server: WIP tantivy, cache slurps, use shared::compute_color,

This commit is contained in:
2024-09-19 15:53:09 -07:00
parent e7cbf9cc45
commit 30f510bb03
10 changed files with 1341 additions and 177 deletions

View File

@@ -1,24 +1,26 @@
use std::hash::{DefaultHasher, Hash, Hasher};
use std::sync::Arc;
use cacher::FilesystemCacher;
use log::info;
use maplit::hashmap;
use scraper::Selector;
use shared::compute_color;
use sqlx::postgres::PgPool;
use tokio::sync::Mutex;
use url::Url;
use crate::Query;
const TAG_PREFIX: &'static str = "News/";
const THREAD_PREFIX: &'static str = "news:";
use crate::{
compute_offset_limit,
config::Config,
error::ServerError,
graphql::{NewsPost, Tag, Thread, ThreadSummary},
AddOutlink, EscapeHtml, FrameImages, InlineStyle, SanitizeHtml, SlurpContents, StripHtml,
Transformer,
AddOutlink, EscapeHtml, FrameImages, InlineStyle, Query, SanitizeHtml, SlurpContents,
StripHtml, Transformer,
};
const TAG_PREFIX: &'static str = "News/";
const THREAD_PREFIX: &'static str = "news:";
pub fn is_newsreader_search(query: &str) -> bool {
query.contains(TAG_PREFIX)
}
@@ -128,11 +130,9 @@ pub async fn tags(pool: &PgPool, _needs_unread: bool) -> Result<Vec<Tag>, Server
let tags = tags
.into_iter()
.map(|tag| {
let mut hasher = DefaultHasher::new();
tag.site.hash(&mut hasher);
let hex = format!("#{:06x}", hasher.finish() % (1 << 24));
let unread = tag.unread.unwrap_or(0).try_into().unwrap_or(0);
let name = format!("{TAG_PREFIX}{}", tag.site.expect("tag must have site"));
let hex = compute_color(&name);
Tag {
name,
fg_color: "white".to_string(),
@@ -144,7 +144,11 @@ pub async fn tags(pool: &PgPool, _needs_unread: bool) -> Result<Vec<Tag>, Server
Ok(tags)
}
pub async fn thread(pool: &PgPool, thread_id: String) -> Result<Thread, ServerError> {
pub async fn thread(
config: &Config,
pool: &PgPool,
thread_id: String,
) -> Result<Thread, ServerError> {
let id = thread_id
.strip_prefix(THREAD_PREFIX)
.expect("news thread doesn't start with '{THREAD_PREFIX}'")
@@ -173,8 +177,10 @@ pub async fn thread(pool: &PgPool, thread_id: String) -> Result<Thread, ServerEr
// TODO: add site specific cleanups. For example:
// * Grafana does <div class="image-wrapp"><img class="lazyload>"<img src="/media/...>"</img></div>
// * Some sites appear to be HTML encoded, unencode them, i.e. imperialviolent
let cacher = Arc::new(Mutex::new(FilesystemCacher::new(&config.slurp_cache_path)?));
let body_tranformers: Vec<Box<dyn Transformer>> = vec![
Box::new(SlurpContents {
cacher,
site_selectors: hashmap![
"atmeta.com".to_string() => vec![
Selector::parse("div.entry-content").unwrap(),