server: WIP tantivy, cache slurps, use shared::compute_color,

2024-09-19 15:53:09 -07:00
parent e7cbf9cc45
commit 30f510bb03
10 changed files with 1341 additions and 177 deletions
--- a/server/src/newsreader.rs
+++ b/server/src/newsreader.rs
@@ -1,24 +1,26 @@
-use std::hash::{DefaultHasher, Hash, Hasher};
+use std::sync::Arc;

+use cacher::FilesystemCacher;
 use log::info;
 use maplit::hashmap;
 use scraper::Selector;
+use shared::compute_color;
 use sqlx::postgres::PgPool;
+use tokio::sync::Mutex;
 use url::Url;

-use crate::Query;
-
-const TAG_PREFIX: &'static str = "News/";
-const THREAD_PREFIX: &'static str = "news:";
-
 use crate::{
    compute_offset_limit,
+    config::Config,
    error::ServerError,
    graphql::{NewsPost, Tag, Thread, ThreadSummary},
-    AddOutlink, EscapeHtml, FrameImages, InlineStyle, SanitizeHtml, SlurpContents, StripHtml,
-    Transformer,
+    AddOutlink, EscapeHtml, FrameImages, InlineStyle, Query, SanitizeHtml, SlurpContents,
+    StripHtml, Transformer,
 };

+const TAG_PREFIX: &'static str = "News/";
+const THREAD_PREFIX: &'static str = "news:";
+
 pub fn is_newsreader_search(query: &str) -> bool {
    query.contains(TAG_PREFIX)
 }
@@ -128,11 +130,9 @@ pub async fn tags(pool: &PgPool, _needs_unread: bool) -> Result<Vec<Tag>, Server
    let tags = tags
        .into_iter()
        .map(|tag| {
-            let mut hasher = DefaultHasher::new();
-            tag.site.hash(&mut hasher);
-            let hex = format!("#{:06x}", hasher.finish() % (1 << 24));
            let unread = tag.unread.unwrap_or(0).try_into().unwrap_or(0);
            let name = format!("{TAG_PREFIX}{}", tag.site.expect("tag must have site"));
+            let hex = compute_color(&name);
            Tag {
                name,
                fg_color: "white".to_string(),
@@ -144,7 +144,11 @@ pub async fn tags(pool: &PgPool, _needs_unread: bool) -> Result<Vec<Tag>, Server
    Ok(tags)
 }

-pub async fn thread(pool: &PgPool, thread_id: String) -> Result<Thread, ServerError> {
+pub async fn thread(
+    config: &Config,
+    pool: &PgPool,
+    thread_id: String,
+) -> Result<Thread, ServerError> {
    let id = thread_id
        .strip_prefix(THREAD_PREFIX)
        .expect("news thread doesn't start with '{THREAD_PREFIX}'")
@@ -173,8 +177,10 @@ pub async fn thread(pool: &PgPool, thread_id: String) -> Result<Thread, ServerEr
    // TODO: add site specific cleanups. For example:
    // * Grafana does <div class="image-wrapp"><img class="lazyload>"<img src="/media/...>"</img></div>
    // * Some sites appear to be HTML encoded, unencode them, i.e. imperialviolent
+    let cacher = Arc::new(Mutex::new(FilesystemCacher::new(&config.slurp_cache_path)?));
    let body_tranformers: Vec<Box<dyn Transformer>> = vec![
        Box::new(SlurpContents {
+            cacher,
            site_selectors: hashmap![
                "atmeta.com".to_string() => vec![
                    Selector::parse("div.entry-content").unwrap(),