14 changed files with 189 additions and 1353 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/notmuch/Cargo.toml
+++ b/notmuch/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "notmuch"
-version = "0.0.23"
+version = "0.0.22"
 edition = "2021"

 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
--- a/procmail2notmuch/Cargo.toml
+++ b/procmail2notmuch/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "procmail2notmuch"
-version = "0.0.23"
+version = "0.0.22"
 edition = "2021"

 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
--- a/server/Cargo.toml
+++ b/server/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "server"
-version = "0.0.23"
+version = "0.0.22"
 edition = "2021"
 default-run = "server"

@ -13,7 +13,6 @@ async-graphql = { version = "6.0.11", features = ["log"] }
 async-graphql-rocket = "6.0.11"
 async-trait = "0.1.81"
 build-info = "0.0.38"
-cacher = {git = "https://git-private.z.xinu.tv/wathiede/cacher"}
 css-inline = "0.13.0"
 glog = "0.1.0"
 html-escape = "0.2.13"
--- a/server/Rocket.toml
+++ b/server/Rocket.toml
@ -1,8 +1,6 @@
 [release]
 address = "0.0.0.0"
 port = 9345
-newsreader_database_url = "postgres://newsreader@nixos-07.h.xinu.tv/newsreader"
-newsreader_tantivy_db_path = "../target/database/newsreader"

 [debug]
 address = "0.0.0.0"
@ -10,5 +8,3 @@ port = 9345
 # Uncomment to make it production like.
 #log_level = "critical"
 newsreader_database_url = "postgres://newsreader@nixos-07.h.xinu.tv/newsreader"
-newsreader_tantivy_db_path = "../target/database/newsreader"
-slurp_cache_path = "/net/nasx/x/letterbox/slurp"
--- a/server/sql/all-posts.sql
+++ b/server/sql/all-posts.sql
@ -1,10 +0,0 @@
-SELECT
-    site,
-    title,
-    summary,
-    link,
-    date,
-    is_read,
-    uid,
-    id
-FROM post
--- a/server/src/bin/server.rs
+++ b/server/src/bin/server.rs
@ -18,14 +18,18 @@ use rocket::{
    Response, State,
 };
 use rocket_cors::{AllowedHeaders, AllowedOrigins};
+use serde::Deserialize;
 use server::{
-    config::Config,
    error::ServerError,
    graphql::{Attachment, GraphqlSchema, Mutation, QueryRoot},
    nm::{attachment_bytes, cid_attachment_bytes},
 };
 use sqlx::postgres::PgPool;
-use tantivy::{Index, IndexWriter};
+
+#[derive(Deserialize)]
+struct Config {
+    newsreader_database_url: String,
+}

 #[get("/refresh")]
 async fn refresh(nm: &State<Notmuch>) -> Result<Json<String>, Debug<NotmuchError>> {
@ -166,122 +170,6 @@ fn graphiql() -> content::RawHtml<String> {
    content::RawHtml(GraphiQLSource::build().endpoint("/api/graphql").finish())
 }

-#[rocket::post("/create-news-db")]
-fn create_news_db(config: &State<Config>) -> Result<String, Debug<ServerError>> {
-    std::fs::remove_dir_all(&config.newsreader_tantivy_db_path).map_err(ServerError::from)?;
-    std::fs::create_dir_all(&config.newsreader_tantivy_db_path).map_err(ServerError::from)?;
-    use tantivy::schema::*;
-    let mut schema_builder = Schema::builder();
-    schema_builder.add_text_field("site", STRING | STORED);
-    schema_builder.add_text_field("title", TEXT | STORED);
-    schema_builder.add_text_field("summary", TEXT);
-    schema_builder.add_text_field("link", STRING | STORED);
-    schema_builder.add_date_field("date", FAST);
-    schema_builder.add_bool_field("is_read", FAST);
-    schema_builder.add_text_field("uid", STRING | STORED);
-    schema_builder.add_i64_field("id", FAST);
-
-    let schema = schema_builder.build();
-    Index::create_in_dir(&config.newsreader_tantivy_db_path, schema).map_err(ServerError::from)?;
-    Ok(format!(
-        "DB created in {}\n",
-        config.newsreader_tantivy_db_path
-    ))
-}
-
-#[rocket::post("/reindex-news-db")]
-async fn reindex_news_db(
-    pool: &State<PgPool>,
-    config: &State<Config>,
-) -> Result<String, Debug<ServerError>> {
-    use tantivy::{doc, Term};
-
-    let start_time = std::time::Instant::now();
-    let pool: &PgPool = pool;
-
-    let index =
-        Index::open_in_dir(&config.newsreader_tantivy_db_path).map_err(ServerError::from)?;
-    let mut index_writer = index.writer(50_000_000).map_err(ServerError::from)?;
-    let schema = index.schema();
-    let site = schema.get_field("site").map_err(ServerError::from)?;
-    let title = schema.get_field("title").map_err(ServerError::from)?;
-    let summary = schema.get_field("summary").map_err(ServerError::from)?;
-    let link = schema.get_field("link").map_err(ServerError::from)?;
-    let date = schema.get_field("date").map_err(ServerError::from)?;
-    let is_read = schema.get_field("is_read").map_err(ServerError::from)?;
-    let uid = schema.get_field("uid").map_err(ServerError::from)?;
-    let id = schema.get_field("id").map_err(ServerError::from)?;
-
-    let rows = sqlx::query_file!("sql/all-posts.sql")
-        .fetch_all(pool)
-        .await
-        .map_err(ServerError::from)?;
-
-    let total = rows.len();
-    for (i, r) in rows.into_iter().enumerate() {
-        if i % 10_000 == 0 {
-            info!(
-                "{i}/{total} processed, elapsed {:.2}s",
-                start_time.elapsed().as_secs_f32()
-            );
-        }
-        let id_term = Term::from_field_text(uid, &r.uid);
-        index_writer.delete_term(id_term);
-        index_writer
-            .add_document(doc!(
-                site =>  r.site.expect("UNKOWN_SITE"),
-                title =>  r.title.expect("UNKOWN_TITLE"),
-                // TODO: clean and extract text from HTML
-                summary => r.summary.expect("UNKNOWN_SUMMARY"),
-                link => r.link.expect("link"),
-                date => tantivy::DateTime::from_primitive(r.date.expect("date")),
-                is_read => r.is_read.expect("is_read"),
-                uid => r.uid,
-                id => r.id as i64,
-            ))
-            .map_err(ServerError::from)?;
-    }
-
-    index_writer.commit().map_err(ServerError::from)?;
-
-    info!("took {:.2}s to reindex", start_time.elapsed().as_secs_f32());
-    Ok(format!(
-        "DB openned in {}\n",
-        config.newsreader_tantivy_db_path
-    ))
-}
-
-#[rocket::get("/search-news-db")]
-fn search_news_db(
-    index: &State<tantivy::Index>,
-    reader: &State<tantivy::IndexReader>,
-) -> Result<String, Debug<ServerError>> {
-    use tantivy::{collector::TopDocs, query::QueryParser, Document, TantivyDocument};
-
-    let searcher = reader.searcher();
-    let schema = index.schema();
-    let site = schema.get_field("site").map_err(ServerError::from)?;
-    let title = schema.get_field("title").map_err(ServerError::from)?;
-    let summary = schema.get_field("summary").map_err(ServerError::from)?;
-    let query_parser = QueryParser::for_index(&index, vec![site, title, summary]);
-
-    let query = query_parser
-        .parse_query("grapheme")
-        .map_err(ServerError::from)?;
-    let top_docs = searcher
-        .search(&query, &TopDocs::with_limit(10))
-        .map_err(ServerError::from)?;
-    let mut results = vec![];
-    info!("search found {} docs", top_docs.len());
-    for (_score, doc_address) in top_docs {
-        let retrieved_doc: TantivyDocument =
-            searcher.doc(doc_address).map_err(ServerError::from)?;
-        results.push(format!("{}", retrieved_doc.to_json(&schema)));
-    }
-
-    Ok(format!("{}", results.join(" ")))
-}
-
 #[rocket::get("/graphql?<query..>")]
 async fn graphql_query(schema: &State<GraphqlSchema>, query: GraphQLQuery) -> GraphQLResponse {
    query.execute(schema.inner()).await
@ -295,6 +183,7 @@ async fn graphql_request(
    request.execute(schema.inner()).await
 }

+
 #[rocket::main]
 async fn main() -> Result<(), Box<dyn Error>> {
    glog::new()
@ -324,9 +213,6 @@ async fn main() -> Result<(), Box<dyn Error>> {
        .mount(
            shared::urls::MOUNT_POINT,
            routes![
-                create_news_db,
-                reindex_news_db,
-                search_news_db,
                original,
                refresh,
                show_pretty,
@ -343,26 +229,14 @@ async fn main() -> Result<(), Box<dyn Error>> {
        .attach(AdHoc::config::<Config>());

    let config: Config = rkt.figment().extract()?;
-    if !std::fs::exists(&config.slurp_cache_path)? {
-        info!("Creating slurp cache @ '{}'", &config.slurp_cache_path);
-        std::fs::create_dir_all(&config.slurp_cache_path)?;
-    }
    let pool = PgPool::connect(&config.newsreader_database_url).await?;
-    let tantivy_newsreader_index = Index::open_in_dir(&config.newsreader_tantivy_db_path)?;
-    let tantivy_newsreader_reader = tantivy_newsreader_index.reader()?;
    let schema = Schema::build(QueryRoot, Mutation, EmptySubscription)
        .data(Notmuch::default())
-        .data(config)
        .data(pool.clone())
        .extension(async_graphql::extensions::Logger)
        .finish();

-    let rkt = rkt
-        .manage(schema)
-        .manage(pool)
-        .manage(Notmuch::default())
-        .manage(tantivy_newsreader_index)
-        .manage(tantivy_newsreader_reader);
+    let rkt = rkt.manage(schema).manage(pool).manage(Notmuch::default());
    //.manage(Notmuch::with_config("../notmuch/testdata/notmuch.config"))

    rkt.launch().await?;
--- a/server/src/config.rs
+++ b/server/src/config.rs
@ -1,7 +0,0 @@
-use serde::Deserialize;
-#[derive(Deserialize)]
-pub struct Config {
-    pub newsreader_database_url: String,
-    pub newsreader_tantivy_db_path: String,
-    pub slurp_cache_path: String,
-}
--- a/server/src/error.rs
+++ b/server/src/error.rs
@ -1,8 +1,6 @@
 use std::{convert::Infallible, str::Utf8Error, string::FromUtf8Error};

 use mailparse::MailParseError;
-use tantivy::TantivyError;
-use tantivy::query::QueryParserError;
 use thiserror::Error;

 use crate::TransformError;
@ -31,10 +29,6 @@ pub enum ServerError {
    StringError(String),
    #[error("invalid url: {0}")]
    UrlParseError(#[from] url::ParseError),
-    #[error("tantivy error: {0}")]
-    TantivyError(#[from] TantivyError),
-    #[error("tantivy query parse error: {0}")]
-    QueryParseError(#[from] QueryParserError),
    #[error("impossible: {0}")]
    InfaillibleError(#[from] Infallible),
 }
--- a/server/src/graphql.rs
+++ b/server/src/graphql.rs
@ -8,7 +8,7 @@ use notmuch::Notmuch;
 use serde::{Deserialize, Serialize};
 use sqlx::postgres::PgPool;

-use crate::{config::Config, newsreader, nm, Query};
+use crate::{newsreader, nm, Query};

 /// # Number of seconds since the Epoch
 pub type UnixTime = isize;
@ -384,7 +384,6 @@ impl QueryRoot {
    async fn thread<'ctx>(&self, ctx: &Context<'ctx>, thread_id: String) -> Result<Thread, Error> {
        let nm = ctx.data_unchecked::<Notmuch>();
        let pool = ctx.data_unchecked::<PgPool>();
-        let config = ctx.data_unchecked::<Config>();
        let debug_content_tree = ctx
            .look_ahead()
            .field("messages")
@ -393,7 +392,7 @@ impl QueryRoot {
            .exists();
        // TODO: look at thread_id and conditionally load newsreader
        if newsreader::is_newsreader_thread(&thread_id) {
-            Ok(newsreader::thread(config, pool, thread_id).await?)
+            Ok(newsreader::thread(pool, thread_id).await?)
        } else {
            Ok(nm::thread(nm, thread_id, debug_content_tree).await?)
        }
--- a/server/src/lib.rs
+++ b/server/src/lib.rs
@ -1,16 +1,14 @@
-pub mod config;
 pub mod error;
 pub mod graphql;
 pub mod newsreader;
 pub mod nm;

-use std::{collections::HashMap, convert::Infallible, str::FromStr, sync::Arc};
+use std::{collections::HashMap, convert::Infallible, str::FromStr};

 use async_trait::async_trait;
-use cacher::{Cacher, FilesystemCacher};
 use css_inline::{CSSInliner, InlineError, InlineOptions};
 use linkify::{LinkFinder, LinkKind};
-use log::{error, info, warn};
+use log::{error, warn};
 use lol_html::{
    element, errors::RewritingError, html_content::ContentType, rewrite_str, text,
    RewriteStrSettings,
@ -18,7 +16,6 @@ use lol_html::{
 use maplit::{hashmap, hashset};
 use scraper::{Html, Selector};
 use thiserror::Error;
-use tokio::sync::Mutex;
 use url::Url;

 use crate::newsreader::{extract_thread_id, is_newsreader_thread};
@ -112,17 +109,16 @@ impl Transformer for InlineStyle {
            include_str!("custom.css"),
        );
        let inline_opts = InlineOptions {
-            inline_style_tags: true,
+            inline_style_tags: false,
            keep_style_tags: false,
-            keep_link_tags: true,
+            keep_link_tags: false,
            base_url: None,
-            load_remote_stylesheets: true,
+            load_remote_stylesheets: false,
            extra_css: Some(css.into()),
            preallocate_node_capacity: 32,
            ..InlineOptions::default()
        };

-        //info!("HTML:\n{html}");
        Ok(match CSSInliner::new(inline_opts).inline(&html) {
            Ok(inlined_html) => inlined_html,
            Err(err) => {
@ -216,7 +212,6 @@ impl Transformer for AddOutlink {
 }

 struct SlurpContents {
-    cacher: Arc<Mutex<FilesystemCacher>>,
    site_selectors: HashMap<String, Vec<Selector>>,
 }

@ -246,26 +241,19 @@ impl Transformer for SlurpContents {
        let Some(selectors) = self.get_selectors(&link) else {
            return Ok(html.to_string());
        };
-        let mut cacher = self.cacher.lock().await;
-        let body = if let Some(body) = cacher.get(link.as_str()) {
-            info!("cache hit for {link}");
-            String::from_utf8_lossy(&body).to_string()
-        } else {
-            let body = reqwest::get(link.as_str()).await?.text().await?;
-            cacher.set(link.as_str(), body.as_bytes());
-            body
-        };
+        let body = reqwest::get(link.as_str()).await?.text().await?;
        let doc = Html::parse_document(&body);

        let mut results = Vec::new();
        for selector in selectors {
-            for frag in doc.select(&selector) {
+            if let Some(frag) = doc.select(&selector).next() {
                results.push(frag.html())
-                // TODO: figure out how to warn if there were no hits
-                //warn!("couldn't find '{:?}' in {}", selector, link);
+            } else {
+                warn!("couldn't find '{:?}' in {}", selector, link);
+                return Ok(html.to_string());
            }
        }
-        Ok(results.join(""))
+        Ok(results.join("<br><br>"))
    }
 }

@ -304,7 +292,7 @@ pub fn sanitize_html(
 ) -> Result<String, TransformError> {
    let inline_opts = InlineOptions {
        inline_style_tags: true,
-        keep_style_tags: true,
+        keep_style_tags: false,
        keep_link_tags: false,
        base_url: None,
        load_remote_stylesheets: false,
@ -347,30 +335,6 @@ pub fn sanitize_html(

            el.set_attribute("src", &src)?;

-            Ok(())
-        }),
-        // Add https to href with //<domain name>
-        element!("link[href]", |el| {
-            info!("found link[href] {el:?}");
-            let mut href = el.get_attribute("href").expect("href was required");
-            if href.starts_with("//") {
-                warn!("adding https to {href}");
-                href.insert_str(0, "https:");
-            }
-
-            el.set_attribute("href", &href)?;
-
-            Ok(())
-        }),
-        // Add https to src with //<domain name>
-        element!("style[src]", |el| {
-            let mut src = el.get_attribute("src").expect("src was required");
-            if src.starts_with("//") {
-                src.insert_str(0, "https:");
-            }
-
-            el.set_attribute("src", &src)?;
-
            Ok(())
        }),
    ];
--- a/server/src/newsreader.rs
+++ b/server/src/newsreader.rs
@ -1,26 +1,24 @@
-use std::sync::Arc;
+use std::hash::{DefaultHasher, Hash, Hasher};

-use cacher::FilesystemCacher;
 use log::info;
 use maplit::hashmap;
 use scraper::Selector;
-use shared::compute_color;
 use sqlx::postgres::PgPool;
-use tokio::sync::Mutex;
 use url::Url;

-use crate::{
-    compute_offset_limit,
-    config::Config,
-    error::ServerError,
-    graphql::{NewsPost, Tag, Thread, ThreadSummary},
-    AddOutlink, EscapeHtml, FrameImages, InlineStyle, Query, SanitizeHtml, SlurpContents,
-    StripHtml, Transformer,
-};
+use crate::Query;

 const TAG_PREFIX: &'static str = "News/";
 const THREAD_PREFIX: &'static str = "news:";

+use crate::{
+    compute_offset_limit,
+    error::ServerError,
+    graphql::{NewsPost, Tag, Thread, ThreadSummary},
+    AddOutlink, EscapeHtml, FrameImages, InlineStyle, SanitizeHtml, SlurpContents, StripHtml,
+    Transformer,
+};
+
 pub fn is_newsreader_search(query: &str) -> bool {
    query.contains(TAG_PREFIX)
 }
@ -130,9 +128,11 @@ pub async fn tags(pool: &PgPool, _needs_unread: bool) -> Result<Vec<Tag>, Server
    let tags = tags
        .into_iter()
        .map(|tag| {
+            let mut hasher = DefaultHasher::new();
+            tag.site.hash(&mut hasher);
+            let hex = format!("#{:06x}", hasher.finish() % (1 << 24));
            let unread = tag.unread.unwrap_or(0).try_into().unwrap_or(0);
            let name = format!("{TAG_PREFIX}{}", tag.site.expect("tag must have site"));
-            let hex = compute_color(&name);
            Tag {
                name,
                fg_color: "white".to_string(),
@ -144,11 +144,7 @@ pub async fn tags(pool: &PgPool, _needs_unread: bool) -> Result<Vec<Tag>, Server
    Ok(tags)
 }

-pub async fn thread(
-    config: &Config,
-    pool: &PgPool,
-    thread_id: String,
-) -> Result<Thread, ServerError> {
+pub async fn thread(pool: &PgPool, thread_id: String) -> Result<Thread, ServerError> {
    let id = thread_id
        .strip_prefix(THREAD_PREFIX)
        .expect("news thread doesn't start with '{THREAD_PREFIX}'")
@ -177,10 +173,8 @@ pub async fn thread(
    // TODO: add site specific cleanups. For example:
    // * Grafana does <div class="image-wrapp"><img class="lazyload>"<img src="/media/...>"</img></div>
    // * Some sites appear to be HTML encoded, unencode them, i.e. imperialviolent
-    let cacher = Arc::new(Mutex::new(FilesystemCacher::new(&config.slurp_cache_path)?));
    let body_tranformers: Vec<Box<dyn Transformer>> = vec![
        Box::new(SlurpContents {
-            cacher,
            site_selectors: hashmap![
                "atmeta.com".to_string() => vec![
                    Selector::parse("div.entry-content").unwrap(),
--- a/shared/Cargo.toml
+++ b/shared/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "shared"
-version = "0.0.23"
+version = "0.0.22"
 edition = "2021"

 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
--- a/web/Cargo.toml
+++ b/web/Cargo.toml
@ -1,5 +1,5 @@
 [package]
-version = "0.0.23"
+version = "0.0.22"
 name = "letterbox"
 repository = "https://github.com/seed-rs/seed-quickstart"
 authors = ["Bill Thiede <git@xinu.tv>"]