server: WIP tantivy integration

This commit is contained in:
2024-09-28 11:17:52 -07:00
parent 005a457348
commit ebf32a9905
8 changed files with 285 additions and 99 deletions

View File

@@ -18,11 +18,19 @@ use lol_html::{
};
use maplit::{hashmap, hashset};
use scraper::{Html, Selector};
use sqlx::{postgres::PgPool, types::time::PrimitiveDateTime};
use thiserror::Error;
use tokio::sync::Mutex;
use url::Url;
use crate::newsreader::{extract_thread_id, is_newsreader_thread};
use crate::{
error::ServerError,
graphql::ThreadSummary,
newsreader::{extract_thread_id, is_newsreader_thread},
};
const NEWSREADER_TAG_PREFIX: &'static str = "News/";
const NEWSREADER_THREAD_PREFIX: &'static str = "news:";
// TODO: figure out how to use Cow
#[async_trait]
@@ -604,6 +612,7 @@ pub struct Query {
pub remainder: Vec<String>,
pub is_notmuch: bool,
pub is_newsreader: bool,
pub is_tantivy: bool,
}
impl Query {
@@ -638,6 +647,7 @@ impl FromStr for Query {
let mut remainder = Vec::new();
let mut is_notmuch = false;
let mut is_newsreader = false;
let mut is_tantivy = false;
for word in s.split_whitespace() {
if word == "is:unread" {
unread_only = true
@@ -664,6 +674,8 @@ impl FromStr for Query {
is_newsreader = true;
is_notmuch = true;
}
// TODO: decide if tantivy gets it's own life or replaces newsreader
is_tantivy = is_newsreader;
Ok(Query {
unread_only,
tag,
@@ -671,6 +683,53 @@ impl FromStr for Query {
remainder,
is_notmuch,
is_newsreader,
is_tantivy,
})
}
}
pub struct ThreadSummaryRecord {
pub site: Option<String>,
pub date: Option<PrimitiveDateTime>,
pub is_read: Option<bool>,
pub title: Option<String>,
pub uid: String,
pub name: Option<String>,
}
async fn thread_summary_from_row(r: ThreadSummaryRecord) -> ThreadSummary {
let site = r.site.unwrap_or("UNKOWN TAG".to_string());
let mut tags = vec![format!("{NEWSREADER_TAG_PREFIX}{site}")];
if !r.is_read.unwrap_or(true) {
tags.push("unread".to_string());
};
let mut title = r.title.unwrap_or("NO TITLE".to_string());
title = clean_title(&title).await.expect("failed to clean title");
ThreadSummary {
thread: format!("{NEWSREADER_THREAD_PREFIX}{}", r.uid),
timestamp: r
.date
.expect("post missing date")
.assume_utc()
.unix_timestamp() as isize,
date_relative: "TODO date_relative".to_string(),
matched: 0,
total: 1,
authors: r.name.unwrap_or_else(|| site.clone()),
subject: title,
tags,
}
}
async fn clean_title(title: &str) -> Result<String, ServerError> {
// Make title HTML so html parsers work
let mut title = format!("<html>{title}</html>");
let title_tranformers: Vec<Box<dyn Transformer>> =
vec![Box::new(EscapeHtml), Box::new(StripHtml)];
// Make title HTML so html parsers work
title = format!("<html>{title}</html>");
for t in title_tranformers.iter() {
if t.should_run(&None, &title) {
title = t.transform(&None, &title).await?;
}
}
Ok(title)
}