web & server: using tantivy for news post search
This commit is contained in:
@@ -18,15 +18,16 @@ use lol_html::{
|
||||
};
|
||||
use maplit::{hashmap, hashset};
|
||||
use scraper::{Html, Selector};
|
||||
use sqlx::{postgres::PgPool, types::time::PrimitiveDateTime};
|
||||
use sqlx::types::time::PrimitiveDateTime;
|
||||
use thiserror::Error;
|
||||
use tokio::sync::Mutex;
|
||||
use url::Url;
|
||||
|
||||
use crate::{
|
||||
error::ServerError,
|
||||
graphql::ThreadSummary,
|
||||
newsreader::{extract_thread_id, is_newsreader_thread},
|
||||
graphql::{Corpus, ThreadSummary},
|
||||
newsreader::is_newsreader_thread,
|
||||
nm::is_notmuch_thread_or_id,
|
||||
};
|
||||
|
||||
const NEWSREADER_TAG_PREFIX: &'static str = "News/";
|
||||
@@ -607,12 +608,13 @@ fn compute_offset_limit(
|
||||
#[derive(Debug)]
|
||||
pub struct Query {
|
||||
pub unread_only: bool,
|
||||
pub tag: Option<String>,
|
||||
pub uid: Option<String>,
|
||||
pub tags: Vec<String>,
|
||||
pub uids: Vec<String>,
|
||||
pub remainder: Vec<String>,
|
||||
pub is_notmuch: bool,
|
||||
pub is_newsreader: bool,
|
||||
pub is_tantivy: bool,
|
||||
pub corpus: Option<Corpus>,
|
||||
}
|
||||
|
||||
impl Query {
|
||||
@@ -627,10 +629,10 @@ impl Query {
|
||||
if self.unread_only {
|
||||
parts.push("is:unread".to_string());
|
||||
}
|
||||
if let Some(site) = &self.tag {
|
||||
parts.push(format!("tag:{site}"));
|
||||
for tag in &self.tags {
|
||||
parts.push(format!("tag:{tag}"));
|
||||
}
|
||||
if let Some(uid) = &self.uid {
|
||||
for uid in &self.uids {
|
||||
parts.push(uid.clone());
|
||||
}
|
||||
parts.extend(self.remainder.clone());
|
||||
@@ -642,48 +644,60 @@ impl FromStr for Query {
|
||||
type Err = Infallible;
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
let mut unread_only = false;
|
||||
let mut tag = None;
|
||||
let mut uid = None;
|
||||
let mut tags = Vec::new();
|
||||
let mut uids = Vec::new();
|
||||
let mut remainder = Vec::new();
|
||||
let mut is_notmuch = false;
|
||||
let mut is_newsreader = false;
|
||||
let is_newsreader = false;
|
||||
let mut is_tantivy = false;
|
||||
let mut corpus = None;
|
||||
for word in s.split_whitespace() {
|
||||
if word == "is:unread" {
|
||||
unread_only = true
|
||||
} else if word.starts_with("tag:") {
|
||||
tag = Some(word["tag:".len()..].to_string())
|
||||
tags.push(word["tag:".len()..].to_string());
|
||||
|
||||
/*
|
||||
} else if word.starts_with("tag:") {
|
||||
// Any tag that doesn't match site_prefix should explicitly set the site to something not in the
|
||||
// database
|
||||
site = Some(NON_EXISTENT_SITE_NAME.to_string());
|
||||
*/
|
||||
} else if word.starts_with("corpus:") {
|
||||
let c = word["corpus:".len()..].to_string();
|
||||
corpus = c.parse::<Corpus>().map(|c| Some(c)).unwrap_or_else(|e| {
|
||||
warn!("Error parsing corpus '{c}': {e:?}");
|
||||
None
|
||||
});
|
||||
} else if is_newsreader_thread(word) {
|
||||
uid = Some(extract_thread_id(word).to_string())
|
||||
uids.push(word.to_string());
|
||||
} else if is_notmuch_thread_or_id(word) {
|
||||
uids.push(word.to_string());
|
||||
} else if word == "is:mail" || word == "is:email" || word == "is:notmuch" {
|
||||
is_notmuch = true;
|
||||
} else if word == "is:news" || word == "is:newsreader" {
|
||||
is_newsreader = true;
|
||||
is_tantivy = true;
|
||||
} else {
|
||||
remainder.push(word.to_string());
|
||||
}
|
||||
}
|
||||
// If we don't see any explicit filters for a corpus, flip them all on
|
||||
if !(is_notmuch || is_newsreader) {
|
||||
is_newsreader = true;
|
||||
if corpus.is_none() && !(is_newsreader || is_notmuch || is_tantivy) {
|
||||
// Don't set is_newsreader unless debugging, assume tantivy can handle it.
|
||||
// Explicitely setting corpus:newsreader will by-pass this logic
|
||||
// is_newsreader = true;
|
||||
is_notmuch = true;
|
||||
is_tantivy = true;
|
||||
}
|
||||
// TODO: decide if tantivy gets it's own life or replaces newsreader
|
||||
is_tantivy = is_newsreader;
|
||||
Ok(Query {
|
||||
unread_only,
|
||||
tag,
|
||||
uid,
|
||||
tags,
|
||||
uids,
|
||||
remainder,
|
||||
is_notmuch,
|
||||
is_newsreader,
|
||||
is_tantivy,
|
||||
corpus,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -694,6 +708,7 @@ pub struct ThreadSummaryRecord {
|
||||
pub title: Option<String>,
|
||||
pub uid: String,
|
||||
pub name: Option<String>,
|
||||
pub corpus: Corpus,
|
||||
}
|
||||
|
||||
async fn thread_summary_from_row(r: ThreadSummaryRecord) -> ThreadSummary {
|
||||
@@ -711,12 +726,14 @@ async fn thread_summary_from_row(r: ThreadSummaryRecord) -> ThreadSummary {
|
||||
.expect("post missing date")
|
||||
.assume_utc()
|
||||
.unix_timestamp() as isize,
|
||||
date_relative: "TODO date_relative".to_string(),
|
||||
date_relative: format!("{:?}", r.date),
|
||||
//date_relative: "TODO date_relative".to_string(),
|
||||
matched: 0,
|
||||
total: 1,
|
||||
authors: r.name.unwrap_or_else(|| site.clone()),
|
||||
subject: title,
|
||||
tags,
|
||||
corpus: r.corpus,
|
||||
}
|
||||
}
|
||||
async fn clean_title(title: &str) -> Result<String, ServerError> {
|
||||
|
||||
Reference in New Issue
Block a user