server: add postgres based newsreader search and disable tantivy
This commit is contained in:
parent
e36f4f97f9
commit
13eaf33b1a
@ -31,7 +31,7 @@ serde = { version = "1.0.147", features = ["derive"] }
|
||||
serde_json = "1.0.87"
|
||||
shared = { path = "../shared" }
|
||||
sqlx = { version = "0.7.4", features = ["postgres", "runtime-tokio", "time"] }
|
||||
tantivy = "0.22.0"
|
||||
tantivy = { version = "0.22.0", optional = true }
|
||||
thiserror = "1.0.37"
|
||||
tokio = "1.26.0"
|
||||
tracing = "0.1.41"
|
||||
@ -42,3 +42,7 @@ xtracing = { git = "http://git-private.h.xinu.tv/wathiede/xtracing.git" }
|
||||
|
||||
[build-dependencies]
|
||||
build-info-build = "0.0.38"
|
||||
|
||||
[features]
|
||||
default = [ "tantivy" ]
|
||||
tantivy = [ "dep:tantivy" ]
|
||||
|
||||
@ -1 +1,3 @@
|
||||
CREATE INDEX post_summary_idx ON post USING GIN (to_tsvector('english', summary));
|
||||
CREATE INDEX post_site_idx ON post USING GIN (to_tsvector('english', site));
|
||||
CREATE INDEX post_title_idx ON post USING GIN (to_tsvector('english', title));
|
||||
|
||||
@ -11,3 +11,7 @@ WHERE
|
||||
NOT $2
|
||||
OR NOT is_read
|
||||
)
|
||||
AND (
|
||||
$3 :: text IS NULL
|
||||
OR to_tsvector('english', summary) @@ websearch_to_tsquery('english', $3)
|
||||
)
|
||||
|
||||
@ -14,6 +14,10 @@ WHERE
|
||||
NOT $2
|
||||
OR NOT is_read
|
||||
)
|
||||
AND (
|
||||
$5 :: text IS NULL
|
||||
OR to_tsvector('english', summary) @@ websearch_to_tsquery('english', $5)
|
||||
)
|
||||
ORDER BY
|
||||
date DESC,
|
||||
title OFFSET $3
|
||||
|
||||
@ -17,12 +17,13 @@ use rocket::{
|
||||
Response, State,
|
||||
};
|
||||
use rocket_cors::{AllowedHeaders, AllowedOrigins};
|
||||
#[cfg(feature = "tantivy")]
|
||||
use server::tantivy::TantivyConnection;
|
||||
use server::{
|
||||
config::Config,
|
||||
error::ServerError,
|
||||
graphql::{Attachment, GraphqlSchema, Mutation, QueryRoot},
|
||||
nm::{attachment_bytes, cid_attachment_bytes},
|
||||
tantivy::TantivyConnection,
|
||||
};
|
||||
use sqlx::postgres::PgPool;
|
||||
|
||||
@ -216,15 +217,18 @@ async fn main() -> Result<(), Box<dyn Error>> {
|
||||
}
|
||||
let pool = PgPool::connect(&config.newsreader_database_url).await?;
|
||||
sqlx::migrate!("./migrations").run(&pool).await?;
|
||||
#[cfg(feature = "tantivy")]
|
||||
let tantivy_conn = TantivyConnection::new(&config.newsreader_tantivy_db_path)?;
|
||||
|
||||
let schema = Schema::build(QueryRoot, Mutation, EmptySubscription)
|
||||
.data(Notmuch::default())
|
||||
.data(config)
|
||||
.data(pool.clone())
|
||||
.data(tantivy_conn)
|
||||
.extension(async_graphql::extensions::Logger)
|
||||
.finish();
|
||||
.data(pool.clone());
|
||||
|
||||
#[cfg(feature = "tantivy")]
|
||||
let schema = schema.data(tantivy_conn);
|
||||
|
||||
let schema = schema.extension(async_graphql::extensions::Logger).finish();
|
||||
|
||||
let rkt = rkt.manage(schema).manage(pool).manage(Notmuch::default());
|
||||
//.manage(Notmuch::with_config("../notmuch/testdata/notmuch.config"))
|
||||
|
||||
@ -1,8 +1,8 @@
|
||||
use std::{convert::Infallible, str::Utf8Error, string::FromUtf8Error};
|
||||
|
||||
use mailparse::MailParseError;
|
||||
use tantivy::TantivyError;
|
||||
use tantivy::query::QueryParserError;
|
||||
#[cfg(feature = "tantivy")]
|
||||
use tantivy::{query::QueryParserError, TantivyError};
|
||||
use thiserror::Error;
|
||||
|
||||
use crate::TransformError;
|
||||
@ -31,8 +31,10 @@ pub enum ServerError {
|
||||
StringError(String),
|
||||
#[error("invalid url: {0}")]
|
||||
UrlParseError(#[from] url::ParseError),
|
||||
#[cfg(feature = "tantivy")]
|
||||
#[error("tantivy error: {0}")]
|
||||
TantivyError(#[from] TantivyError),
|
||||
#[cfg(feature = "tantivy")]
|
||||
#[error("tantivy query parse error: {0}")]
|
||||
QueryParseError(#[from] QueryParserError),
|
||||
#[error("impossible: {0}")]
|
||||
|
||||
@ -12,7 +12,9 @@ use sqlx::postgres::PgPool;
|
||||
use tokio::join;
|
||||
use tracing::instrument;
|
||||
|
||||
use crate::{config::Config, newsreader, nm, tantivy::TantivyConnection, Query};
|
||||
#[cfg(feature = "tantivy")]
|
||||
use crate::tantivy::TantivyConnection;
|
||||
use crate::{config::Config, newsreader, nm, Query};
|
||||
|
||||
/// # Number of seconds since the Epoch
|
||||
pub type UnixTime = isize;
|
||||
@ -275,13 +277,18 @@ impl QueryRoot {
|
||||
async fn count<'ctx>(&self, ctx: &Context<'ctx>, query: String) -> Result<usize, Error> {
|
||||
let nm = ctx.data_unchecked::<Notmuch>();
|
||||
let pool = ctx.data_unchecked::<PgPool>();
|
||||
#[cfg(feature = "tantivy")]
|
||||
let tantivy = ctx.data_unchecked::<TantivyConnection>();
|
||||
|
||||
let newsreader_query: Query = query.parse()?;
|
||||
|
||||
let newsreader_count = newsreader::count(pool, &newsreader_query).await?;
|
||||
let notmuch_count = nm::count(nm, &newsreader_query).await?;
|
||||
#[cfg(feature = "tantivy")]
|
||||
let tantivy_count = tantivy.count(&newsreader_query).await?;
|
||||
#[cfg(not(feature = "tantivy"))]
|
||||
let tantivy_count = 0;
|
||||
|
||||
let total = newsreader_count + notmuch_count + tantivy_count;
|
||||
info!("count {newsreader_query:?} newsreader count {newsreader_count} notmuch count {notmuch_count} tantivy count {tantivy_count} total {total}");
|
||||
Ok(total)
|
||||
@ -302,6 +309,7 @@ impl QueryRoot {
|
||||
info!("search({after:?} {before:?} {first:?} {last:?} {query:?})",);
|
||||
let nm = ctx.data_unchecked::<Notmuch>();
|
||||
let pool = ctx.data_unchecked::<PgPool>();
|
||||
#[cfg(feature = "tantivy")]
|
||||
let tantivy = ctx.data_unchecked::<TantivyConnection>();
|
||||
|
||||
Ok(connection::query(
|
||||
@ -341,6 +349,7 @@ impl QueryRoot {
|
||||
);
|
||||
let notmuch_fut =
|
||||
notmuch_search(nm, notmuch_after, notmuch_before, first, last, &query);
|
||||
#[cfg(feature = "tantivy")]
|
||||
let tantivy_fut = tantivy_search(
|
||||
tantivy,
|
||||
pool,
|
||||
@ -350,6 +359,10 @@ impl QueryRoot {
|
||||
last,
|
||||
&query,
|
||||
);
|
||||
#[cfg(not(feature = "tantivy"))]
|
||||
let tantivy_fut =
|
||||
async { Ok::<Vec<ThreadSummaryCursor>, async_graphql::Error>(Vec::new()) };
|
||||
|
||||
let (newsreader_results, notmuch_results, tantivy_results) =
|
||||
join!(newsreader_fut, notmuch_fut, tantivy_fut);
|
||||
|
||||
@ -492,6 +505,7 @@ async fn notmuch_search(
|
||||
.collect())
|
||||
}
|
||||
|
||||
#[cfg(feature = "tantivy")]
|
||||
async fn tantivy_search(
|
||||
tantivy: &TantivyConnection,
|
||||
pool: &PgPool,
|
||||
@ -521,10 +535,12 @@ impl Mutation {
|
||||
) -> Result<bool, Error> {
|
||||
let nm = ctx.data_unchecked::<Notmuch>();
|
||||
let pool = ctx.data_unchecked::<PgPool>();
|
||||
#[cfg(feature = "tantivy")]
|
||||
let tantivy = ctx.data_unchecked::<TantivyConnection>();
|
||||
|
||||
let query: Query = query.parse()?;
|
||||
newsreader::set_read_status(pool, &query, unread).await?;
|
||||
#[cfg(feature = "tantivy")]
|
||||
tantivy.reindex_thread(pool, &query).await?;
|
||||
nm::set_read_status(nm, &query, unread).await?;
|
||||
Ok(true)
|
||||
@ -554,6 +570,7 @@ impl Mutation {
|
||||
Ok(true)
|
||||
}
|
||||
/// Drop and recreate tantivy index. Warning this is slow
|
||||
#[cfg(feature = "tantivy")]
|
||||
async fn drop_and_load_index<'ctx>(&self, ctx: &Context<'ctx>) -> Result<bool, Error> {
|
||||
let tantivy = ctx.data_unchecked::<TantivyConnection>();
|
||||
let pool = ctx.data_unchecked::<PgPool>();
|
||||
@ -566,11 +583,14 @@ impl Mutation {
|
||||
#[instrument(skip_all)]
|
||||
async fn refresh<'ctx>(&self, ctx: &Context<'ctx>) -> Result<bool, Error> {
|
||||
let nm = ctx.data_unchecked::<Notmuch>();
|
||||
let tantivy = ctx.data_unchecked::<TantivyConnection>();
|
||||
let pool = ctx.data_unchecked::<PgPool>();
|
||||
// TODO: parallelize
|
||||
info!("{}", String::from_utf8_lossy(&nm.new()?));
|
||||
tantivy.refresh(pool).await?;
|
||||
#[cfg(feature = "tantivy")]
|
||||
{
|
||||
let tantivy = ctx.data_unchecked::<TantivyConnection>();
|
||||
let pool = ctx.data_unchecked::<PgPool>();
|
||||
// TODO: parallelize
|
||||
info!("{}", String::from_utf8_lossy(&nm.new()?));
|
||||
tantivy.refresh(pool).await?;
|
||||
}
|
||||
Ok(true)
|
||||
}
|
||||
}
|
||||
|
||||
@ -3,6 +3,7 @@ pub mod error;
|
||||
pub mod graphql;
|
||||
pub mod newsreader;
|
||||
pub mod nm;
|
||||
#[cfg(feature = "tantivy")]
|
||||
pub mod tantivy;
|
||||
|
||||
use std::{collections::HashMap, convert::Infallible, fmt, str::FromStr, sync::Arc};
|
||||
@ -612,6 +613,7 @@ pub struct Query {
|
||||
pub uids: Vec<String>,
|
||||
pub remainder: Vec<String>,
|
||||
pub is_notmuch: bool,
|
||||
pub is_newsreader: bool,
|
||||
pub is_tantivy: bool,
|
||||
pub corpus: Option<Corpus>,
|
||||
}
|
||||
@ -630,6 +632,9 @@ impl fmt::Display for Query {
|
||||
if self.is_notmuch {
|
||||
write!(f, "is:mail ")?;
|
||||
}
|
||||
if self.is_newsreader {
|
||||
write!(f, "is:newsreader ")?;
|
||||
}
|
||||
if self.is_tantivy {
|
||||
write!(f, "is:news ")?;
|
||||
}
|
||||
@ -675,6 +680,7 @@ impl FromStr for Query {
|
||||
let mut uids = Vec::new();
|
||||
let mut remainder = Vec::new();
|
||||
let mut is_notmuch = false;
|
||||
let mut is_newsreader = false;
|
||||
let mut is_tantivy = false;
|
||||
let mut corpus = None;
|
||||
for word in s.split_whitespace() {
|
||||
@ -701,15 +707,18 @@ impl FromStr for Query {
|
||||
uids.push(word.to_string());
|
||||
} else if word == "is:mail" || word == "is:email" || word == "is:notmuch" {
|
||||
is_notmuch = true;
|
||||
} else if word == "is:news" || word == "is:newsreader" {
|
||||
} else if word == "is:news" {
|
||||
is_tantivy = true;
|
||||
} else if word == "is:newsreader" {
|
||||
is_newsreader = true;
|
||||
} else {
|
||||
remainder.push(word.to_string());
|
||||
}
|
||||
}
|
||||
// If we don't see any explicit filters for a corpus, flip them all on
|
||||
if corpus.is_none() && !(is_notmuch || is_tantivy) {
|
||||
if corpus.is_none() && !(is_notmuch || is_tantivy || is_newsreader) {
|
||||
is_notmuch = true;
|
||||
is_newsreader = true;
|
||||
is_tantivy = true;
|
||||
}
|
||||
Ok(Query {
|
||||
@ -718,6 +727,7 @@ impl FromStr for Query {
|
||||
uids,
|
||||
remainder,
|
||||
is_notmuch,
|
||||
is_newsreader,
|
||||
is_tantivy,
|
||||
corpus,
|
||||
})
|
||||
|
||||
@ -21,7 +21,7 @@ use crate::{
|
||||
};
|
||||
|
||||
pub fn is_newsreader_query(query: &Query) -> bool {
|
||||
query.corpus == Some(Corpus::Newsreader)
|
||||
query.is_newsreader || query.corpus == Some(Corpus::Newsreader)
|
||||
}
|
||||
|
||||
pub fn is_newsreader_thread(query: &str) -> bool {
|
||||
@ -58,12 +58,6 @@ pub async fn count(pool: &PgPool, query: &Query) -> Result<usize, ServerError> {
|
||||
if !is_newsreader_query(query) {
|
||||
return Ok(0);
|
||||
}
|
||||
if !query.remainder.is_empty() {
|
||||
// TODO: handle full text search against all sites, for now, early return if search words
|
||||
// are specified.
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let site = site_from_tags(&query.tags);
|
||||
if !query.tags.is_empty() && site.is_none() {
|
||||
// Newsreader can only handle all sites read/unread queries, anything with a non-site tag
|
||||
@ -71,7 +65,15 @@ pub async fn count(pool: &PgPool, query: &Query) -> Result<usize, ServerError> {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let row = sqlx::query_file!("sql/count.sql", site, query.unread_only)
|
||||
let search_term = query.remainder.join(" ");
|
||||
let search_term = search_term.trim();
|
||||
let search_term = if search_term.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(search_term)
|
||||
};
|
||||
// TODO: add support for looking for search_term in title and site
|
||||
let row = sqlx::query_file!("sql/count.sql", site, query.unread_only, search_term)
|
||||
.fetch_one(pool)
|
||||
.await?;
|
||||
Ok(row.count.unwrap_or(0).try_into().unwrap_or(0))
|
||||
@ -90,12 +92,6 @@ pub async fn search(
|
||||
if !is_newsreader_query(query) {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
if !query.remainder.is_empty() {
|
||||
// TODO: handle full text search against all sites, for now, early return if search words
|
||||
// are specified.
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
let site = site_from_tags(&query.tags);
|
||||
if !query.tags.is_empty() && site.is_none() {
|
||||
// Newsreader can only handle all sites read/unread queries, anything with a non-site tag
|
||||
@ -115,14 +111,22 @@ pub async fn search(
|
||||
"search offset {offset} limit {limit} site {site:?} unread_only {}",
|
||||
query.unread_only
|
||||
);
|
||||
let search_term = query.remainder.join(" ");
|
||||
let search_term = search_term.trim();
|
||||
let search_term = if search_term.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(search_term)
|
||||
};
|
||||
|
||||
// TODO: further limit results to include query.remainder if set
|
||||
// TODO: add support for looking for search_term in title and site
|
||||
let rows = sqlx::query_file!(
|
||||
"sql/threads.sql",
|
||||
site,
|
||||
query.unread_only,
|
||||
offset as i64,
|
||||
limit as i64
|
||||
limit as i64,
|
||||
search_term
|
||||
)
|
||||
.fetch_all(pool)
|
||||
.await?;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user