web & server: using tantivy for news post search
This commit is contained in:
@@ -1,3 +1,5 @@
|
||||
use std::str::FromStr;
|
||||
|
||||
use async_graphql::{
|
||||
connection::{self, Connection, Edge, OpaqueCursor},
|
||||
Context, EmptySubscription, Enum, Error, FieldResult, InputObject, Object, Schema,
|
||||
@@ -16,6 +18,26 @@ pub type UnixTime = isize;
|
||||
/// # Thread ID, sans "thread:"
|
||||
pub type ThreadId = String;
|
||||
|
||||
#[derive(Debug, Enum, Copy, Clone, Eq, PartialEq)]
|
||||
pub enum Corpus {
|
||||
Notmuch,
|
||||
Newsreader,
|
||||
Tantivy,
|
||||
}
|
||||
|
||||
impl FromStr for Corpus {
|
||||
type Err = String;
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
Ok(match s {
|
||||
"notmuch" => Corpus::Notmuch,
|
||||
"newsreader" => Corpus::Newsreader,
|
||||
"tantivy" => Corpus::Tantivy,
|
||||
s => return Err(format!("unknown corpus: '{s}'")),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: add is_read field and remove all use of 'tag:unread'
|
||||
#[derive(Debug, SimpleObject)]
|
||||
pub struct ThreadSummary {
|
||||
pub thread: ThreadId,
|
||||
@@ -30,6 +52,7 @@ pub struct ThreadSummary {
|
||||
pub authors: String,
|
||||
pub subject: String,
|
||||
pub tags: Vec<String>,
|
||||
pub corpus: Corpus,
|
||||
}
|
||||
|
||||
#[derive(Debug, Union)]
|
||||
@@ -237,13 +260,16 @@ impl QueryRoot {
|
||||
async fn count<'ctx>(&self, ctx: &Context<'ctx>, query: String) -> Result<usize, Error> {
|
||||
let nm = ctx.data_unchecked::<Notmuch>();
|
||||
let pool = ctx.data_unchecked::<PgPool>();
|
||||
let tantivy = ctx.data_unchecked::<TantivyConnection>();
|
||||
|
||||
let newsreader_query: Query = query.parse()?;
|
||||
|
||||
let newsreader_count = newsreader::count(pool, &newsreader_query).await?;
|
||||
let notmuch_count = nm::count(nm, &newsreader_query.to_notmuch()).await?;
|
||||
info!("count {newsreader_query:?} newsreader count {newsreader_count} notmuch count {notmuch_count}");
|
||||
Ok(newsreader_count + notmuch_count)
|
||||
let notmuch_count = nm::count(nm, &newsreader_query).await?;
|
||||
let tantivy_count = tantivy.count(&newsreader_query).await?;
|
||||
let total = newsreader_count + notmuch_count + tantivy_count;
|
||||
info!("count {newsreader_query:?} newsreader count {newsreader_count} notmuch count {notmuch_count} tantivy count {tantivy_count} total {total}");
|
||||
Ok(total)
|
||||
}
|
||||
|
||||
async fn search<'ctx>(
|
||||
@@ -255,18 +281,11 @@ impl QueryRoot {
|
||||
last: Option<i32>,
|
||||
query: String,
|
||||
) -> Result<Connection<OpaqueCursor<SearchCursor>, ThreadSummary>, Error> {
|
||||
// TODO: add keywords to limit search to one corpus, i.e. is:news or is:mail
|
||||
info!("search({after:?} {before:?} {first:?} {last:?} {query:?})",);
|
||||
let nm = ctx.data_unchecked::<Notmuch>();
|
||||
let pool = ctx.data_unchecked::<PgPool>();
|
||||
let tantivy = ctx.data_unchecked::<TantivyConnection>();
|
||||
|
||||
#[derive(Debug)]
|
||||
enum ThreadSummaryCursor {
|
||||
Newsreader(i32, ThreadSummary),
|
||||
Notmuch(i32, ThreadSummary),
|
||||
Tantivy(i32, ThreadSummary),
|
||||
}
|
||||
Ok(connection::query(
|
||||
after,
|
||||
before,
|
||||
@@ -277,7 +296,7 @@ impl QueryRoot {
|
||||
first: Option<usize>,
|
||||
last: Option<usize>| async move {
|
||||
info!(
|
||||
"search({:?} {:?} {first:?} {last:?} {query:?})",
|
||||
"search(after {:?} before {:?} first {first:?} last {last:?} query: {query:?})",
|
||||
after.as_ref().map(|v| &v.0),
|
||||
before.as_ref().map(|v| &v.0)
|
||||
);
|
||||
@@ -288,65 +307,40 @@ impl QueryRoot {
|
||||
let newsreader_before = before.as_ref().map(|sc| sc.newsreader_offset);
|
||||
let notmuch_before = before.as_ref().map(|sc| sc.notmuch_offset);
|
||||
let tantivy_before = before.as_ref().map(|sc| sc.tantivy_offset);
|
||||
let first = first.map(|v| v as i32);
|
||||
let last = last.map(|v| v as i32);
|
||||
|
||||
let newsreader_query: Query = query.parse()?;
|
||||
info!("newsreader_query {newsreader_query:?}");
|
||||
let newsreader_results = if newsreader_query.is_newsreader {
|
||||
newsreader::search(
|
||||
pool,
|
||||
newsreader_after,
|
||||
newsreader_before,
|
||||
first.map(|v| v as i32),
|
||||
last.map(|v| v as i32),
|
||||
&newsreader_query,
|
||||
)
|
||||
.await?
|
||||
.into_iter()
|
||||
.map(|(cur, ts)| ThreadSummaryCursor::Newsreader(cur, ts))
|
||||
.collect()
|
||||
} else {
|
||||
Vec::new()
|
||||
};
|
||||
|
||||
let notmuch_results = if newsreader_query.is_notmuch {
|
||||
nm::search(
|
||||
nm,
|
||||
notmuch_after,
|
||||
notmuch_before,
|
||||
first.map(|v| v as i32),
|
||||
last.map(|v| v as i32),
|
||||
newsreader_query.to_notmuch(),
|
||||
)
|
||||
.await?
|
||||
.into_iter()
|
||||
.map(|(cur, ts)| ThreadSummaryCursor::Notmuch(cur, ts))
|
||||
.collect()
|
||||
} else {
|
||||
Vec::new()
|
||||
};
|
||||
|
||||
let tantivy_results = if newsreader_query.is_tantivy {
|
||||
tantivy
|
||||
.search(
|
||||
pool,
|
||||
tantivy_after,
|
||||
tantivy_before,
|
||||
first.map(|v| v as i32),
|
||||
last.map(|v| v as i32),
|
||||
&newsreader_query,
|
||||
)
|
||||
.await?
|
||||
.into_iter()
|
||||
.map(|(cur, ts)| ThreadSummaryCursor::Tantivy(cur, ts))
|
||||
.collect()
|
||||
} else {
|
||||
Vec::new()
|
||||
};
|
||||
let query: Query = query.parse()?;
|
||||
info!("newsreader_query {query:?}");
|
||||
|
||||
let newsreader_results = newsreader_search(
|
||||
pool,
|
||||
newsreader_after,
|
||||
newsreader_before,
|
||||
first,
|
||||
last,
|
||||
&query,
|
||||
)
|
||||
.await?;
|
||||
let notmuch_results =
|
||||
notmuch_search(nm, notmuch_after, notmuch_before, first, last, &query).await?;
|
||||
let tantivy_results = tantivy_search(
|
||||
tantivy,
|
||||
pool,
|
||||
tantivy_after,
|
||||
tantivy_before,
|
||||
first,
|
||||
last,
|
||||
&query,
|
||||
)
|
||||
.await?;
|
||||
info!(
|
||||
"tantivy results:\nis_tantivy:{} {tantivy_results:#?}",
|
||||
newsreader_query.is_tantivy
|
||||
"newsreader_results ({}) notmuch_results ({}) tantivy_results ({})",
|
||||
newsreader_results.len(),
|
||||
notmuch_results.len(),
|
||||
tantivy_results.len()
|
||||
);
|
||||
|
||||
let mut results: Vec<_> = newsreader_results
|
||||
.into_iter()
|
||||
.chain(notmuch_results)
|
||||
@@ -362,6 +356,7 @@ impl QueryRoot {
|
||||
|
||||
let mut has_next_page = before.is_some();
|
||||
if let Some(first) = first {
|
||||
let first = first as usize;
|
||||
if results.len() > first {
|
||||
has_next_page = true;
|
||||
results.truncate(first);
|
||||
@@ -370,6 +365,7 @@ impl QueryRoot {
|
||||
|
||||
let mut has_previous_page = after.is_some();
|
||||
if let Some(last) = last {
|
||||
let last = last as usize;
|
||||
if results.len() > last {
|
||||
has_previous_page = true;
|
||||
results.truncate(last);
|
||||
@@ -437,6 +433,59 @@ impl QueryRoot {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum ThreadSummaryCursor {
|
||||
Newsreader(i32, ThreadSummary),
|
||||
Notmuch(i32, ThreadSummary),
|
||||
Tantivy(i32, ThreadSummary),
|
||||
}
|
||||
async fn newsreader_search(
|
||||
pool: &PgPool,
|
||||
after: Option<i32>,
|
||||
before: Option<i32>,
|
||||
first: Option<i32>,
|
||||
last: Option<i32>,
|
||||
query: &Query,
|
||||
) -> Result<Vec<ThreadSummaryCursor>, async_graphql::Error> {
|
||||
Ok(newsreader::search(pool, after, before, first, last, &query)
|
||||
.await?
|
||||
.into_iter()
|
||||
.map(|(cur, ts)| ThreadSummaryCursor::Newsreader(cur, ts))
|
||||
.collect())
|
||||
}
|
||||
|
||||
async fn notmuch_search(
|
||||
nm: &Notmuch,
|
||||
after: Option<i32>,
|
||||
before: Option<i32>,
|
||||
first: Option<i32>,
|
||||
last: Option<i32>,
|
||||
query: &Query,
|
||||
) -> Result<Vec<ThreadSummaryCursor>, async_graphql::Error> {
|
||||
Ok(nm::search(nm, after, before, first, last, &query)
|
||||
.await?
|
||||
.into_iter()
|
||||
.map(|(cur, ts)| ThreadSummaryCursor::Notmuch(cur, ts))
|
||||
.collect())
|
||||
}
|
||||
|
||||
async fn tantivy_search(
|
||||
tantivy: &TantivyConnection,
|
||||
pool: &PgPool,
|
||||
after: Option<i32>,
|
||||
before: Option<i32>,
|
||||
first: Option<i32>,
|
||||
last: Option<i32>,
|
||||
query: &Query,
|
||||
) -> Result<Vec<ThreadSummaryCursor>, async_graphql::Error> {
|
||||
Ok(tantivy
|
||||
.search(pool, after, before, first, last, &query)
|
||||
.await?
|
||||
.into_iter()
|
||||
.map(|(cur, ts)| ThreadSummaryCursor::Tantivy(cur, ts))
|
||||
.collect())
|
||||
}
|
||||
|
||||
pub struct Mutation;
|
||||
#[Object]
|
||||
impl Mutation {
|
||||
@@ -448,14 +497,12 @@ impl Mutation {
|
||||
) -> Result<bool, Error> {
|
||||
let nm = ctx.data_unchecked::<Notmuch>();
|
||||
let pool = ctx.data_unchecked::<PgPool>();
|
||||
let tantivy = ctx.data_unchecked::<TantivyConnection>();
|
||||
|
||||
for q in query.split_whitespace() {
|
||||
if newsreader::is_newsreader_thread(&q) {
|
||||
newsreader::set_read_status(pool, &q, unread).await?;
|
||||
} else {
|
||||
nm::set_read_status(nm, q, unread).await?;
|
||||
}
|
||||
}
|
||||
let query: Query = query.parse()?;
|
||||
newsreader::set_read_status(pool, &query, unread).await?;
|
||||
tantivy.reindex_thread(pool, &query).await?;
|
||||
nm::set_read_status(nm, &query, unread).await?;
|
||||
Ok(true)
|
||||
}
|
||||
async fn tag_add<'ctx>(
|
||||
@@ -486,10 +533,19 @@ impl Mutation {
|
||||
let pool = ctx.data_unchecked::<PgPool>();
|
||||
|
||||
tantivy.drop_and_load_index()?;
|
||||
tantivy.reindex(pool).await?;
|
||||
tantivy.reindex_all(pool).await?;
|
||||
|
||||
Ok(true)
|
||||
}
|
||||
async fn refresh<'ctx>(&self, ctx: &Context<'ctx>) -> Result<bool, Error> {
|
||||
let nm = ctx.data_unchecked::<Notmuch>();
|
||||
let tantivy = ctx.data_unchecked::<TantivyConnection>();
|
||||
let pool = ctx.data_unchecked::<PgPool>();
|
||||
// TODO: parallelize
|
||||
info!("{}", String::from_utf8_lossy(&nm.new()?));
|
||||
tantivy.refresh(pool).await?;
|
||||
Ok(true)
|
||||
}
|
||||
}
|
||||
|
||||
pub type GraphqlSchema = Schema<QueryRoot, Mutation, EmptySubscription>;
|
||||
|
||||
Reference in New Issue
Block a user