server: improve tantivy performance by reusing IndexReader
Also improve a bunch of trace logging
This commit is contained in:
parent
05cdcec244
commit
6d8b2de608
@ -505,7 +505,7 @@ async fn tantivy_search(
|
|||||||
pub struct Mutation;
|
pub struct Mutation;
|
||||||
#[Object]
|
#[Object]
|
||||||
impl Mutation {
|
impl Mutation {
|
||||||
#[instrument(skip_all, fields(query, bool))]
|
#[instrument(skip_all, fields(query=query, unread=unread))]
|
||||||
async fn set_read_status<'ctx>(
|
async fn set_read_status<'ctx>(
|
||||||
&self,
|
&self,
|
||||||
ctx: &Context<'ctx>,
|
ctx: &Context<'ctx>,
|
||||||
@ -522,7 +522,7 @@ impl Mutation {
|
|||||||
nm::set_read_status(nm, &query, unread).await?;
|
nm::set_read_status(nm, &query, unread).await?;
|
||||||
Ok(true)
|
Ok(true)
|
||||||
}
|
}
|
||||||
#[instrument(skip_all, fields(query, tag))]
|
#[instrument(skip_all, fields(query=query, tag=tag))]
|
||||||
async fn tag_add<'ctx>(
|
async fn tag_add<'ctx>(
|
||||||
&self,
|
&self,
|
||||||
ctx: &Context<'ctx>,
|
ctx: &Context<'ctx>,
|
||||||
@ -534,7 +534,7 @@ impl Mutation {
|
|||||||
nm.tag_add(&tag, &query)?;
|
nm.tag_add(&tag, &query)?;
|
||||||
Ok(true)
|
Ok(true)
|
||||||
}
|
}
|
||||||
#[instrument(skip_all, fields(query, tag))]
|
#[instrument(skip_all, fields(query=query, tag=tag))]
|
||||||
async fn tag_remove<'ctx>(
|
async fn tag_remove<'ctx>(
|
||||||
&self,
|
&self,
|
||||||
ctx: &Context<'ctx>,
|
ctx: &Context<'ctx>,
|
||||||
|
|||||||
@ -5,7 +5,7 @@ pub mod newsreader;
|
|||||||
pub mod nm;
|
pub mod nm;
|
||||||
pub mod tantivy;
|
pub mod tantivy;
|
||||||
|
|
||||||
use std::{collections::HashMap, convert::Infallible, str::FromStr, sync::Arc};
|
use std::{collections::HashMap, convert::Infallible, fmt, str::FromStr, sync::Arc};
|
||||||
|
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use cacher::{Cacher, FilesystemCacher};
|
use cacher::{Cacher, FilesystemCacher};
|
||||||
@ -612,11 +612,38 @@ pub struct Query {
|
|||||||
pub uids: Vec<String>,
|
pub uids: Vec<String>,
|
||||||
pub remainder: Vec<String>,
|
pub remainder: Vec<String>,
|
||||||
pub is_notmuch: bool,
|
pub is_notmuch: bool,
|
||||||
pub is_newsreader: bool,
|
|
||||||
pub is_tantivy: bool,
|
pub is_tantivy: bool,
|
||||||
pub corpus: Option<Corpus>,
|
pub corpus: Option<Corpus>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for Query {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
|
||||||
|
if self.unread_only {
|
||||||
|
write!(f, "is:unread ")?;
|
||||||
|
}
|
||||||
|
for tag in &self.tags {
|
||||||
|
write!(f, "tag:{tag} ")?;
|
||||||
|
}
|
||||||
|
for uid in &self.uids {
|
||||||
|
write!(f, "id:{uid} ")?;
|
||||||
|
}
|
||||||
|
if self.is_notmuch {
|
||||||
|
write!(f, "is:mail ")?;
|
||||||
|
}
|
||||||
|
if self.is_tantivy {
|
||||||
|
write!(f, "is:news ")?;
|
||||||
|
}
|
||||||
|
match self.corpus {
|
||||||
|
Some(c) => write!(f, "corpus:{c:?}")?,
|
||||||
|
_ => (),
|
||||||
|
}
|
||||||
|
for rem in &self.remainder {
|
||||||
|
write!(f, "{rem} ")?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl Query {
|
impl Query {
|
||||||
// Converts the internal state of Query to something suitable for notmuch queries. Removes and
|
// Converts the internal state of Query to something suitable for notmuch queries. Removes and
|
||||||
// letterbox specific '<key>:<value' tags
|
// letterbox specific '<key>:<value' tags
|
||||||
@ -648,7 +675,6 @@ impl FromStr for Query {
|
|||||||
let mut uids = Vec::new();
|
let mut uids = Vec::new();
|
||||||
let mut remainder = Vec::new();
|
let mut remainder = Vec::new();
|
||||||
let mut is_notmuch = false;
|
let mut is_notmuch = false;
|
||||||
let is_newsreader = false;
|
|
||||||
let mut is_tantivy = false;
|
let mut is_tantivy = false;
|
||||||
let mut corpus = None;
|
let mut corpus = None;
|
||||||
for word in s.split_whitespace() {
|
for word in s.split_whitespace() {
|
||||||
@ -682,10 +708,7 @@ impl FromStr for Query {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
// If we don't see any explicit filters for a corpus, flip them all on
|
// If we don't see any explicit filters for a corpus, flip them all on
|
||||||
if corpus.is_none() && !(is_newsreader || is_notmuch || is_tantivy) {
|
if corpus.is_none() && !(is_notmuch || is_tantivy) {
|
||||||
// Don't set is_newsreader unless debugging, assume tantivy can handle it.
|
|
||||||
// Explicitely setting corpus:newsreader will by-pass this logic
|
|
||||||
// is_newsreader = true;
|
|
||||||
is_notmuch = true;
|
is_notmuch = true;
|
||||||
is_tantivy = true;
|
is_tantivy = true;
|
||||||
}
|
}
|
||||||
@ -695,7 +718,6 @@ impl FromStr for Query {
|
|||||||
uids,
|
uids,
|
||||||
remainder,
|
remainder,
|
||||||
is_notmuch,
|
is_notmuch,
|
||||||
is_newsreader,
|
|
||||||
is_tantivy,
|
is_tantivy,
|
||||||
corpus,
|
corpus,
|
||||||
})
|
})
|
||||||
|
|||||||
@ -20,7 +20,7 @@ use crate::{
|
|||||||
};
|
};
|
||||||
|
|
||||||
pub fn is_newsreader_query(query: &Query) -> bool {
|
pub fn is_newsreader_query(query: &Query) -> bool {
|
||||||
query.is_newsreader || query.corpus == Some(Corpus::Newsreader)
|
query.corpus == Some(Corpus::Newsreader)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn is_newsreader_thread(query: &str) -> bool {
|
pub fn is_newsreader_thread(query: &str) -> bool {
|
||||||
|
|||||||
@ -49,7 +49,7 @@ pub fn threadset_to_messages(thread_set: notmuch::ThreadSet) -> Result<Vec<Messa
|
|||||||
Ok(Vec::new())
|
Ok(Vec::new())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[instrument(name="nm::count", skip_all, fields(query=?query))]
|
#[instrument(name="nm::count", skip_all, fields(query=%query))]
|
||||||
pub async fn count(nm: &Notmuch, query: &Query) -> Result<usize, ServerError> {
|
pub async fn count(nm: &Notmuch, query: &Query) -> Result<usize, ServerError> {
|
||||||
if !is_notmuch_query(query) {
|
if !is_notmuch_query(query) {
|
||||||
return Ok(0);
|
return Ok(0);
|
||||||
@ -58,7 +58,7 @@ pub async fn count(nm: &Notmuch, query: &Query) -> Result<usize, ServerError> {
|
|||||||
Ok(nm.count(&query)?)
|
Ok(nm.count(&query)?)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[instrument(name="nm::search", skip_all, fields(query=?query))]
|
#[instrument(name="nm::search", skip_all, fields(query=%query))]
|
||||||
pub async fn search(
|
pub async fn search(
|
||||||
nm: &Notmuch,
|
nm: &Notmuch,
|
||||||
after: Option<i32>,
|
after: Option<i32>,
|
||||||
@ -856,7 +856,7 @@ fn render_content_type_tree(m: &ParsedMail) -> String {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[instrument(name="nm::set_read_status", skip_all, fields(query=?query, unread=unread))]
|
#[instrument(name="nm::set_read_status", skip_all, fields(query=%query, unread=unread))]
|
||||||
pub async fn set_read_status<'ctx>(
|
pub async fn set_read_status<'ctx>(
|
||||||
nm: &Notmuch,
|
nm: &Notmuch,
|
||||||
query: &Query,
|
query: &Query,
|
||||||
|
|||||||
@ -7,7 +7,7 @@ use tantivy::{
|
|||||||
doc, query,
|
doc, query,
|
||||||
query::{AllQuery, BooleanQuery, Occur, QueryParser, TermQuery},
|
query::{AllQuery, BooleanQuery, Occur, QueryParser, TermQuery},
|
||||||
schema::{Facet, IndexRecordOption, Value},
|
schema::{Facet, IndexRecordOption, Value},
|
||||||
DocAddress, Index, Searcher, TantivyDocument, TantivyError, Term,
|
DocAddress, Index, IndexReader, Searcher, TantivyDocument, TantivyError, Term,
|
||||||
};
|
};
|
||||||
use tracing::instrument;
|
use tracing::instrument;
|
||||||
|
|
||||||
@ -24,23 +24,29 @@ pub fn is_tantivy_query(query: &Query) -> bool {
|
|||||||
}
|
}
|
||||||
pub struct TantivyConnection {
|
pub struct TantivyConnection {
|
||||||
db_path: String,
|
db_path: String,
|
||||||
//index: Index,
|
index: Index,
|
||||||
|
reader: IndexReader,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TantivyConnection {
|
fn get_index(db_path: &str) -> Result<Index, TantivyError> {
|
||||||
fn get_index(&self) -> Result<Index, TantivyError> {
|
Ok(match Index::open_in_dir(db_path) {
|
||||||
Ok(match Index::open_in_dir(&self.db_path) {
|
|
||||||
Ok(idx) => idx,
|
Ok(idx) => idx,
|
||||||
Err(_) => {
|
Err(_) => {
|
||||||
create_news_db(&self.db_path)?;
|
create_news_db(db_path)?;
|
||||||
Index::open_in_dir(&self.db_path)?
|
Index::open_in_dir(db_path)?
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl TantivyConnection {
|
||||||
pub fn new(tantivy_db_path: &str) -> Result<TantivyConnection, TantivyError> {
|
pub fn new(tantivy_db_path: &str) -> Result<TantivyConnection, TantivyError> {
|
||||||
|
let index = get_index(tantivy_db_path)?;
|
||||||
|
let reader = index.reader()?;
|
||||||
|
|
||||||
Ok(TantivyConnection {
|
Ok(TantivyConnection {
|
||||||
db_path: tantivy_db_path.to_string(),
|
db_path: tantivy_db_path.to_string(),
|
||||||
|
index,
|
||||||
|
reader,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
#[instrument(name = "tantivy::refresh", skip_all)]
|
#[instrument(name = "tantivy::refresh", skip_all)]
|
||||||
@ -61,7 +67,7 @@ impl TantivyConnection {
|
|||||||
let start_time = std::time::Instant::now();
|
let start_time = std::time::Instant::now();
|
||||||
let (searcher, _query) = self.searcher_and_query(&Query::default())?;
|
let (searcher, _query) = self.searcher_and_query(&Query::default())?;
|
||||||
let docs = searcher.search(&AllQuery, &DocSetCollector)?;
|
let docs = searcher.search(&AllQuery, &DocSetCollector)?;
|
||||||
let uid = self.get_index()?.schema().get_field("uid")?;
|
let uid = self.index.schema().get_field("uid")?;
|
||||||
let t_uids: Vec<_> = docs
|
let t_uids: Vec<_> = docs
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|doc_address| {
|
.map(|doc_address| {
|
||||||
@ -112,9 +118,8 @@ impl TantivyConnection {
|
|||||||
let start_time = std::time::Instant::now();
|
let start_time = std::time::Instant::now();
|
||||||
let pool: &PgPool = pool;
|
let pool: &PgPool = pool;
|
||||||
|
|
||||||
let index = self.get_index()?;
|
let mut index_writer = self.index.writer(50_000_000)?;
|
||||||
let mut index_writer = index.writer(50_000_000)?;
|
let schema = self.index.schema();
|
||||||
let schema = index.schema();
|
|
||||||
let site = schema.get_field("site")?;
|
let site = schema.get_field("site")?;
|
||||||
let title = schema.get_field("title")?;
|
let title = schema.get_field("title")?;
|
||||||
let summary = schema.get_field("summary")?;
|
let summary = schema.get_field("summary")?;
|
||||||
@ -169,7 +174,7 @@ impl TantivyConnection {
|
|||||||
index_writer.commit()?;
|
index_writer.commit()?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
#[instrument(name = "tantivy::reindex_thread", skip_all, fields(query=?query))]
|
#[instrument(name = "tantivy::reindex_thread", skip_all, fields(query=%query))]
|
||||||
pub async fn reindex_thread(&self, pool: &PgPool, query: &Query) -> Result<(), ServerError> {
|
pub async fn reindex_thread(&self, pool: &PgPool, query: &Query) -> Result<(), ServerError> {
|
||||||
let uids: Vec<_> = query
|
let uids: Vec<_> = query
|
||||||
.uids
|
.uids
|
||||||
@ -193,7 +198,6 @@ impl TantivyConnection {
|
|||||||
&self,
|
&self,
|
||||||
query: &Query,
|
query: &Query,
|
||||||
) -> Result<(Searcher, Box<dyn query::Query>), ServerError> {
|
) -> Result<(Searcher, Box<dyn query::Query>), ServerError> {
|
||||||
let index = self.get_index()?;
|
|
||||||
// TODO: only create one reader
|
// TODO: only create one reader
|
||||||
// From https://tantivy-search.github.io/examples/basic_search.html
|
// From https://tantivy-search.github.io/examples/basic_search.html
|
||||||
// "For a search server you will typically create one reader for the entire lifetime of
|
// "For a search server you will typically create one reader for the entire lifetime of
|
||||||
@ -202,12 +206,11 @@ impl TantivyConnection {
|
|||||||
// I think there's some challenge in making the reader work if we reindex, so reader my
|
// I think there's some challenge in making the reader work if we reindex, so reader my
|
||||||
// need to be stored indirectly, and be recreated on reindex
|
// need to be stored indirectly, and be recreated on reindex
|
||||||
// I think creating a reader takes 200-300 ms.
|
// I think creating a reader takes 200-300 ms.
|
||||||
let reader = index.reader()?;
|
let schema = self.index.schema();
|
||||||
let schema = index.schema();
|
let searcher = self.reader.searcher();
|
||||||
let searcher = reader.searcher();
|
|
||||||
let title = schema.get_field("title")?;
|
let title = schema.get_field("title")?;
|
||||||
let summary = schema.get_field("summary")?;
|
let summary = schema.get_field("summary")?;
|
||||||
let query_parser = QueryParser::for_index(&index, vec![title, summary]);
|
let query_parser = QueryParser::for_index(&self.index, vec![title, summary]);
|
||||||
// Tantivy uses '*' to match all docs, not empty string
|
// Tantivy uses '*' to match all docs, not empty string
|
||||||
let term = &query.remainder.join(" ");
|
let term = &query.remainder.join(" ");
|
||||||
let term = if term.is_empty() { "*" } else { term };
|
let term = if term.is_empty() { "*" } else { term };
|
||||||
@ -215,8 +218,8 @@ impl TantivyConnection {
|
|||||||
|
|
||||||
let tantivy_query = query_parser.parse_query(&term)?;
|
let tantivy_query = query_parser.parse_query(&term)?;
|
||||||
|
|
||||||
let tag = self.get_index()?.schema().get_field("tag")?;
|
let tag = schema.get_field("tag")?;
|
||||||
let is_read = self.get_index()?.schema().get_field("is_read")?;
|
let is_read = schema.get_field("is_read")?;
|
||||||
let mut terms = vec![(Occur::Must, tantivy_query)];
|
let mut terms = vec![(Occur::Must, tantivy_query)];
|
||||||
for t in &query.tags {
|
for t in &query.tags {
|
||||||
let facet = Facet::from(&format!("/{t}"));
|
let facet = Facet::from(&format!("/{t}"));
|
||||||
@ -236,7 +239,7 @@ impl TantivyConnection {
|
|||||||
Ok((searcher, Box::new(search_query)))
|
Ok((searcher, Box::new(search_query)))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[instrument(name="tantivy::count", skip_all, fields(query=?query))]
|
#[instrument(name="tantivy::count", skip_all, fields(query=%query))]
|
||||||
pub async fn count(&self, query: &Query) -> Result<usize, ServerError> {
|
pub async fn count(&self, query: &Query) -> Result<usize, ServerError> {
|
||||||
if !is_tantivy_query(query) {
|
if !is_tantivy_query(query) {
|
||||||
return Ok(0);
|
return Ok(0);
|
||||||
@ -246,7 +249,7 @@ impl TantivyConnection {
|
|||||||
let (searcher, query) = self.searcher_and_query(&query)?;
|
let (searcher, query) = self.searcher_and_query(&query)?;
|
||||||
Ok(searcher.search(&query, &Count)?)
|
Ok(searcher.search(&query, &Count)?)
|
||||||
}
|
}
|
||||||
#[instrument(name="tantivy::search", skip_all, fields(query=?query))]
|
#[instrument(name="tantivy::search", skip_all, fields(query=%query))]
|
||||||
pub async fn search(
|
pub async fn search(
|
||||||
&self,
|
&self,
|
||||||
pool: &PgPool,
|
pool: &PgPool,
|
||||||
@ -276,7 +279,7 @@ impl TantivyConnection {
|
|||||||
.order_by_u64_field("date", tantivy::index::Order::Desc),
|
.order_by_u64_field("date", tantivy::index::Order::Desc),
|
||||||
)?;
|
)?;
|
||||||
info!("search found {} docs", top_docs.len());
|
info!("search found {} docs", top_docs.len());
|
||||||
let uid = self.get_index()?.schema().get_field("uid")?;
|
let uid = self.index.schema().get_field("uid")?;
|
||||||
let uids = top_docs
|
let uids = top_docs
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|(_, doc_address): (u64, DocAddress)| {
|
.map(|(_, doc_address): (u64, DocAddress)| {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user