server: WIP tantivy integration
This commit is contained in:
@@ -1,23 +1,31 @@
|
||||
use log::info;
|
||||
use sqlx::postgres::PgPool;
|
||||
use tantivy::{Index, IndexWriter, TantivyError};
|
||||
use tantivy::{schema::Value, Index, TantivyError};
|
||||
|
||||
use crate::error::ServerError;
|
||||
use crate::{
|
||||
error::ServerError, graphql::ThreadSummary, thread_summary_from_row, Query, ThreadSummaryRecord,
|
||||
};
|
||||
|
||||
pub struct TantivyConnection {
|
||||
index: Index,
|
||||
db_path: String,
|
||||
//index: Index,
|
||||
}
|
||||
|
||||
impl TantivyConnection {
|
||||
pub fn new(tantivy_db_path: &str) -> Result<TantivyConnection, TantivyError> {
|
||||
let index = match Index::open_in_dir(tantivy_db_path) {
|
||||
fn get_index(&self) -> Result<Index, TantivyError> {
|
||||
Ok(match Index::open_in_dir(&self.db_path) {
|
||||
Ok(idx) => idx,
|
||||
Err(_) => {
|
||||
create_news_db(tantivy_db_path)?;
|
||||
Index::open_in_dir(tantivy_db_path)?
|
||||
create_news_db(&self.db_path)?;
|
||||
Index::open_in_dir(&self.db_path)?
|
||||
}
|
||||
};
|
||||
Ok(TantivyConnection { index })
|
||||
})
|
||||
}
|
||||
|
||||
pub fn new(tantivy_db_path: &str) -> Result<TantivyConnection, TantivyError> {
|
||||
Ok(TantivyConnection {
|
||||
db_path: tantivy_db_path.to_string(),
|
||||
})
|
||||
}
|
||||
pub async fn reindex(&self, pool: &PgPool) -> Result<(), ServerError> {
|
||||
use tantivy::{doc, Term};
|
||||
@@ -25,8 +33,9 @@ impl TantivyConnection {
|
||||
let start_time = std::time::Instant::now();
|
||||
let pool: &PgPool = pool;
|
||||
|
||||
let mut index_writer = self.index.writer(50_000_000)?;
|
||||
let schema = self.index.schema();
|
||||
let index = self.get_index()?;
|
||||
let mut index_writer = index.writer(50_000_000)?;
|
||||
let schema = index.schema();
|
||||
let site = schema.get_field("site")?;
|
||||
let title = schema.get_field("title")?;
|
||||
let summary = schema.get_field("summary")?;
|
||||
@@ -68,30 +77,76 @@ impl TantivyConnection {
|
||||
info!("took {:.2}s to reindex", start_time.elapsed().as_secs_f32());
|
||||
Ok(())
|
||||
}
|
||||
pub fn search(&self) -> Result<String, TantivyError> {
|
||||
pub async fn search(
|
||||
&self,
|
||||
pool: &PgPool,
|
||||
after: Option<i32>,
|
||||
before: Option<i32>,
|
||||
first: Option<i32>,
|
||||
last: Option<i32>,
|
||||
query: &Query,
|
||||
) -> Result<Vec<(i32, ThreadSummary)>, async_graphql::Error> {
|
||||
use tantivy::{collector::TopDocs, query::QueryParser, Document, TantivyDocument};
|
||||
// TODO: set based on function parameters
|
||||
let offset = 0;
|
||||
|
||||
let reader = self.index.reader()?;
|
||||
let schema = self.index.schema();
|
||||
let index = self.get_index()?;
|
||||
let reader = index.reader()?;
|
||||
let schema = index.schema();
|
||||
let searcher = reader.searcher();
|
||||
let site = schema.get_field("site")?;
|
||||
let uid = schema.get_field("uid")?;
|
||||
let title = schema.get_field("title")?;
|
||||
let summary = schema.get_field("summary")?;
|
||||
let query_parser = QueryParser::for_index(&self.index, vec![site, title, summary]);
|
||||
let date = schema.get_field("date")?;
|
||||
let query_parser = QueryParser::for_index(&index, vec![title, summary]);
|
||||
|
||||
let query = query_parser.parse_query("grapheme")?;
|
||||
let query = query_parser.parse_query(&query.remainder.join(" "))?;
|
||||
let top_docs = searcher.search(&query, &TopDocs::with_limit(10))?;
|
||||
let mut results = vec![];
|
||||
info!("search found {} docs", top_docs.len());
|
||||
for (_score, doc_address) in top_docs {
|
||||
let retrieved_doc: TantivyDocument = searcher.doc(doc_address)?;
|
||||
results.push(format!("{}", retrieved_doc.to_json(&schema)));
|
||||
let uids = top_docs
|
||||
.into_iter()
|
||||
.map(|(_, doc_address)| {
|
||||
searcher.doc(doc_address).map(|doc: TantivyDocument| {
|
||||
doc.get_first(uid)
|
||||
.expect("doc missing uid")
|
||||
.as_str()
|
||||
.expect("doc str missing")
|
||||
.to_string()
|
||||
})
|
||||
})
|
||||
.collect::<Result<Vec<String>, TantivyError>>()?;
|
||||
|
||||
//let uids = format!("'{}'", uids.join("','"));
|
||||
info!("uids {uids:?}");
|
||||
let rows = sqlx::query_file!("sql/threads-from-uid.sql", &uids as &[String])
|
||||
.fetch_all(pool)
|
||||
.await?;
|
||||
let mut res = Vec::new();
|
||||
info!("found {} hits joining w/ tantivy", rows.len());
|
||||
for (i, r) in rows.into_iter().enumerate() {
|
||||
res.push((
|
||||
i as i32 + offset,
|
||||
thread_summary_from_row(ThreadSummaryRecord {
|
||||
site: r.site,
|
||||
date: r.date,
|
||||
is_read: r.is_read,
|
||||
title: r.title,
|
||||
uid: r.uid,
|
||||
name: r.name,
|
||||
})
|
||||
.await,
|
||||
));
|
||||
}
|
||||
Ok(results.join(" "))
|
||||
Ok(res)
|
||||
}
|
||||
pub fn drop_and_load_index(&self) -> Result<(), TantivyError> {
|
||||
create_news_db(&self.db_path)
|
||||
}
|
||||
}
|
||||
|
||||
fn create_news_db(tantivy_db_path: &str) -> Result<(), TantivyError> {
|
||||
info!("create_news_db");
|
||||
std::fs::remove_dir_all(tantivy_db_path)?;
|
||||
std::fs::create_dir_all(tantivy_db_path)?;
|
||||
use tantivy::schema::*;
|
||||
@@ -100,7 +155,7 @@ fn create_news_db(tantivy_db_path: &str) -> Result<(), TantivyError> {
|
||||
schema_builder.add_text_field("title", TEXT | STORED);
|
||||
schema_builder.add_text_field("summary", TEXT);
|
||||
schema_builder.add_text_field("link", STRING | STORED);
|
||||
schema_builder.add_date_field("date", FAST);
|
||||
schema_builder.add_date_field("date", FAST | INDEXED | STORED);
|
||||
schema_builder.add_bool_field("is_read", FAST);
|
||||
schema_builder.add_text_field("uid", STRING | STORED);
|
||||
schema_builder.add_i64_field("id", FAST);
|
||||
|
||||
Reference in New Issue
Block a user