web & server: using tantivy for news post search
This commit is contained in:
parent
f36d1e0c29
commit
3ec1741f10
@ -1,6 +1,6 @@
|
||||
{
|
||||
"db_name": "PostgreSQL",
|
||||
"query": "SELECT\n COUNT(*) count\nFROM\n post\nWHERE\n ($1::text IS NULL OR site = $1)\n AND (\n NOT $2\n OR NOT is_read\n )\n",
|
||||
"query": "SELECT\n COUNT(*) count\nFROM\n post\nWHERE\n (\n $1 :: text IS NULL\n OR site = $1\n )\n AND (\n NOT $2\n OR NOT is_read\n )\n",
|
||||
"describe": {
|
||||
"columns": [
|
||||
{
|
||||
@ -19,5 +19,5 @@
|
||||
null
|
||||
]
|
||||
},
|
||||
"hash": "e28b890e308f483aa6bd08617548ae66294ae1e99b1cab49f5f4211e0fd7d419"
|
||||
"hash": "13a9193d91264b678c18df032cc61b523e6a804ae9569da18455eb380eadb515"
|
||||
}
|
||||
@ -1,6 +1,6 @@
|
||||
{
|
||||
"db_name": "PostgreSQL",
|
||||
"query": "SELECT\n site,\n title,\n summary,\n link,\n date,\n is_read,\n uid,\n id\nFROM post\n",
|
||||
"query": "SELECT\n site,\n title,\n summary,\n link,\n date,\n is_read,\n uid,\n p.id id\nFROM\n post AS p\n JOIN feed AS f ON p.site = f.slug -- necessary to weed out nzb posts\nORDER BY\n date DESC;\n",
|
||||
"describe": {
|
||||
"columns": [
|
||||
{
|
||||
@ -58,5 +58,5 @@
|
||||
false
|
||||
]
|
||||
},
|
||||
"hash": "1b2244c9b9b64a1395d8d266f5df5352242bbe5efe481b0852e1c1d4b40584a7"
|
||||
"hash": "4cbb6252a0b76f6a452ee09abb9f4b99308a74db33cdfb91baaa1bb98dc53a82"
|
||||
}
|
||||
64
server/.sqlx/query-700757f6fb175b1aa246816ac60501576a38db222380c034ae7bc030ba6a915e.json
generated
Normal file
64
server/.sqlx/query-700757f6fb175b1aa246816ac60501576a38db222380c034ae7bc030ba6a915e.json
generated
Normal file
@ -0,0 +1,64 @@
|
||||
{
|
||||
"db_name": "PostgreSQL",
|
||||
"query": "SELECT\n site AS \"site!\",\n title AS \"title!\",\n summary AS \"summary!\",\n link AS \"link!\",\n date AS \"date!\",\n is_read AS \"is_read!\",\n uid AS \"uid!\",\n p.id id\nFROM\n post p\n JOIN feed f ON p.site = f.slug\nWHERE\n uid = ANY ($1);\n",
|
||||
"describe": {
|
||||
"columns": [
|
||||
{
|
||||
"ordinal": 0,
|
||||
"name": "site!",
|
||||
"type_info": "Text"
|
||||
},
|
||||
{
|
||||
"ordinal": 1,
|
||||
"name": "title!",
|
||||
"type_info": "Text"
|
||||
},
|
||||
{
|
||||
"ordinal": 2,
|
||||
"name": "summary!",
|
||||
"type_info": "Text"
|
||||
},
|
||||
{
|
||||
"ordinal": 3,
|
||||
"name": "link!",
|
||||
"type_info": "Text"
|
||||
},
|
||||
{
|
||||
"ordinal": 4,
|
||||
"name": "date!",
|
||||
"type_info": "Timestamp"
|
||||
},
|
||||
{
|
||||
"ordinal": 5,
|
||||
"name": "is_read!",
|
||||
"type_info": "Bool"
|
||||
},
|
||||
{
|
||||
"ordinal": 6,
|
||||
"name": "uid!",
|
||||
"type_info": "Text"
|
||||
},
|
||||
{
|
||||
"ordinal": 7,
|
||||
"name": "id",
|
||||
"type_info": "Int4"
|
||||
}
|
||||
],
|
||||
"parameters": {
|
||||
"Left": [
|
||||
"TextArray"
|
||||
]
|
||||
},
|
||||
"nullable": [
|
||||
true,
|
||||
true,
|
||||
true,
|
||||
true,
|
||||
true,
|
||||
true,
|
||||
false,
|
||||
false
|
||||
]
|
||||
},
|
||||
"hash": "700757f6fb175b1aa246816ac60501576a38db222380c034ae7bc030ba6a915e"
|
||||
}
|
||||
20
server/.sqlx/query-c16821d20c1e6012d6079008889af16eed687c74d9d77957ad08aaaeefcd1ff2.json
generated
Normal file
20
server/.sqlx/query-c16821d20c1e6012d6079008889af16eed687c74d9d77957ad08aaaeefcd1ff2.json
generated
Normal file
@ -0,0 +1,20 @@
|
||||
{
|
||||
"db_name": "PostgreSQL",
|
||||
"query": "SELECT\n uid\nFROM\n post AS p\n JOIN feed AS f ON p.site = f.slug -- necessary to weed out nzb posts\n;\n",
|
||||
"describe": {
|
||||
"columns": [
|
||||
{
|
||||
"ordinal": 0,
|
||||
"name": "uid",
|
||||
"type_info": "Text"
|
||||
}
|
||||
],
|
||||
"parameters": {
|
||||
"Left": []
|
||||
},
|
||||
"nullable": [
|
||||
false
|
||||
]
|
||||
},
|
||||
"hash": "c16821d20c1e6012d6079008889af16eed687c74d9d77957ad08aaaeefcd1ff2"
|
||||
}
|
||||
52
server/.sqlx/query-ed34545b5fc681e9df9fc47c6ff93e5eac874eb8f95f55a27859537c51bca4f4.json
generated
Normal file
52
server/.sqlx/query-ed34545b5fc681e9df9fc47c6ff93e5eac874eb8f95f55a27859537c51bca4f4.json
generated
Normal file
@ -0,0 +1,52 @@
|
||||
{
|
||||
"db_name": "PostgreSQL",
|
||||
"query": "SELECT\n site,\n date,\n is_read,\n title,\n uid,\n name\nFROM\n post p\n JOIN feed f ON p.site = f.slug\nWHERE\n uid = ANY ($1)\nORDER BY\n date DESC;\n",
|
||||
"describe": {
|
||||
"columns": [
|
||||
{
|
||||
"ordinal": 0,
|
||||
"name": "site",
|
||||
"type_info": "Text"
|
||||
},
|
||||
{
|
||||
"ordinal": 1,
|
||||
"name": "date",
|
||||
"type_info": "Timestamp"
|
||||
},
|
||||
{
|
||||
"ordinal": 2,
|
||||
"name": "is_read",
|
||||
"type_info": "Bool"
|
||||
},
|
||||
{
|
||||
"ordinal": 3,
|
||||
"name": "title",
|
||||
"type_info": "Text"
|
||||
},
|
||||
{
|
||||
"ordinal": 4,
|
||||
"name": "uid",
|
||||
"type_info": "Text"
|
||||
},
|
||||
{
|
||||
"ordinal": 5,
|
||||
"name": "name",
|
||||
"type_info": "Text"
|
||||
}
|
||||
],
|
||||
"parameters": {
|
||||
"Left": [
|
||||
"TextArray"
|
||||
]
|
||||
},
|
||||
"nullable": [
|
||||
true,
|
||||
true,
|
||||
true,
|
||||
true,
|
||||
false,
|
||||
true
|
||||
]
|
||||
},
|
||||
"hash": "ed34545b5fc681e9df9fc47c6ff93e5eac874eb8f95f55a27859537c51bca4f4"
|
||||
}
|
||||
@ -6,6 +6,9 @@ SELECT
|
||||
date,
|
||||
is_read,
|
||||
uid,
|
||||
id
|
||||
FROM post
|
||||
WHERE title ILIKE '%grapheme%' OR summary ILIKE '%grapheme%';
|
||||
p.id id
|
||||
FROM
|
||||
post AS p
|
||||
JOIN feed AS f ON p.site = f.slug -- necessary to weed out nzb posts
|
||||
ORDER BY
|
||||
date DESC;
|
||||
|
||||
6
server/sql/all-uids.sql
Normal file
6
server/sql/all-uids.sql
Normal file
@ -0,0 +1,6 @@
|
||||
SELECT
|
||||
uid
|
||||
FROM
|
||||
post AS p
|
||||
JOIN feed AS f ON p.site = f.slug -- necessary to weed out nzb posts
|
||||
;
|
||||
@ -3,7 +3,10 @@ SELECT
|
||||
FROM
|
||||
post
|
||||
WHERE
|
||||
($1::text IS NULL OR site = $1)
|
||||
(
|
||||
$1 :: text IS NULL
|
||||
OR site = $1
|
||||
)
|
||||
AND (
|
||||
NOT $2
|
||||
OR NOT is_read
|
||||
|
||||
14
server/sql/posts-from-uids.sql
Normal file
14
server/sql/posts-from-uids.sql
Normal file
@ -0,0 +1,14 @@
|
||||
SELECT
|
||||
site AS "site!",
|
||||
title AS "title!",
|
||||
summary AS "summary!",
|
||||
link AS "link!",
|
||||
date AS "date!",
|
||||
is_read AS "is_read!",
|
||||
uid AS "uid!",
|
||||
p.id id
|
||||
FROM
|
||||
post p
|
||||
JOIN feed f ON p.site = f.slug
|
||||
WHERE
|
||||
uid = ANY ($1);
|
||||
@ -10,4 +10,5 @@ FROM
|
||||
JOIN feed f ON p.site = f.slug
|
||||
WHERE
|
||||
uid = ANY ($1)
|
||||
;
|
||||
ORDER BY
|
||||
date DESC;
|
||||
|
||||
@ -27,11 +27,6 @@ use server::{
|
||||
};
|
||||
use sqlx::postgres::PgPool;
|
||||
|
||||
#[get("/refresh")]
|
||||
async fn refresh(nm: &State<Notmuch>) -> Result<Json<String>, Debug<NotmuchError>> {
|
||||
Ok(Json(String::from_utf8_lossy(&nm.new()?).to_string()))
|
||||
}
|
||||
|
||||
#[get("/show/<query>/pretty")]
|
||||
async fn show_pretty(
|
||||
nm: &State<Notmuch>,
|
||||
@ -209,7 +204,6 @@ async fn main() -> Result<(), Box<dyn Error>> {
|
||||
shared::urls::MOUNT_POINT,
|
||||
routes![
|
||||
original,
|
||||
refresh,
|
||||
show_pretty,
|
||||
show,
|
||||
graphql_query,
|
||||
|
||||
@ -1,3 +1,5 @@
|
||||
use std::str::FromStr;
|
||||
|
||||
use async_graphql::{
|
||||
connection::{self, Connection, Edge, OpaqueCursor},
|
||||
Context, EmptySubscription, Enum, Error, FieldResult, InputObject, Object, Schema,
|
||||
@ -16,6 +18,26 @@ pub type UnixTime = isize;
|
||||
/// # Thread ID, sans "thread:"
|
||||
pub type ThreadId = String;
|
||||
|
||||
#[derive(Debug, Enum, Copy, Clone, Eq, PartialEq)]
|
||||
pub enum Corpus {
|
||||
Notmuch,
|
||||
Newsreader,
|
||||
Tantivy,
|
||||
}
|
||||
|
||||
impl FromStr for Corpus {
|
||||
type Err = String;
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
Ok(match s {
|
||||
"notmuch" => Corpus::Notmuch,
|
||||
"newsreader" => Corpus::Newsreader,
|
||||
"tantivy" => Corpus::Tantivy,
|
||||
s => return Err(format!("unknown corpus: '{s}'")),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: add is_read field and remove all use of 'tag:unread'
|
||||
#[derive(Debug, SimpleObject)]
|
||||
pub struct ThreadSummary {
|
||||
pub thread: ThreadId,
|
||||
@ -30,6 +52,7 @@ pub struct ThreadSummary {
|
||||
pub authors: String,
|
||||
pub subject: String,
|
||||
pub tags: Vec<String>,
|
||||
pub corpus: Corpus,
|
||||
}
|
||||
|
||||
#[derive(Debug, Union)]
|
||||
@ -237,13 +260,16 @@ impl QueryRoot {
|
||||
async fn count<'ctx>(&self, ctx: &Context<'ctx>, query: String) -> Result<usize, Error> {
|
||||
let nm = ctx.data_unchecked::<Notmuch>();
|
||||
let pool = ctx.data_unchecked::<PgPool>();
|
||||
let tantivy = ctx.data_unchecked::<TantivyConnection>();
|
||||
|
||||
let newsreader_query: Query = query.parse()?;
|
||||
|
||||
let newsreader_count = newsreader::count(pool, &newsreader_query).await?;
|
||||
let notmuch_count = nm::count(nm, &newsreader_query.to_notmuch()).await?;
|
||||
info!("count {newsreader_query:?} newsreader count {newsreader_count} notmuch count {notmuch_count}");
|
||||
Ok(newsreader_count + notmuch_count)
|
||||
let notmuch_count = nm::count(nm, &newsreader_query).await?;
|
||||
let tantivy_count = tantivy.count(&newsreader_query).await?;
|
||||
let total = newsreader_count + notmuch_count + tantivy_count;
|
||||
info!("count {newsreader_query:?} newsreader count {newsreader_count} notmuch count {notmuch_count} tantivy count {tantivy_count} total {total}");
|
||||
Ok(total)
|
||||
}
|
||||
|
||||
async fn search<'ctx>(
|
||||
@ -255,18 +281,11 @@ impl QueryRoot {
|
||||
last: Option<i32>,
|
||||
query: String,
|
||||
) -> Result<Connection<OpaqueCursor<SearchCursor>, ThreadSummary>, Error> {
|
||||
// TODO: add keywords to limit search to one corpus, i.e. is:news or is:mail
|
||||
info!("search({after:?} {before:?} {first:?} {last:?} {query:?})",);
|
||||
let nm = ctx.data_unchecked::<Notmuch>();
|
||||
let pool = ctx.data_unchecked::<PgPool>();
|
||||
let tantivy = ctx.data_unchecked::<TantivyConnection>();
|
||||
|
||||
#[derive(Debug)]
|
||||
enum ThreadSummaryCursor {
|
||||
Newsreader(i32, ThreadSummary),
|
||||
Notmuch(i32, ThreadSummary),
|
||||
Tantivy(i32, ThreadSummary),
|
||||
}
|
||||
Ok(connection::query(
|
||||
after,
|
||||
before,
|
||||
@ -277,7 +296,7 @@ impl QueryRoot {
|
||||
first: Option<usize>,
|
||||
last: Option<usize>| async move {
|
||||
info!(
|
||||
"search({:?} {:?} {first:?} {last:?} {query:?})",
|
||||
"search(after {:?} before {:?} first {first:?} last {last:?} query: {query:?})",
|
||||
after.as_ref().map(|v| &v.0),
|
||||
before.as_ref().map(|v| &v.0)
|
||||
);
|
||||
@ -288,65 +307,40 @@ impl QueryRoot {
|
||||
let newsreader_before = before.as_ref().map(|sc| sc.newsreader_offset);
|
||||
let notmuch_before = before.as_ref().map(|sc| sc.notmuch_offset);
|
||||
let tantivy_before = before.as_ref().map(|sc| sc.tantivy_offset);
|
||||
let first = first.map(|v| v as i32);
|
||||
let last = last.map(|v| v as i32);
|
||||
|
||||
let newsreader_query: Query = query.parse()?;
|
||||
info!("newsreader_query {newsreader_query:?}");
|
||||
let newsreader_results = if newsreader_query.is_newsreader {
|
||||
newsreader::search(
|
||||
pool,
|
||||
newsreader_after,
|
||||
newsreader_before,
|
||||
first.map(|v| v as i32),
|
||||
last.map(|v| v as i32),
|
||||
&newsreader_query,
|
||||
)
|
||||
.await?
|
||||
.into_iter()
|
||||
.map(|(cur, ts)| ThreadSummaryCursor::Newsreader(cur, ts))
|
||||
.collect()
|
||||
} else {
|
||||
Vec::new()
|
||||
};
|
||||
|
||||
let notmuch_results = if newsreader_query.is_notmuch {
|
||||
nm::search(
|
||||
nm,
|
||||
notmuch_after,
|
||||
notmuch_before,
|
||||
first.map(|v| v as i32),
|
||||
last.map(|v| v as i32),
|
||||
newsreader_query.to_notmuch(),
|
||||
)
|
||||
.await?
|
||||
.into_iter()
|
||||
.map(|(cur, ts)| ThreadSummaryCursor::Notmuch(cur, ts))
|
||||
.collect()
|
||||
} else {
|
||||
Vec::new()
|
||||
};
|
||||
|
||||
let tantivy_results = if newsreader_query.is_tantivy {
|
||||
tantivy
|
||||
.search(
|
||||
pool,
|
||||
tantivy_after,
|
||||
tantivy_before,
|
||||
first.map(|v| v as i32),
|
||||
last.map(|v| v as i32),
|
||||
&newsreader_query,
|
||||
)
|
||||
.await?
|
||||
.into_iter()
|
||||
.map(|(cur, ts)| ThreadSummaryCursor::Tantivy(cur, ts))
|
||||
.collect()
|
||||
} else {
|
||||
Vec::new()
|
||||
};
|
||||
let query: Query = query.parse()?;
|
||||
info!("newsreader_query {query:?}");
|
||||
|
||||
let newsreader_results = newsreader_search(
|
||||
pool,
|
||||
newsreader_after,
|
||||
newsreader_before,
|
||||
first,
|
||||
last,
|
||||
&query,
|
||||
)
|
||||
.await?;
|
||||
let notmuch_results =
|
||||
notmuch_search(nm, notmuch_after, notmuch_before, first, last, &query).await?;
|
||||
let tantivy_results = tantivy_search(
|
||||
tantivy,
|
||||
pool,
|
||||
tantivy_after,
|
||||
tantivy_before,
|
||||
first,
|
||||
last,
|
||||
&query,
|
||||
)
|
||||
.await?;
|
||||
info!(
|
||||
"tantivy results:\nis_tantivy:{} {tantivy_results:#?}",
|
||||
newsreader_query.is_tantivy
|
||||
"newsreader_results ({}) notmuch_results ({}) tantivy_results ({})",
|
||||
newsreader_results.len(),
|
||||
notmuch_results.len(),
|
||||
tantivy_results.len()
|
||||
);
|
||||
|
||||
let mut results: Vec<_> = newsreader_results
|
||||
.into_iter()
|
||||
.chain(notmuch_results)
|
||||
@ -362,6 +356,7 @@ impl QueryRoot {
|
||||
|
||||
let mut has_next_page = before.is_some();
|
||||
if let Some(first) = first {
|
||||
let first = first as usize;
|
||||
if results.len() > first {
|
||||
has_next_page = true;
|
||||
results.truncate(first);
|
||||
@ -370,6 +365,7 @@ impl QueryRoot {
|
||||
|
||||
let mut has_previous_page = after.is_some();
|
||||
if let Some(last) = last {
|
||||
let last = last as usize;
|
||||
if results.len() > last {
|
||||
has_previous_page = true;
|
||||
results.truncate(last);
|
||||
@ -437,6 +433,59 @@ impl QueryRoot {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum ThreadSummaryCursor {
|
||||
Newsreader(i32, ThreadSummary),
|
||||
Notmuch(i32, ThreadSummary),
|
||||
Tantivy(i32, ThreadSummary),
|
||||
}
|
||||
async fn newsreader_search(
|
||||
pool: &PgPool,
|
||||
after: Option<i32>,
|
||||
before: Option<i32>,
|
||||
first: Option<i32>,
|
||||
last: Option<i32>,
|
||||
query: &Query,
|
||||
) -> Result<Vec<ThreadSummaryCursor>, async_graphql::Error> {
|
||||
Ok(newsreader::search(pool, after, before, first, last, &query)
|
||||
.await?
|
||||
.into_iter()
|
||||
.map(|(cur, ts)| ThreadSummaryCursor::Newsreader(cur, ts))
|
||||
.collect())
|
||||
}
|
||||
|
||||
async fn notmuch_search(
|
||||
nm: &Notmuch,
|
||||
after: Option<i32>,
|
||||
before: Option<i32>,
|
||||
first: Option<i32>,
|
||||
last: Option<i32>,
|
||||
query: &Query,
|
||||
) -> Result<Vec<ThreadSummaryCursor>, async_graphql::Error> {
|
||||
Ok(nm::search(nm, after, before, first, last, &query)
|
||||
.await?
|
||||
.into_iter()
|
||||
.map(|(cur, ts)| ThreadSummaryCursor::Notmuch(cur, ts))
|
||||
.collect())
|
||||
}
|
||||
|
||||
async fn tantivy_search(
|
||||
tantivy: &TantivyConnection,
|
||||
pool: &PgPool,
|
||||
after: Option<i32>,
|
||||
before: Option<i32>,
|
||||
first: Option<i32>,
|
||||
last: Option<i32>,
|
||||
query: &Query,
|
||||
) -> Result<Vec<ThreadSummaryCursor>, async_graphql::Error> {
|
||||
Ok(tantivy
|
||||
.search(pool, after, before, first, last, &query)
|
||||
.await?
|
||||
.into_iter()
|
||||
.map(|(cur, ts)| ThreadSummaryCursor::Tantivy(cur, ts))
|
||||
.collect())
|
||||
}
|
||||
|
||||
pub struct Mutation;
|
||||
#[Object]
|
||||
impl Mutation {
|
||||
@ -448,14 +497,12 @@ impl Mutation {
|
||||
) -> Result<bool, Error> {
|
||||
let nm = ctx.data_unchecked::<Notmuch>();
|
||||
let pool = ctx.data_unchecked::<PgPool>();
|
||||
let tantivy = ctx.data_unchecked::<TantivyConnection>();
|
||||
|
||||
for q in query.split_whitespace() {
|
||||
if newsreader::is_newsreader_thread(&q) {
|
||||
newsreader::set_read_status(pool, &q, unread).await?;
|
||||
} else {
|
||||
nm::set_read_status(nm, q, unread).await?;
|
||||
}
|
||||
}
|
||||
let query: Query = query.parse()?;
|
||||
newsreader::set_read_status(pool, &query, unread).await?;
|
||||
tantivy.reindex_thread(pool, &query).await?;
|
||||
nm::set_read_status(nm, &query, unread).await?;
|
||||
Ok(true)
|
||||
}
|
||||
async fn tag_add<'ctx>(
|
||||
@ -486,10 +533,19 @@ impl Mutation {
|
||||
let pool = ctx.data_unchecked::<PgPool>();
|
||||
|
||||
tantivy.drop_and_load_index()?;
|
||||
tantivy.reindex(pool).await?;
|
||||
tantivy.reindex_all(pool).await?;
|
||||
|
||||
Ok(true)
|
||||
}
|
||||
async fn refresh<'ctx>(&self, ctx: &Context<'ctx>) -> Result<bool, Error> {
|
||||
let nm = ctx.data_unchecked::<Notmuch>();
|
||||
let tantivy = ctx.data_unchecked::<TantivyConnection>();
|
||||
let pool = ctx.data_unchecked::<PgPool>();
|
||||
// TODO: parallelize
|
||||
info!("{}", String::from_utf8_lossy(&nm.new()?));
|
||||
tantivy.refresh(pool).await?;
|
||||
Ok(true)
|
||||
}
|
||||
}
|
||||
|
||||
pub type GraphqlSchema = Schema<QueryRoot, Mutation, EmptySubscription>;
|
||||
|
||||
@ -18,15 +18,16 @@ use lol_html::{
|
||||
};
|
||||
use maplit::{hashmap, hashset};
|
||||
use scraper::{Html, Selector};
|
||||
use sqlx::{postgres::PgPool, types::time::PrimitiveDateTime};
|
||||
use sqlx::types::time::PrimitiveDateTime;
|
||||
use thiserror::Error;
|
||||
use tokio::sync::Mutex;
|
||||
use url::Url;
|
||||
|
||||
use crate::{
|
||||
error::ServerError,
|
||||
graphql::ThreadSummary,
|
||||
newsreader::{extract_thread_id, is_newsreader_thread},
|
||||
graphql::{Corpus, ThreadSummary},
|
||||
newsreader::is_newsreader_thread,
|
||||
nm::is_notmuch_thread_or_id,
|
||||
};
|
||||
|
||||
const NEWSREADER_TAG_PREFIX: &'static str = "News/";
|
||||
@ -607,12 +608,13 @@ fn compute_offset_limit(
|
||||
#[derive(Debug)]
|
||||
pub struct Query {
|
||||
pub unread_only: bool,
|
||||
pub tag: Option<String>,
|
||||
pub uid: Option<String>,
|
||||
pub tags: Vec<String>,
|
||||
pub uids: Vec<String>,
|
||||
pub remainder: Vec<String>,
|
||||
pub is_notmuch: bool,
|
||||
pub is_newsreader: bool,
|
||||
pub is_tantivy: bool,
|
||||
pub corpus: Option<Corpus>,
|
||||
}
|
||||
|
||||
impl Query {
|
||||
@ -627,10 +629,10 @@ impl Query {
|
||||
if self.unread_only {
|
||||
parts.push("is:unread".to_string());
|
||||
}
|
||||
if let Some(site) = &self.tag {
|
||||
parts.push(format!("tag:{site}"));
|
||||
for tag in &self.tags {
|
||||
parts.push(format!("tag:{tag}"));
|
||||
}
|
||||
if let Some(uid) = &self.uid {
|
||||
for uid in &self.uids {
|
||||
parts.push(uid.clone());
|
||||
}
|
||||
parts.extend(self.remainder.clone());
|
||||
@ -642,48 +644,60 @@ impl FromStr for Query {
|
||||
type Err = Infallible;
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
let mut unread_only = false;
|
||||
let mut tag = None;
|
||||
let mut uid = None;
|
||||
let mut tags = Vec::new();
|
||||
let mut uids = Vec::new();
|
||||
let mut remainder = Vec::new();
|
||||
let mut is_notmuch = false;
|
||||
let mut is_newsreader = false;
|
||||
let is_newsreader = false;
|
||||
let mut is_tantivy = false;
|
||||
let mut corpus = None;
|
||||
for word in s.split_whitespace() {
|
||||
if word == "is:unread" {
|
||||
unread_only = true
|
||||
} else if word.starts_with("tag:") {
|
||||
tag = Some(word["tag:".len()..].to_string())
|
||||
tags.push(word["tag:".len()..].to_string());
|
||||
|
||||
/*
|
||||
} else if word.starts_with("tag:") {
|
||||
// Any tag that doesn't match site_prefix should explicitly set the site to something not in the
|
||||
// database
|
||||
site = Some(NON_EXISTENT_SITE_NAME.to_string());
|
||||
*/
|
||||
} else if word.starts_with("corpus:") {
|
||||
let c = word["corpus:".len()..].to_string();
|
||||
corpus = c.parse::<Corpus>().map(|c| Some(c)).unwrap_or_else(|e| {
|
||||
warn!("Error parsing corpus '{c}': {e:?}");
|
||||
None
|
||||
});
|
||||
} else if is_newsreader_thread(word) {
|
||||
uid = Some(extract_thread_id(word).to_string())
|
||||
uids.push(word.to_string());
|
||||
} else if is_notmuch_thread_or_id(word) {
|
||||
uids.push(word.to_string());
|
||||
} else if word == "is:mail" || word == "is:email" || word == "is:notmuch" {
|
||||
is_notmuch = true;
|
||||
} else if word == "is:news" || word == "is:newsreader" {
|
||||
is_newsreader = true;
|
||||
is_tantivy = true;
|
||||
} else {
|
||||
remainder.push(word.to_string());
|
||||
}
|
||||
}
|
||||
// If we don't see any explicit filters for a corpus, flip them all on
|
||||
if !(is_notmuch || is_newsreader) {
|
||||
is_newsreader = true;
|
||||
if corpus.is_none() && !(is_newsreader || is_notmuch || is_tantivy) {
|
||||
// Don't set is_newsreader unless debugging, assume tantivy can handle it.
|
||||
// Explicitely setting corpus:newsreader will by-pass this logic
|
||||
// is_newsreader = true;
|
||||
is_notmuch = true;
|
||||
is_tantivy = true;
|
||||
}
|
||||
// TODO: decide if tantivy gets it's own life or replaces newsreader
|
||||
is_tantivy = is_newsreader;
|
||||
Ok(Query {
|
||||
unread_only,
|
||||
tag,
|
||||
uid,
|
||||
tags,
|
||||
uids,
|
||||
remainder,
|
||||
is_notmuch,
|
||||
is_newsreader,
|
||||
is_tantivy,
|
||||
corpus,
|
||||
})
|
||||
}
|
||||
}
|
||||
@ -694,6 +708,7 @@ pub struct ThreadSummaryRecord {
|
||||
pub title: Option<String>,
|
||||
pub uid: String,
|
||||
pub name: Option<String>,
|
||||
pub corpus: Corpus,
|
||||
}
|
||||
|
||||
async fn thread_summary_from_row(r: ThreadSummaryRecord) -> ThreadSummary {
|
||||
@ -711,12 +726,14 @@ async fn thread_summary_from_row(r: ThreadSummaryRecord) -> ThreadSummary {
|
||||
.expect("post missing date")
|
||||
.assume_utc()
|
||||
.unix_timestamp() as isize,
|
||||
date_relative: "TODO date_relative".to_string(),
|
||||
date_relative: format!("{:?}", r.date),
|
||||
//date_relative: "TODO date_relative".to_string(),
|
||||
matched: 0,
|
||||
total: 1,
|
||||
authors: r.name.unwrap_or_else(|| site.clone()),
|
||||
subject: title,
|
||||
tags,
|
||||
corpus: r.corpus,
|
||||
}
|
||||
}
|
||||
async fn clean_title(title: &str) -> Result<String, ServerError> {
|
||||
|
||||
@ -13,14 +13,14 @@ use crate::{
|
||||
clean_title, compute_offset_limit,
|
||||
config::Config,
|
||||
error::ServerError,
|
||||
graphql::{NewsPost, Tag, Thread, ThreadSummary},
|
||||
graphql::{Corpus, NewsPost, Tag, Thread, ThreadSummary},
|
||||
thread_summary_from_row, AddOutlink, EscapeHtml, FrameImages, InlineStyle, Query, SanitizeHtml,
|
||||
SlurpContents, StripHtml, ThreadSummaryRecord, Transformer, NEWSREADER_TAG_PREFIX,
|
||||
SlurpContents, ThreadSummaryRecord, Transformer, NEWSREADER_TAG_PREFIX,
|
||||
NEWSREADER_THREAD_PREFIX,
|
||||
};
|
||||
|
||||
pub fn is_newsreader_search(query: &str) -> bool {
|
||||
query.contains(NEWSREADER_TAG_PREFIX)
|
||||
pub fn is_newsreader_query(query: &Query) -> bool {
|
||||
query.is_newsreader || query.corpus == Some(Corpus::Newsreader)
|
||||
}
|
||||
|
||||
pub fn is_newsreader_thread(query: &str) -> bool {
|
||||
@ -28,7 +28,11 @@ pub fn is_newsreader_thread(query: &str) -> bool {
|
||||
}
|
||||
|
||||
pub fn extract_thread_id(query: &str) -> &str {
|
||||
&query[NEWSREADER_THREAD_PREFIX.len()..]
|
||||
if query.starts_with(NEWSREADER_THREAD_PREFIX) {
|
||||
&query[NEWSREADER_THREAD_PREFIX.len()..]
|
||||
} else {
|
||||
query
|
||||
}
|
||||
}
|
||||
|
||||
pub fn extract_site(tag: &str) -> &str {
|
||||
@ -39,14 +43,33 @@ pub fn make_news_tag(tag: &str) -> String {
|
||||
format!("tag:{NEWSREADER_TAG_PREFIX}{tag}")
|
||||
}
|
||||
|
||||
fn site_from_tags(tags: &[String]) -> Option<String> {
|
||||
for t in tags {
|
||||
if t.starts_with(NEWSREADER_TAG_PREFIX) {
|
||||
return Some(extract_site(t).to_string());
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
pub async fn count(pool: &PgPool, query: &Query) -> Result<usize, ServerError> {
|
||||
if !is_newsreader_query(query) {
|
||||
return Ok(0);
|
||||
}
|
||||
if !query.remainder.is_empty() {
|
||||
// TODO: handle full text search against all sites, for now, early return if search words
|
||||
// are specified.
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let row = sqlx::query_file!("sql/count.sql", query.tag, query.unread_only)
|
||||
let site = site_from_tags(&query.tags);
|
||||
if !query.tags.is_empty() && site.is_none() {
|
||||
// Newsreader can only handle all sites read/unread queries, anything with a non-site tag
|
||||
// isn't supported
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let row = sqlx::query_file!("sql/count.sql", site, query.unread_only)
|
||||
.fetch_one(pool)
|
||||
.await?;
|
||||
Ok(row.count.unwrap_or(0).try_into().unwrap_or(0))
|
||||
@ -61,12 +84,22 @@ pub async fn search(
|
||||
query: &Query,
|
||||
) -> Result<Vec<(i32, ThreadSummary)>, async_graphql::Error> {
|
||||
info!("search({after:?} {before:?} {first:?} {last:?} {query:?}");
|
||||
if !is_newsreader_query(query) {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
if !query.remainder.is_empty() {
|
||||
// TODO: handle full text search against all sites, for now, early return if search words
|
||||
// are specified.
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
let site = site_from_tags(&query.tags);
|
||||
if !query.tags.is_empty() && site.is_none() {
|
||||
// Newsreader can only handle all sites read/unread queries, anything with a non-site tag
|
||||
// isn't supported
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
let (offset, mut limit) = compute_offset_limit(after, before, first, last);
|
||||
if before.is_none() {
|
||||
// When searching forward, the +1 is to see if there are more pages of data available.
|
||||
@ -75,7 +108,6 @@ pub async fn search(
|
||||
limit = limit + 1;
|
||||
}
|
||||
|
||||
let site = query.tag.as_ref().map(|t| extract_site(&t).to_string());
|
||||
info!(
|
||||
"search offset {offset} limit {limit} site {site:?} unread_only {}",
|
||||
query.unread_only
|
||||
@ -102,6 +134,7 @@ pub async fn search(
|
||||
title: r.title,
|
||||
uid: r.uid,
|
||||
name: r.name,
|
||||
corpus: Corpus::Newsreader,
|
||||
})
|
||||
.await,
|
||||
));
|
||||
@ -243,12 +276,22 @@ pub async fn thread(
|
||||
}
|
||||
pub async fn set_read_status<'ctx>(
|
||||
pool: &PgPool,
|
||||
query: &str,
|
||||
query: &Query,
|
||||
unread: bool,
|
||||
) -> Result<bool, ServerError> {
|
||||
let query: Query = query.parse()?;
|
||||
sqlx::query_file!("sql/set_unread.sql", !unread, query.uid)
|
||||
.execute(pool)
|
||||
.await?;
|
||||
// TODO: make single query when query.uids.len() > 1
|
||||
let uids: Vec<_> = query
|
||||
.uids
|
||||
.iter()
|
||||
.filter(|uid| is_newsreader_thread(uid))
|
||||
.map(
|
||||
|uid| extract_thread_id(uid), // TODO strip prefix
|
||||
)
|
||||
.collect();
|
||||
for uid in uids {
|
||||
sqlx::query_file!("sql/set_unread.sql", !unread, uid)
|
||||
.execute(pool)
|
||||
.await?;
|
||||
}
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
@ -14,10 +14,10 @@ use crate::{
|
||||
compute_offset_limit,
|
||||
error::ServerError,
|
||||
graphql::{
|
||||
Attachment, Body, DispositionType, Email, EmailThread, Header, Html, Message, PlainText,
|
||||
Tag, Thread, ThreadSummary, UnhandledContentType,
|
||||
Attachment, Body, Corpus, DispositionType, Email, EmailThread, Header, Html, Message,
|
||||
PlainText, Tag, Thread, ThreadSummary, UnhandledContentType,
|
||||
},
|
||||
linkify_html, InlineStyle, SanitizeHtml, Transformer,
|
||||
linkify_html, InlineStyle, Query, SanitizeHtml, Transformer,
|
||||
};
|
||||
|
||||
const TEXT_PLAIN: &'static str = "text/plain";
|
||||
@ -31,6 +31,14 @@ const MULTIPART_RELATED: &'static str = "multipart/related";
|
||||
|
||||
const MAX_RAW_MESSAGE_SIZE: usize = 100_000;
|
||||
|
||||
fn is_notmuch_query(query: &Query) -> bool {
|
||||
query.is_notmuch || query.corpus == Some(Corpus::Notmuch)
|
||||
}
|
||||
|
||||
pub fn is_notmuch_thread_or_id(id: &str) -> bool {
|
||||
id.starts_with("id:") || id.starts_with("thread:")
|
||||
}
|
||||
|
||||
// TODO(wathiede): decide good error type
|
||||
pub fn threadset_to_messages(thread_set: notmuch::ThreadSet) -> Result<Vec<Message>, ServerError> {
|
||||
for t in thread_set.0 {
|
||||
@ -39,8 +47,12 @@ pub fn threadset_to_messages(thread_set: notmuch::ThreadSet) -> Result<Vec<Messa
|
||||
Ok(Vec::new())
|
||||
}
|
||||
|
||||
pub async fn count(nm: &Notmuch, query: &str) -> Result<usize, ServerError> {
|
||||
Ok(nm.count(query)?)
|
||||
pub async fn count(nm: &Notmuch, query: &Query) -> Result<usize, ServerError> {
|
||||
if !is_notmuch_query(query) {
|
||||
return Ok(0);
|
||||
}
|
||||
let query = query.to_notmuch();
|
||||
Ok(nm.count(&query)?)
|
||||
}
|
||||
|
||||
pub async fn search(
|
||||
@ -49,8 +61,12 @@ pub async fn search(
|
||||
before: Option<i32>,
|
||||
first: Option<i32>,
|
||||
last: Option<i32>,
|
||||
query: String,
|
||||
query: &Query,
|
||||
) -> Result<Vec<(i32, ThreadSummary)>, async_graphql::Error> {
|
||||
if !is_notmuch_query(query) {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
let query = query.to_notmuch();
|
||||
let (offset, mut limit) = compute_offset_limit(after, before, first, last);
|
||||
if before.is_none() {
|
||||
// When searching forward, the +1 is to see if there are more pages of data available.
|
||||
@ -75,6 +91,7 @@ pub async fn search(
|
||||
authors: ts.authors,
|
||||
subject: ts.subject,
|
||||
tags: ts.tags,
|
||||
corpus: Corpus::Notmuch,
|
||||
},
|
||||
)
|
||||
})
|
||||
@ -770,13 +787,21 @@ fn render_content_type_tree(m: &ParsedMail) -> String {
|
||||
|
||||
pub async fn set_read_status<'ctx>(
|
||||
nm: &Notmuch,
|
||||
query: &str,
|
||||
query: &Query,
|
||||
unread: bool,
|
||||
) -> Result<bool, ServerError> {
|
||||
if unread {
|
||||
nm.tag_add("unread", &format!("{query}"))?;
|
||||
} else {
|
||||
nm.tag_remove("unread", &format!("{query}"))?;
|
||||
let uids: Vec<_> = query
|
||||
.uids
|
||||
.iter()
|
||||
.filter(|uid| is_notmuch_thread_or_id(uid))
|
||||
.collect();
|
||||
info!("set_read_status({unread} {uids:?})");
|
||||
for uid in uids {
|
||||
if unread {
|
||||
nm.tag_add("unread", uid)?;
|
||||
} else {
|
||||
nm.tag_remove("unread", uid)?;
|
||||
}
|
||||
}
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
@ -1,11 +1,26 @@
|
||||
use log::info;
|
||||
use sqlx::postgres::PgPool;
|
||||
use tantivy::{schema::Value, Index, TantivyError};
|
||||
use std::collections::HashSet;
|
||||
|
||||
use crate::{
|
||||
error::ServerError, graphql::ThreadSummary, thread_summary_from_row, Query, ThreadSummaryRecord,
|
||||
use log::{debug, error, info};
|
||||
use sqlx::{postgres::PgPool, types::time::PrimitiveDateTime};
|
||||
use tantivy::{
|
||||
collector::{DocSetCollector, TopDocs},
|
||||
query,
|
||||
query::{AllQuery, BooleanQuery, Occur, QueryParser, TermQuery},
|
||||
schema::{Facet, IndexRecordOption, Value},
|
||||
DocAddress, Index, Searcher, TantivyDocument, TantivyError, Term,
|
||||
};
|
||||
|
||||
use crate::{
|
||||
compute_offset_limit,
|
||||
error::ServerError,
|
||||
graphql::{Corpus, ThreadSummary},
|
||||
newsreader::{extract_thread_id, is_newsreader_thread},
|
||||
thread_summary_from_row, Query, ThreadSummaryRecord,
|
||||
};
|
||||
|
||||
pub fn is_tantivy_query(query: &Query) -> bool {
|
||||
query.is_tantivy || query.corpus == Some(Corpus::Tantivy)
|
||||
}
|
||||
pub struct TantivyConnection {
|
||||
db_path: String,
|
||||
//index: Index,
|
||||
@ -27,7 +42,67 @@ impl TantivyConnection {
|
||||
db_path: tantivy_db_path.to_string(),
|
||||
})
|
||||
}
|
||||
pub async fn reindex(&self, pool: &PgPool) -> Result<(), ServerError> {
|
||||
pub async fn refresh(&self, pool: &PgPool) -> Result<(), ServerError> {
|
||||
let start_time = std::time::Instant::now();
|
||||
let p_uids: Vec<_> = sqlx::query_file!("sql/all-uids.sql")
|
||||
.fetch_all(pool)
|
||||
.await?
|
||||
.into_iter()
|
||||
.map(|r| r.uid)
|
||||
.collect();
|
||||
info!(
|
||||
"refresh from postgres got {} uids in {}",
|
||||
p_uids.len(),
|
||||
start_time.elapsed().as_secs_f32()
|
||||
);
|
||||
|
||||
let start_time = std::time::Instant::now();
|
||||
let (searcher, _query) = self.searcher_and_query("")?;
|
||||
let docs = searcher.search(&AllQuery, &DocSetCollector)?;
|
||||
let uid = self.get_index()?.schema().get_field("uid")?;
|
||||
let t_uids: Vec<_> = docs
|
||||
.into_iter()
|
||||
.map(|doc_address| {
|
||||
searcher
|
||||
.doc(doc_address)
|
||||
.map(|doc: TantivyDocument| {
|
||||
debug!("doc: {doc:#?}");
|
||||
doc.get_first(uid)
|
||||
.expect("uid")
|
||||
.as_str()
|
||||
.expect("as_str")
|
||||
.to_string()
|
||||
})
|
||||
.expect("searcher.doc")
|
||||
})
|
||||
.collect();
|
||||
|
||||
info!(
|
||||
"refresh tantivy got {} uids in {}",
|
||||
t_uids.len(),
|
||||
start_time.elapsed().as_secs_f32()
|
||||
);
|
||||
let t_set: HashSet<_> = t_uids.into_iter().collect();
|
||||
let need: Vec<_> = p_uids
|
||||
.into_iter()
|
||||
.filter(|uid| !t_set.contains(uid.as_str()))
|
||||
.collect();
|
||||
if !need.is_empty() {
|
||||
info!(
|
||||
"need to reindex {} uids: {:?}...",
|
||||
need.len(),
|
||||
&need[..need.len().min(10)]
|
||||
);
|
||||
}
|
||||
let batch_size = 1000;
|
||||
let uids: Vec<_> = need[..need.len().min(batch_size)]
|
||||
.into_iter()
|
||||
.cloned()
|
||||
.collect();
|
||||
self.reindex_uids(pool, &uids).await
|
||||
}
|
||||
async fn reindex_uids(&self, pool: &PgPool, uids: &[String]) -> Result<(), ServerError> {
|
||||
// TODO: add SlurpContents and convert HTML to text
|
||||
use tantivy::{doc, Term};
|
||||
|
||||
let start_time = std::time::Instant::now();
|
||||
@ -44,11 +119,20 @@ impl TantivyConnection {
|
||||
let is_read = schema.get_field("is_read")?;
|
||||
let uid = schema.get_field("uid")?;
|
||||
let id = schema.get_field("id")?;
|
||||
let tag = schema.get_field("tag")?;
|
||||
|
||||
let rows = sqlx::query_file!("sql/all-posts.sql")
|
||||
info!("reindexing {} posts", uids.len());
|
||||
let rows = sqlx::query_file_as!(PostgresDoc, "sql/posts-from-uids.sql", uids)
|
||||
.fetch_all(pool)
|
||||
.await?;
|
||||
|
||||
if uids.len() != rows.len() {
|
||||
error!(
|
||||
"Had {} uids and only got {} rows: uids {uids:?}",
|
||||
uids.len(),
|
||||
rows.len()
|
||||
);
|
||||
}
|
||||
let total = rows.len();
|
||||
for (i, r) in rows.into_iter().enumerate() {
|
||||
if i % 10_000 == 0 {
|
||||
@ -57,26 +141,76 @@ impl TantivyConnection {
|
||||
start_time.elapsed().as_secs_f32()
|
||||
);
|
||||
}
|
||||
|
||||
let id_term = Term::from_field_text(uid, &r.uid);
|
||||
index_writer.delete_term(id_term);
|
||||
let slug = r.site;
|
||||
let tag_facet = Facet::from(&format!("/News/{slug}"));
|
||||
index_writer.add_document(doc!(
|
||||
site => r.site.expect("UNKOWN_SITE"),
|
||||
title => r.title.expect("UNKOWN_TITLE"),
|
||||
site => slug.clone(),
|
||||
title => r.title,
|
||||
// TODO: clean and extract text from HTML
|
||||
summary => r.summary.expect("UNKNOWN_SUMMARY"),
|
||||
link => r.link.expect("link"),
|
||||
date => tantivy::DateTime::from_primitive(r.date.expect("date")),
|
||||
is_read => r.is_read.expect("is_read"),
|
||||
summary => r.summary,
|
||||
link => r.link,
|
||||
date => tantivy::DateTime::from_primitive(r.date),
|
||||
is_read => r.is_read,
|
||||
uid => r.uid,
|
||||
id => r.id as i64,
|
||||
id => r.id as u64,
|
||||
tag => tag_facet,
|
||||
))?;
|
||||
}
|
||||
|
||||
index_writer.commit()?;
|
||||
|
||||
info!("took {:.2}s to reindex", start_time.elapsed().as_secs_f32());
|
||||
|
||||
index_writer.commit()?;
|
||||
Ok(())
|
||||
}
|
||||
pub async fn reindex_thread(&self, pool: &PgPool, query: &Query) -> Result<(), ServerError> {
|
||||
let uids: Vec<_> = query
|
||||
.uids
|
||||
.iter()
|
||||
.filter(|uid| is_newsreader_thread(uid))
|
||||
.map(|uid| extract_thread_id(uid).to_string())
|
||||
.collect();
|
||||
Ok(self.reindex_uids(pool, &uids).await?)
|
||||
}
|
||||
pub async fn reindex_all(&self, pool: &PgPool) -> Result<(), ServerError> {
|
||||
let rows = sqlx::query_file!("sql/all-posts.sql")
|
||||
.fetch_all(pool)
|
||||
.await?;
|
||||
|
||||
let uids: Vec<String> = rows.into_iter().map(|r| r.uid).collect();
|
||||
self.reindex_uids(pool, &uids).await?;
|
||||
Ok(())
|
||||
}
|
||||
fn searcher_and_query(
|
||||
&self,
|
||||
term: &str,
|
||||
) -> Result<(Searcher, Box<dyn query::Query>), ServerError> {
|
||||
let index = self.get_index()?;
|
||||
let reader = index.reader()?;
|
||||
let schema = index.schema();
|
||||
let searcher = reader.searcher();
|
||||
let title = schema.get_field("title")?;
|
||||
let summary = schema.get_field("summary")?;
|
||||
let query_parser = QueryParser::for_index(&index, vec![title, summary]);
|
||||
// Tantivy uses '*' to match all docs, not empty string
|
||||
let term = if term.is_empty() { "*" } else { term };
|
||||
|
||||
info!("query_parser('{term}')");
|
||||
let query = query_parser.parse_query(&term)?;
|
||||
Ok((searcher, query))
|
||||
}
|
||||
|
||||
pub async fn count(&self, query: &Query) -> Result<usize, ServerError> {
|
||||
if !is_tantivy_query(query) {
|
||||
return Ok(0);
|
||||
}
|
||||
use tantivy::collector::Count;
|
||||
let term = query.remainder.join(" ");
|
||||
let (searcher, query) = self.searcher_and_query(&term)?;
|
||||
Ok(searcher.search(&query, &Count)?)
|
||||
}
|
||||
pub async fn search(
|
||||
&self,
|
||||
pool: &PgPool,
|
||||
@ -86,28 +220,51 @@ impl TantivyConnection {
|
||||
last: Option<i32>,
|
||||
query: &Query,
|
||||
) -> Result<Vec<(i32, ThreadSummary)>, async_graphql::Error> {
|
||||
use tantivy::{collector::TopDocs, query::QueryParser, Document, TantivyDocument};
|
||||
// TODO: set based on function parameters
|
||||
let offset = 0;
|
||||
if !is_tantivy_query(query) {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
let (offset, mut limit) = compute_offset_limit(after, before, first, last);
|
||||
if before.is_none() {
|
||||
// When searching forward, the +1 is to see if there are more pages of data available.
|
||||
// Searching backwards implies there's more pages forward, because the value represented by
|
||||
// `before` is on the next page.
|
||||
limit = limit + 1;
|
||||
}
|
||||
|
||||
let index = self.get_index()?;
|
||||
let reader = index.reader()?;
|
||||
let schema = index.schema();
|
||||
let searcher = reader.searcher();
|
||||
let site = schema.get_field("site")?;
|
||||
let uid = schema.get_field("uid")?;
|
||||
let title = schema.get_field("title")?;
|
||||
let summary = schema.get_field("summary")?;
|
||||
let date = schema.get_field("date")?;
|
||||
let query_parser = QueryParser::for_index(&index, vec![title, summary]);
|
||||
|
||||
let query = query_parser.parse_query(&query.remainder.join(" "))?;
|
||||
let top_docs = searcher.search(&query, &TopDocs::with_limit(10))?;
|
||||
let term = query.remainder.join(" ");
|
||||
let (searcher, tantivy_query) = self.searcher_and_query(&term)?;
|
||||
let tag = self.get_index()?.schema().get_field("tag")?;
|
||||
let is_read = self.get_index()?.schema().get_field("is_read")?;
|
||||
let mut terms = vec![(Occur::Must, tantivy_query)];
|
||||
for t in &query.tags {
|
||||
let facet = Facet::from(&format!("/{t}"));
|
||||
let facet_term = Term::from_facet(tag, &facet);
|
||||
let facet_term_query = Box::new(TermQuery::new(facet_term, IndexRecordOption::Basic));
|
||||
terms.push((Occur::Must, facet_term_query));
|
||||
}
|
||||
if query.unread_only {
|
||||
info!("searching for unread only");
|
||||
let term = Term::from_field_bool(is_read, false);
|
||||
terms.push((
|
||||
Occur::Must,
|
||||
Box::new(TermQuery::new(term, IndexRecordOption::Basic)),
|
||||
));
|
||||
}
|
||||
let search_query = BooleanQuery::new(terms);
|
||||
info!("Tantivy::search(term '{term}', off {offset}, lim {limit}, search_query {search_query:?})");
|
||||
let top_docs = searcher.search(
|
||||
&search_query,
|
||||
&TopDocs::with_limit(limit as usize)
|
||||
.and_offset(offset as usize)
|
||||
.order_by_u64_field("date", tantivy::index::Order::Desc),
|
||||
)?;
|
||||
info!("search found {} docs", top_docs.len());
|
||||
let uid = self.get_index()?.schema().get_field("uid")?;
|
||||
let uids = top_docs
|
||||
.into_iter()
|
||||
.map(|(_, doc_address)| {
|
||||
.map(|(_, doc_address): (u64, DocAddress)| {
|
||||
searcher.doc(doc_address).map(|doc: TantivyDocument| {
|
||||
debug!("doc: {doc:#?}");
|
||||
doc.get_first(uid)
|
||||
.expect("doc missing uid")
|
||||
.as_str()
|
||||
@ -134,6 +291,7 @@ impl TantivyConnection {
|
||||
title: r.title,
|
||||
uid: r.uid,
|
||||
name: r.name,
|
||||
corpus: Corpus::Tantivy,
|
||||
})
|
||||
.await,
|
||||
));
|
||||
@ -157,11 +315,23 @@ fn create_news_db(tantivy_db_path: &str) -> Result<(), TantivyError> {
|
||||
schema_builder.add_text_field("summary", TEXT);
|
||||
schema_builder.add_text_field("link", STRING | STORED);
|
||||
schema_builder.add_date_field("date", FAST | INDEXED | STORED);
|
||||
schema_builder.add_bool_field("is_read", FAST);
|
||||
schema_builder.add_bool_field("is_read", FAST | INDEXED | STORED);
|
||||
schema_builder.add_text_field("uid", STRING | STORED);
|
||||
schema_builder.add_i64_field("id", FAST);
|
||||
schema_builder.add_u64_field("id", FAST);
|
||||
schema_builder.add_facet_field("tag", FacetOptions::default());
|
||||
|
||||
let schema = schema_builder.build();
|
||||
Index::create_in_dir(tantivy_db_path, schema)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
struct PostgresDoc {
|
||||
site: String,
|
||||
title: String,
|
||||
summary: String,
|
||||
link: String,
|
||||
date: PrimitiveDateTime,
|
||||
is_read: bool,
|
||||
uid: String,
|
||||
id: i32,
|
||||
}
|
||||
|
||||
@ -14,6 +14,7 @@ query FrontPageQuery($query: String!, $after: String $before: String, $first: In
|
||||
subject
|
||||
authors
|
||||
tags
|
||||
corpus
|
||||
}
|
||||
}
|
||||
tags {
|
||||
|
||||
3
web/graphql/refresh.graphql
Normal file
3
web/graphql/refresh.graphql
Normal file
@ -0,0 +1,3 @@
|
||||
mutation RefreshMutation {
|
||||
refresh
|
||||
}
|
||||
@ -232,6 +232,35 @@
|
||||
"name": "Boolean",
|
||||
"possibleTypes": null
|
||||
},
|
||||
{
|
||||
"description": null,
|
||||
"enumValues": [
|
||||
{
|
||||
"deprecationReason": null,
|
||||
"description": null,
|
||||
"isDeprecated": false,
|
||||
"name": "NOTMUCH"
|
||||
},
|
||||
{
|
||||
"deprecationReason": null,
|
||||
"description": null,
|
||||
"isDeprecated": false,
|
||||
"name": "NEWSREADER"
|
||||
},
|
||||
{
|
||||
"deprecationReason": null,
|
||||
"description": null,
|
||||
"isDeprecated": false,
|
||||
"name": "TANTIVY"
|
||||
}
|
||||
],
|
||||
"fields": null,
|
||||
"inputFields": null,
|
||||
"interfaces": null,
|
||||
"kind": "ENUM",
|
||||
"name": "Corpus",
|
||||
"possibleTypes": null
|
||||
},
|
||||
{
|
||||
"description": null,
|
||||
"enumValues": [
|
||||
@ -850,6 +879,38 @@
|
||||
"ofType": null
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"args": [],
|
||||
"deprecationReason": null,
|
||||
"description": "Drop and recreate tantivy index. Warning this is slow",
|
||||
"isDeprecated": false,
|
||||
"name": "dropAndLoadIndex",
|
||||
"type": {
|
||||
"kind": "NON_NULL",
|
||||
"name": null,
|
||||
"ofType": {
|
||||
"kind": "SCALAR",
|
||||
"name": "Boolean",
|
||||
"ofType": null
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"args": [],
|
||||
"deprecationReason": null,
|
||||
"description": null,
|
||||
"isDeprecated": false,
|
||||
"name": "refresh",
|
||||
"type": {
|
||||
"kind": "NON_NULL",
|
||||
"name": null,
|
||||
"ofType": {
|
||||
"kind": "SCALAR",
|
||||
"name": "Boolean",
|
||||
"ofType": null
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"inputFields": null,
|
||||
@ -1536,6 +1597,22 @@
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"args": [],
|
||||
"deprecationReason": null,
|
||||
"description": null,
|
||||
"isDeprecated": false,
|
||||
"name": "corpus",
|
||||
"type": {
|
||||
"kind": "NON_NULL",
|
||||
"name": null,
|
||||
"ofType": {
|
||||
"kind": "ENUM",
|
||||
"name": "Corpus",
|
||||
"ofType": null
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"inputFields": null,
|
||||
|
||||
@ -44,6 +44,14 @@ pub struct AddTagMutation;
|
||||
)]
|
||||
pub struct RemoveTagMutation;
|
||||
|
||||
#[derive(GraphQLQuery)]
|
||||
#[graphql(
|
||||
schema_path = "graphql/schema.json",
|
||||
query_path = "graphql/refresh.graphql",
|
||||
response_derives = "Debug"
|
||||
)]
|
||||
pub struct RefreshMutation;
|
||||
|
||||
pub async fn send_graphql<Body, Resp>(body: Body) -> Result<graphql_client::Response<Resp>, Error>
|
||||
where
|
||||
Body: Serialize,
|
||||
|
||||
@ -111,7 +111,17 @@ pub fn update(msg: Msg, model: &mut Model, orders: &mut impl Orders<Msg>) {
|
||||
Msg::Noop => {}
|
||||
Msg::RefreshStart => {
|
||||
model.refreshing_state = RefreshingState::Loading;
|
||||
orders.perform_cmd(async move { Msg::RefreshDone(api::refresh_request().await.err()) });
|
||||
orders.perform_cmd(async move {
|
||||
Msg::RefreshDone(
|
||||
send_graphql::<_, graphql::refresh_mutation::ResponseData>(
|
||||
graphql::RefreshMutation::build_query(
|
||||
graphql::refresh_mutation::Variables {},
|
||||
),
|
||||
)
|
||||
.await
|
||||
.err(),
|
||||
)
|
||||
});
|
||||
}
|
||||
Msg::RefreshDone(err) => {
|
||||
model.refreshing_state = if let Some(err) = err {
|
||||
|
||||
@ -203,7 +203,7 @@ fn view_search_results(
|
||||
}),
|
||||
]],
|
||||
td![
|
||||
C!["from"],
|
||||
C!["from", format!("corpus-{:?} ", r.corpus)],
|
||||
a![
|
||||
C!["has-text-light", "text"],
|
||||
attrs! {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user