web & server: using tantivy for news post search

2024-09-29 16:28:05 -07:00
parent f36d1e0c29
commit 3ec1741f10
22 changed files with 737 additions and 170 deletions
--- a/server/.sqlx/query-13a9193d91264b678c18df032cc61b523e6a804ae9569da18455eb380eadb515.json
+++ b/server/.sqlx/query-13a9193d91264b678c18df032cc61b523e6a804ae9569da18455eb380eadb515.json
@@ -1,6 +1,6 @@
 {
  "db_name": "PostgreSQL",
-  "query": "SELECT\n    COUNT(*) count\nFROM\n    post\nWHERE\n    ($1::text IS NULL OR site = $1)\n    AND (\n        NOT $2\n        OR NOT is_read\n    )\n",
+  "query": "SELECT\n    COUNT(*) count\nFROM\n    post\nWHERE\n    (\n        $1 :: text IS NULL\n        OR site = $1\n    )\n    AND (\n        NOT $2\n        OR NOT is_read\n    )\n",
  "describe": {
    "columns": [
      {
@@ -19,5 +19,5 @@
      null
    ]
  },
-  "hash": "e28b890e308f483aa6bd08617548ae66294ae1e99b1cab49f5f4211e0fd7d419"
+  "hash": "13a9193d91264b678c18df032cc61b523e6a804ae9569da18455eb380eadb515"
 }
--- a/server/.sqlx/query-4cbb6252a0b76f6a452ee09abb9f4b99308a74db33cdfb91baaa1bb98dc53a82.json
+++ b/server/.sqlx/query-4cbb6252a0b76f6a452ee09abb9f4b99308a74db33cdfb91baaa1bb98dc53a82.json
@@ -1,6 +1,6 @@
 {
  "db_name": "PostgreSQL",
-  "query": "SELECT\n    site,\n    title,\n    summary,\n    link,\n    date,\n    is_read,\n    uid,\n    id\nFROM post\n",
+  "query": "SELECT\n    site,\n    title,\n    summary,\n    link,\n    date,\n    is_read,\n    uid,\n    p.id id\nFROM\n    post AS p\n    JOIN feed AS f ON p.site = f.slug -- necessary to weed out nzb posts\nORDER BY\n    date DESC;\n",
  "describe": {
    "columns": [
      {
@@ -58,5 +58,5 @@
      false
    ]
  },
-  "hash": "1b2244c9b9b64a1395d8d266f5df5352242bbe5efe481b0852e1c1d4b40584a7"
+  "hash": "4cbb6252a0b76f6a452ee09abb9f4b99308a74db33cdfb91baaa1bb98dc53a82"
 }
--- a/server/.sqlx/query-700757f6fb175b1aa246816ac60501576a38db222380c034ae7bc030ba6a915e.json
+++ b/server/.sqlx/query-700757f6fb175b1aa246816ac60501576a38db222380c034ae7bc030ba6a915e.json
@@ -0,0 +1,64 @@
+{
+  "db_name": "PostgreSQL",
+  "query": "SELECT\n    site AS \"site!\",\n    title AS \"title!\",\n    summary AS \"summary!\",\n    link AS \"link!\",\n    date AS \"date!\",\n    is_read AS \"is_read!\",\n    uid AS \"uid!\",\n    p.id id\nFROM\n    post p\n    JOIN feed f ON p.site = f.slug\nWHERE\n    uid = ANY ($1);\n",
+  "describe": {
+    "columns": [
+      {
+        "ordinal": 0,
+        "name": "site!",
+        "type_info": "Text"
+      },
+      {
+        "ordinal": 1,
+        "name": "title!",
+        "type_info": "Text"
+      },
+      {
+        "ordinal": 2,
+        "name": "summary!",
+        "type_info": "Text"
+      },
+      {
+        "ordinal": 3,
+        "name": "link!",
+        "type_info": "Text"
+      },
+      {
+        "ordinal": 4,
+        "name": "date!",
+        "type_info": "Timestamp"
+      },
+      {
+        "ordinal": 5,
+        "name": "is_read!",
+        "type_info": "Bool"
+      },
+      {
+        "ordinal": 6,
+        "name": "uid!",
+        "type_info": "Text"
+      },
+      {
+        "ordinal": 7,
+        "name": "id",
+        "type_info": "Int4"
+      }
+    ],
+    "parameters": {
+      "Left": [
+        "TextArray"
+      ]
+    },
+    "nullable": [
+      true,
+      true,
+      true,
+      true,
+      true,
+      true,
+      false,
+      false
+    ]
+  },
+  "hash": "700757f6fb175b1aa246816ac60501576a38db222380c034ae7bc030ba6a915e"
+}
--- a/server/.sqlx/query-c16821d20c1e6012d6079008889af16eed687c74d9d77957ad08aaaeefcd1ff2.json
+++ b/server/.sqlx/query-c16821d20c1e6012d6079008889af16eed687c74d9d77957ad08aaaeefcd1ff2.json
@@ -0,0 +1,20 @@
+{
+  "db_name": "PostgreSQL",
+  "query": "SELECT\n    uid\nFROM\n    post AS p\n    JOIN feed AS f ON p.site = f.slug -- necessary to weed out nzb posts\n;\n",
+  "describe": {
+    "columns": [
+      {
+        "ordinal": 0,
+        "name": "uid",
+        "type_info": "Text"
+      }
+    ],
+    "parameters": {
+      "Left": []
+    },
+    "nullable": [
+      false
+    ]
+  },
+  "hash": "c16821d20c1e6012d6079008889af16eed687c74d9d77957ad08aaaeefcd1ff2"
+}
--- a/server/.sqlx/query-ed34545b5fc681e9df9fc47c6ff93e5eac874eb8f95f55a27859537c51bca4f4.json
+++ b/server/.sqlx/query-ed34545b5fc681e9df9fc47c6ff93e5eac874eb8f95f55a27859537c51bca4f4.json
@@ -0,0 +1,52 @@
+{
+  "db_name": "PostgreSQL",
+  "query": "SELECT\n    site,\n    date,\n    is_read,\n    title,\n    uid,\n    name\nFROM\n    post p\n    JOIN feed f ON p.site = f.slug\nWHERE\n    uid = ANY ($1)\nORDER BY\n    date DESC;\n",
+  "describe": {
+    "columns": [
+      {
+        "ordinal": 0,
+        "name": "site",
+        "type_info": "Text"
+      },
+      {
+        "ordinal": 1,
+        "name": "date",
+        "type_info": "Timestamp"
+      },
+      {
+        "ordinal": 2,
+        "name": "is_read",
+        "type_info": "Bool"
+      },
+      {
+        "ordinal": 3,
+        "name": "title",
+        "type_info": "Text"
+      },
+      {
+        "ordinal": 4,
+        "name": "uid",
+        "type_info": "Text"
+      },
+      {
+        "ordinal": 5,
+        "name": "name",
+        "type_info": "Text"
+      }
+    ],
+    "parameters": {
+      "Left": [
+        "TextArray"
+      ]
+    },
+    "nullable": [
+      true,
+      true,
+      true,
+      true,
+      false,
+      true
+    ]
+  },
+  "hash": "ed34545b5fc681e9df9fc47c6ff93e5eac874eb8f95f55a27859537c51bca4f4"
+}
--- a/server/sql/all-posts.sql
+++ b/server/sql/all-posts.sql
@@ -6,6 +6,9 @@ SELECT
    date,
    is_read,
    uid,
-    id
-FROM post
-WHERE title ILIKE '%grapheme%' OR summary ILIKE '%grapheme%';
+    p.id id
+FROM
+    post AS p
+    JOIN feed AS f ON p.site = f.slug -- necessary to weed out nzb posts
+ORDER BY
+    date DESC;
--- a/server/sql/all-uids.sql
+++ b/server/sql/all-uids.sql
@@ -0,0 +1,6 @@
+SELECT
+    uid
+FROM
+    post AS p
+    JOIN feed AS f ON p.site = f.slug -- necessary to weed out nzb posts
+;
--- a/server/sql/count.sql
+++ b/server/sql/count.sql
@@ -3,7 +3,10 @@ SELECT
 FROM
    post
 WHERE
-    ($1::text IS NULL OR site = $1)
+    (
+        $1 :: text IS NULL
+        OR site = $1
+    )
    AND (
        NOT $2
        OR NOT is_read
--- a/server/sql/posts-from-uids.sql
+++ b/server/sql/posts-from-uids.sql
@@ -0,0 +1,14 @@
+SELECT
+    site AS "site!",
+    title AS "title!",
+    summary AS "summary!",
+    link AS "link!",
+    date AS "date!",
+    is_read AS "is_read!",
+    uid AS "uid!",
+    p.id id
+FROM
+    post p
+    JOIN feed f ON p.site = f.slug
+WHERE
+    uid = ANY ($1);
--- a/server/sql/threads-from-uid.sql
+++ b/server/sql/threads-from-uid.sql
@@ -10,4 +10,5 @@ FROM
    JOIN feed f ON p.site = f.slug
 WHERE
    uid = ANY ($1)
-;
+ORDER BY
+    date DESC;
--- a/server/src/bin/server.rs
+++ b/server/src/bin/server.rs
@@ -27,11 +27,6 @@ use server::{
 };
 use sqlx::postgres::PgPool;

-#[get("/refresh")]
-async fn refresh(nm: &State<Notmuch>) -> Result<Json<String>, Debug<NotmuchError>> {
-    Ok(Json(String::from_utf8_lossy(&nm.new()?).to_string()))
-}
-
 #[get("/show/<query>/pretty")]
 async fn show_pretty(
    nm: &State<Notmuch>,
@@ -209,7 +204,6 @@ async fn main() -> Result<(), Box<dyn Error>> {
            shared::urls::MOUNT_POINT,
            routes![
                original,
-                refresh,
                show_pretty,
                show,
                graphql_query,
--- a/server/src/graphql.rs
+++ b/server/src/graphql.rs
@@ -1,3 +1,5 @@
+use std::str::FromStr;
+
 use async_graphql::{
    connection::{self, Connection, Edge, OpaqueCursor},
    Context, EmptySubscription, Enum, Error, FieldResult, InputObject, Object, Schema,
@@ -16,6 +18,26 @@ pub type UnixTime = isize;
 /// # Thread ID, sans "thread:"
 pub type ThreadId = String;

+#[derive(Debug, Enum, Copy, Clone, Eq, PartialEq)]
+pub enum Corpus {
+    Notmuch,
+    Newsreader,
+    Tantivy,
+}
+
+impl FromStr for Corpus {
+    type Err = String;
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        Ok(match s {
+            "notmuch" => Corpus::Notmuch,
+            "newsreader" => Corpus::Newsreader,
+            "tantivy" => Corpus::Tantivy,
+            s => return Err(format!("unknown corpus: '{s}'")),
+        })
+    }
+}
+
+// TODO: add is_read field and remove all use of 'tag:unread'
 #[derive(Debug, SimpleObject)]
 pub struct ThreadSummary {
    pub thread: ThreadId,
@@ -30,6 +52,7 @@ pub struct ThreadSummary {
    pub authors: String,
    pub subject: String,
    pub tags: Vec<String>,
+    pub corpus: Corpus,
 }

 #[derive(Debug, Union)]
@@ -237,13 +260,16 @@ impl QueryRoot {
    async fn count<'ctx>(&self, ctx: &Context<'ctx>, query: String) -> Result<usize, Error> {
        let nm = ctx.data_unchecked::<Notmuch>();
        let pool = ctx.data_unchecked::<PgPool>();
+        let tantivy = ctx.data_unchecked::<TantivyConnection>();

        let newsreader_query: Query = query.parse()?;

        let newsreader_count = newsreader::count(pool, &newsreader_query).await?;
-        let notmuch_count = nm::count(nm, &newsreader_query.to_notmuch()).await?;
-        info!("count {newsreader_query:?} newsreader count {newsreader_count} notmuch count {notmuch_count}");
-        Ok(newsreader_count + notmuch_count)
+        let notmuch_count = nm::count(nm, &newsreader_query).await?;
+        let tantivy_count = tantivy.count(&newsreader_query).await?;
+        let total = newsreader_count + notmuch_count + tantivy_count;
+        info!("count {newsreader_query:?} newsreader count {newsreader_count} notmuch count {notmuch_count} tantivy count {tantivy_count} total {total}");
+        Ok(total)
    }

    async fn search<'ctx>(
@@ -255,18 +281,11 @@ impl QueryRoot {
        last: Option<i32>,
        query: String,
    ) -> Result<Connection<OpaqueCursor<SearchCursor>, ThreadSummary>, Error> {
-        // TODO: add keywords to limit search to one corpus, i.e. is:news or is:mail
        info!("search({after:?} {before:?} {first:?} {last:?} {query:?})",);
        let nm = ctx.data_unchecked::<Notmuch>();
        let pool = ctx.data_unchecked::<PgPool>();
        let tantivy = ctx.data_unchecked::<TantivyConnection>();

-        #[derive(Debug)]
-        enum ThreadSummaryCursor {
-            Newsreader(i32, ThreadSummary),
-            Notmuch(i32, ThreadSummary),
-            Tantivy(i32, ThreadSummary),
-        }
        Ok(connection::query(
            after,
            before,
@@ -277,7 +296,7 @@ impl QueryRoot {
             first: Option<usize>,
             last: Option<usize>| async move {
                info!(
-                    "search({:?} {:?} {first:?} {last:?} {query:?})",
+                    "search(after {:?} before {:?} first {first:?} last {last:?} query: {query:?})",
                    after.as_ref().map(|v| &v.0),
                    before.as_ref().map(|v| &v.0)
                );
@@ -288,65 +307,40 @@ impl QueryRoot {
                let newsreader_before = before.as_ref().map(|sc| sc.newsreader_offset);
                let notmuch_before = before.as_ref().map(|sc| sc.notmuch_offset);
                let tantivy_before = before.as_ref().map(|sc| sc.tantivy_offset);
+                let first = first.map(|v| v as i32);
+                let last = last.map(|v| v as i32);

-                let newsreader_query: Query = query.parse()?;
-                info!("newsreader_query {newsreader_query:?}");
-                let newsreader_results = if newsreader_query.is_newsreader {
-                    newsreader::search(
-                        pool,
-                        newsreader_after,
-                        newsreader_before,
-                        first.map(|v| v as i32),
-                        last.map(|v| v as i32),
-                        &newsreader_query,
-                    )
-                    .await?
-                    .into_iter()
-                    .map(|(cur, ts)| ThreadSummaryCursor::Newsreader(cur, ts))
-                    .collect()
-                } else {
-                    Vec::new()
-                };
-
-                let notmuch_results = if newsreader_query.is_notmuch {
-                    nm::search(
-                        nm,
-                        notmuch_after,
-                        notmuch_before,
-                        first.map(|v| v as i32),
-                        last.map(|v| v as i32),
-                        newsreader_query.to_notmuch(),
-                    )
-                    .await?
-                    .into_iter()
-                    .map(|(cur, ts)| ThreadSummaryCursor::Notmuch(cur, ts))
-                    .collect()
-                } else {
-                    Vec::new()
-                };
-
-                let tantivy_results = if newsreader_query.is_tantivy {
-                    tantivy
-                        .search(
-                            pool,
-                            tantivy_after,
-                            tantivy_before,
-                            first.map(|v| v as i32),
-                            last.map(|v| v as i32),
-                            &newsreader_query,
-                        )
-                        .await?
-                        .into_iter()
-                        .map(|(cur, ts)| ThreadSummaryCursor::Tantivy(cur, ts))
-                        .collect()
-                } else {
-                    Vec::new()
-                };
+                let query: Query = query.parse()?;
+                info!("newsreader_query {query:?}");

+                let newsreader_results = newsreader_search(
+                    pool,
+                    newsreader_after,
+                    newsreader_before,
+                    first,
+                    last,
+                    &query,
+                )
+                .await?;
+                let notmuch_results =
+                    notmuch_search(nm, notmuch_after, notmuch_before, first, last, &query).await?;
+                let tantivy_results = tantivy_search(
+                    tantivy,
+                    pool,
+                    tantivy_after,
+                    tantivy_before,
+                    first,
+                    last,
+                    &query,
+                )
+                .await?;
                info!(
-                    "tantivy results:\nis_tantivy:{} {tantivy_results:#?}",
-                    newsreader_query.is_tantivy
+                    "newsreader_results ({}) notmuch_results ({}) tantivy_results ({})",
+                    newsreader_results.len(),
+                    notmuch_results.len(),
+                    tantivy_results.len()
                );
+
                let mut results: Vec<_> = newsreader_results
                    .into_iter()
                    .chain(notmuch_results)
@@ -362,6 +356,7 @@ impl QueryRoot {

                let mut has_next_page = before.is_some();
                if let Some(first) = first {
+                    let first = first as usize;
                    if results.len() > first {
                        has_next_page = true;
                        results.truncate(first);
@@ -370,6 +365,7 @@ impl QueryRoot {

                let mut has_previous_page = after.is_some();
                if let Some(last) = last {
+                    let last = last as usize;
                    if results.len() > last {
                        has_previous_page = true;
                        results.truncate(last);
@@ -437,6 +433,59 @@ impl QueryRoot {
    }
 }

+#[derive(Debug)]
+enum ThreadSummaryCursor {
+    Newsreader(i32, ThreadSummary),
+    Notmuch(i32, ThreadSummary),
+    Tantivy(i32, ThreadSummary),
+}
+async fn newsreader_search(
+    pool: &PgPool,
+    after: Option<i32>,
+    before: Option<i32>,
+    first: Option<i32>,
+    last: Option<i32>,
+    query: &Query,
+) -> Result<Vec<ThreadSummaryCursor>, async_graphql::Error> {
+    Ok(newsreader::search(pool, after, before, first, last, &query)
+        .await?
+        .into_iter()
+        .map(|(cur, ts)| ThreadSummaryCursor::Newsreader(cur, ts))
+        .collect())
+}
+
+async fn notmuch_search(
+    nm: &Notmuch,
+    after: Option<i32>,
+    before: Option<i32>,
+    first: Option<i32>,
+    last: Option<i32>,
+    query: &Query,
+) -> Result<Vec<ThreadSummaryCursor>, async_graphql::Error> {
+    Ok(nm::search(nm, after, before, first, last, &query)
+        .await?
+        .into_iter()
+        .map(|(cur, ts)| ThreadSummaryCursor::Notmuch(cur, ts))
+        .collect())
+}
+
+async fn tantivy_search(
+    tantivy: &TantivyConnection,
+    pool: &PgPool,
+    after: Option<i32>,
+    before: Option<i32>,
+    first: Option<i32>,
+    last: Option<i32>,
+    query: &Query,
+) -> Result<Vec<ThreadSummaryCursor>, async_graphql::Error> {
+    Ok(tantivy
+        .search(pool, after, before, first, last, &query)
+        .await?
+        .into_iter()
+        .map(|(cur, ts)| ThreadSummaryCursor::Tantivy(cur, ts))
+        .collect())
+}
+
 pub struct Mutation;
 #[Object]
 impl Mutation {
@@ -448,14 +497,12 @@ impl Mutation {
    ) -> Result<bool, Error> {
        let nm = ctx.data_unchecked::<Notmuch>();
        let pool = ctx.data_unchecked::<PgPool>();
+        let tantivy = ctx.data_unchecked::<TantivyConnection>();

-        for q in query.split_whitespace() {
-            if newsreader::is_newsreader_thread(&q) {
-                newsreader::set_read_status(pool, &q, unread).await?;
-            } else {
-                nm::set_read_status(nm, q, unread).await?;
-            }
-        }
+        let query: Query = query.parse()?;
+        newsreader::set_read_status(pool, &query, unread).await?;
+        tantivy.reindex_thread(pool, &query).await?;
+        nm::set_read_status(nm, &query, unread).await?;
        Ok(true)
    }
    async fn tag_add<'ctx>(
@@ -486,10 +533,19 @@ impl Mutation {
        let pool = ctx.data_unchecked::<PgPool>();

        tantivy.drop_and_load_index()?;
-        tantivy.reindex(pool).await?;
+        tantivy.reindex_all(pool).await?;

        Ok(true)
    }
+    async fn refresh<'ctx>(&self, ctx: &Context<'ctx>) -> Result<bool, Error> {
+        let nm = ctx.data_unchecked::<Notmuch>();
+        let tantivy = ctx.data_unchecked::<TantivyConnection>();
+        let pool = ctx.data_unchecked::<PgPool>();
+        // TODO: parallelize
+        info!("{}", String::from_utf8_lossy(&nm.new()?));
+        tantivy.refresh(pool).await?;
+        Ok(true)
+    }
 }

 pub type GraphqlSchema = Schema<QueryRoot, Mutation, EmptySubscription>;
--- a/server/src/lib.rs
+++ b/server/src/lib.rs
@@ -18,15 +18,16 @@ use lol_html::{
 };
 use maplit::{hashmap, hashset};
 use scraper::{Html, Selector};
-use sqlx::{postgres::PgPool, types::time::PrimitiveDateTime};
+use sqlx::types::time::PrimitiveDateTime;
 use thiserror::Error;
 use tokio::sync::Mutex;
 use url::Url;

 use crate::{
    error::ServerError,
-    graphql::ThreadSummary,
-    newsreader::{extract_thread_id, is_newsreader_thread},
+    graphql::{Corpus, ThreadSummary},
+    newsreader::is_newsreader_thread,
+    nm::is_notmuch_thread_or_id,
 };

 const NEWSREADER_TAG_PREFIX: &'static str = "News/";
@@ -607,12 +608,13 @@ fn compute_offset_limit(
 #[derive(Debug)]
 pub struct Query {
    pub unread_only: bool,
-    pub tag: Option<String>,
-    pub uid: Option<String>,
+    pub tags: Vec<String>,
+    pub uids: Vec<String>,
    pub remainder: Vec<String>,
    pub is_notmuch: bool,
    pub is_newsreader: bool,
    pub is_tantivy: bool,
+    pub corpus: Option<Corpus>,
 }

 impl Query {
@@ -627,10 +629,10 @@ impl Query {
        if self.unread_only {
            parts.push("is:unread".to_string());
        }
-        if let Some(site) = &self.tag {
-            parts.push(format!("tag:{site}"));
+        for tag in &self.tags {
+            parts.push(format!("tag:{tag}"));
        }
-        if let Some(uid) = &self.uid {
+        for uid in &self.uids {
            parts.push(uid.clone());
        }
        parts.extend(self.remainder.clone());
@@ -642,48 +644,60 @@ impl FromStr for Query {
    type Err = Infallible;
    fn from_str(s: &str) -> Result<Self, Self::Err> {
        let mut unread_only = false;
-        let mut tag = None;
-        let mut uid = None;
+        let mut tags = Vec::new();
+        let mut uids = Vec::new();
        let mut remainder = Vec::new();
        let mut is_notmuch = false;
-        let mut is_newsreader = false;
+        let is_newsreader = false;
        let mut is_tantivy = false;
+        let mut corpus = None;
        for word in s.split_whitespace() {
            if word == "is:unread" {
                unread_only = true
            } else if word.starts_with("tag:") {
-                tag = Some(word["tag:".len()..].to_string())
+                tags.push(word["tag:".len()..].to_string());
+
                /*
                            } else if word.starts_with("tag:") {
                                // Any tag that doesn't match site_prefix should explicitly set the site to something not in the
                                // database
                                site = Some(NON_EXISTENT_SITE_NAME.to_string());
                */
+            } else if word.starts_with("corpus:") {
+                let c = word["corpus:".len()..].to_string();
+                corpus = c.parse::<Corpus>().map(|c| Some(c)).unwrap_or_else(|e| {
+                    warn!("Error parsing corpus '{c}': {e:?}");
+                    None
+                });
            } else if is_newsreader_thread(word) {
-                uid = Some(extract_thread_id(word).to_string())
+                uids.push(word.to_string());
+            } else if is_notmuch_thread_or_id(word) {
+                uids.push(word.to_string());
            } else if word == "is:mail" || word == "is:email" || word == "is:notmuch" {
                is_notmuch = true;
            } else if word == "is:news" || word == "is:newsreader" {
-                is_newsreader = true;
+                is_tantivy = true;
            } else {
                remainder.push(word.to_string());
            }
        }
        // If we don't see any explicit filters for a corpus, flip them all on
-        if !(is_notmuch || is_newsreader) {
-            is_newsreader = true;
+        if corpus.is_none() && !(is_newsreader || is_notmuch || is_tantivy) {
+            // Don't set is_newsreader unless debugging, assume tantivy can handle it.
+            // Explicitely setting corpus:newsreader will by-pass this logic
+            // is_newsreader = true;
            is_notmuch = true;
+            is_tantivy = true;
        }
-        // TODO: decide if tantivy gets it's own life or replaces newsreader
-        is_tantivy = is_newsreader;
        Ok(Query {
            unread_only,
-            tag,
-            uid,
+            tags,
+            uids,
            remainder,
            is_notmuch,
            is_newsreader,
            is_tantivy,
+            corpus,
        })
    }
 }
@@ -694,6 +708,7 @@ pub struct ThreadSummaryRecord {
    pub title: Option<String>,
    pub uid: String,
    pub name: Option<String>,
+    pub corpus: Corpus,
 }

 async fn thread_summary_from_row(r: ThreadSummaryRecord) -> ThreadSummary {
@@ -711,12 +726,14 @@ async fn thread_summary_from_row(r: ThreadSummaryRecord) -> ThreadSummary {
            .expect("post missing date")
            .assume_utc()
            .unix_timestamp() as isize,
-        date_relative: "TODO date_relative".to_string(),
+        date_relative: format!("{:?}", r.date),
+        //date_relative: "TODO date_relative".to_string(),
        matched: 0,
        total: 1,
        authors: r.name.unwrap_or_else(|| site.clone()),
        subject: title,
        tags,
+        corpus: r.corpus,
    }
 }
 async fn clean_title(title: &str) -> Result<String, ServerError> {
--- a/server/src/newsreader.rs
+++ b/server/src/newsreader.rs
@@ -13,14 +13,14 @@ use crate::{
    clean_title, compute_offset_limit,
    config::Config,
    error::ServerError,
-    graphql::{NewsPost, Tag, Thread, ThreadSummary},
+    graphql::{Corpus, NewsPost, Tag, Thread, ThreadSummary},
    thread_summary_from_row, AddOutlink, EscapeHtml, FrameImages, InlineStyle, Query, SanitizeHtml,
-    SlurpContents, StripHtml, ThreadSummaryRecord, Transformer, NEWSREADER_TAG_PREFIX,
+    SlurpContents, ThreadSummaryRecord, Transformer, NEWSREADER_TAG_PREFIX,
    NEWSREADER_THREAD_PREFIX,
 };

-pub fn is_newsreader_search(query: &str) -> bool {
-    query.contains(NEWSREADER_TAG_PREFIX)
+pub fn is_newsreader_query(query: &Query) -> bool {
+    query.is_newsreader || query.corpus == Some(Corpus::Newsreader)
 }

 pub fn is_newsreader_thread(query: &str) -> bool {
@@ -28,7 +28,11 @@ pub fn is_newsreader_thread(query: &str) -> bool {
 }

 pub fn extract_thread_id(query: &str) -> &str {
-    &query[NEWSREADER_THREAD_PREFIX.len()..]
+    if query.starts_with(NEWSREADER_THREAD_PREFIX) {
+        &query[NEWSREADER_THREAD_PREFIX.len()..]
+    } else {
+        query
+    }
 }

 pub fn extract_site(tag: &str) -> &str {
@@ -39,14 +43,33 @@ pub fn make_news_tag(tag: &str) -> String {
    format!("tag:{NEWSREADER_TAG_PREFIX}{tag}")
 }

+fn site_from_tags(tags: &[String]) -> Option<String> {
+    for t in tags {
+        if t.starts_with(NEWSREADER_TAG_PREFIX) {
+            return Some(extract_site(t).to_string());
+        }
+    }
+    None
+}
+
 pub async fn count(pool: &PgPool, query: &Query) -> Result<usize, ServerError> {
+    if !is_newsreader_query(query) {
+        return Ok(0);
+    }
    if !query.remainder.is_empty() {
        // TODO: handle full text search against all sites, for now, early return if search words
        // are specified.
        return Ok(0);
    }

-    let row = sqlx::query_file!("sql/count.sql", query.tag, query.unread_only)
+    let site = site_from_tags(&query.tags);
+    if !query.tags.is_empty() && site.is_none() {
+        // Newsreader can only handle all sites read/unread queries, anything with a non-site tag
+        // isn't supported
+        return Ok(0);
+    }
+
+    let row = sqlx::query_file!("sql/count.sql", site, query.unread_only)
        .fetch_one(pool)
        .await?;
    Ok(row.count.unwrap_or(0).try_into().unwrap_or(0))
@@ -61,12 +84,22 @@ pub async fn search(
    query: &Query,
 ) -> Result<Vec<(i32, ThreadSummary)>, async_graphql::Error> {
    info!("search({after:?} {before:?} {first:?} {last:?} {query:?}");
+    if !is_newsreader_query(query) {
+        return Ok(Vec::new());
+    }
    if !query.remainder.is_empty() {
        // TODO: handle full text search against all sites, for now, early return if search words
        // are specified.
        return Ok(Vec::new());
    }

+    let site = site_from_tags(&query.tags);
+    if !query.tags.is_empty() && site.is_none() {
+        // Newsreader can only handle all sites read/unread queries, anything with a non-site tag
+        // isn't supported
+        return Ok(Vec::new());
+    }
+
    let (offset, mut limit) = compute_offset_limit(after, before, first, last);
    if before.is_none() {
        // When searching forward, the +1 is to see if there are more pages of data available.
@@ -75,7 +108,6 @@ pub async fn search(
        limit = limit + 1;
    }

-    let site = query.tag.as_ref().map(|t| extract_site(&t).to_string());
    info!(
        "search offset {offset} limit {limit} site {site:?} unread_only {}",
        query.unread_only
@@ -102,6 +134,7 @@ pub async fn search(
                title: r.title,
                uid: r.uid,
                name: r.name,
+                corpus: Corpus::Newsreader,
            })
            .await,
        ));
@@ -243,12 +276,22 @@ pub async fn thread(
 }
 pub async fn set_read_status<'ctx>(
    pool: &PgPool,
-    query: &str,
+    query: &Query,
    unread: bool,
 ) -> Result<bool, ServerError> {
-    let query: Query = query.parse()?;
-    sqlx::query_file!("sql/set_unread.sql", !unread, query.uid)
-        .execute(pool)
-        .await?;
+    // TODO: make single query when query.uids.len() > 1
+    let uids: Vec<_> = query
+        .uids
+        .iter()
+        .filter(|uid| is_newsreader_thread(uid))
+        .map(
+            |uid| extract_thread_id(uid), // TODO strip prefix
+        )
+        .collect();
+    for uid in uids {
+        sqlx::query_file!("sql/set_unread.sql", !unread, uid)
+            .execute(pool)
+            .await?;
+    }
    Ok(true)
 }
--- a/server/src/nm.rs
+++ b/server/src/nm.rs
@@ -14,10 +14,10 @@ use crate::{
    compute_offset_limit,
    error::ServerError,
    graphql::{
-        Attachment, Body, DispositionType, Email, EmailThread, Header, Html, Message, PlainText,
-        Tag, Thread, ThreadSummary, UnhandledContentType,
+        Attachment, Body, Corpus, DispositionType, Email, EmailThread, Header, Html, Message,
+        PlainText, Tag, Thread, ThreadSummary, UnhandledContentType,
    },
-    linkify_html, InlineStyle, SanitizeHtml, Transformer,
+    linkify_html, InlineStyle, Query, SanitizeHtml, Transformer,
 };

 const TEXT_PLAIN: &'static str = "text/plain";
@@ -31,6 +31,14 @@ const MULTIPART_RELATED: &'static str = "multipart/related";

 const MAX_RAW_MESSAGE_SIZE: usize = 100_000;

+fn is_notmuch_query(query: &Query) -> bool {
+    query.is_notmuch || query.corpus == Some(Corpus::Notmuch)
+}
+
+pub fn is_notmuch_thread_or_id(id: &str) -> bool {
+    id.starts_with("id:") || id.starts_with("thread:")
+}
+
 // TODO(wathiede): decide good error type
 pub fn threadset_to_messages(thread_set: notmuch::ThreadSet) -> Result<Vec<Message>, ServerError> {
    for t in thread_set.0 {
@@ -39,8 +47,12 @@ pub fn threadset_to_messages(thread_set: notmuch::ThreadSet) -> Result<Vec<Messa
    Ok(Vec::new())
 }

-pub async fn count(nm: &Notmuch, query: &str) -> Result<usize, ServerError> {
-    Ok(nm.count(query)?)
+pub async fn count(nm: &Notmuch, query: &Query) -> Result<usize, ServerError> {
+    if !is_notmuch_query(query) {
+        return Ok(0);
+    }
+    let query = query.to_notmuch();
+    Ok(nm.count(&query)?)
 }

 pub async fn search(
@@ -49,8 +61,12 @@ pub async fn search(
    before: Option<i32>,
    first: Option<i32>,
    last: Option<i32>,
-    query: String,
+    query: &Query,
 ) -> Result<Vec<(i32, ThreadSummary)>, async_graphql::Error> {
+    if !is_notmuch_query(query) {
+        return Ok(Vec::new());
+    }
+    let query = query.to_notmuch();
    let (offset, mut limit) = compute_offset_limit(after, before, first, last);
    if before.is_none() {
        // When searching forward, the +1 is to see if there are more pages of data available.
@@ -75,6 +91,7 @@ pub async fn search(
                    authors: ts.authors,
                    subject: ts.subject,
                    tags: ts.tags,
+                    corpus: Corpus::Notmuch,
                },
            )
        })
@@ -770,13 +787,21 @@ fn render_content_type_tree(m: &ParsedMail) -> String {

 pub async fn set_read_status<'ctx>(
    nm: &Notmuch,
-    query: &str,
+    query: &Query,
    unread: bool,
 ) -> Result<bool, ServerError> {
-    if unread {
-        nm.tag_add("unread", &format!("{query}"))?;
-    } else {
-        nm.tag_remove("unread", &format!("{query}"))?;
+    let uids: Vec<_> = query
+        .uids
+        .iter()
+        .filter(|uid| is_notmuch_thread_or_id(uid))
+        .collect();
+    info!("set_read_status({unread} {uids:?})");
+    for uid in uids {
+        if unread {
+            nm.tag_add("unread", uid)?;
+        } else {
+            nm.tag_remove("unread", uid)?;
+        }
    }
    Ok(true)
 }
--- a/server/src/tantivy.rs
+++ b/server/src/tantivy.rs
@@ -1,11 +1,26 @@
-use log::info;
-use sqlx::postgres::PgPool;
-use tantivy::{schema::Value, Index, TantivyError};
+use std::collections::HashSet;

-use crate::{
-    error::ServerError, graphql::ThreadSummary, thread_summary_from_row, Query, ThreadSummaryRecord,
+use log::{debug, error, info};
+use sqlx::{postgres::PgPool, types::time::PrimitiveDateTime};
+use tantivy::{
+    collector::{DocSetCollector, TopDocs},
+    query,
+    query::{AllQuery, BooleanQuery, Occur, QueryParser, TermQuery},
+    schema::{Facet, IndexRecordOption, Value},
+    DocAddress, Index, Searcher, TantivyDocument, TantivyError, Term,
 };

+use crate::{
+    compute_offset_limit,
+    error::ServerError,
+    graphql::{Corpus, ThreadSummary},
+    newsreader::{extract_thread_id, is_newsreader_thread},
+    thread_summary_from_row, Query, ThreadSummaryRecord,
+};
+
+pub fn is_tantivy_query(query: &Query) -> bool {
+    query.is_tantivy || query.corpus == Some(Corpus::Tantivy)
+}
 pub struct TantivyConnection {
    db_path: String,
    //index: Index,
@@ -27,7 +42,67 @@ impl TantivyConnection {
            db_path: tantivy_db_path.to_string(),
        })
    }
-    pub async fn reindex(&self, pool: &PgPool) -> Result<(), ServerError> {
+    pub async fn refresh(&self, pool: &PgPool) -> Result<(), ServerError> {
+        let start_time = std::time::Instant::now();
+        let p_uids: Vec<_> = sqlx::query_file!("sql/all-uids.sql")
+            .fetch_all(pool)
+            .await?
+            .into_iter()
+            .map(|r| r.uid)
+            .collect();
+        info!(
+            "refresh from postgres got {} uids in {}",
+            p_uids.len(),
+            start_time.elapsed().as_secs_f32()
+        );
+
+        let start_time = std::time::Instant::now();
+        let (searcher, _query) = self.searcher_and_query("")?;
+        let docs = searcher.search(&AllQuery, &DocSetCollector)?;
+        let uid = self.get_index()?.schema().get_field("uid")?;
+        let t_uids: Vec<_> = docs
+            .into_iter()
+            .map(|doc_address| {
+                searcher
+                    .doc(doc_address)
+                    .map(|doc: TantivyDocument| {
+                        debug!("doc: {doc:#?}");
+                        doc.get_first(uid)
+                            .expect("uid")
+                            .as_str()
+                            .expect("as_str")
+                            .to_string()
+                    })
+                    .expect("searcher.doc")
+            })
+            .collect();
+
+        info!(
+            "refresh tantivy got {} uids in {}",
+            t_uids.len(),
+            start_time.elapsed().as_secs_f32()
+        );
+        let t_set: HashSet<_> = t_uids.into_iter().collect();
+        let need: Vec<_> = p_uids
+            .into_iter()
+            .filter(|uid| !t_set.contains(uid.as_str()))
+            .collect();
+        if !need.is_empty() {
+            info!(
+                "need to reindex {} uids: {:?}...",
+                need.len(),
+                &need[..need.len().min(10)]
+            );
+        }
+        let batch_size = 1000;
+        let uids: Vec<_> = need[..need.len().min(batch_size)]
+            .into_iter()
+            .cloned()
+            .collect();
+        self.reindex_uids(pool, &uids).await
+    }
+    async fn reindex_uids(&self, pool: &PgPool, uids: &[String]) -> Result<(), ServerError> {
+        // TODO: add SlurpContents and convert HTML to text
        use tantivy::{doc, Term};

        let start_time = std::time::Instant::now();
@@ -44,11 +119,20 @@ impl TantivyConnection {
        let is_read = schema.get_field("is_read")?;
        let uid = schema.get_field("uid")?;
        let id = schema.get_field("id")?;
+        let tag = schema.get_field("tag")?;

-        let rows = sqlx::query_file!("sql/all-posts.sql")
+        info!("reindexing {} posts", uids.len());
+        let rows = sqlx::query_file_as!(PostgresDoc, "sql/posts-from-uids.sql", uids)
            .fetch_all(pool)
            .await?;

+        if uids.len() != rows.len() {
+            error!(
+                "Had {} uids and only got {} rows: uids {uids:?}",
+                uids.len(),
+                rows.len()
+            );
+        }
        let total = rows.len();
        for (i, r) in rows.into_iter().enumerate() {
            if i % 10_000 == 0 {
@@ -57,26 +141,76 @@ impl TantivyConnection {
                    start_time.elapsed().as_secs_f32()
                );
            }
+
            let id_term = Term::from_field_text(uid, &r.uid);
            index_writer.delete_term(id_term);
+            let slug = r.site;
+            let tag_facet = Facet::from(&format!("/News/{slug}"));
            index_writer.add_document(doc!(
-                site =>  r.site.expect("UNKOWN_SITE"),
-                title =>  r.title.expect("UNKOWN_TITLE"),
+                site => slug.clone(),
+                title => r.title,
                // TODO: clean and extract text from HTML
-                summary => r.summary.expect("UNKNOWN_SUMMARY"),
-                link => r.link.expect("link"),
-                date => tantivy::DateTime::from_primitive(r.date.expect("date")),
-                is_read => r.is_read.expect("is_read"),
+                summary => r.summary,
+                link => r.link,
+                date => tantivy::DateTime::from_primitive(r.date),
+                is_read => r.is_read,
                uid => r.uid,
-                id => r.id as i64,
+                id => r.id as u64,
+                tag => tag_facet,
            ))?;
        }

-        index_writer.commit()?;
-
        info!("took {:.2}s to reindex", start_time.elapsed().as_secs_f32());
+
+        index_writer.commit()?;
        Ok(())
    }
+    pub async fn reindex_thread(&self, pool: &PgPool, query: &Query) -> Result<(), ServerError> {
+        let uids: Vec<_> = query
+            .uids
+            .iter()
+            .filter(|uid| is_newsreader_thread(uid))
+            .map(|uid| extract_thread_id(uid).to_string())
+            .collect();
+        Ok(self.reindex_uids(pool, &uids).await?)
+    }
+    pub async fn reindex_all(&self, pool: &PgPool) -> Result<(), ServerError> {
+        let rows = sqlx::query_file!("sql/all-posts.sql")
+            .fetch_all(pool)
+            .await?;
+
+        let uids: Vec<String> = rows.into_iter().map(|r| r.uid).collect();
+        self.reindex_uids(pool, &uids).await?;
+        Ok(())
+    }
+    fn searcher_and_query(
+        &self,
+        term: &str,
+    ) -> Result<(Searcher, Box<dyn query::Query>), ServerError> {
+        let index = self.get_index()?;
+        let reader = index.reader()?;
+        let schema = index.schema();
+        let searcher = reader.searcher();
+        let title = schema.get_field("title")?;
+        let summary = schema.get_field("summary")?;
+        let query_parser = QueryParser::for_index(&index, vec![title, summary]);
+        // Tantivy uses '*' to match all docs, not empty string
+        let term = if term.is_empty() { "*" } else { term };
+
+        info!("query_parser('{term}')");
+        let query = query_parser.parse_query(&term)?;
+        Ok((searcher, query))
+    }
+
+    pub async fn count(&self, query: &Query) -> Result<usize, ServerError> {
+        if !is_tantivy_query(query) {
+            return Ok(0);
+        }
+        use tantivy::collector::Count;
+        let term = query.remainder.join(" ");
+        let (searcher, query) = self.searcher_and_query(&term)?;
+        Ok(searcher.search(&query, &Count)?)
+    }
    pub async fn search(
        &self,
        pool: &PgPool,
@@ -86,28 +220,51 @@ impl TantivyConnection {
        last: Option<i32>,
        query: &Query,
    ) -> Result<Vec<(i32, ThreadSummary)>, async_graphql::Error> {
-        use tantivy::{collector::TopDocs, query::QueryParser, Document, TantivyDocument};
-        // TODO: set based on function parameters
-        let offset = 0;
+        if !is_tantivy_query(query) {
+            return Ok(Vec::new());
+        }
+        let (offset, mut limit) = compute_offset_limit(after, before, first, last);
+        if before.is_none() {
+            // When searching forward, the +1 is to see if there are more pages of data available.
+            // Searching backwards implies there's more pages forward, because the value represented by
+            // `before` is on the next page.
+            limit = limit + 1;
+        }

-        let index = self.get_index()?;
-        let reader = index.reader()?;
-        let schema = index.schema();
-        let searcher = reader.searcher();
-        let site = schema.get_field("site")?;
-        let uid = schema.get_field("uid")?;
-        let title = schema.get_field("title")?;
-        let summary = schema.get_field("summary")?;
-        let date = schema.get_field("date")?;
-        let query_parser = QueryParser::for_index(&index, vec![title, summary]);
-
-        let query = query_parser.parse_query(&query.remainder.join(" "))?;
-        let top_docs = searcher.search(&query, &TopDocs::with_limit(10))?;
+        let term = query.remainder.join(" ");
+        let (searcher, tantivy_query) = self.searcher_and_query(&term)?;
+        let tag = self.get_index()?.schema().get_field("tag")?;
+        let is_read = self.get_index()?.schema().get_field("is_read")?;
+        let mut terms = vec![(Occur::Must, tantivy_query)];
+        for t in &query.tags {
+            let facet = Facet::from(&format!("/{t}"));
+            let facet_term = Term::from_facet(tag, &facet);
+            let facet_term_query = Box::new(TermQuery::new(facet_term, IndexRecordOption::Basic));
+            terms.push((Occur::Must, facet_term_query));
+        }
+        if query.unread_only {
+            info!("searching for unread only");
+            let term = Term::from_field_bool(is_read, false);
+            terms.push((
+                Occur::Must,
+                Box::new(TermQuery::new(term, IndexRecordOption::Basic)),
+            ));
+        }
+        let search_query = BooleanQuery::new(terms);
+        info!("Tantivy::search(term '{term}', off {offset}, lim {limit}, search_query {search_query:?})");
+        let top_docs = searcher.search(
+            &search_query,
+            &TopDocs::with_limit(limit as usize)
+                .and_offset(offset as usize)
+                .order_by_u64_field("date", tantivy::index::Order::Desc),
+        )?;
        info!("search found {} docs", top_docs.len());
+        let uid = self.get_index()?.schema().get_field("uid")?;
        let uids = top_docs
            .into_iter()
-            .map(|(_, doc_address)| {
+            .map(|(_, doc_address): (u64, DocAddress)| {
                searcher.doc(doc_address).map(|doc: TantivyDocument| {
+                    debug!("doc: {doc:#?}");
                    doc.get_first(uid)
                        .expect("doc missing uid")
                        .as_str()
@@ -134,6 +291,7 @@ impl TantivyConnection {
                    title: r.title,
                    uid: r.uid,
                    name: r.name,
+                    corpus: Corpus::Tantivy,
                })
                .await,
            ));
@@ -157,11 +315,23 @@ fn create_news_db(tantivy_db_path: &str) -> Result<(), TantivyError> {
    schema_builder.add_text_field("summary", TEXT);
    schema_builder.add_text_field("link", STRING | STORED);
    schema_builder.add_date_field("date", FAST | INDEXED | STORED);
-    schema_builder.add_bool_field("is_read", FAST);
+    schema_builder.add_bool_field("is_read", FAST | INDEXED | STORED);
    schema_builder.add_text_field("uid", STRING | STORED);
-    schema_builder.add_i64_field("id", FAST);
+    schema_builder.add_u64_field("id", FAST);
+    schema_builder.add_facet_field("tag", FacetOptions::default());

    let schema = schema_builder.build();
    Index::create_in_dir(tantivy_db_path, schema)?;
    Ok(())
 }
+
+struct PostgresDoc {
+    site: String,
+    title: String,
+    summary: String,
+    link: String,
+    date: PrimitiveDateTime,
+    is_read: bool,
+    uid: String,
+    id: i32,
+}
--- a/web/graphql/front_page.graphql
+++ b/web/graphql/front_page.graphql
@@ -14,6 +14,7 @@ query FrontPageQuery($query: String!, $after: String $before: String, $first: In
      subject
      authors
      tags
+      corpus
    }
  }
  tags {
--- a/web/graphql/refresh.graphql
+++ b/web/graphql/refresh.graphql
@@ -0,0 +1,3 @@
+mutation RefreshMutation {
+  refresh
+}
--- a/web/graphql/schema.json
+++ b/web/graphql/schema.json
@@ -232,6 +232,35 @@
          "name": "Boolean",
          "possibleTypes": null
        },
+        {
+          "description": null,
+          "enumValues": [
+            {
+              "deprecationReason": null,
+              "description": null,
+              "isDeprecated": false,
+              "name": "NOTMUCH"
+            },
+            {
+              "deprecationReason": null,
+              "description": null,
+              "isDeprecated": false,
+              "name": "NEWSREADER"
+            },
+            {
+              "deprecationReason": null,
+              "description": null,
+              "isDeprecated": false,
+              "name": "TANTIVY"
+            }
+          ],
+          "fields": null,
+          "inputFields": null,
+          "interfaces": null,
+          "kind": "ENUM",
+          "name": "Corpus",
+          "possibleTypes": null
+        },
        {
          "description": null,
          "enumValues": [
@@ -850,6 +879,38 @@
                  "ofType": null
                }
              }
+            },
+            {
+              "args": [],
+              "deprecationReason": null,
+              "description": "Drop and recreate tantivy index. Warning this is slow",
+              "isDeprecated": false,
+              "name": "dropAndLoadIndex",
+              "type": {
+                "kind": "NON_NULL",
+                "name": null,
+                "ofType": {
+                  "kind": "SCALAR",
+                  "name": "Boolean",
+                  "ofType": null
+                }
+              }
+            },
+            {
+              "args": [],
+              "deprecationReason": null,
+              "description": null,
+              "isDeprecated": false,
+              "name": "refresh",
+              "type": {
+                "kind": "NON_NULL",
+                "name": null,
+                "ofType": {
+                  "kind": "SCALAR",
+                  "name": "Boolean",
+                  "ofType": null
+                }
+              }
            }
          ],
          "inputFields": null,
@@ -1536,6 +1597,22 @@
                  }
                }
              }
+            },
+            {
+              "args": [],
+              "deprecationReason": null,
+              "description": null,
+              "isDeprecated": false,
+              "name": "corpus",
+              "type": {
+                "kind": "NON_NULL",
+                "name": null,
+                "ofType": {
+                  "kind": "ENUM",
+                  "name": "Corpus",
+                  "ofType": null
+                }
+              }
            }
          ],
          "inputFields": null,
--- a/web/src/graphql.rs
+++ b/web/src/graphql.rs
@@ -44,6 +44,14 @@ pub struct AddTagMutation;
 )]
 pub struct RemoveTagMutation;

+#[derive(GraphQLQuery)]
+#[graphql(
+    schema_path = "graphql/schema.json",
+    query_path = "graphql/refresh.graphql",
+    response_derives = "Debug"
+)]
+pub struct RefreshMutation;
+
 pub async fn send_graphql<Body, Resp>(body: Body) -> Result<graphql_client::Response<Resp>, Error>
 where
    Body: Serialize,
--- a/web/src/state.rs
+++ b/web/src/state.rs
@@ -111,7 +111,17 @@ pub fn update(msg: Msg, model: &mut Model, orders: &mut impl Orders<Msg>) {
        Msg::Noop => {}
        Msg::RefreshStart => {
            model.refreshing_state = RefreshingState::Loading;
-            orders.perform_cmd(async move { Msg::RefreshDone(api::refresh_request().await.err()) });
+            orders.perform_cmd(async move {
+                Msg::RefreshDone(
+                    send_graphql::<_, graphql::refresh_mutation::ResponseData>(
+                        graphql::RefreshMutation::build_query(
+                            graphql::refresh_mutation::Variables {},
+                        ),
+                    )
+                    .await
+                    .err(),
+                )
+            });
        }
        Msg::RefreshDone(err) => {
            model.refreshing_state = if let Some(err) = err {
--- a/web/src/view/mod.rs
+++ b/web/src/view/mod.rs
@@ -203,7 +203,7 @@ fn view_search_results(
                }),
            ]],
            td![
-                C!["from"],
+                C!["from", format!("corpus-{:?} ", r.corpus)],
                a![
                    C!["has-text-light", "text"],
                    attrs! {