Compare commits

...

3 Commits

Author SHA1 Message Date
80454cbc7e Bumping version to 0.0.122 2025-01-29 15:44:05 -08:00
78cf59333e cargo sqlx prepare 2025-01-29 15:44:04 -08:00
ab47f32b52 server: fetch search summaries in parallel 2025-01-29 15:43:46 -08:00
9 changed files with 58 additions and 27 deletions

11
Cargo.lock generated
View File

@ -2910,7 +2910,7 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
[[package]] [[package]]
name = "letterbox" name = "letterbox"
version = "0.0.121" version = "0.0.122"
dependencies = [ dependencies = [
"build-info", "build-info",
"build-info-build", "build-info-build",
@ -2936,7 +2936,7 @@ dependencies = [
[[package]] [[package]]
name = "letterbox-server" name = "letterbox-server"
version = "0.0.121" version = "0.0.122"
dependencies = [ dependencies = [
"ammonia", "ammonia",
"anyhow", "anyhow",
@ -2949,6 +2949,7 @@ dependencies = [
"chrono", "chrono",
"clap", "clap",
"css-inline", "css-inline",
"futures 0.3.31",
"html-escape", "html-escape",
"linkify", "linkify",
"log", "log",
@ -3456,7 +3457,7 @@ dependencies = [
[[package]] [[package]]
name = "notmuch" name = "notmuch"
version = "0.0.121" version = "0.0.122"
dependencies = [ dependencies = [
"itertools 0.10.5", "itertools 0.10.5",
"log", "log",
@ -4251,7 +4252,7 @@ dependencies = [
[[package]] [[package]]
name = "procmail2notmuch" name = "procmail2notmuch"
version = "0.0.121" version = "0.0.122"
dependencies = [ dependencies = [
"anyhow", "anyhow",
] ]
@ -5330,7 +5331,7 @@ dependencies = [
[[package]] [[package]]
name = "shared" name = "shared"
version = "0.0.121" version = "0.0.122"
dependencies = [ dependencies = [
"build-info", "build-info",
"notmuch", "notmuch",

View File

@ -1,6 +1,6 @@
[package] [package]
name = "notmuch" name = "notmuch"
version = "0.0.121" version = "0.0.122"
edition = "2021" edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

View File

@ -1,6 +1,6 @@
[package] [package]
name = "procmail2notmuch" name = "procmail2notmuch"
version = "0.0.121" version = "0.0.122"
edition = "2021" edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

View File

@ -1,6 +1,6 @@
{ {
"db_name": "PostgreSQL", "db_name": "PostgreSQL",
"query": "SELECT\n p.id,\n link,\n clean_summary\nFROM\n post AS p TABLESAMPLE SYSTEM (.1)\nINNER JOIN feed AS f ON p.site = f.slug -- necessary to weed out nzb posts\nWHERE search_summary IS NULL;\n", "query": "SELECT\n p.id,\n link,\n clean_summary\nFROM\n-- Remoe tablesample when db sufficiently indexed\n post AS p TABLESAMPLE SYSTEM (.1)\nINNER JOIN feed AS f ON p.site = f.slug -- necessary to weed out nzb posts\nWHERE search_summary IS NULL;\n",
"describe": { "describe": {
"columns": [ "columns": [
{ {
@ -28,5 +28,5 @@
true true
] ]
}, },
"hash": "078322a8e99faa6b91e3890501f6763e8787b181d5fd070267180b0d5180ca36" "hash": "26e150d5d294f2eb264bbae21766295c4e4e5fe1e076dd7d6bc168e5fb49dc84"
} }

View File

@ -1,6 +1,6 @@
[package] [package]
name = "letterbox-server" name = "letterbox-server"
version = "0.0.121" version = "0.0.122"
edition = "2021" edition = "2021"
default-run = "letterbox-server" default-run = "letterbox-server"
@ -17,6 +17,7 @@ cacher = { version = "0.1.0", registry = "xinu" }
chrono = "0.4.39" chrono = "0.4.39"
clap = { version = "4.5.23", features = ["derive"] } clap = { version = "4.5.23", features = ["derive"] }
css-inline = "0.13.0" css-inline = "0.13.0"
futures = "0.3.31"
html-escape = "0.2.13" html-escape = "0.2.13"
linkify = "0.10.0" linkify = "0.10.0"
log = "0.4.17" log = "0.4.17"

View File

@ -3,6 +3,7 @@ SELECT
link, link,
clean_summary clean_summary
FROM FROM
-- Remoe tablesample when db sufficiently indexed
post AS p TABLESAMPLE SYSTEM (.1) post AS p TABLESAMPLE SYSTEM (.1)
INNER JOIN feed AS f ON p.site = f.slug -- necessary to weed out nzb posts INNER JOIN feed AS f ON p.site = f.slug -- necessary to weed out nzb posts
WHERE search_summary IS NULL; WHERE search_summary IS NULL;

View File

@ -1,6 +1,7 @@
use std::collections::HashMap; use std::collections::HashMap;
use cacher::FilesystemCacher; use cacher::FilesystemCacher;
use futures::{stream::FuturesUnordered, StreamExt};
use log::info; use log::info;
use maplit::hashmap; use maplit::hashmap;
use scraper::Selector; use scraper::Selector;
@ -254,23 +255,25 @@ pub async fn set_read_status<'ctx>(
} }
#[instrument(name = "newsreader::refresh", skip_all)] #[instrument(name = "newsreader::refresh", skip_all)]
pub async fn refresh<'ctx>(pool: &PgPool, cacher: &FilesystemCacher) -> Result<bool, ServerError> { pub async fn refresh<'ctx>(pool: &PgPool, cacher: &FilesystemCacher) -> Result<bool, ServerError> {
let body_transformers: Vec<Box<dyn Transformer>> = vec![ async fn update_search_summary(
Box::new(SlurpContents { pool: &PgPool,
cacher, cacher: &FilesystemCacher,
inline_css: true, link: Url,
site_selectors: slurp_contents_selectors(), body: String,
}), id: i32,
Box::new(StripHtml), ) -> Result<(), ServerError> {
]; let body_transformers: Vec<Box<dyn Transformer>> = vec![
Box::new(SlurpContents {
cacher,
inline_css: true,
site_selectors: slurp_contents_selectors(),
}),
Box::new(StripHtml),
];
let rows = sqlx::query_file!("sql/need-search-summary.sql",)
.fetch_all(pool)
.await?;
for r in rows {
let link = Url::parse(&r.link)?;
info!("adding {link} to search index"); info!("adding {link} to search index");
let mut body = body;
let link = Some(link); let link = Some(link);
let mut body = r.clean_summary.unwrap_or("NO SUMMARY".to_string());
for t in body_transformers.iter() { for t in body_transformers.iter() {
if t.should_run(&link, &body) { if t.should_run(&link, &body) {
body = t.transform(&link, &body).await?; body = t.transform(&link, &body).await?;
@ -279,10 +282,35 @@ pub async fn refresh<'ctx>(pool: &PgPool, cacher: &FilesystemCacher) -> Result<b
sqlx::query!( sqlx::query!(
"UPDATE post SET search_summary = $1 WHERE id = $2", "UPDATE post SET search_summary = $1 WHERE id = $2",
body, body,
r.id id
) )
.execute(pool) .execute(pool)
.await?; .await?;
Ok(())
}
let mut unordered: FuturesUnordered<_> = sqlx::query_file!("sql/need-search-summary.sql",)
.fetch_all(pool)
.await?
.into_iter()
.map(|r| {
let link = Url::parse(&r.link).expect("failed to parse link");
let body = r.clean_summary.unwrap_or("NO SUMMARY".to_string());
let id = r.id;
update_search_summary(pool, cacher, link, body, id)
})
.collect();
while let Some(res) = unordered.next().await {
//let res = res;
match res {
Ok(()) => {}
Err(err) => {
info!("failed refresh {err:?}");
// TODO:
//fd.error = Some(err);
}
};
} }
Ok(true) Ok(true)
} }

View File

@ -1,6 +1,6 @@
[package] [package]
name = "shared" name = "shared"
version = "0.0.121" version = "0.0.122"
edition = "2021" edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

View File

@ -1,5 +1,5 @@
[package] [package]
version = "0.0.121" version = "0.0.122"
name = "letterbox" name = "letterbox"
repository = "https://github.com/seed-rs/seed-quickstart" repository = "https://github.com/seed-rs/seed-quickstart"
authors = ["Bill Thiede <git@xinu.tv>"] authors = ["Bill Thiede <git@xinu.tv>"]