Compare commits
3 Commits
c7aa32b922
...
29bf6d9b6d
| Author | SHA1 | Date | |
|---|---|---|---|
| 29bf6d9b6d | |||
| 92bf45bd15 | |||
| 12c8e0e33b |
11
Cargo.lock
generated
11
Cargo.lock
generated
@ -2910,7 +2910,7 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
|
||||
|
||||
[[package]]
|
||||
name = "letterbox"
|
||||
version = "0.0.119"
|
||||
version = "0.0.120"
|
||||
dependencies = [
|
||||
"build-info",
|
||||
"build-info-build",
|
||||
@ -2936,7 +2936,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "letterbox-server"
|
||||
version = "0.0.119"
|
||||
version = "0.0.120"
|
||||
dependencies = [
|
||||
"ammonia",
|
||||
"anyhow",
|
||||
@ -2958,6 +2958,7 @@ dependencies = [
|
||||
"memmap",
|
||||
"notmuch",
|
||||
"opentelemetry",
|
||||
"regex",
|
||||
"reqwest 0.12.9",
|
||||
"rocket",
|
||||
"rocket_cors",
|
||||
@ -3455,7 +3456,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "notmuch"
|
||||
version = "0.0.119"
|
||||
version = "0.0.120"
|
||||
dependencies = [
|
||||
"itertools 0.10.5",
|
||||
"log",
|
||||
@ -4250,7 +4251,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "procmail2notmuch"
|
||||
version = "0.0.119"
|
||||
version = "0.0.120"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
]
|
||||
@ -5329,7 +5330,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "shared"
|
||||
version = "0.0.119"
|
||||
version = "0.0.120"
|
||||
dependencies = [
|
||||
"build-info",
|
||||
"notmuch",
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "notmuch"
|
||||
version = "0.0.119"
|
||||
version = "0.0.120"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "procmail2notmuch"
|
||||
version = "0.0.119"
|
||||
version = "0.0.120"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
32
server/.sqlx/query-3900728293604ff2c174c208386699b1898f9e74447963b931c7ba1c94d75b7b.json
generated
Normal file
32
server/.sqlx/query-3900728293604ff2c174c208386699b1898f9e74447963b931c7ba1c94d75b7b.json
generated
Normal file
@ -0,0 +1,32 @@
|
||||
{
|
||||
"db_name": "PostgreSQL",
|
||||
"query": "SELECT\n p.id,\n link,\n clean_summary\nFROM\n post AS p\nINNER JOIN feed AS f ON p.site = f.slug -- necessary to weed out nzb posts\nWHERE search_summary IS NULL;\n",
|
||||
"describe": {
|
||||
"columns": [
|
||||
{
|
||||
"ordinal": 0,
|
||||
"name": "id",
|
||||
"type_info": "Int4"
|
||||
},
|
||||
{
|
||||
"ordinal": 1,
|
||||
"name": "link",
|
||||
"type_info": "Text"
|
||||
},
|
||||
{
|
||||
"ordinal": 2,
|
||||
"name": "clean_summary",
|
||||
"type_info": "Text"
|
||||
}
|
||||
],
|
||||
"parameters": {
|
||||
"Left": []
|
||||
},
|
||||
"nullable": [
|
||||
false,
|
||||
false,
|
||||
true
|
||||
]
|
||||
},
|
||||
"hash": "3900728293604ff2c174c208386699b1898f9e74447963b931c7ba1c94d75b7b"
|
||||
}
|
||||
24
server/.sqlx/query-8c1b3c78649135e98b89092237750088433f7ff1b7c2ddeedec553406ea9f203.json
generated
Normal file
24
server/.sqlx/query-8c1b3c78649135e98b89092237750088433f7ff1b7c2ddeedec553406ea9f203.json
generated
Normal file
@ -0,0 +1,24 @@
|
||||
{
|
||||
"db_name": "PostgreSQL",
|
||||
"query": "SELECT COUNT(*) AS count\nFROM\n post\nWHERE\n (\n $1::text IS NULL\n OR site = $1\n )\n AND (\n NOT $2\n OR NOT is_read\n )\n AND (\n $3::text IS NULL\n OR TO_TSVECTOR('english', search_summary)\n @@ WEBSEARCH_TO_TSQUERY('english', $3)\n )\n",
|
||||
"describe": {
|
||||
"columns": [
|
||||
{
|
||||
"ordinal": 0,
|
||||
"name": "count",
|
||||
"type_info": "Int8"
|
||||
}
|
||||
],
|
||||
"parameters": {
|
||||
"Left": [
|
||||
"Text",
|
||||
"Bool",
|
||||
"Text"
|
||||
]
|
||||
},
|
||||
"nullable": [
|
||||
null
|
||||
]
|
||||
},
|
||||
"hash": "8c1b3c78649135e98b89092237750088433f7ff1b7c2ddeedec553406ea9f203"
|
||||
}
|
||||
@ -1,24 +0,0 @@
|
||||
{
|
||||
"db_name": "PostgreSQL",
|
||||
"query": "SELECT\n COUNT(*) count\nFROM\n post\nWHERE\n (\n $1 :: text IS NULL\n OR site = $1\n )\n AND (\n NOT $2\n OR NOT is_read\n )\n AND (\n $3 :: text IS NULL\n OR to_tsvector('english', summary) @@ websearch_to_tsquery('english', $3)\n )\n",
|
||||
"describe": {
|
||||
"columns": [
|
||||
{
|
||||
"ordinal": 0,
|
||||
"name": "count",
|
||||
"type_info": "Int8"
|
||||
}
|
||||
],
|
||||
"parameters": {
|
||||
"Left": [
|
||||
"Text",
|
||||
"Bool",
|
||||
"Text"
|
||||
]
|
||||
},
|
||||
"nullable": [
|
||||
null
|
||||
]
|
||||
},
|
||||
"hash": "e118f546c628661023aa25803bb29affb6cd25eca63246e5ace5b90a845d76ac"
|
||||
}
|
||||
15
server/.sqlx/query-ef8327f039dbfa8f4e59b7a77a6411252a346bf51cf940024a17d9fbb2df173c.json
generated
Normal file
15
server/.sqlx/query-ef8327f039dbfa8f4e59b7a77a6411252a346bf51cf940024a17d9fbb2df173c.json
generated
Normal file
@ -0,0 +1,15 @@
|
||||
{
|
||||
"db_name": "PostgreSQL",
|
||||
"query": "UPDATE post SET search_summary = $1 WHERE id = $2",
|
||||
"describe": {
|
||||
"columns": [],
|
||||
"parameters": {
|
||||
"Left": [
|
||||
"Text",
|
||||
"Int4"
|
||||
]
|
||||
},
|
||||
"nullable": []
|
||||
},
|
||||
"hash": "ef8327f039dbfa8f4e59b7a77a6411252a346bf51cf940024a17d9fbb2df173c"
|
||||
}
|
||||
@ -1,6 +1,6 @@
|
||||
{
|
||||
"db_name": "PostgreSQL",
|
||||
"query": "SELECT\n site,\n date,\n is_read,\n title,\n uid,\n name\nFROM\n post p\n JOIN feed f ON p.site = f.slug\nWHERE\n ($1::text IS NULL OR site = $1)\n AND (\n NOT $2\n OR NOT is_read\n )\n AND (\n $5 :: text IS NULL\n OR to_tsvector('english', summary) @@ websearch_to_tsquery('english', $5)\n )\nORDER BY\n date DESC,\n title OFFSET $3\nLIMIT\n $4\n",
|
||||
"query": "SELECT\n site,\n date,\n is_read,\n title,\n uid,\n name\nFROM\n post p\n JOIN feed f ON p.site = f.slug\nWHERE\n ($1::text IS NULL OR site = $1)\n AND (\n NOT $2\n OR NOT is_read\n )\n AND (\n $5 :: text IS NULL\n OR to_tsvector('english', search_summary) @@ websearch_to_tsquery('english', $5)\n )\nORDER BY\n date DESC,\n title OFFSET $3\nLIMIT\n $4\n",
|
||||
"describe": {
|
||||
"columns": [
|
||||
{
|
||||
@ -52,5 +52,5 @@
|
||||
true
|
||||
]
|
||||
},
|
||||
"hash": "99114d4840067acb12d9a41ef036bdd8ecf87cfdde8ce4985821485816af5213"
|
||||
"hash": "fc4607f02cc76a5f3a6629cce4507c74f52ae44820897b47365da3f339d1da06"
|
||||
}
|
||||
@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "letterbox-server"
|
||||
version = "0.0.119"
|
||||
version = "0.0.120"
|
||||
edition = "2021"
|
||||
default-run = "letterbox-server"
|
||||
|
||||
@ -26,6 +26,7 @@ maplit = "1.0.2"
|
||||
memmap = "0.7.0"
|
||||
notmuch = { path = "../notmuch" }
|
||||
opentelemetry = "0.27.1"
|
||||
regex = "1.11.1"
|
||||
reqwest = { version = "0.12.7", features = ["blocking"] }
|
||||
rocket = { version = "0.5.0-rc.2", features = ["json"] }
|
||||
rocket_cors = "0.6.0"
|
||||
|
||||
15
server/migrations/20250128234348_add-search-summary.down.sql
Normal file
15
server/migrations/20250128234348_add-search-summary.down.sql
Normal file
@ -0,0 +1,15 @@
|
||||
-- Add down migration script here
|
||||
BEGIN;
|
||||
DROP INDEX IF EXISTS post_search_summary_idx;
|
||||
ALTER TABLE post DROP search_summary;
|
||||
|
||||
-- CREATE INDEX post_summary_idx ON post USING gin (to_tsvector(
|
||||
-- 'english',
|
||||
-- regexp_replace(
|
||||
-- regexp_replace(summary, '<[^>]+>', ' ', 'g'),
|
||||
-- '\s+',
|
||||
-- ' ',
|
||||
-- 'g'
|
||||
-- )
|
||||
-- ));
|
||||
COMMIT;
|
||||
14
server/migrations/20250128234348_add-search-summary.up.sql
Normal file
14
server/migrations/20250128234348_add-search-summary.up.sql
Normal file
@ -0,0 +1,14 @@
|
||||
-- Add up migration script here
|
||||
BEGIN;
|
||||
DROP INDEX IF EXISTS post_summary_idx;
|
||||
ALTER TABLE post ADD search_summary TEXT;
|
||||
CREATE INDEX post_search_summary_idx ON post USING gin (
|
||||
to_tsvector('english', search_summary)
|
||||
);
|
||||
UPDATE post SET search_summary = regexp_replace(
|
||||
regexp_replace(summary, '<[^>]+>', ' ', 'g'),
|
||||
'\s+',
|
||||
' ',
|
||||
'g'
|
||||
);
|
||||
COMMIT;
|
||||
@ -1,10 +1,9 @@
|
||||
SELECT
|
||||
COUNT(*) count
|
||||
SELECT COUNT(*) AS count
|
||||
FROM
|
||||
post
|
||||
WHERE
|
||||
(
|
||||
$1 :: text IS NULL
|
||||
$1::text IS NULL
|
||||
OR site = $1
|
||||
)
|
||||
AND (
|
||||
@ -12,6 +11,7 @@ WHERE
|
||||
OR NOT is_read
|
||||
)
|
||||
AND (
|
||||
$3 :: text IS NULL
|
||||
OR to_tsvector('english', summary) @@ websearch_to_tsquery('english', $3)
|
||||
$3::text IS NULL
|
||||
OR TO_TSVECTOR('english', search_summary)
|
||||
@@ WEBSEARCH_TO_TSQUERY('english', $3)
|
||||
)
|
||||
|
||||
8
server/sql/need-search-summary.sql
Normal file
8
server/sql/need-search-summary.sql
Normal file
@ -0,0 +1,8 @@
|
||||
SELECT
|
||||
p.id,
|
||||
link,
|
||||
clean_summary
|
||||
FROM
|
||||
post AS p
|
||||
INNER JOIN feed AS f ON p.site = f.slug -- necessary to weed out nzb posts
|
||||
WHERE search_summary IS NULL;
|
||||
@ -16,7 +16,7 @@ WHERE
|
||||
)
|
||||
AND (
|
||||
$5 :: text IS NULL
|
||||
OR to_tsvector('english', summary) @@ websearch_to_tsquery('english', $5)
|
||||
OR to_tsvector('english', search_summary) @@ websearch_to_tsquery('english', $5)
|
||||
)
|
||||
ORDER BY
|
||||
date DESC,
|
||||
|
||||
@ -7,6 +7,7 @@ use std::{error::Error, io::Cursor, str::FromStr};
|
||||
|
||||
use async_graphql::{extensions, http::GraphiQLSource, EmptySubscription, Schema};
|
||||
use async_graphql_rocket::{GraphQLQuery, GraphQLRequest, GraphQLResponse};
|
||||
use cacher::FilesystemCacher;
|
||||
#[cfg(feature = "tantivy")]
|
||||
use letterbox_server::tantivy::TantivyConnection;
|
||||
use letterbox_server::{
|
||||
@ -220,9 +221,10 @@ async fn main() -> Result<(), Box<dyn Error>> {
|
||||
#[cfg(feature = "tantivy")]
|
||||
let tantivy_conn = TantivyConnection::new(&config.newsreader_tantivy_db_path)?;
|
||||
|
||||
let cacher = FilesystemCacher::new(&config.slurp_cache_path)?;
|
||||
let schema = Schema::build(QueryRoot, Mutation, EmptySubscription)
|
||||
.data(Notmuch::default())
|
||||
.data(config)
|
||||
.data(cacher)
|
||||
.data(pool.clone());
|
||||
|
||||
#[cfg(feature = "tantivy")]
|
||||
|
||||
@ -5,6 +5,7 @@ use async_graphql::{
|
||||
Context, EmptySubscription, Enum, Error, FieldResult, InputObject, Object, Schema,
|
||||
SimpleObject, Union,
|
||||
};
|
||||
use cacher::FilesystemCacher;
|
||||
use log::info;
|
||||
use notmuch::Notmuch;
|
||||
use serde::{Deserialize, Serialize};
|
||||
@ -14,7 +15,7 @@ use tracing::instrument;
|
||||
|
||||
#[cfg(feature = "tantivy")]
|
||||
use crate::tantivy::TantivyConnection;
|
||||
use crate::{config::Config, newsreader, nm, Query};
|
||||
use crate::{newsreader, nm, Query};
|
||||
|
||||
/// # Number of seconds since the Epoch
|
||||
pub type UnixTime = isize;
|
||||
@ -478,8 +479,8 @@ impl QueryRoot {
|
||||
#[instrument(skip_all, fields(thread_id=thread_id, request_id=request_id()))]
|
||||
async fn thread<'ctx>(&self, ctx: &Context<'ctx>, thread_id: String) -> Result<Thread, Error> {
|
||||
let nm = ctx.data_unchecked::<Notmuch>();
|
||||
let cacher = ctx.data_unchecked::<FilesystemCacher>();
|
||||
let pool = ctx.data_unchecked::<PgPool>();
|
||||
let config = ctx.data_unchecked::<Config>();
|
||||
let debug_content_tree = ctx
|
||||
.look_ahead()
|
||||
.field("messages")
|
||||
@ -487,7 +488,7 @@ impl QueryRoot {
|
||||
.field("contentTree")
|
||||
.exists();
|
||||
if newsreader::is_newsreader_thread(&thread_id) {
|
||||
Ok(newsreader::thread(config, pool, thread_id).await?)
|
||||
Ok(newsreader::thread(cacher, pool, thread_id).await?)
|
||||
} else {
|
||||
Ok(nm::thread(nm, pool, thread_id, debug_content_tree).await?)
|
||||
}
|
||||
@ -609,11 +610,13 @@ impl Mutation {
|
||||
#[instrument(skip_all, fields(request_id=request_id()))]
|
||||
async fn refresh<'ctx>(&self, ctx: &Context<'ctx>) -> Result<bool, Error> {
|
||||
let nm = ctx.data_unchecked::<Notmuch>();
|
||||
let cacher = ctx.data_unchecked::<FilesystemCacher>();
|
||||
let pool = ctx.data_unchecked::<PgPool>();
|
||||
info!("{}", String::from_utf8_lossy(&nm.new()?));
|
||||
newsreader::refresh(pool, cacher).await?;
|
||||
#[cfg(feature = "tantivy")]
|
||||
{
|
||||
let tantivy = ctx.data_unchecked::<TantivyConnection>();
|
||||
let pool = ctx.data_unchecked::<PgPool>();
|
||||
// TODO: parallelize
|
||||
tantivy.refresh(pool).await?;
|
||||
}
|
||||
|
||||
@ -19,6 +19,7 @@ use lol_html::{
|
||||
RewriteStrSettings,
|
||||
};
|
||||
use maplit::{hashmap, hashset};
|
||||
use regex::Regex;
|
||||
use scraper::{Html, Selector};
|
||||
use sqlx::types::time::PrimitiveDateTime;
|
||||
use thiserror::Error;
|
||||
@ -105,6 +106,8 @@ impl Transformer for StripHtml {
|
||||
..RewriteStrSettings::default()
|
||||
},
|
||||
)?;
|
||||
let re = Regex::new(r"\s+").expect("failed to parse regex");
|
||||
let text = re.replace_all(&text, " ").to_string();
|
||||
|
||||
Ok(text)
|
||||
}
|
||||
@ -250,13 +253,13 @@ impl Transformer for AddOutlink {
|
||||
}
|
||||
}
|
||||
|
||||
struct SlurpContents {
|
||||
cacher: Arc<Mutex<FilesystemCacher>>,
|
||||
struct SlurpContents<'c> {
|
||||
cacher: &'c FilesystemCacher,
|
||||
inline_css: bool,
|
||||
site_selectors: HashMap<String, Vec<Selector>>,
|
||||
}
|
||||
|
||||
impl SlurpContents {
|
||||
impl<'c> SlurpContents<'c> {
|
||||
fn get_selectors(&self, link: &Url) -> Option<&[Selector]> {
|
||||
for (host, selector) in self.site_selectors.iter() {
|
||||
if link.host_str().map(|h| h.contains(host)).unwrap_or(false) {
|
||||
@ -268,7 +271,7 @@ impl SlurpContents {
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Transformer for SlurpContents {
|
||||
impl<'c> Transformer for SlurpContents<'c> {
|
||||
fn should_run(&self, link: &Option<Url>, html: &str) -> bool {
|
||||
let mut will_slurp = false;
|
||||
if let Some(link) = link {
|
||||
@ -294,7 +297,7 @@ impl Transformer for SlurpContents {
|
||||
let Some(selectors) = self.get_selectors(&link) else {
|
||||
return Ok(html.to_string());
|
||||
};
|
||||
let cacher = self.cacher.lock().await;
|
||||
let cacher = self.cacher;
|
||||
let body = if let Some(body) = cacher.get(link.as_str()) {
|
||||
String::from_utf8_lossy(&body).to_string()
|
||||
} else {
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
use std::sync::Arc;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use cacher::FilesystemCacher;
|
||||
use log::info;
|
||||
@ -6,17 +6,15 @@ use maplit::hashmap;
|
||||
use scraper::Selector;
|
||||
use shared::compute_color;
|
||||
use sqlx::postgres::PgPool;
|
||||
use tokio::sync::Mutex;
|
||||
use tracing::instrument;
|
||||
use url::Url;
|
||||
|
||||
use crate::{
|
||||
clean_title, compute_offset_limit,
|
||||
config::Config,
|
||||
error::ServerError,
|
||||
graphql::{Corpus, NewsPost, Tag, Thread, ThreadSummary},
|
||||
thread_summary_from_row, AddOutlink, FrameImages, Query, SanitizeHtml, SlurpContents,
|
||||
ThreadSummaryRecord, Transformer, NEWSREADER_TAG_PREFIX, NEWSREADER_THREAD_PREFIX,
|
||||
StripHtml, ThreadSummaryRecord, Transformer, NEWSREADER_TAG_PREFIX, NEWSREADER_THREAD_PREFIX,
|
||||
};
|
||||
|
||||
pub fn is_newsreader_query(query: &Query) -> bool {
|
||||
@ -173,7 +171,7 @@ pub async fn tags(pool: &PgPool, _needs_unread: bool) -> Result<Vec<Tag>, Server
|
||||
|
||||
#[instrument(name = "newsreader::thread", skip_all, fields(thread_id=%thread_id))]
|
||||
pub async fn thread(
|
||||
config: &Config,
|
||||
cacher: &FilesystemCacher,
|
||||
pool: &PgPool,
|
||||
thread_id: String,
|
||||
) -> Result<Thread, ServerError> {
|
||||
@ -191,73 +189,11 @@ pub async fn thread(
|
||||
// TODO: remove the various places that have this as an Option
|
||||
let link = Some(Url::parse(&r.link)?);
|
||||
let mut body = r.clean_summary.unwrap_or("NO SUMMARY".to_string());
|
||||
let cacher = Arc::new(Mutex::new(FilesystemCacher::new(&config.slurp_cache_path)?));
|
||||
let body_tranformers: Vec<Box<dyn Transformer>> = vec![
|
||||
let body_transformers: Vec<Box<dyn Transformer>> = vec![
|
||||
Box::new(SlurpContents {
|
||||
cacher,
|
||||
inline_css: true,
|
||||
site_selectors: hashmap![
|
||||
"atmeta.com".to_string() => vec![
|
||||
Selector::parse("div.entry-content").unwrap(),
|
||||
],
|
||||
"blog.prusa3d.com".to_string() => vec![
|
||||
Selector::parse("article.content .post-block").unwrap(),
|
||||
],
|
||||
"blog.cloudflare.com".to_string() => vec![
|
||||
Selector::parse(".author-lists .author-name-tooltip").unwrap(),
|
||||
Selector::parse(".post-full-content").unwrap()
|
||||
],
|
||||
"blog.zsa.io".to_string() => vec![
|
||||
Selector::parse("section.blog-article").unwrap(),
|
||||
],
|
||||
"engineering.fb.com".to_string() => vec![
|
||||
Selector::parse("article").unwrap(),
|
||||
],
|
||||
"grafana.com".to_string() => vec![
|
||||
Selector::parse(".blog-content").unwrap(),
|
||||
],
|
||||
"hackaday.com".to_string() => vec![
|
||||
Selector::parse("div.entry-featured-image").unwrap(),
|
||||
Selector::parse("div.entry-content").unwrap()
|
||||
],
|
||||
"ingowald.blog".to_string() => vec![
|
||||
Selector::parse("article").unwrap(),
|
||||
],
|
||||
"jvns.ca".to_string() => vec![
|
||||
Selector::parse("article").unwrap(),
|
||||
],
|
||||
"mitchellh.com".to_string() => vec![Selector::parse("div.w-full").unwrap()],
|
||||
"natwelch.com".to_string() => vec![
|
||||
Selector::parse("article div.prose").unwrap(),
|
||||
],
|
||||
"rustacean-station.org".to_string() => vec![
|
||||
Selector::parse("article").unwrap(),
|
||||
],
|
||||
"slashdot.org".to_string() => vec![
|
||||
Selector::parse("span.story-byline").unwrap(),
|
||||
Selector::parse("div.p").unwrap(),
|
||||
],
|
||||
"theonion.com".to_string() => vec![
|
||||
// Single image joke w/ title
|
||||
Selector::parse("article > section > div > figure").unwrap(),
|
||||
// Single cartoon
|
||||
Selector::parse("article > div > div > figure").unwrap(),
|
||||
// Image at top of article
|
||||
Selector::parse("article > header > div > div > figure").unwrap(),
|
||||
// Article body
|
||||
Selector::parse("article .entry-content > *").unwrap(),
|
||||
],
|
||||
"trofi.github.io".to_string() => vec![
|
||||
Selector::parse("#content").unwrap(),
|
||||
],
|
||||
"www.redox-os.org".to_string() => vec![
|
||||
Selector::parse("div.content").unwrap(),
|
||||
],
|
||||
"www.smbc-comics.com".to_string() => vec![
|
||||
Selector::parse("img#cc-comic").unwrap(),
|
||||
Selector::parse("div#aftercomic img").unwrap(),
|
||||
],
|
||||
],
|
||||
site_selectors: slurp_contents_selectors(),
|
||||
}),
|
||||
Box::new(FrameImages),
|
||||
Box::new(AddOutlink),
|
||||
@ -268,7 +204,7 @@ pub async fn thread(
|
||||
base_url: &link,
|
||||
}),
|
||||
];
|
||||
for t in body_tranformers.iter() {
|
||||
for t in body_transformers.iter() {
|
||||
if t.should_run(&link, &body) {
|
||||
body = t.transform(&link, &body).await?;
|
||||
}
|
||||
@ -316,3 +252,102 @@ pub async fn set_read_status<'ctx>(
|
||||
}
|
||||
Ok(true)
|
||||
}
|
||||
#[instrument(name = "newsreader::refresh", skip_all)]
|
||||
pub async fn refresh<'ctx>(pool: &PgPool, cacher: &FilesystemCacher) -> Result<bool, ServerError> {
|
||||
let body_transformers: Vec<Box<dyn Transformer>> = vec![
|
||||
Box::new(SlurpContents {
|
||||
cacher,
|
||||
inline_css: true,
|
||||
site_selectors: slurp_contents_selectors(),
|
||||
}),
|
||||
Box::new(StripHtml),
|
||||
];
|
||||
|
||||
let rows = sqlx::query_file!("sql/need-search-summary.sql",)
|
||||
.fetch_all(pool)
|
||||
.await?;
|
||||
for r in rows {
|
||||
let link = Url::parse(&r.link)?;
|
||||
info!("adding {link} to search index");
|
||||
let link = Some(link);
|
||||
let mut body = r.clean_summary.unwrap_or("NO SUMMARY".to_string());
|
||||
for t in body_transformers.iter() {
|
||||
if t.should_run(&link, &body) {
|
||||
body = t.transform(&link, &body).await?;
|
||||
}
|
||||
}
|
||||
sqlx::query!(
|
||||
"UPDATE post SET search_summary = $1 WHERE id = $2",
|
||||
body,
|
||||
r.id
|
||||
)
|
||||
.execute(pool)
|
||||
.await?;
|
||||
}
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
fn slurp_contents_selectors() -> HashMap<String, Vec<Selector>> {
|
||||
hashmap![
|
||||
"atmeta.com".to_string() => vec![
|
||||
Selector::parse("div.entry-content").unwrap(),
|
||||
],
|
||||
"blog.prusa3d.com".to_string() => vec![
|
||||
Selector::parse("article.content .post-block").unwrap(),
|
||||
],
|
||||
"blog.cloudflare.com".to_string() => vec![
|
||||
Selector::parse(".author-lists .author-name-tooltip").unwrap(),
|
||||
Selector::parse(".post-full-content").unwrap()
|
||||
],
|
||||
"blog.zsa.io".to_string() => vec![
|
||||
Selector::parse("section.blog-article").unwrap(),
|
||||
],
|
||||
"engineering.fb.com".to_string() => vec![
|
||||
Selector::parse("article").unwrap(),
|
||||
],
|
||||
"grafana.com".to_string() => vec![
|
||||
Selector::parse(".blog-content").unwrap(),
|
||||
],
|
||||
"hackaday.com".to_string() => vec![
|
||||
Selector::parse("div.entry-featured-image").unwrap(),
|
||||
Selector::parse("div.entry-content").unwrap()
|
||||
],
|
||||
"ingowald.blog".to_string() => vec![
|
||||
Selector::parse("article").unwrap(),
|
||||
],
|
||||
"jvns.ca".to_string() => vec![
|
||||
Selector::parse("article").unwrap(),
|
||||
],
|
||||
"mitchellh.com".to_string() => vec![Selector::parse("div.w-full").unwrap()],
|
||||
"natwelch.com".to_string() => vec![
|
||||
Selector::parse("article div.prose").unwrap(),
|
||||
],
|
||||
"rustacean-station.org".to_string() => vec![
|
||||
Selector::parse("article").unwrap(),
|
||||
],
|
||||
"slashdot.org".to_string() => vec![
|
||||
Selector::parse("span.story-byline").unwrap(),
|
||||
Selector::parse("div.p").unwrap(),
|
||||
],
|
||||
"theonion.com".to_string() => vec![
|
||||
// Single image joke w/ title
|
||||
Selector::parse("article > section > div > figure").unwrap(),
|
||||
// Single cartoon
|
||||
Selector::parse("article > div > div > figure").unwrap(),
|
||||
// Image at top of article
|
||||
Selector::parse("article > header > div > div > figure").unwrap(),
|
||||
// Article body
|
||||
Selector::parse("article .entry-content > *").unwrap(),
|
||||
],
|
||||
"trofi.github.io".to_string() => vec![
|
||||
Selector::parse("#content").unwrap(),
|
||||
],
|
||||
"www.redox-os.org".to_string() => vec![
|
||||
Selector::parse("div.content").unwrap(),
|
||||
],
|
||||
"www.smbc-comics.com".to_string() => vec![
|
||||
Selector::parse("img#cc-comic").unwrap(),
|
||||
Selector::parse("div#aftercomic img").unwrap(),
|
||||
],
|
||||
]
|
||||
}
|
||||
|
||||
@ -696,7 +696,6 @@ fn walk_attachments_inner<T, F: Fn(&ParsedMail, &[usize]) -> Option<T> + Copy>(
|
||||
fn extract_attachments(m: &ParsedMail, id: &str) -> Result<Vec<Attachment>, ServerError> {
|
||||
let mut attachments = Vec::new();
|
||||
for (idx, sp) in m.subparts.iter().enumerate() {
|
||||
info!("sp: {:?}", sp.headers);
|
||||
if let Some(attachment) = extract_attachment(sp, id, &[idx]) {
|
||||
// Filter out inline attachements, they're flattened into the body of the message.
|
||||
if attachment.disposition == DispositionType::Attachment {
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "shared"
|
||||
version = "0.0.119"
|
||||
version = "0.0.120"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
[package]
|
||||
version = "0.0.119"
|
||||
version = "0.0.120"
|
||||
name = "letterbox"
|
||||
repository = "https://github.com/seed-rs/seed-quickstart"
|
||||
authors = ["Bill Thiede <git@xinu.tv>"]
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user