Compare commits

...

58 Commits

Author SHA1 Message Date
86805f38e3 Load slurp config from toml file 2024-09-21 12:52:08 -07:00
62b17bd6a6 Bumping version to 0.0.29 2024-09-20 08:56:58 -07:00
c0bac99d5a server: add slurp config for zsa blog 2024-09-20 08:56:45 -07:00
3b69c5e74b Bumping version to 0.0.28 2024-09-19 17:06:03 -07:00
539fd469cc server: create index when missing 2024-09-19 17:05:47 -07:00
442688c35c web: lint 2024-09-19 16:54:18 -07:00
da27f02237 Bumping version to 0.0.27 2024-09-19 16:52:35 -07:00
9460e354b7 server: cargo sqlx prepare 2024-09-19 16:52:26 -07:00
6bab128ed9 Bumping version to 0.0.26 2024-09-19 16:33:50 -07:00
3856b4ca5a server: try different cacher url 2024-09-19 16:33:40 -07:00
bef39eefa5 Bumping version to 0.0.25 2024-09-19 16:08:20 -07:00
b0366c7b4d server: try non-https to see if that works 2024-09-19 16:07:59 -07:00
ca02d84d63 Bumping version to 0.0.24 2024-09-19 16:01:55 -07:00
461d5de886 server: change internal git url 2024-09-19 16:01:41 -07:00
f8134dad7a Bumping version to 0.0.23 2024-09-19 15:53:56 -07:00
30f510bb03 server: WIP tantivy, cache slurps, use shared::compute_color, 2024-09-19 15:53:09 -07:00
e7cbf9cc45 shared: remove debug logging 2024-09-19 13:54:47 -07:00
5108213af5 web: use shared compute_color 2024-09-19 13:49:24 -07:00
d148f625ac shared: add compute_color 2024-09-19 13:48:56 -07:00
a9b8f5a88f Bumping version to 0.0.22 2024-09-16 20:00:16 -07:00
539b584d9b web: fix broken build 2024-09-16 20:00:06 -07:00
2f8d83fc4b Bumping version to 0.0.21 2024-09-16 19:52:28 -07:00
86ee1257fa web: better progress bar 2024-09-16 19:52:20 -07:00
03f1035e0e Bumping version to 0.0.20 2024-09-12 22:38:18 -07:00
bd578191a8 web: add scroll to top button and squelch some debug logging 2024-09-12 22:37:58 -07:00
d4fc2e2ef1 Bumping version to 0.0.19 2024-09-12 15:41:01 -07:00
cde30de81c web: explicitly set progress to zero when not in thread/news view 2024-09-12 15:40:42 -07:00
96be74e3ee Bumping version to 0.0.18 2024-09-12 15:32:30 -07:00
b78d34b27e web: disable bulma styling for .number 2024-09-12 15:32:18 -07:00
b4b64c33a6 Bumping version to 0.0.17 2024-09-12 10:07:00 -07:00
47b1875022 server: tweak cloudflare and prusa slurp config 2024-09-12 10:06:46 -07:00
b06cbd1381 Bumping version to 0.0.16 2024-09-12 10:03:26 -07:00
9e35f8ca6c web: fix <em> looking like a button 2024-09-12 10:01:58 -07:00
8eaefde67d Bumping version to 0.0.15 2024-09-12 09:28:14 -07:00
d5a3324837 server: slurp config for prusa blog and squelch some info logging 2024-09-12 09:27:57 -07:00
f5c90d8770 Bumping version to 0.0.14 2024-09-11 11:46:04 -07:00
825a125a62 web: redox specific styling 2024-09-11 11:45:53 -07:00
da7cf37dae Bumping version to 0.0.13 2024-09-11 11:41:27 -07:00
1985ae1f49 server: add slurp configs for facebook and redox 2024-09-11 11:41:09 -07:00
91eb3019f9 Bumping version to 0.0.12 2024-09-09 20:31:07 -07:00
66e8e00a9b web: remove dead code 2024-09-09 20:21:51 -07:00
4b8923d852 web: more accurate reading progress bar 2024-09-09 20:21:13 -07:00
baba720749 Bumping version to 0.0.11 2024-09-02 13:36:18 -07:00
1ec22599cc web: make pre blocks look like code blocks in email 2024-09-02 13:35:58 -07:00
c69017bc36 Bumping version to 0.0.10 2024-09-02 13:19:11 -07:00
48bf57fbbe web: more pleasant color scheme for code blocks in email 2024-09-02 13:18:49 -07:00
3491856784 Bumping version to 0.0.9 2024-09-01 16:17:35 -07:00
f887c15b46 web: address lint 2024-09-01 16:17:27 -07:00
7786f850d1 Bumping version to 0.0.8 2024-09-01 16:16:09 -07:00
cad778734e web: rename Msg::Reload->Refresh and create proper Reload 2024-09-01 16:15:38 -07:00
1210f7038a Bumping version to 0.0.7 2024-09-01 16:09:14 -07:00
f9ab7284a3 web: remove obsolete Makefile 2024-09-01 16:09:04 -07:00
100865c923 server: use same html cleanup idiom in nm as we do in newreader 2024-09-01 16:08:25 -07:00
b8c1710a83 dev: watch for git commits and rebuild on change 2024-09-01 16:07:22 -07:00
215b8cd41d shared: ignore dirty, if git is present we're developing
When developing dirty can get out of between client and server if you're
only doing development in one.
2024-09-01 15:57:02 -07:00
487d7084c3 Bumping version to 0.0.6 2024-09-01 15:48:41 -07:00
b1e761b26f web: don't show progress bar until 400px have scrolled 2024-09-01 15:48:11 -07:00
3efe90ca21 Update release makefile 2024-09-01 15:40:19 -07:00
28 changed files with 1993 additions and 301 deletions

1571
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,6 @@
.PHONEY: commit
commit:
.PHONEY: release
release:
bash scripts/update-crate-version.sh
git push
all: commit
all: release

4
dev.sh
View File

@@ -1,7 +1,7 @@
cd -- "$( dirname -- "${BASH_SOURCE[0]}" )"
tmux new-session -d -s letterbox-dev
tmux rename-window web
tmux send-keys "cd web; trunk serve -w ../shared -w ../notmuch -w ./" C-m
tmux send-keys "cd web; trunk serve -w ../.git -w ../shared -w ../notmuch -w ./" C-m
tmux new-window -n server
tmux send-keys "cd server; cargo watch -c -x run -w ../shared -w ../notmuch -w ./" C-m
tmux send-keys "cd server; cargo watch -c -x run -w ../.git -w ../shared -w ../notmuch -w ./" C-m
tmux attach -d -t letterbox-dev

View File

@@ -1,6 +1,6 @@
[package]
name = "notmuch"
version = "0.0.5"
version = "0.0.29"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

View File

@@ -1,6 +1,6 @@
[package]
name = "procmail2notmuch"
version = "0.0.5"
version = "0.0.29"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

View File

@@ -2,4 +2,4 @@
set -e -x
cargo-set-version set-version --bump patch
VERSION="$(awk -F\" '/^version/ {print $2}' server/Cargo.toml)"
git commit */Cargo.toml -m "Bumping version to ${VERSION:?}"
git commit Cargo.lock */Cargo.toml -m "Bumping version to ${VERSION:?}"

View File

@@ -0,0 +1,62 @@
{
"db_name": "PostgreSQL",
"query": "SELECT\n site,\n title,\n summary,\n link,\n date,\n is_read,\n uid,\n id\nFROM post\n",
"describe": {
"columns": [
{
"ordinal": 0,
"name": "site",
"type_info": "Text"
},
{
"ordinal": 1,
"name": "title",
"type_info": "Text"
},
{
"ordinal": 2,
"name": "summary",
"type_info": "Text"
},
{
"ordinal": 3,
"name": "link",
"type_info": "Text"
},
{
"ordinal": 4,
"name": "date",
"type_info": "Timestamp"
},
{
"ordinal": 5,
"name": "is_read",
"type_info": "Bool"
},
{
"ordinal": 6,
"name": "uid",
"type_info": "Text"
},
{
"ordinal": 7,
"name": "id",
"type_info": "Int4"
}
],
"parameters": {
"Left": []
},
"nullable": [
true,
true,
true,
true,
true,
true,
false,
false
]
},
"hash": "1b2244c9b9b64a1395d8d266f5df5352242bbe5efe481b0852e1c1d4b40584a7"
}

View File

@@ -1,6 +1,6 @@
[package]
name = "server"
version = "0.0.5"
version = "0.0.29"
edition = "2021"
default-run = "server"
@@ -13,6 +13,7 @@ async-graphql = { version = "6.0.11", features = ["log"] }
async-graphql-rocket = "6.0.11"
async-trait = "0.1.81"
build-info = "0.0.38"
cacher = {git = "http://git-private.h.xinu.tv/wathiede/cacher.git"}
css-inline = "0.13.0"
glog = "0.1.0"
html-escape = "0.2.13"
@@ -31,6 +32,7 @@ serde = { version = "1.0.147", features = ["derive"] }
serde_json = "1.0.87"
shared = { path = "../shared" }
sqlx = { version = "0.7.4", features = ["postgres", "runtime-tokio", "time"] }
tantivy = "0.22.0"
thiserror = "1.0.37"
tokio = "1.26.0"
url = "2.5.2"

View File

@@ -1,6 +1,8 @@
[release]
address = "0.0.0.0"
port = 9345
newsreader_database_url = "postgres://newsreader@nixos-07.h.xinu.tv/newsreader"
newsreader_tantivy_db_path = "../target/database/newsreader"
[debug]
address = "0.0.0.0"
@@ -8,3 +10,45 @@ port = 9345
# Uncomment to make it production like.
#log_level = "critical"
newsreader_database_url = "postgres://newsreader@nixos-07.h.xinu.tv/newsreader"
newsreader_tantivy_db_path = "../target/database/newsreader"
slurp_cache_path = "/net/nasx/x/letterbox/slurp"
[debug.slurp_site_selectors]
"atmeta.com" = [
"div.entry-content"
]
"blog.prusa3d.com" = [
"article.content .post-block"
]
"blog.cloudflare.com" = [
".author-lists .author-name-tooltip",
".post-full-content"
]
"blog.zsa.io" = [
"section.blog-article"
]
"engineering.fb.com" = [
"article"
]
"hackaday.com" = [
"div.entry-featured-image",
"div.entry-content"
]
"mitchellh.com" = [
"div.w-full"
]
"natwelch.com" = [
"article div.prose"
]
"slashdot.org" = [
"span.story-byline",
"div.p"
]
"www.redox-os.org" = [
"div.content"
]
"www.smbc-comics.com" = [
"img#cc-comic",
"div#aftercomic img"
]

10
server/sql/all-posts.sql Normal file
View File

@@ -0,0 +1,10 @@
SELECT
site,
title,
summary,
link,
date,
is_read,
uid,
id
FROM post

View File

@@ -18,18 +18,14 @@ use rocket::{
Response, State,
};
use rocket_cors::{AllowedHeaders, AllowedOrigins};
use serde::Deserialize;
use server::{
config::Config,
error::ServerError,
graphql::{Attachment, GraphqlSchema, Mutation, QueryRoot},
nm::{attachment_bytes, cid_attachment_bytes},
};
use sqlx::postgres::PgPool;
#[derive(Deserialize)]
struct Config {
newsreader_database_url: String,
}
use tantivy::{Index, IndexWriter};
#[get("/refresh")]
async fn refresh(nm: &State<Notmuch>) -> Result<Json<String>, Debug<NotmuchError>> {
@@ -170,6 +166,126 @@ fn graphiql() -> content::RawHtml<String> {
content::RawHtml(GraphiQLSource::build().endpoint("/api/graphql").finish())
}
#[rocket::post("/create-news-db")]
fn create_news_db(config: &State<Config>) -> Result<String, Debug<ServerError>> {
create_news_db_impl(config)?;
Ok(format!(
"DB created in {}\n",
config.newsreader_tantivy_db_path
))
}
fn create_news_db_impl(config: &Config) -> Result<(), ServerError> {
std::fs::remove_dir_all(&config.newsreader_tantivy_db_path).map_err(ServerError::from)?;
std::fs::create_dir_all(&config.newsreader_tantivy_db_path).map_err(ServerError::from)?;
use tantivy::schema::*;
let mut schema_builder = Schema::builder();
schema_builder.add_text_field("site", STRING | STORED);
schema_builder.add_text_field("title", TEXT | STORED);
schema_builder.add_text_field("summary", TEXT);
schema_builder.add_text_field("link", STRING | STORED);
schema_builder.add_date_field("date", FAST);
schema_builder.add_bool_field("is_read", FAST);
schema_builder.add_text_field("uid", STRING | STORED);
schema_builder.add_i64_field("id", FAST);
let schema = schema_builder.build();
Index::create_in_dir(&config.newsreader_tantivy_db_path, schema).map_err(ServerError::from)?;
Ok(())
}
#[rocket::post("/reindex-news-db")]
async fn reindex_news_db(
pool: &State<PgPool>,
config: &State<Config>,
) -> Result<String, Debug<ServerError>> {
use tantivy::{doc, Term};
let start_time = std::time::Instant::now();
let pool: &PgPool = pool;
let index =
Index::open_in_dir(&config.newsreader_tantivy_db_path).map_err(ServerError::from)?;
let mut index_writer = index.writer(50_000_000).map_err(ServerError::from)?;
let schema = index.schema();
let site = schema.get_field("site").map_err(ServerError::from)?;
let title = schema.get_field("title").map_err(ServerError::from)?;
let summary = schema.get_field("summary").map_err(ServerError::from)?;
let link = schema.get_field("link").map_err(ServerError::from)?;
let date = schema.get_field("date").map_err(ServerError::from)?;
let is_read = schema.get_field("is_read").map_err(ServerError::from)?;
let uid = schema.get_field("uid").map_err(ServerError::from)?;
let id = schema.get_field("id").map_err(ServerError::from)?;
let rows = sqlx::query_file!("sql/all-posts.sql")
.fetch_all(pool)
.await
.map_err(ServerError::from)?;
let total = rows.len();
for (i, r) in rows.into_iter().enumerate() {
if i % 10_000 == 0 {
info!(
"{i}/{total} processed, elapsed {:.2}s",
start_time.elapsed().as_secs_f32()
);
}
let id_term = Term::from_field_text(uid, &r.uid);
index_writer.delete_term(id_term);
index_writer
.add_document(doc!(
site => r.site.expect("UNKOWN_SITE"),
title => r.title.expect("UNKOWN_TITLE"),
// TODO: clean and extract text from HTML
summary => r.summary.expect("UNKNOWN_SUMMARY"),
link => r.link.expect("link"),
date => tantivy::DateTime::from_primitive(r.date.expect("date")),
is_read => r.is_read.expect("is_read"),
uid => r.uid,
id => r.id as i64,
))
.map_err(ServerError::from)?;
}
index_writer.commit().map_err(ServerError::from)?;
info!("took {:.2}s to reindex", start_time.elapsed().as_secs_f32());
Ok(format!(
"DB openned in {}\n",
config.newsreader_tantivy_db_path
))
}
#[rocket::get("/search-news-db")]
fn search_news_db(
index: &State<tantivy::Index>,
reader: &State<tantivy::IndexReader>,
) -> Result<String, Debug<ServerError>> {
use tantivy::{collector::TopDocs, query::QueryParser, Document, TantivyDocument};
let searcher = reader.searcher();
let schema = index.schema();
let site = schema.get_field("site").map_err(ServerError::from)?;
let title = schema.get_field("title").map_err(ServerError::from)?;
let summary = schema.get_field("summary").map_err(ServerError::from)?;
let query_parser = QueryParser::for_index(&index, vec![site, title, summary]);
let query = query_parser
.parse_query("grapheme")
.map_err(ServerError::from)?;
let top_docs = searcher
.search(&query, &TopDocs::with_limit(10))
.map_err(ServerError::from)?;
let mut results = vec![];
info!("search found {} docs", top_docs.len());
for (_score, doc_address) in top_docs {
let retrieved_doc: TantivyDocument =
searcher.doc(doc_address).map_err(ServerError::from)?;
results.push(format!("{}", retrieved_doc.to_json(&schema)));
}
Ok(format!("{}", results.join(" ")))
}
#[rocket::get("/graphql?<query..>")]
async fn graphql_query(schema: &State<GraphqlSchema>, query: GraphQLQuery) -> GraphQLResponse {
query.execute(schema.inner()).await
@@ -183,7 +299,6 @@ async fn graphql_request(
request.execute(schema.inner()).await
}
#[rocket::main]
async fn main() -> Result<(), Box<dyn Error>> {
glog::new()
@@ -213,6 +328,9 @@ async fn main() -> Result<(), Box<dyn Error>> {
.mount(
shared::urls::MOUNT_POINT,
routes![
create_news_db,
reindex_news_db,
search_news_db,
original,
refresh,
show_pretty,
@@ -229,14 +347,33 @@ async fn main() -> Result<(), Box<dyn Error>> {
.attach(AdHoc::config::<Config>());
let config: Config = rkt.figment().extract()?;
info!("Config:\n{config:#?}");
if !std::fs::exists(&config.slurp_cache_path)? {
info!("Creating slurp cache @ '{}'", &config.slurp_cache_path);
std::fs::create_dir_all(&config.slurp_cache_path)?;
}
let pool = PgPool::connect(&config.newsreader_database_url).await?;
let tantivy_newsreader_index = match Index::open_in_dir(&config.newsreader_tantivy_db_path) {
Ok(idx) => idx,
Err(_) => {
create_news_db_impl(&config)?;
Index::open_in_dir(&config.newsreader_tantivy_db_path)?
}
};
let tantivy_newsreader_reader = tantivy_newsreader_index.reader()?;
let schema = Schema::build(QueryRoot, Mutation, EmptySubscription)
.data(Notmuch::default())
.data(config)
.data(pool.clone())
.extension(async_graphql::extensions::Logger)
.finish();
let rkt = rkt.manage(schema).manage(pool).manage(Notmuch::default());
let rkt = rkt
.manage(schema)
.manage(pool)
.manage(Notmuch::default())
.manage(tantivy_newsreader_index)
.manage(tantivy_newsreader_reader);
//.manage(Notmuch::with_config("../notmuch/testdata/notmuch.config"))
rkt.launch().await?;

23
server/src/config.rs Normal file
View File

@@ -0,0 +1,23 @@
use std::{collections::HashMap, fmt::Display, str::FromStr};
use scraper::Selector;
use serde::{de, Deserialize, Deserializer};
#[derive(Debug)]
pub struct DeSelector(pub Selector);
impl<'de> Deserialize<'de> for DeSelector {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
let s = String::deserialize(deserializer)?;
Ok(DeSelector(Selector::parse(&s).map_err(de::Error::custom)?))
}
}
#[derive(Debug, Deserialize)]
pub struct Config {
pub newsreader_database_url: String,
pub newsreader_tantivy_db_path: String,
pub slurp_cache_path: String,
pub slurp_site_selectors: HashMap<String, Vec<DeSelector>>,
}

View File

@@ -1,6 +1,8 @@
use std::{convert::Infallible, str::Utf8Error, string::FromUtf8Error};
use mailparse::MailParseError;
use tantivy::TantivyError;
use tantivy::query::QueryParserError;
use thiserror::Error;
use crate::TransformError;
@@ -29,6 +31,10 @@ pub enum ServerError {
StringError(String),
#[error("invalid url: {0}")]
UrlParseError(#[from] url::ParseError),
#[error("tantivy error: {0}")]
TantivyError(#[from] TantivyError),
#[error("tantivy query parse error: {0}")]
QueryParseError(#[from] QueryParserError),
#[error("impossible: {0}")]
InfaillibleError(#[from] Infallible),
}

View File

@@ -8,7 +8,7 @@ use notmuch::Notmuch;
use serde::{Deserialize, Serialize};
use sqlx::postgres::PgPool;
use crate::{newsreader, nm, Query};
use crate::{config::Config, newsreader, nm, Query};
/// # Number of seconds since the Epoch
pub type UnixTime = isize;
@@ -384,6 +384,7 @@ impl QueryRoot {
async fn thread<'ctx>(&self, ctx: &Context<'ctx>, thread_id: String) -> Result<Thread, Error> {
let nm = ctx.data_unchecked::<Notmuch>();
let pool = ctx.data_unchecked::<PgPool>();
let config = ctx.data_unchecked::<Config>();
let debug_content_tree = ctx
.look_ahead()
.field("messages")
@@ -392,7 +393,7 @@ impl QueryRoot {
.exists();
// TODO: look at thread_id and conditionally load newsreader
if newsreader::is_newsreader_thread(&thread_id) {
Ok(newsreader::thread(pool, thread_id).await?)
Ok(newsreader::thread(config, pool, thread_id).await?)
} else {
Ok(nm::thread(nm, thread_id, debug_content_tree).await?)
}

View File

@@ -1,11 +1,13 @@
pub mod config;
pub mod error;
pub mod graphql;
pub mod newsreader;
pub mod nm;
use std::{collections::HashMap, convert::Infallible, str::FromStr};
use std::{collections::HashMap, convert::Infallible, str::FromStr, sync::Arc};
use async_trait::async_trait;
use cacher::{Cacher, FilesystemCacher};
use css_inline::{CSSInliner, InlineError, InlineOptions};
use linkify::{LinkFinder, LinkKind};
use log::{error, info, warn};
@@ -16,9 +18,13 @@ use lol_html::{
use maplit::{hashmap, hashset};
use scraper::{Html, Selector};
use thiserror::Error;
use tokio::sync::Mutex;
use url::Url;
use crate::newsreader::{extract_thread_id, is_newsreader_thread};
use crate::{
config::DeSelector,
newsreader::{extract_thread_id, is_newsreader_thread},
};
// TODO: figure out how to use Cow
#[async_trait]
@@ -109,16 +115,17 @@ impl Transformer for InlineStyle {
include_str!("custom.css"),
);
let inline_opts = InlineOptions {
inline_style_tags: false,
inline_style_tags: true,
keep_style_tags: false,
keep_link_tags: false,
keep_link_tags: true,
base_url: None,
load_remote_stylesheets: false,
load_remote_stylesheets: true,
extra_css: Some(css.into()),
preallocate_node_capacity: 32,
..InlineOptions::default()
};
//info!("HTML:\n{html}");
Ok(match CSSInliner::new(inline_opts).inline(&html) {
Ok(inlined_html) => inlined_html,
Err(err) => {
@@ -141,7 +148,6 @@ impl Transformer for FrameImages {
RewriteStrSettings {
element_content_handlers: vec![
element!("img[data-src]", |el| {
info!("found image with data-src {el:?}");
let src = el
.get_attribute("data-src")
.unwrap_or("https://placehold.co/600x400".to_string());
@@ -150,7 +156,6 @@ impl Transformer for FrameImages {
Ok(())
}),
element!("img[data-cfsrc]", |el| {
info!("found image with data-cfsrc {el:?}");
let src = el
.get_attribute("data-cfsrc")
.unwrap_or("https://placehold.co/600x400".to_string());
@@ -159,7 +164,6 @@ impl Transformer for FrameImages {
Ok(())
}),
element!("img[alt], img[title]", |el| {
info!("found image with alt or title {el:?}");
let src = el
.get_attribute("src")
.unwrap_or("https://placehold.co/600x400".to_string());
@@ -214,12 +218,13 @@ impl Transformer for AddOutlink {
}
}
struct SlurpContents {
site_selectors: HashMap<String, Vec<Selector>>,
struct SlurpContents<'h> {
cacher: Arc<Mutex<FilesystemCacher>>,
site_selectors: &'h HashMap<String, Vec<DeSelector>>,
}
impl SlurpContents {
fn get_selectors(&self, link: &Url) -> Option<&[Selector]> {
impl<'h> SlurpContents<'h> {
fn get_selectors(&self, link: &Url) -> Option<&[DeSelector]> {
for (host, selector) in self.site_selectors.iter() {
if link.host_str().map(|h| h.contains(host)).unwrap_or(false) {
return Some(&selector);
@@ -230,7 +235,7 @@ impl SlurpContents {
}
#[async_trait]
impl Transformer for SlurpContents {
impl<'h> Transformer for SlurpContents<'h> {
fn should_run(&self, link: &Option<Url>, _: &str) -> bool {
if let Some(link) = link {
return self.get_selectors(link).is_some();
@@ -244,19 +249,26 @@ impl Transformer for SlurpContents {
let Some(selectors) = self.get_selectors(&link) else {
return Ok(html.to_string());
};
let body = reqwest::get(link.as_str()).await?.text().await?;
let mut cacher = self.cacher.lock().await;
let body = if let Some(body) = cacher.get(link.as_str()) {
info!("cache hit for {link}");
String::from_utf8_lossy(&body).to_string()
} else {
let body = reqwest::get(link.as_str()).await?.text().await?;
cacher.set(link.as_str(), body.as_bytes());
body
};
let doc = Html::parse_document(&body);
let mut results = Vec::new();
for selector in selectors {
if let Some(frag) = doc.select(&selector).next() {
for frag in doc.select(&selector.0) {
results.push(frag.html())
} else {
warn!("couldn't find '{:?}' in {}", selector, link);
return Ok(html.to_string());
// TODO: figure out how to warn if there were no hits
//warn!("couldn't find '{:?}' in {}", selector, link);
}
}
Ok(results.join("<br><br>"))
Ok(results.join(""))
}
}
@@ -295,7 +307,7 @@ pub fn sanitize_html(
) -> Result<String, TransformError> {
let inline_opts = InlineOptions {
inline_style_tags: true,
keep_style_tags: false,
keep_style_tags: true,
keep_link_tags: false,
base_url: None,
load_remote_stylesheets: false,
@@ -338,6 +350,30 @@ pub fn sanitize_html(
el.set_attribute("src", &src)?;
Ok(())
}),
// Add https to href with //<domain name>
element!("link[href]", |el| {
info!("found link[href] {el:?}");
let mut href = el.get_attribute("href").expect("href was required");
if href.starts_with("//") {
warn!("adding https to {href}");
href.insert_str(0, "https:");
}
el.set_attribute("href", &href)?;
Ok(())
}),
// Add https to src with //<domain name>
element!("style[src]", |el| {
let mut src = el.get_attribute("src").expect("src was required");
if src.starts_with("//") {
src.insert_str(0, "https:");
}
el.set_attribute("src", &src)?;
Ok(())
}),
];

View File

@@ -1,24 +1,26 @@
use std::hash::{DefaultHasher, Hash, Hasher};
use std::sync::Arc;
use cacher::FilesystemCacher;
use log::info;
use maplit::hashmap;
use scraper::Selector;
use shared::compute_color;
use sqlx::postgres::PgPool;
use tokio::sync::Mutex;
use url::Url;
use crate::Query;
const TAG_PREFIX: &'static str = "News/";
const THREAD_PREFIX: &'static str = "news:";
use crate::{
compute_offset_limit,
config::Config,
error::ServerError,
graphql::{NewsPost, Tag, Thread, ThreadSummary},
AddOutlink, EscapeHtml, FrameImages, InlineStyle, SanitizeHtml, SlurpContents, StripHtml,
Transformer,
AddOutlink, EscapeHtml, FrameImages, InlineStyle, Query, SanitizeHtml, SlurpContents,
StripHtml, Transformer,
};
const TAG_PREFIX: &'static str = "News/";
const THREAD_PREFIX: &'static str = "news:";
pub fn is_newsreader_search(query: &str) -> bool {
query.contains(TAG_PREFIX)
}
@@ -128,11 +130,9 @@ pub async fn tags(pool: &PgPool, _needs_unread: bool) -> Result<Vec<Tag>, Server
let tags = tags
.into_iter()
.map(|tag| {
let mut hasher = DefaultHasher::new();
tag.site.hash(&mut hasher);
let hex = format!("#{:06x}", hasher.finish() % (1 << 24));
let unread = tag.unread.unwrap_or(0).try_into().unwrap_or(0);
let name = format!("{TAG_PREFIX}{}", tag.site.expect("tag must have site"));
let hex = compute_color(&name);
Tag {
name,
fg_color: "white".to_string(),
@@ -144,7 +144,11 @@ pub async fn tags(pool: &PgPool, _needs_unread: bool) -> Result<Vec<Tag>, Server
Ok(tags)
}
pub async fn thread(pool: &PgPool, thread_id: String) -> Result<Thread, ServerError> {
pub async fn thread(
config: &Config,
pool: &PgPool,
thread_id: String,
) -> Result<Thread, ServerError> {
let id = thread_id
.strip_prefix(THREAD_PREFIX)
.expect("news thread doesn't start with '{THREAD_PREFIX}'")
@@ -173,39 +177,20 @@ pub async fn thread(pool: &PgPool, thread_id: String) -> Result<Thread, ServerEr
// TODO: add site specific cleanups. For example:
// * Grafana does <div class="image-wrapp"><img class="lazyload>"<img src="/media/...>"</img></div>
// * Some sites appear to be HTML encoded, unencode them, i.e. imperialviolent
let cacher = Arc::new(Mutex::new(FilesystemCacher::new(&config.slurp_cache_path)?));
let body_tranformers: Vec<Box<dyn Transformer>> = vec![
Box::new(SlurpContents {
site_selectors: hashmap![
"blog.cloudflare.com".to_string() => vec![
Selector::parse(".author-lists").unwrap(),
Selector::parse(".post-full-content").unwrap()
],
"hackaday.com".to_string() => vec![
Selector::parse("div.entry-featured-image").unwrap(),
Selector::parse("div.entry-content").unwrap()
],
"mitchellh.com".to_string() => vec![Selector::parse("div.w-full").unwrap()],
"natwelch.com".to_string() => vec![
Selector::parse("article div.prose").unwrap(),
],
"slashdot.org".to_string() => vec![
Selector::parse("span.story-byline").unwrap(),
Selector::parse("div.p").unwrap(),
],
"www.smbc-comics.com".to_string() => vec![
Selector::parse("img#cc-comic").unwrap(),
Selector::parse("div#aftercomic img").unwrap(),
],
],
cacher,
site_selectors: &config.slurp_site_selectors,
}),
Box::new(FrameImages),
Box::new(AddOutlink),
Box::new(EscapeHtml),
Box::new(InlineStyle),
Box::new(SanitizeHtml {
cid_prefix: "",
base_url: &link,
}),
Box::new(InlineStyle),
];
for t in body_tranformers.iter() {
if t.should_run(&link, &body) {

View File

@@ -17,7 +17,7 @@ use crate::{
Attachment, Body, DispositionType, Email, EmailThread, Header, Html, Message, PlainText,
Tag, Thread, ThreadSummary, UnhandledContentType,
},
linkify_html, sanitize_html,
linkify_html, InlineStyle, SanitizeHtml, Transformer,
};
const TEXT_PLAIN: &'static str = "text/plain";
@@ -169,17 +169,29 @@ pub async fn thread(
};
Body::Html(Html {
html: format!(
r#"<p class="view-part-text-plain">{}</p>"#,
// Trim newlines to prevent excessive white space at the beginning/end of
// presenation. Leave tabs and spaces incase plain text attempts to center a
// header on the first line.
sanitize_html(
&linkify_html(&text.trim_matches('\n')),
&cid_prefix,
&base_url
)?
),
html: {
let body_tranformers: Vec<Box<dyn Transformer>> = vec![
Box::new(InlineStyle),
Box::new(SanitizeHtml {
cid_prefix: &cid_prefix,
base_url: &base_url,
}),
];
let mut html = linkify_html(&text.trim_matches('\n'));
for t in body_tranformers.iter() {
if t.should_run(&None, &html) {
html = t.transform(&None, &html).await?;
}
}
format!(
r#"<p class="view-part-text-plain">{}</p>"#,
// Trim newlines to prevent excessive white space at the beginning/end of
// presenation. Leave tabs and spaces incase plain text attempts to center a
// header on the first line.
html
)
},
content_tree: if debug_content_tree {
render_content_type_tree(&m)
} else {
@@ -187,8 +199,27 @@ pub async fn thread(
},
})
}
Body::Html(Html { html, content_tree }) => Body::Html(Html {
html: sanitize_html(&html, &cid_prefix, &base_url)?,
Body::Html(Html {
mut html,
content_tree,
}) => Body::Html(Html {
html: {
let body_tranformers: Vec<Box<dyn Transformer>> = vec![
// TODO: this breaks things like emails from calendar
//Box::new(InlineStyle),
Box::new(SanitizeHtml {
cid_prefix: &cid_prefix,
base_url: &base_url,
}),
];
for t in body_tranformers.iter() {
if t.should_run(&None, &html) {
html = t.transform(&None, &html).await?;
}
}
html
},
content_tree: if debug_content_tree {
render_content_type_tree(&m)
} else {

View File

@@ -1,6 +1,6 @@
[package]
name = "shared"
version = "0.0.5"
version = "0.0.29"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

View File

@@ -1,5 +1,7 @@
use std::hash::{DefaultHasher, Hash, Hasher};
use build_info::{BuildInfo, VersionControl};
use notmuch::SearchSummary;
use build_info::{VersionControl,BuildInfo};
use serde::{Deserialize, Serialize};
#[derive(Serialize, Deserialize, Debug)]
@@ -34,16 +36,12 @@ pub mod urls {
}
}
}
pub fn build_version(bi:fn()->&'static BuildInfo) -> String {
pub fn build_version(bi: fn() -> &'static BuildInfo) -> String {
fn commit(git: &Option<VersionControl>) -> String {
let Some(VersionControl::Git(git)) = git else {
return String::new();
};
let mut s = vec!["-".to_string(), git.commit_short_id.clone()];
if git.dirty {
s.push(".+".to_string());
}
if let Some(branch) = &git.branch {
s.push(format!(" ({branch})"));
}
@@ -53,3 +51,8 @@ pub fn build_version(bi:fn()->&'static BuildInfo) -> String {
format!("v{}{}", bi.crate_info.version, commit(&bi.version_control)).to_string()
}
pub fn compute_color(data: &str) -> String {
let mut hasher = DefaultHasher::new();
data.hash(&mut hasher);
format!("#{:06x}", hasher.finish() % (1 << 24))
}

View File

@@ -1,5 +1,5 @@
[package]
version = "0.0.5"
version = "0.0.29"
name = "letterbox"
repository = "https://github.com/seed-rs/seed-quickstart"
authors = ["Bill Thiede <git@xinu.tv>"]
@@ -41,7 +41,9 @@ wasm-opt = ['-Os']
version = "0.3.58"
features = [
"Clipboard",
"DomRect",
"Element",
"MediaQueryList",
"Navigator",
"Window"
"Window",
]

View File

@@ -1,8 +0,0 @@
.PHONY: all
APP=letterbox
# Build in release mode and push to minio for serving.
all:
trunk build --release
mc mirror m/$(APP)/ /tmp/$(APP)-$(shell date +%s)
mc mirror --overwrite --remove dist/ m/$(APP)/

View File

@@ -1,9 +1,10 @@
use std::collections::HashSet;
use graphql_client::GraphQLQuery;
use log::{error, info, warn};
use log::{debug, error, info, warn};
use seed::{prelude::*, *};
use thiserror::Error;
use web_sys::HtmlElement;
use crate::{
api,
@@ -39,9 +40,7 @@ pub fn init(url: Url, orders: &mut impl Orders<Msg>) -> Model {
// 'notmuch new' on the server periodically?
orders.stream(streams::interval(30_000, || Msg::RefreshStart));
orders.subscribe(on_url_changed);
orders.stream(streams::window_event(Ev::Scroll, |_| {
compute_scroll_ratio()
}));
orders.stream(streams::window_event(Ev::Scroll, |_| Msg::WindowScrolled));
build_info::build_info!(fn bi);
Model {
@@ -50,6 +49,7 @@ pub fn init(url: Url, orders: &mut impl Orders<Msg>) -> Model {
refreshing_state: RefreshingState::None,
tags: None,
read_completion_ratio: 0.,
content_el: ElRef::<HtmlElement>::default(),
versions: Version {
client: version,
server: None,
@@ -57,23 +57,6 @@ pub fn init(url: Url, orders: &mut impl Orders<Msg>) -> Model {
}
}
fn compute_scroll_ratio() -> Msg {
// TODO: compute completion based on contents of the post, not the overall body (which includes
// the tags at the end on mobile/tablet).
let body = document().body().expect("body");
let sh = body.scroll_height() as f64;
let window = window();
let ih = window
.inner_height()
.expect("window height")
.unchecked_into::<js_sys::Number>()
.value_of();
let scroll_y = window.scroll_y().expect("scroll Y");
let end = sh - ih;
let ratio = scroll_y / end;
Msg::SetProgress(ratio)
}
fn on_url_changed(uc: subs::UrlChanged) -> Msg {
let mut url = uc.0;
info!(
@@ -136,10 +119,16 @@ pub fn update(msg: Msg, model: &mut Model, orders: &mut impl Orders<Msg>) {
} else {
RefreshingState::None
};
orders.perform_cmd(async move { Msg::Reload });
orders.perform_cmd(async move { Msg::Refresh });
}
Msg::Refresh => {
orders.perform_cmd(async move { on_url_changed(subs::UrlChanged(Url::current())) });
}
Msg::Reload => {
orders.perform_cmd(async move { on_url_changed(subs::UrlChanged(Url::current())) });
window()
.location()
.reload()
.expect("failed to reload window");
}
Msg::OnResize => (),
@@ -343,6 +332,8 @@ pub fn update(msg: Msg, model: &mut Model, orders: &mut impl Orders<Msg>) {
selected_threads,
};
orders.send_msg(Msg::UpdateServerVersion(data.version));
// Generate signal so progress bar is reset
orders.send_msg(Msg::WindowScrolled);
}
Msg::ShowThreadRequest { thread_id } => {
@@ -393,6 +384,8 @@ pub fn update(msg: Msg, model: &mut Model, orders: &mut impl Orders<Msg>) {
}
}
orders.send_msg(Msg::UpdateServerVersion(data.version));
// Generate signal so progress bar is reset
orders.send_msg(Msg::WindowScrolled);
}
Msg::ShowThreadResult(bad) => {
error!("show_thread_query error: {bad:#?}");
@@ -513,6 +506,42 @@ pub fn update(msg: Msg, model: &mut Model, orders: &mut impl Orders<Msg>) {
.expect("failed to copy to clipboard");
});
}
Msg::WindowScrolled => {
if let Some(el) = model.content_el.get() {
let ih = window()
.inner_height()
.expect("window height")
.unchecked_into::<js_sys::Number>()
.value_of();
let r = el.get_bounding_client_rect();
if r.height() < ih {
// The whole content fits in the window, no scrollbar
orders.send_msg(Msg::SetProgress(0.));
return;
}
let end: f64 = r.height() - ih;
if end < 0. {
orders.send_msg(Msg::SetProgress(0.));
return;
}
// Flip Y, normally it's 0-point when the top of the content hits the top of the
// screen and goes negative from there.
let y = -r.y();
let ratio: f64 = (y / end).max(0.);
debug!(
"WindowScrolled ih {ih} end {end} ratio {ratio:.02} {}x{} @ {},{}",
r.width(),
r.height(),
r.x(),
r.y()
);
orders.send_msg(Msg::SetProgress(ratio));
} else {
orders.send_msg(Msg::SetProgress(0.));
}
}
Msg::SetProgress(ratio) => {
model.read_completion_ratio = ratio;
}
@@ -535,6 +564,7 @@ pub struct Model {
pub refreshing_state: RefreshingState,
pub tags: Option<Vec<Tag>>,
pub read_completion_ratio: f64,
pub content_el: ElRef<HtmlElement>,
pub versions: Version,
}
@@ -588,6 +618,8 @@ pub enum RefreshingState {
pub enum Msg {
Noop,
// Tell the client to refresh its state
Refresh,
// Tell the client to reload whole page from server
Reload,
// Window has changed size
OnResize,
@@ -637,6 +669,7 @@ pub enum Msg {
CopyToClipboard(String),
WindowScrolled,
SetProgress(f64),
UpdateServerVersion(String),
}

View File

@@ -16,11 +16,11 @@ pub(super) fn view(model: &Model) -> Node<Msg> {
Context::ThreadResult {
thread: ShowThreadQueryThread::EmailThread(thread),
open_messages,
} => view::thread(thread, open_messages, show_icon_text),
} => view::thread(thread, open_messages, show_icon_text, &model.content_el),
Context::ThreadResult {
thread: ShowThreadQueryThread::NewsPost(post),
..
} => view::news_post(post, show_icon_text),
} => view::news_post(post, show_icon_text, &model.content_el),
Context::SearchResult {
query,
results,

View File

@@ -19,11 +19,11 @@ pub(super) fn view(model: &Model) -> Node<Msg> {
Context::ThreadResult {
thread: ShowThreadQueryThread::EmailThread(thread),
open_messages,
} => view::thread(thread, open_messages, show_icon_text),
} => view::thread(thread, open_messages, show_icon_text, &model.content_el),
Context::ThreadResult {
thread: ShowThreadQueryThread::NewsPost(post),
..
} => view::news_post(post, show_icon_text),
} => view::news_post(post, show_icon_text, &model.content_el),
Context::SearchResult {
query,
results,

View File

@@ -1,14 +1,13 @@
use std::{
collections::{hash_map::DefaultHasher, HashSet},
hash::{Hash, Hasher},
};
use std::collections::HashSet;
use chrono::{DateTime, Datelike, Duration, Local, Utc};
use human_format::{Formatter, Scales};
use itertools::Itertools;
use log::{error, info};
use log::{debug, error, info};
use seed::{prelude::*, *};
use seed_hooks::{state_access::CloneState, topo, use_state};
use shared::compute_color;
use web_sys::HtmlElement;
use crate::{
api::urls,
@@ -28,12 +27,6 @@ fn set_title(title: &str) {
seed::document().set_title(&format!("lb: {}", title));
}
fn compute_color(data: &str) -> String {
let mut hasher = DefaultHasher::new();
data.hash(&mut hasher);
format!("#{:06x}", hasher.finish() % (1 << 24))
}
fn tags_chiclet(tags: &[String], is_mobile: bool) -> impl Iterator<Item = Node<Msg>> + '_ {
tags.iter().map(move |tag| {
let hex = compute_color(tag);
@@ -795,6 +788,7 @@ fn thread(
thread: &ShowThreadQueryThreadOnEmailThread,
open_messages: &HashSet<String>,
show_icon_text: bool,
content_el: &ElRef<HtmlElement>,
) -> Node<Msg> {
// TODO(wathiede): show per-message subject if it changes significantly from top-level subject
let subject = if thread.subject.is_empty() {
@@ -867,7 +861,7 @@ fn thread(
],
],
],
messages,
div![el_ref(content_el), messages, click_to_top()],
/* TODO(wathiede): plumb in orignal id
a![
attrs! {At::Href=>api::original(&thread_node.0.as_ref().expect("message missing").id)},
@@ -1074,7 +1068,11 @@ pub fn tags(model: &Model) -> Node<Msg> {
]
]
}
fn news_post(post: &ShowThreadQueryThreadOnNewsPost, show_icon_text: bool) -> Node<Msg> {
fn news_post(
post: &ShowThreadQueryThreadOnNewsPost,
show_icon_text: bool,
content_el: &ElRef<HtmlElement>,
) -> Node<Msg> {
// TODO(wathiede): show per-message subject if it changes significantly from top-level subject
let subject = &post.title;
set_title(subject);
@@ -1114,6 +1112,7 @@ fn news_post(post: &ShowThreadQueryThreadOnNewsPost, show_icon_text: bool) -> No
div![C!["header"], render_news_post_header(&post)],
div![
C!["body", "news-post", format!("site-{}", post.slug)],
el_ref(content_el),
raw![&post.body]
]
],
@@ -1123,7 +1122,7 @@ fn news_post(post: &ShowThreadQueryThreadOnNewsPost, show_icon_text: bool) -> No
"Original"
],
*/
ev(Ev::Scroll, |e| info!("scroll event {e:?}"))
click_to_top(),
]
}
fn render_news_post_header(post: &ShowThreadQueryThreadOnNewsPost) -> Node<Msg> {
@@ -1190,7 +1189,7 @@ fn reading_progress(ratio: f64) -> Node<Msg> {
"progress",
"is-success",
"is-small",
IF!(percent<5. => "is-invisible")
IF!(percent<1. => "is-invisible")
],
attrs! {
At::Value=>percent,
@@ -1200,7 +1199,7 @@ fn reading_progress(ratio: f64) -> Node<Msg> {
]
}
pub fn versions(versions: &crate::state::Version) -> Node<Msg> {
info!("versions {versions:?}");
debug!("versions {versions:?}");
aside![
C!["tags-menu", "menu"],
p![C!["menu-label"], "Versions"],
@@ -1216,3 +1215,14 @@ pub fn versions(versions: &crate::state::Version) -> Node<Msg> {
])
]
}
fn click_to_top() -> Node<Msg> {
button![
C!["button", "is-danger", "is-small"],
span!["Top"],
span![C!["icon"], i![C!["fas", "fa-arrow-turn-up"]]],
ev(Ev::Click, move |_| web_sys::window()
.unwrap()
.scroll_to_with_x_and_y(0., 0.))
]
}

View File

@@ -14,11 +14,11 @@ pub(super) fn view(model: &Model) -> Node<Msg> {
Context::ThreadResult {
thread: ShowThreadQueryThread::EmailThread(thread),
open_messages,
} => view::thread(thread, open_messages, show_icon_text),
} => view::thread(thread, open_messages, show_icon_text, &model.content_el),
Context::ThreadResult {
thread: ShowThreadQueryThread::NewsPost(post),
..
} => view::news_post(post, show_icon_text),
} => view::news_post(post, show_icon_text, &model.content_el),
Context::SearchResult {
query,
results,

View File

@@ -2,6 +2,28 @@
color: var(--color-text) !important;
}
.body.news-post em {
border: 0 !important;
font-style: italic;
margin: inherit !important;
padding: inherit !important;
}
.body.news-post .number {
align-items: inherit;
background-color: inherit;
border-radius: inherit;
display: inherit;
font-size: inherit;
height: inherit;
justify-content: inherit;
margin-right: inherit;
min-width: inherit;
padding: inherit;
text-align: inherit;
vertical-align: inherit;
}
.body.news-post.site-saturday-morning-breakfast-cereal {
display: flex;
align-items: center;
@@ -18,11 +40,16 @@
padding-left: 1em;
}
.body.news-post em {
margin: inherit !important;
padding: inherit !important;
font-weight: inherit !important;
border: inherit !important;
display: inline !important;
color: inherit !important;
.body.news-post.site-news-on-redox-your-next-gen-os h1,
.body.news-post.site-news-on-redox-your-next-gen-os h2,
.body.news-post.site-news-on-redox-your-next-gen-os h3,
.body.news-post.site-news-on-redox-your-next-gen-os h4,
.body.news-post.site-news-on-redox-your-next-gen-os h5 {
color: var(--color-text) !important;
}
.body.mail code,
.body.mail pre {
color: var(--color-text);
background-color: var(--color-bg-secondary);
}

View File

@@ -345,7 +345,12 @@ display: none;
}
progress.read-progress {
border-radius: 0;
position: fixed;
top: 0;
z-index: 999;
}
progress.read-progress.is-small {
height: .25rem;
}