Compare commits
27 Commits
96be74e3ee
...
slurp-toml
| Author | SHA1 | Date | |
|---|---|---|---|
| 86805f38e3 | |||
| 62b17bd6a6 | |||
| c0bac99d5a | |||
| 3b69c5e74b | |||
| 539fd469cc | |||
| 442688c35c | |||
| da27f02237 | |||
| 9460e354b7 | |||
| 6bab128ed9 | |||
| 3856b4ca5a | |||
| bef39eefa5 | |||
| b0366c7b4d | |||
| ca02d84d63 | |||
| 461d5de886 | |||
| f8134dad7a | |||
| 30f510bb03 | |||
| e7cbf9cc45 | |||
| 5108213af5 | |||
| d148f625ac | |||
| a9b8f5a88f | |||
| 539b584d9b | |||
| 2f8d83fc4b | |||
| 86ee1257fa | |||
| 03f1035e0e | |||
| bd578191a8 | |||
| d4fc2e2ef1 | |||
| cde30de81c |
1263
Cargo.lock
generated
1263
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "notmuch"
|
||||
version = "0.0.18"
|
||||
version = "0.0.29"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "procmail2notmuch"
|
||||
version = "0.0.18"
|
||||
version = "0.0.29"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
62
server/.sqlx/query-1b2244c9b9b64a1395d8d266f5df5352242bbe5efe481b0852e1c1d4b40584a7.json
generated
Normal file
62
server/.sqlx/query-1b2244c9b9b64a1395d8d266f5df5352242bbe5efe481b0852e1c1d4b40584a7.json
generated
Normal file
@@ -0,0 +1,62 @@
|
||||
{
|
||||
"db_name": "PostgreSQL",
|
||||
"query": "SELECT\n site,\n title,\n summary,\n link,\n date,\n is_read,\n uid,\n id\nFROM post\n",
|
||||
"describe": {
|
||||
"columns": [
|
||||
{
|
||||
"ordinal": 0,
|
||||
"name": "site",
|
||||
"type_info": "Text"
|
||||
},
|
||||
{
|
||||
"ordinal": 1,
|
||||
"name": "title",
|
||||
"type_info": "Text"
|
||||
},
|
||||
{
|
||||
"ordinal": 2,
|
||||
"name": "summary",
|
||||
"type_info": "Text"
|
||||
},
|
||||
{
|
||||
"ordinal": 3,
|
||||
"name": "link",
|
||||
"type_info": "Text"
|
||||
},
|
||||
{
|
||||
"ordinal": 4,
|
||||
"name": "date",
|
||||
"type_info": "Timestamp"
|
||||
},
|
||||
{
|
||||
"ordinal": 5,
|
||||
"name": "is_read",
|
||||
"type_info": "Bool"
|
||||
},
|
||||
{
|
||||
"ordinal": 6,
|
||||
"name": "uid",
|
||||
"type_info": "Text"
|
||||
},
|
||||
{
|
||||
"ordinal": 7,
|
||||
"name": "id",
|
||||
"type_info": "Int4"
|
||||
}
|
||||
],
|
||||
"parameters": {
|
||||
"Left": []
|
||||
},
|
||||
"nullable": [
|
||||
true,
|
||||
true,
|
||||
true,
|
||||
true,
|
||||
true,
|
||||
true,
|
||||
false,
|
||||
false
|
||||
]
|
||||
},
|
||||
"hash": "1b2244c9b9b64a1395d8d266f5df5352242bbe5efe481b0852e1c1d4b40584a7"
|
||||
}
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "server"
|
||||
version = "0.0.18"
|
||||
version = "0.0.29"
|
||||
edition = "2021"
|
||||
default-run = "server"
|
||||
|
||||
@@ -13,6 +13,7 @@ async-graphql = { version = "6.0.11", features = ["log"] }
|
||||
async-graphql-rocket = "6.0.11"
|
||||
async-trait = "0.1.81"
|
||||
build-info = "0.0.38"
|
||||
cacher = {git = "http://git-private.h.xinu.tv/wathiede/cacher.git"}
|
||||
css-inline = "0.13.0"
|
||||
glog = "0.1.0"
|
||||
html-escape = "0.2.13"
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
[release]
|
||||
address = "0.0.0.0"
|
||||
port = 9345
|
||||
newsreader_database_url = "postgres://newsreader@nixos-07.h.xinu.tv/newsreader"
|
||||
newsreader_tantivy_db_path = "../target/database/newsreader"
|
||||
|
||||
[debug]
|
||||
address = "0.0.0.0"
|
||||
@@ -8,3 +10,45 @@ port = 9345
|
||||
# Uncomment to make it production like.
|
||||
#log_level = "critical"
|
||||
newsreader_database_url = "postgres://newsreader@nixos-07.h.xinu.tv/newsreader"
|
||||
newsreader_tantivy_db_path = "../target/database/newsreader"
|
||||
slurp_cache_path = "/net/nasx/x/letterbox/slurp"
|
||||
|
||||
[debug.slurp_site_selectors]
|
||||
"atmeta.com" = [
|
||||
"div.entry-content"
|
||||
]
|
||||
"blog.prusa3d.com" = [
|
||||
"article.content .post-block"
|
||||
]
|
||||
"blog.cloudflare.com" = [
|
||||
".author-lists .author-name-tooltip",
|
||||
".post-full-content"
|
||||
]
|
||||
"blog.zsa.io" = [
|
||||
"section.blog-article"
|
||||
]
|
||||
"engineering.fb.com" = [
|
||||
"article"
|
||||
]
|
||||
"hackaday.com" = [
|
||||
"div.entry-featured-image",
|
||||
"div.entry-content"
|
||||
]
|
||||
"mitchellh.com" = [
|
||||
"div.w-full"
|
||||
]
|
||||
"natwelch.com" = [
|
||||
"article div.prose"
|
||||
]
|
||||
"slashdot.org" = [
|
||||
"span.story-byline",
|
||||
"div.p"
|
||||
]
|
||||
"www.redox-os.org" = [
|
||||
"div.content"
|
||||
]
|
||||
"www.smbc-comics.com" = [
|
||||
"img#cc-comic",
|
||||
"div#aftercomic img"
|
||||
]
|
||||
|
||||
|
||||
10
server/sql/all-posts.sql
Normal file
10
server/sql/all-posts.sql
Normal file
@@ -0,0 +1,10 @@
|
||||
SELECT
|
||||
site,
|
||||
title,
|
||||
summary,
|
||||
link,
|
||||
date,
|
||||
is_read,
|
||||
uid,
|
||||
id
|
||||
FROM post
|
||||
@@ -18,18 +18,14 @@ use rocket::{
|
||||
Response, State,
|
||||
};
|
||||
use rocket_cors::{AllowedHeaders, AllowedOrigins};
|
||||
use serde::Deserialize;
|
||||
use server::{
|
||||
config::Config,
|
||||
error::ServerError,
|
||||
graphql::{Attachment, GraphqlSchema, Mutation, QueryRoot},
|
||||
nm::{attachment_bytes, cid_attachment_bytes},
|
||||
};
|
||||
use sqlx::postgres::PgPool;
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct Config {
|
||||
newsreader_database_url: String,
|
||||
}
|
||||
use tantivy::{Index, IndexWriter};
|
||||
|
||||
#[get("/refresh")]
|
||||
async fn refresh(nm: &State<Notmuch>) -> Result<Json<String>, Debug<NotmuchError>> {
|
||||
@@ -170,6 +166,126 @@ fn graphiql() -> content::RawHtml<String> {
|
||||
content::RawHtml(GraphiQLSource::build().endpoint("/api/graphql").finish())
|
||||
}
|
||||
|
||||
#[rocket::post("/create-news-db")]
|
||||
fn create_news_db(config: &State<Config>) -> Result<String, Debug<ServerError>> {
|
||||
create_news_db_impl(config)?;
|
||||
Ok(format!(
|
||||
"DB created in {}\n",
|
||||
config.newsreader_tantivy_db_path
|
||||
))
|
||||
}
|
||||
fn create_news_db_impl(config: &Config) -> Result<(), ServerError> {
|
||||
std::fs::remove_dir_all(&config.newsreader_tantivy_db_path).map_err(ServerError::from)?;
|
||||
std::fs::create_dir_all(&config.newsreader_tantivy_db_path).map_err(ServerError::from)?;
|
||||
use tantivy::schema::*;
|
||||
let mut schema_builder = Schema::builder();
|
||||
schema_builder.add_text_field("site", STRING | STORED);
|
||||
schema_builder.add_text_field("title", TEXT | STORED);
|
||||
schema_builder.add_text_field("summary", TEXT);
|
||||
schema_builder.add_text_field("link", STRING | STORED);
|
||||
schema_builder.add_date_field("date", FAST);
|
||||
schema_builder.add_bool_field("is_read", FAST);
|
||||
schema_builder.add_text_field("uid", STRING | STORED);
|
||||
schema_builder.add_i64_field("id", FAST);
|
||||
|
||||
let schema = schema_builder.build();
|
||||
Index::create_in_dir(&config.newsreader_tantivy_db_path, schema).map_err(ServerError::from)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[rocket::post("/reindex-news-db")]
|
||||
async fn reindex_news_db(
|
||||
pool: &State<PgPool>,
|
||||
config: &State<Config>,
|
||||
) -> Result<String, Debug<ServerError>> {
|
||||
use tantivy::{doc, Term};
|
||||
|
||||
let start_time = std::time::Instant::now();
|
||||
let pool: &PgPool = pool;
|
||||
|
||||
let index =
|
||||
Index::open_in_dir(&config.newsreader_tantivy_db_path).map_err(ServerError::from)?;
|
||||
let mut index_writer = index.writer(50_000_000).map_err(ServerError::from)?;
|
||||
let schema = index.schema();
|
||||
let site = schema.get_field("site").map_err(ServerError::from)?;
|
||||
let title = schema.get_field("title").map_err(ServerError::from)?;
|
||||
let summary = schema.get_field("summary").map_err(ServerError::from)?;
|
||||
let link = schema.get_field("link").map_err(ServerError::from)?;
|
||||
let date = schema.get_field("date").map_err(ServerError::from)?;
|
||||
let is_read = schema.get_field("is_read").map_err(ServerError::from)?;
|
||||
let uid = schema.get_field("uid").map_err(ServerError::from)?;
|
||||
let id = schema.get_field("id").map_err(ServerError::from)?;
|
||||
|
||||
let rows = sqlx::query_file!("sql/all-posts.sql")
|
||||
.fetch_all(pool)
|
||||
.await
|
||||
.map_err(ServerError::from)?;
|
||||
|
||||
let total = rows.len();
|
||||
for (i, r) in rows.into_iter().enumerate() {
|
||||
if i % 10_000 == 0 {
|
||||
info!(
|
||||
"{i}/{total} processed, elapsed {:.2}s",
|
||||
start_time.elapsed().as_secs_f32()
|
||||
);
|
||||
}
|
||||
let id_term = Term::from_field_text(uid, &r.uid);
|
||||
index_writer.delete_term(id_term);
|
||||
index_writer
|
||||
.add_document(doc!(
|
||||
site => r.site.expect("UNKOWN_SITE"),
|
||||
title => r.title.expect("UNKOWN_TITLE"),
|
||||
// TODO: clean and extract text from HTML
|
||||
summary => r.summary.expect("UNKNOWN_SUMMARY"),
|
||||
link => r.link.expect("link"),
|
||||
date => tantivy::DateTime::from_primitive(r.date.expect("date")),
|
||||
is_read => r.is_read.expect("is_read"),
|
||||
uid => r.uid,
|
||||
id => r.id as i64,
|
||||
))
|
||||
.map_err(ServerError::from)?;
|
||||
}
|
||||
|
||||
index_writer.commit().map_err(ServerError::from)?;
|
||||
|
||||
info!("took {:.2}s to reindex", start_time.elapsed().as_secs_f32());
|
||||
Ok(format!(
|
||||
"DB openned in {}\n",
|
||||
config.newsreader_tantivy_db_path
|
||||
))
|
||||
}
|
||||
|
||||
#[rocket::get("/search-news-db")]
|
||||
fn search_news_db(
|
||||
index: &State<tantivy::Index>,
|
||||
reader: &State<tantivy::IndexReader>,
|
||||
) -> Result<String, Debug<ServerError>> {
|
||||
use tantivy::{collector::TopDocs, query::QueryParser, Document, TantivyDocument};
|
||||
|
||||
let searcher = reader.searcher();
|
||||
let schema = index.schema();
|
||||
let site = schema.get_field("site").map_err(ServerError::from)?;
|
||||
let title = schema.get_field("title").map_err(ServerError::from)?;
|
||||
let summary = schema.get_field("summary").map_err(ServerError::from)?;
|
||||
let query_parser = QueryParser::for_index(&index, vec![site, title, summary]);
|
||||
|
||||
let query = query_parser
|
||||
.parse_query("grapheme")
|
||||
.map_err(ServerError::from)?;
|
||||
let top_docs = searcher
|
||||
.search(&query, &TopDocs::with_limit(10))
|
||||
.map_err(ServerError::from)?;
|
||||
let mut results = vec![];
|
||||
info!("search found {} docs", top_docs.len());
|
||||
for (_score, doc_address) in top_docs {
|
||||
let retrieved_doc: TantivyDocument =
|
||||
searcher.doc(doc_address).map_err(ServerError::from)?;
|
||||
results.push(format!("{}", retrieved_doc.to_json(&schema)));
|
||||
}
|
||||
|
||||
Ok(format!("{}", results.join(" ")))
|
||||
}
|
||||
|
||||
#[rocket::get("/graphql?<query..>")]
|
||||
async fn graphql_query(schema: &State<GraphqlSchema>, query: GraphQLQuery) -> GraphQLResponse {
|
||||
query.execute(schema.inner()).await
|
||||
@@ -183,7 +299,6 @@ async fn graphql_request(
|
||||
request.execute(schema.inner()).await
|
||||
}
|
||||
|
||||
|
||||
#[rocket::main]
|
||||
async fn main() -> Result<(), Box<dyn Error>> {
|
||||
glog::new()
|
||||
@@ -213,6 +328,9 @@ async fn main() -> Result<(), Box<dyn Error>> {
|
||||
.mount(
|
||||
shared::urls::MOUNT_POINT,
|
||||
routes![
|
||||
create_news_db,
|
||||
reindex_news_db,
|
||||
search_news_db,
|
||||
original,
|
||||
refresh,
|
||||
show_pretty,
|
||||
@@ -229,14 +347,33 @@ async fn main() -> Result<(), Box<dyn Error>> {
|
||||
.attach(AdHoc::config::<Config>());
|
||||
|
||||
let config: Config = rkt.figment().extract()?;
|
||||
info!("Config:\n{config:#?}");
|
||||
if !std::fs::exists(&config.slurp_cache_path)? {
|
||||
info!("Creating slurp cache @ '{}'", &config.slurp_cache_path);
|
||||
std::fs::create_dir_all(&config.slurp_cache_path)?;
|
||||
}
|
||||
let pool = PgPool::connect(&config.newsreader_database_url).await?;
|
||||
let tantivy_newsreader_index = match Index::open_in_dir(&config.newsreader_tantivy_db_path) {
|
||||
Ok(idx) => idx,
|
||||
Err(_) => {
|
||||
create_news_db_impl(&config)?;
|
||||
Index::open_in_dir(&config.newsreader_tantivy_db_path)?
|
||||
}
|
||||
};
|
||||
let tantivy_newsreader_reader = tantivy_newsreader_index.reader()?;
|
||||
let schema = Schema::build(QueryRoot, Mutation, EmptySubscription)
|
||||
.data(Notmuch::default())
|
||||
.data(config)
|
||||
.data(pool.clone())
|
||||
.extension(async_graphql::extensions::Logger)
|
||||
.finish();
|
||||
|
||||
let rkt = rkt.manage(schema).manage(pool).manage(Notmuch::default());
|
||||
let rkt = rkt
|
||||
.manage(schema)
|
||||
.manage(pool)
|
||||
.manage(Notmuch::default())
|
||||
.manage(tantivy_newsreader_index)
|
||||
.manage(tantivy_newsreader_reader);
|
||||
//.manage(Notmuch::with_config("../notmuch/testdata/notmuch.config"))
|
||||
|
||||
rkt.launch().await?;
|
||||
|
||||
23
server/src/config.rs
Normal file
23
server/src/config.rs
Normal file
@@ -0,0 +1,23 @@
|
||||
use std::{collections::HashMap, fmt::Display, str::FromStr};
|
||||
|
||||
use scraper::Selector;
|
||||
use serde::{de, Deserialize, Deserializer};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct DeSelector(pub Selector);
|
||||
impl<'de> Deserialize<'de> for DeSelector {
|
||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
where
|
||||
D: Deserializer<'de>,
|
||||
{
|
||||
let s = String::deserialize(deserializer)?;
|
||||
Ok(DeSelector(Selector::parse(&s).map_err(de::Error::custom)?))
|
||||
}
|
||||
}
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct Config {
|
||||
pub newsreader_database_url: String,
|
||||
pub newsreader_tantivy_db_path: String,
|
||||
pub slurp_cache_path: String,
|
||||
pub slurp_site_selectors: HashMap<String, Vec<DeSelector>>,
|
||||
}
|
||||
@@ -1,6 +1,8 @@
|
||||
use std::{convert::Infallible, str::Utf8Error, string::FromUtf8Error};
|
||||
|
||||
use mailparse::MailParseError;
|
||||
use tantivy::TantivyError;
|
||||
use tantivy::query::QueryParserError;
|
||||
use thiserror::Error;
|
||||
|
||||
use crate::TransformError;
|
||||
@@ -29,6 +31,10 @@ pub enum ServerError {
|
||||
StringError(String),
|
||||
#[error("invalid url: {0}")]
|
||||
UrlParseError(#[from] url::ParseError),
|
||||
#[error("tantivy error: {0}")]
|
||||
TantivyError(#[from] TantivyError),
|
||||
#[error("tantivy query parse error: {0}")]
|
||||
QueryParseError(#[from] QueryParserError),
|
||||
#[error("impossible: {0}")]
|
||||
InfaillibleError(#[from] Infallible),
|
||||
}
|
||||
|
||||
@@ -8,7 +8,7 @@ use notmuch::Notmuch;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use sqlx::postgres::PgPool;
|
||||
|
||||
use crate::{newsreader, nm, Query};
|
||||
use crate::{config::Config, newsreader, nm, Query};
|
||||
|
||||
/// # Number of seconds since the Epoch
|
||||
pub type UnixTime = isize;
|
||||
@@ -384,6 +384,7 @@ impl QueryRoot {
|
||||
async fn thread<'ctx>(&self, ctx: &Context<'ctx>, thread_id: String) -> Result<Thread, Error> {
|
||||
let nm = ctx.data_unchecked::<Notmuch>();
|
||||
let pool = ctx.data_unchecked::<PgPool>();
|
||||
let config = ctx.data_unchecked::<Config>();
|
||||
let debug_content_tree = ctx
|
||||
.look_ahead()
|
||||
.field("messages")
|
||||
@@ -392,7 +393,7 @@ impl QueryRoot {
|
||||
.exists();
|
||||
// TODO: look at thread_id and conditionally load newsreader
|
||||
if newsreader::is_newsreader_thread(&thread_id) {
|
||||
Ok(newsreader::thread(pool, thread_id).await?)
|
||||
Ok(newsreader::thread(config, pool, thread_id).await?)
|
||||
} else {
|
||||
Ok(nm::thread(nm, thread_id, debug_content_tree).await?)
|
||||
}
|
||||
|
||||
@@ -1,14 +1,16 @@
|
||||
pub mod config;
|
||||
pub mod error;
|
||||
pub mod graphql;
|
||||
pub mod newsreader;
|
||||
pub mod nm;
|
||||
|
||||
use std::{collections::HashMap, convert::Infallible, str::FromStr};
|
||||
use std::{collections::HashMap, convert::Infallible, str::FromStr, sync::Arc};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use cacher::{Cacher, FilesystemCacher};
|
||||
use css_inline::{CSSInliner, InlineError, InlineOptions};
|
||||
use linkify::{LinkFinder, LinkKind};
|
||||
use log::{error, warn};
|
||||
use log::{error, info, warn};
|
||||
use lol_html::{
|
||||
element, errors::RewritingError, html_content::ContentType, rewrite_str, text,
|
||||
RewriteStrSettings,
|
||||
@@ -16,9 +18,13 @@ use lol_html::{
|
||||
use maplit::{hashmap, hashset};
|
||||
use scraper::{Html, Selector};
|
||||
use thiserror::Error;
|
||||
use tokio::sync::Mutex;
|
||||
use url::Url;
|
||||
|
||||
use crate::newsreader::{extract_thread_id, is_newsreader_thread};
|
||||
use crate::{
|
||||
config::DeSelector,
|
||||
newsreader::{extract_thread_id, is_newsreader_thread},
|
||||
};
|
||||
|
||||
// TODO: figure out how to use Cow
|
||||
#[async_trait]
|
||||
@@ -109,16 +115,17 @@ impl Transformer for InlineStyle {
|
||||
include_str!("custom.css"),
|
||||
);
|
||||
let inline_opts = InlineOptions {
|
||||
inline_style_tags: false,
|
||||
inline_style_tags: true,
|
||||
keep_style_tags: false,
|
||||
keep_link_tags: false,
|
||||
keep_link_tags: true,
|
||||
base_url: None,
|
||||
load_remote_stylesheets: false,
|
||||
load_remote_stylesheets: true,
|
||||
extra_css: Some(css.into()),
|
||||
preallocate_node_capacity: 32,
|
||||
..InlineOptions::default()
|
||||
};
|
||||
|
||||
//info!("HTML:\n{html}");
|
||||
Ok(match CSSInliner::new(inline_opts).inline(&html) {
|
||||
Ok(inlined_html) => inlined_html,
|
||||
Err(err) => {
|
||||
@@ -211,12 +218,13 @@ impl Transformer for AddOutlink {
|
||||
}
|
||||
}
|
||||
|
||||
struct SlurpContents {
|
||||
site_selectors: HashMap<String, Vec<Selector>>,
|
||||
struct SlurpContents<'h> {
|
||||
cacher: Arc<Mutex<FilesystemCacher>>,
|
||||
site_selectors: &'h HashMap<String, Vec<DeSelector>>,
|
||||
}
|
||||
|
||||
impl SlurpContents {
|
||||
fn get_selectors(&self, link: &Url) -> Option<&[Selector]> {
|
||||
impl<'h> SlurpContents<'h> {
|
||||
fn get_selectors(&self, link: &Url) -> Option<&[DeSelector]> {
|
||||
for (host, selector) in self.site_selectors.iter() {
|
||||
if link.host_str().map(|h| h.contains(host)).unwrap_or(false) {
|
||||
return Some(&selector);
|
||||
@@ -227,7 +235,7 @@ impl SlurpContents {
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Transformer for SlurpContents {
|
||||
impl<'h> Transformer for SlurpContents<'h> {
|
||||
fn should_run(&self, link: &Option<Url>, _: &str) -> bool {
|
||||
if let Some(link) = link {
|
||||
return self.get_selectors(link).is_some();
|
||||
@@ -241,19 +249,26 @@ impl Transformer for SlurpContents {
|
||||
let Some(selectors) = self.get_selectors(&link) else {
|
||||
return Ok(html.to_string());
|
||||
};
|
||||
let body = reqwest::get(link.as_str()).await?.text().await?;
|
||||
let mut cacher = self.cacher.lock().await;
|
||||
let body = if let Some(body) = cacher.get(link.as_str()) {
|
||||
info!("cache hit for {link}");
|
||||
String::from_utf8_lossy(&body).to_string()
|
||||
} else {
|
||||
let body = reqwest::get(link.as_str()).await?.text().await?;
|
||||
cacher.set(link.as_str(), body.as_bytes());
|
||||
body
|
||||
};
|
||||
let doc = Html::parse_document(&body);
|
||||
|
||||
let mut results = Vec::new();
|
||||
for selector in selectors {
|
||||
if let Some(frag) = doc.select(&selector).next() {
|
||||
for frag in doc.select(&selector.0) {
|
||||
results.push(frag.html())
|
||||
} else {
|
||||
warn!("couldn't find '{:?}' in {}", selector, link);
|
||||
return Ok(html.to_string());
|
||||
// TODO: figure out how to warn if there were no hits
|
||||
//warn!("couldn't find '{:?}' in {}", selector, link);
|
||||
}
|
||||
}
|
||||
Ok(results.join("<br><br>"))
|
||||
Ok(results.join(""))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -292,7 +307,7 @@ pub fn sanitize_html(
|
||||
) -> Result<String, TransformError> {
|
||||
let inline_opts = InlineOptions {
|
||||
inline_style_tags: true,
|
||||
keep_style_tags: false,
|
||||
keep_style_tags: true,
|
||||
keep_link_tags: false,
|
||||
base_url: None,
|
||||
load_remote_stylesheets: false,
|
||||
@@ -335,6 +350,30 @@ pub fn sanitize_html(
|
||||
|
||||
el.set_attribute("src", &src)?;
|
||||
|
||||
Ok(())
|
||||
}),
|
||||
// Add https to href with //<domain name>
|
||||
element!("link[href]", |el| {
|
||||
info!("found link[href] {el:?}");
|
||||
let mut href = el.get_attribute("href").expect("href was required");
|
||||
if href.starts_with("//") {
|
||||
warn!("adding https to {href}");
|
||||
href.insert_str(0, "https:");
|
||||
}
|
||||
|
||||
el.set_attribute("href", &href)?;
|
||||
|
||||
Ok(())
|
||||
}),
|
||||
// Add https to src with //<domain name>
|
||||
element!("style[src]", |el| {
|
||||
let mut src = el.get_attribute("src").expect("src was required");
|
||||
if src.starts_with("//") {
|
||||
src.insert_str(0, "https:");
|
||||
}
|
||||
|
||||
el.set_attribute("src", &src)?;
|
||||
|
||||
Ok(())
|
||||
}),
|
||||
];
|
||||
|
||||
@@ -1,24 +1,26 @@
|
||||
use std::hash::{DefaultHasher, Hash, Hasher};
|
||||
use std::sync::Arc;
|
||||
|
||||
use cacher::FilesystemCacher;
|
||||
use log::info;
|
||||
use maplit::hashmap;
|
||||
use scraper::Selector;
|
||||
use shared::compute_color;
|
||||
use sqlx::postgres::PgPool;
|
||||
use tokio::sync::Mutex;
|
||||
use url::Url;
|
||||
|
||||
use crate::Query;
|
||||
|
||||
const TAG_PREFIX: &'static str = "News/";
|
||||
const THREAD_PREFIX: &'static str = "news:";
|
||||
|
||||
use crate::{
|
||||
compute_offset_limit,
|
||||
config::Config,
|
||||
error::ServerError,
|
||||
graphql::{NewsPost, Tag, Thread, ThreadSummary},
|
||||
AddOutlink, EscapeHtml, FrameImages, InlineStyle, SanitizeHtml, SlurpContents, StripHtml,
|
||||
Transformer,
|
||||
AddOutlink, EscapeHtml, FrameImages, InlineStyle, Query, SanitizeHtml, SlurpContents,
|
||||
StripHtml, Transformer,
|
||||
};
|
||||
|
||||
const TAG_PREFIX: &'static str = "News/";
|
||||
const THREAD_PREFIX: &'static str = "news:";
|
||||
|
||||
pub fn is_newsreader_search(query: &str) -> bool {
|
||||
query.contains(TAG_PREFIX)
|
||||
}
|
||||
@@ -128,11 +130,9 @@ pub async fn tags(pool: &PgPool, _needs_unread: bool) -> Result<Vec<Tag>, Server
|
||||
let tags = tags
|
||||
.into_iter()
|
||||
.map(|tag| {
|
||||
let mut hasher = DefaultHasher::new();
|
||||
tag.site.hash(&mut hasher);
|
||||
let hex = format!("#{:06x}", hasher.finish() % (1 << 24));
|
||||
let unread = tag.unread.unwrap_or(0).try_into().unwrap_or(0);
|
||||
let name = format!("{TAG_PREFIX}{}", tag.site.expect("tag must have site"));
|
||||
let hex = compute_color(&name);
|
||||
Tag {
|
||||
name,
|
||||
fg_color: "white".to_string(),
|
||||
@@ -144,7 +144,11 @@ pub async fn tags(pool: &PgPool, _needs_unread: bool) -> Result<Vec<Tag>, Server
|
||||
Ok(tags)
|
||||
}
|
||||
|
||||
pub async fn thread(pool: &PgPool, thread_id: String) -> Result<Thread, ServerError> {
|
||||
pub async fn thread(
|
||||
config: &Config,
|
||||
pool: &PgPool,
|
||||
thread_id: String,
|
||||
) -> Result<Thread, ServerError> {
|
||||
let id = thread_id
|
||||
.strip_prefix(THREAD_PREFIX)
|
||||
.expect("news thread doesn't start with '{THREAD_PREFIX}'")
|
||||
@@ -173,42 +177,11 @@ pub async fn thread(pool: &PgPool, thread_id: String) -> Result<Thread, ServerEr
|
||||
// TODO: add site specific cleanups. For example:
|
||||
// * Grafana does <div class="image-wrapp"><img class="lazyload>"<img src="/media/...>"</img></div>
|
||||
// * Some sites appear to be HTML encoded, unencode them, i.e. imperialviolent
|
||||
let cacher = Arc::new(Mutex::new(FilesystemCacher::new(&config.slurp_cache_path)?));
|
||||
let body_tranformers: Vec<Box<dyn Transformer>> = vec![
|
||||
Box::new(SlurpContents {
|
||||
site_selectors: hashmap![
|
||||
"atmeta.com".to_string() => vec![
|
||||
Selector::parse("div.entry-content").unwrap(),
|
||||
],
|
||||
"blog.prusa3d.com".to_string() => vec![
|
||||
Selector::parse("article.content .post-block").unwrap(),
|
||||
],
|
||||
"blog.cloudflare.com".to_string() => vec![
|
||||
Selector::parse(".author-lists .author-name-tooltip").unwrap(),
|
||||
Selector::parse(".post-full-content").unwrap()
|
||||
],
|
||||
"engineering.fb.com".to_string() => vec![
|
||||
Selector::parse("article").unwrap(),
|
||||
],
|
||||
"hackaday.com".to_string() => vec![
|
||||
Selector::parse("div.entry-featured-image").unwrap(),
|
||||
Selector::parse("div.entry-content").unwrap()
|
||||
],
|
||||
"mitchellh.com".to_string() => vec![Selector::parse("div.w-full").unwrap()],
|
||||
"natwelch.com".to_string() => vec![
|
||||
Selector::parse("article div.prose").unwrap(),
|
||||
],
|
||||
"slashdot.org".to_string() => vec![
|
||||
Selector::parse("span.story-byline").unwrap(),
|
||||
Selector::parse("div.p").unwrap(),
|
||||
],
|
||||
"www.redox-os.org".to_string() => vec![
|
||||
Selector::parse("div.content").unwrap(),
|
||||
],
|
||||
"www.smbc-comics.com".to_string() => vec![
|
||||
Selector::parse("img#cc-comic").unwrap(),
|
||||
Selector::parse("div#aftercomic img").unwrap(),
|
||||
],
|
||||
],
|
||||
cacher,
|
||||
site_selectors: &config.slurp_site_selectors,
|
||||
}),
|
||||
Box::new(FrameImages),
|
||||
Box::new(AddOutlink),
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "shared"
|
||||
version = "0.0.18"
|
||||
version = "0.0.29"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
use std::hash::{DefaultHasher, Hash, Hasher};
|
||||
|
||||
use build_info::{BuildInfo, VersionControl};
|
||||
use notmuch::SearchSummary;
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -49,3 +51,8 @@ pub fn build_version(bi: fn() -> &'static BuildInfo) -> String {
|
||||
|
||||
format!("v{}{}", bi.crate_info.version, commit(&bi.version_control)).to_string()
|
||||
}
|
||||
pub fn compute_color(data: &str) -> String {
|
||||
let mut hasher = DefaultHasher::new();
|
||||
data.hash(&mut hasher);
|
||||
format!("#{:06x}", hasher.finish() % (1 << 24))
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[package]
|
||||
version = "0.0.18"
|
||||
version = "0.0.29"
|
||||
name = "letterbox"
|
||||
repository = "https://github.com/seed-rs/seed-quickstart"
|
||||
authors = ["Bill Thiede <git@xinu.tv>"]
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use std::collections::HashSet;
|
||||
|
||||
use graphql_client::GraphQLQuery;
|
||||
use log::{error, info, warn};
|
||||
use log::{debug, error, info, warn};
|
||||
use seed::{prelude::*, *};
|
||||
use thiserror::Error;
|
||||
use web_sys::HtmlElement;
|
||||
@@ -332,6 +332,8 @@ pub fn update(msg: Msg, model: &mut Model, orders: &mut impl Orders<Msg>) {
|
||||
selected_threads,
|
||||
};
|
||||
orders.send_msg(Msg::UpdateServerVersion(data.version));
|
||||
// Generate signal so progress bar is reset
|
||||
orders.send_msg(Msg::WindowScrolled);
|
||||
}
|
||||
|
||||
Msg::ShowThreadRequest { thread_id } => {
|
||||
@@ -382,6 +384,8 @@ pub fn update(msg: Msg, model: &mut Model, orders: &mut impl Orders<Msg>) {
|
||||
}
|
||||
}
|
||||
orders.send_msg(Msg::UpdateServerVersion(data.version));
|
||||
// Generate signal so progress bar is reset
|
||||
orders.send_msg(Msg::WindowScrolled);
|
||||
}
|
||||
Msg::ShowThreadResult(bad) => {
|
||||
error!("show_thread_query error: {bad:#?}");
|
||||
@@ -511,16 +515,31 @@ pub fn update(msg: Msg, model: &mut Model, orders: &mut impl Orders<Msg>) {
|
||||
.value_of();
|
||||
|
||||
let r = el.get_bounding_client_rect();
|
||||
info!(
|
||||
"window scrolled {}x{}@{},{}",
|
||||
if r.height() < ih {
|
||||
// The whole content fits in the window, no scrollbar
|
||||
orders.send_msg(Msg::SetProgress(0.));
|
||||
return;
|
||||
}
|
||||
let end: f64 = r.height() - ih;
|
||||
if end < 0. {
|
||||
orders.send_msg(Msg::SetProgress(0.));
|
||||
return;
|
||||
}
|
||||
// Flip Y, normally it's 0-point when the top of the content hits the top of the
|
||||
// screen and goes negative from there.
|
||||
let y = -r.y();
|
||||
let ratio: f64 = (y / end).max(0.);
|
||||
debug!(
|
||||
"WindowScrolled ih {ih} end {end} ratio {ratio:.02} {}x{} @ {},{}",
|
||||
r.width(),
|
||||
r.height(),
|
||||
r.x(),
|
||||
r.y(),
|
||||
r.y()
|
||||
);
|
||||
let end = r.height() - ih;
|
||||
let y = -r.y();
|
||||
orders.send_msg(Msg::SetProgress((y / end).max(0.)));
|
||||
|
||||
orders.send_msg(Msg::SetProgress(ratio));
|
||||
} else {
|
||||
orders.send_msg(Msg::SetProgress(0.));
|
||||
}
|
||||
}
|
||||
Msg::SetProgress(ratio) => {
|
||||
|
||||
@@ -1,7 +1,4 @@
|
||||
use std::{
|
||||
collections::{hash_map::DefaultHasher, HashSet},
|
||||
hash::{Hash, Hasher},
|
||||
};
|
||||
use std::collections::HashSet;
|
||||
|
||||
use chrono::{DateTime, Datelike, Duration, Local, Utc};
|
||||
use human_format::{Formatter, Scales};
|
||||
@@ -9,6 +6,7 @@ use itertools::Itertools;
|
||||
use log::{debug, error, info};
|
||||
use seed::{prelude::*, *};
|
||||
use seed_hooks::{state_access::CloneState, topo, use_state};
|
||||
use shared::compute_color;
|
||||
use web_sys::HtmlElement;
|
||||
|
||||
use crate::{
|
||||
@@ -29,12 +27,6 @@ fn set_title(title: &str) {
|
||||
seed::document().set_title(&format!("lb: {}", title));
|
||||
}
|
||||
|
||||
fn compute_color(data: &str) -> String {
|
||||
let mut hasher = DefaultHasher::new();
|
||||
data.hash(&mut hasher);
|
||||
format!("#{:06x}", hasher.finish() % (1 << 24))
|
||||
}
|
||||
|
||||
fn tags_chiclet(tags: &[String], is_mobile: bool) -> impl Iterator<Item = Node<Msg>> + '_ {
|
||||
tags.iter().map(move |tag| {
|
||||
let hex = compute_color(tag);
|
||||
@@ -869,12 +861,13 @@ fn thread(
|
||||
],
|
||||
],
|
||||
],
|
||||
div![el_ref(content_el), messages] /* TODO(wathiede): plumb in orignal id
|
||||
a![
|
||||
attrs! {At::Href=>api::original(&thread_node.0.as_ref().expect("message missing").id)},
|
||||
"Original"
|
||||
],
|
||||
*/
|
||||
div![el_ref(content_el), messages, click_to_top()],
|
||||
/* TODO(wathiede): plumb in orignal id
|
||||
a![
|
||||
attrs! {At::Href=>api::original(&thread_node.0.as_ref().expect("message missing").id)},
|
||||
"Original"
|
||||
],
|
||||
*/
|
||||
]
|
||||
}
|
||||
|
||||
@@ -1129,7 +1122,7 @@ fn news_post(
|
||||
"Original"
|
||||
],
|
||||
*/
|
||||
ev(Ev::Scroll, |e| info!("scroll event {e:?}"))
|
||||
click_to_top(),
|
||||
]
|
||||
}
|
||||
fn render_news_post_header(post: &ShowThreadQueryThreadOnNewsPost) -> Node<Msg> {
|
||||
@@ -1222,3 +1215,14 @@ pub fn versions(versions: &crate::state::Version) -> Node<Msg> {
|
||||
])
|
||||
]
|
||||
}
|
||||
|
||||
fn click_to_top() -> Node<Msg> {
|
||||
button![
|
||||
C!["button", "is-danger", "is-small"],
|
||||
span!["Top"],
|
||||
span![C!["icon"], i![C!["fas", "fa-arrow-turn-up"]]],
|
||||
ev(Ev::Click, move |_| web_sys::window()
|
||||
.unwrap()
|
||||
.scroll_to_with_x_and_y(0., 0.))
|
||||
]
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user