Compare commits
No commits in common. "f8134dad7a646a6e9131fd8a5563411f8b20268c" and "e7cbf9cc45e4e478be78a0b481ee3ceaa525c086" have entirely different histories.
f8134dad7a
...
e7cbf9cc45
1263
Cargo.lock
generated
1263
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "notmuch"
|
||||
version = "0.0.23"
|
||||
version = "0.0.22"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "procmail2notmuch"
|
||||
version = "0.0.23"
|
||||
version = "0.0.22"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "server"
|
||||
version = "0.0.23"
|
||||
version = "0.0.22"
|
||||
edition = "2021"
|
||||
default-run = "server"
|
||||
|
||||
@ -13,7 +13,6 @@ async-graphql = { version = "6.0.11", features = ["log"] }
|
||||
async-graphql-rocket = "6.0.11"
|
||||
async-trait = "0.1.81"
|
||||
build-info = "0.0.38"
|
||||
cacher = {git = "https://git-private.z.xinu.tv/wathiede/cacher"}
|
||||
css-inline = "0.13.0"
|
||||
glog = "0.1.0"
|
||||
html-escape = "0.2.13"
|
||||
|
||||
@ -1,8 +1,6 @@
|
||||
[release]
|
||||
address = "0.0.0.0"
|
||||
port = 9345
|
||||
newsreader_database_url = "postgres://newsreader@nixos-07.h.xinu.tv/newsreader"
|
||||
newsreader_tantivy_db_path = "../target/database/newsreader"
|
||||
|
||||
[debug]
|
||||
address = "0.0.0.0"
|
||||
@ -10,5 +8,3 @@ port = 9345
|
||||
# Uncomment to make it production like.
|
||||
#log_level = "critical"
|
||||
newsreader_database_url = "postgres://newsreader@nixos-07.h.xinu.tv/newsreader"
|
||||
newsreader_tantivy_db_path = "../target/database/newsreader"
|
||||
slurp_cache_path = "/net/nasx/x/letterbox/slurp"
|
||||
|
||||
@ -1,10 +0,0 @@
|
||||
SELECT
|
||||
site,
|
||||
title,
|
||||
summary,
|
||||
link,
|
||||
date,
|
||||
is_read,
|
||||
uid,
|
||||
id
|
||||
FROM post
|
||||
@ -18,14 +18,18 @@ use rocket::{
|
||||
Response, State,
|
||||
};
|
||||
use rocket_cors::{AllowedHeaders, AllowedOrigins};
|
||||
use serde::Deserialize;
|
||||
use server::{
|
||||
config::Config,
|
||||
error::ServerError,
|
||||
graphql::{Attachment, GraphqlSchema, Mutation, QueryRoot},
|
||||
nm::{attachment_bytes, cid_attachment_bytes},
|
||||
};
|
||||
use sqlx::postgres::PgPool;
|
||||
use tantivy::{Index, IndexWriter};
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct Config {
|
||||
newsreader_database_url: String,
|
||||
}
|
||||
|
||||
#[get("/refresh")]
|
||||
async fn refresh(nm: &State<Notmuch>) -> Result<Json<String>, Debug<NotmuchError>> {
|
||||
@ -166,122 +170,6 @@ fn graphiql() -> content::RawHtml<String> {
|
||||
content::RawHtml(GraphiQLSource::build().endpoint("/api/graphql").finish())
|
||||
}
|
||||
|
||||
#[rocket::post("/create-news-db")]
|
||||
fn create_news_db(config: &State<Config>) -> Result<String, Debug<ServerError>> {
|
||||
std::fs::remove_dir_all(&config.newsreader_tantivy_db_path).map_err(ServerError::from)?;
|
||||
std::fs::create_dir_all(&config.newsreader_tantivy_db_path).map_err(ServerError::from)?;
|
||||
use tantivy::schema::*;
|
||||
let mut schema_builder = Schema::builder();
|
||||
schema_builder.add_text_field("site", STRING | STORED);
|
||||
schema_builder.add_text_field("title", TEXT | STORED);
|
||||
schema_builder.add_text_field("summary", TEXT);
|
||||
schema_builder.add_text_field("link", STRING | STORED);
|
||||
schema_builder.add_date_field("date", FAST);
|
||||
schema_builder.add_bool_field("is_read", FAST);
|
||||
schema_builder.add_text_field("uid", STRING | STORED);
|
||||
schema_builder.add_i64_field("id", FAST);
|
||||
|
||||
let schema = schema_builder.build();
|
||||
Index::create_in_dir(&config.newsreader_tantivy_db_path, schema).map_err(ServerError::from)?;
|
||||
Ok(format!(
|
||||
"DB created in {}\n",
|
||||
config.newsreader_tantivy_db_path
|
||||
))
|
||||
}
|
||||
|
||||
#[rocket::post("/reindex-news-db")]
|
||||
async fn reindex_news_db(
|
||||
pool: &State<PgPool>,
|
||||
config: &State<Config>,
|
||||
) -> Result<String, Debug<ServerError>> {
|
||||
use tantivy::{doc, Term};
|
||||
|
||||
let start_time = std::time::Instant::now();
|
||||
let pool: &PgPool = pool;
|
||||
|
||||
let index =
|
||||
Index::open_in_dir(&config.newsreader_tantivy_db_path).map_err(ServerError::from)?;
|
||||
let mut index_writer = index.writer(50_000_000).map_err(ServerError::from)?;
|
||||
let schema = index.schema();
|
||||
let site = schema.get_field("site").map_err(ServerError::from)?;
|
||||
let title = schema.get_field("title").map_err(ServerError::from)?;
|
||||
let summary = schema.get_field("summary").map_err(ServerError::from)?;
|
||||
let link = schema.get_field("link").map_err(ServerError::from)?;
|
||||
let date = schema.get_field("date").map_err(ServerError::from)?;
|
||||
let is_read = schema.get_field("is_read").map_err(ServerError::from)?;
|
||||
let uid = schema.get_field("uid").map_err(ServerError::from)?;
|
||||
let id = schema.get_field("id").map_err(ServerError::from)?;
|
||||
|
||||
let rows = sqlx::query_file!("sql/all-posts.sql")
|
||||
.fetch_all(pool)
|
||||
.await
|
||||
.map_err(ServerError::from)?;
|
||||
|
||||
let total = rows.len();
|
||||
for (i, r) in rows.into_iter().enumerate() {
|
||||
if i % 10_000 == 0 {
|
||||
info!(
|
||||
"{i}/{total} processed, elapsed {:.2}s",
|
||||
start_time.elapsed().as_secs_f32()
|
||||
);
|
||||
}
|
||||
let id_term = Term::from_field_text(uid, &r.uid);
|
||||
index_writer.delete_term(id_term);
|
||||
index_writer
|
||||
.add_document(doc!(
|
||||
site => r.site.expect("UNKOWN_SITE"),
|
||||
title => r.title.expect("UNKOWN_TITLE"),
|
||||
// TODO: clean and extract text from HTML
|
||||
summary => r.summary.expect("UNKNOWN_SUMMARY"),
|
||||
link => r.link.expect("link"),
|
||||
date => tantivy::DateTime::from_primitive(r.date.expect("date")),
|
||||
is_read => r.is_read.expect("is_read"),
|
||||
uid => r.uid,
|
||||
id => r.id as i64,
|
||||
))
|
||||
.map_err(ServerError::from)?;
|
||||
}
|
||||
|
||||
index_writer.commit().map_err(ServerError::from)?;
|
||||
|
||||
info!("took {:.2}s to reindex", start_time.elapsed().as_secs_f32());
|
||||
Ok(format!(
|
||||
"DB openned in {}\n",
|
||||
config.newsreader_tantivy_db_path
|
||||
))
|
||||
}
|
||||
|
||||
#[rocket::get("/search-news-db")]
|
||||
fn search_news_db(
|
||||
index: &State<tantivy::Index>,
|
||||
reader: &State<tantivy::IndexReader>,
|
||||
) -> Result<String, Debug<ServerError>> {
|
||||
use tantivy::{collector::TopDocs, query::QueryParser, Document, TantivyDocument};
|
||||
|
||||
let searcher = reader.searcher();
|
||||
let schema = index.schema();
|
||||
let site = schema.get_field("site").map_err(ServerError::from)?;
|
||||
let title = schema.get_field("title").map_err(ServerError::from)?;
|
||||
let summary = schema.get_field("summary").map_err(ServerError::from)?;
|
||||
let query_parser = QueryParser::for_index(&index, vec![site, title, summary]);
|
||||
|
||||
let query = query_parser
|
||||
.parse_query("grapheme")
|
||||
.map_err(ServerError::from)?;
|
||||
let top_docs = searcher
|
||||
.search(&query, &TopDocs::with_limit(10))
|
||||
.map_err(ServerError::from)?;
|
||||
let mut results = vec![];
|
||||
info!("search found {} docs", top_docs.len());
|
||||
for (_score, doc_address) in top_docs {
|
||||
let retrieved_doc: TantivyDocument =
|
||||
searcher.doc(doc_address).map_err(ServerError::from)?;
|
||||
results.push(format!("{}", retrieved_doc.to_json(&schema)));
|
||||
}
|
||||
|
||||
Ok(format!("{}", results.join(" ")))
|
||||
}
|
||||
|
||||
#[rocket::get("/graphql?<query..>")]
|
||||
async fn graphql_query(schema: &State<GraphqlSchema>, query: GraphQLQuery) -> GraphQLResponse {
|
||||
query.execute(schema.inner()).await
|
||||
@ -295,6 +183,7 @@ async fn graphql_request(
|
||||
request.execute(schema.inner()).await
|
||||
}
|
||||
|
||||
|
||||
#[rocket::main]
|
||||
async fn main() -> Result<(), Box<dyn Error>> {
|
||||
glog::new()
|
||||
@ -324,9 +213,6 @@ async fn main() -> Result<(), Box<dyn Error>> {
|
||||
.mount(
|
||||
shared::urls::MOUNT_POINT,
|
||||
routes![
|
||||
create_news_db,
|
||||
reindex_news_db,
|
||||
search_news_db,
|
||||
original,
|
||||
refresh,
|
||||
show_pretty,
|
||||
@ -343,26 +229,14 @@ async fn main() -> Result<(), Box<dyn Error>> {
|
||||
.attach(AdHoc::config::<Config>());
|
||||
|
||||
let config: Config = rkt.figment().extract()?;
|
||||
if !std::fs::exists(&config.slurp_cache_path)? {
|
||||
info!("Creating slurp cache @ '{}'", &config.slurp_cache_path);
|
||||
std::fs::create_dir_all(&config.slurp_cache_path)?;
|
||||
}
|
||||
let pool = PgPool::connect(&config.newsreader_database_url).await?;
|
||||
let tantivy_newsreader_index = Index::open_in_dir(&config.newsreader_tantivy_db_path)?;
|
||||
let tantivy_newsreader_reader = tantivy_newsreader_index.reader()?;
|
||||
let schema = Schema::build(QueryRoot, Mutation, EmptySubscription)
|
||||
.data(Notmuch::default())
|
||||
.data(config)
|
||||
.data(pool.clone())
|
||||
.extension(async_graphql::extensions::Logger)
|
||||
.finish();
|
||||
|
||||
let rkt = rkt
|
||||
.manage(schema)
|
||||
.manage(pool)
|
||||
.manage(Notmuch::default())
|
||||
.manage(tantivy_newsreader_index)
|
||||
.manage(tantivy_newsreader_reader);
|
||||
let rkt = rkt.manage(schema).manage(pool).manage(Notmuch::default());
|
||||
//.manage(Notmuch::with_config("../notmuch/testdata/notmuch.config"))
|
||||
|
||||
rkt.launch().await?;
|
||||
|
||||
@ -1,7 +0,0 @@
|
||||
use serde::Deserialize;
|
||||
#[derive(Deserialize)]
|
||||
pub struct Config {
|
||||
pub newsreader_database_url: String,
|
||||
pub newsreader_tantivy_db_path: String,
|
||||
pub slurp_cache_path: String,
|
||||
}
|
||||
@ -1,8 +1,6 @@
|
||||
use std::{convert::Infallible, str::Utf8Error, string::FromUtf8Error};
|
||||
|
||||
use mailparse::MailParseError;
|
||||
use tantivy::TantivyError;
|
||||
use tantivy::query::QueryParserError;
|
||||
use thiserror::Error;
|
||||
|
||||
use crate::TransformError;
|
||||
@ -31,10 +29,6 @@ pub enum ServerError {
|
||||
StringError(String),
|
||||
#[error("invalid url: {0}")]
|
||||
UrlParseError(#[from] url::ParseError),
|
||||
#[error("tantivy error: {0}")]
|
||||
TantivyError(#[from] TantivyError),
|
||||
#[error("tantivy query parse error: {0}")]
|
||||
QueryParseError(#[from] QueryParserError),
|
||||
#[error("impossible: {0}")]
|
||||
InfaillibleError(#[from] Infallible),
|
||||
}
|
||||
|
||||
@ -8,7 +8,7 @@ use notmuch::Notmuch;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use sqlx::postgres::PgPool;
|
||||
|
||||
use crate::{config::Config, newsreader, nm, Query};
|
||||
use crate::{newsreader, nm, Query};
|
||||
|
||||
/// # Number of seconds since the Epoch
|
||||
pub type UnixTime = isize;
|
||||
@ -384,7 +384,6 @@ impl QueryRoot {
|
||||
async fn thread<'ctx>(&self, ctx: &Context<'ctx>, thread_id: String) -> Result<Thread, Error> {
|
||||
let nm = ctx.data_unchecked::<Notmuch>();
|
||||
let pool = ctx.data_unchecked::<PgPool>();
|
||||
let config = ctx.data_unchecked::<Config>();
|
||||
let debug_content_tree = ctx
|
||||
.look_ahead()
|
||||
.field("messages")
|
||||
@ -393,7 +392,7 @@ impl QueryRoot {
|
||||
.exists();
|
||||
// TODO: look at thread_id and conditionally load newsreader
|
||||
if newsreader::is_newsreader_thread(&thread_id) {
|
||||
Ok(newsreader::thread(config, pool, thread_id).await?)
|
||||
Ok(newsreader::thread(pool, thread_id).await?)
|
||||
} else {
|
||||
Ok(nm::thread(nm, thread_id, debug_content_tree).await?)
|
||||
}
|
||||
|
||||
@ -1,16 +1,14 @@
|
||||
pub mod config;
|
||||
pub mod error;
|
||||
pub mod graphql;
|
||||
pub mod newsreader;
|
||||
pub mod nm;
|
||||
|
||||
use std::{collections::HashMap, convert::Infallible, str::FromStr, sync::Arc};
|
||||
use std::{collections::HashMap, convert::Infallible, str::FromStr};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use cacher::{Cacher, FilesystemCacher};
|
||||
use css_inline::{CSSInliner, InlineError, InlineOptions};
|
||||
use linkify::{LinkFinder, LinkKind};
|
||||
use log::{error, info, warn};
|
||||
use log::{error, warn};
|
||||
use lol_html::{
|
||||
element, errors::RewritingError, html_content::ContentType, rewrite_str, text,
|
||||
RewriteStrSettings,
|
||||
@ -18,7 +16,6 @@ use lol_html::{
|
||||
use maplit::{hashmap, hashset};
|
||||
use scraper::{Html, Selector};
|
||||
use thiserror::Error;
|
||||
use tokio::sync::Mutex;
|
||||
use url::Url;
|
||||
|
||||
use crate::newsreader::{extract_thread_id, is_newsreader_thread};
|
||||
@ -112,17 +109,16 @@ impl Transformer for InlineStyle {
|
||||
include_str!("custom.css"),
|
||||
);
|
||||
let inline_opts = InlineOptions {
|
||||
inline_style_tags: true,
|
||||
inline_style_tags: false,
|
||||
keep_style_tags: false,
|
||||
keep_link_tags: true,
|
||||
keep_link_tags: false,
|
||||
base_url: None,
|
||||
load_remote_stylesheets: true,
|
||||
load_remote_stylesheets: false,
|
||||
extra_css: Some(css.into()),
|
||||
preallocate_node_capacity: 32,
|
||||
..InlineOptions::default()
|
||||
};
|
||||
|
||||
//info!("HTML:\n{html}");
|
||||
Ok(match CSSInliner::new(inline_opts).inline(&html) {
|
||||
Ok(inlined_html) => inlined_html,
|
||||
Err(err) => {
|
||||
@ -216,7 +212,6 @@ impl Transformer for AddOutlink {
|
||||
}
|
||||
|
||||
struct SlurpContents {
|
||||
cacher: Arc<Mutex<FilesystemCacher>>,
|
||||
site_selectors: HashMap<String, Vec<Selector>>,
|
||||
}
|
||||
|
||||
@ -246,26 +241,19 @@ impl Transformer for SlurpContents {
|
||||
let Some(selectors) = self.get_selectors(&link) else {
|
||||
return Ok(html.to_string());
|
||||
};
|
||||
let mut cacher = self.cacher.lock().await;
|
||||
let body = if let Some(body) = cacher.get(link.as_str()) {
|
||||
info!("cache hit for {link}");
|
||||
String::from_utf8_lossy(&body).to_string()
|
||||
} else {
|
||||
let body = reqwest::get(link.as_str()).await?.text().await?;
|
||||
cacher.set(link.as_str(), body.as_bytes());
|
||||
body
|
||||
};
|
||||
let doc = Html::parse_document(&body);
|
||||
|
||||
let mut results = Vec::new();
|
||||
for selector in selectors {
|
||||
for frag in doc.select(&selector) {
|
||||
if let Some(frag) = doc.select(&selector).next() {
|
||||
results.push(frag.html())
|
||||
// TODO: figure out how to warn if there were no hits
|
||||
//warn!("couldn't find '{:?}' in {}", selector, link);
|
||||
} else {
|
||||
warn!("couldn't find '{:?}' in {}", selector, link);
|
||||
return Ok(html.to_string());
|
||||
}
|
||||
}
|
||||
Ok(results.join(""))
|
||||
Ok(results.join("<br><br>"))
|
||||
}
|
||||
}
|
||||
|
||||
@ -304,7 +292,7 @@ pub fn sanitize_html(
|
||||
) -> Result<String, TransformError> {
|
||||
let inline_opts = InlineOptions {
|
||||
inline_style_tags: true,
|
||||
keep_style_tags: true,
|
||||
keep_style_tags: false,
|
||||
keep_link_tags: false,
|
||||
base_url: None,
|
||||
load_remote_stylesheets: false,
|
||||
@ -347,30 +335,6 @@ pub fn sanitize_html(
|
||||
|
||||
el.set_attribute("src", &src)?;
|
||||
|
||||
Ok(())
|
||||
}),
|
||||
// Add https to href with //<domain name>
|
||||
element!("link[href]", |el| {
|
||||
info!("found link[href] {el:?}");
|
||||
let mut href = el.get_attribute("href").expect("href was required");
|
||||
if href.starts_with("//") {
|
||||
warn!("adding https to {href}");
|
||||
href.insert_str(0, "https:");
|
||||
}
|
||||
|
||||
el.set_attribute("href", &href)?;
|
||||
|
||||
Ok(())
|
||||
}),
|
||||
// Add https to src with //<domain name>
|
||||
element!("style[src]", |el| {
|
||||
let mut src = el.get_attribute("src").expect("src was required");
|
||||
if src.starts_with("//") {
|
||||
src.insert_str(0, "https:");
|
||||
}
|
||||
|
||||
el.set_attribute("src", &src)?;
|
||||
|
||||
Ok(())
|
||||
}),
|
||||
];
|
||||
|
||||
@ -1,26 +1,24 @@
|
||||
use std::sync::Arc;
|
||||
use std::hash::{DefaultHasher, Hash, Hasher};
|
||||
|
||||
use cacher::FilesystemCacher;
|
||||
use log::info;
|
||||
use maplit::hashmap;
|
||||
use scraper::Selector;
|
||||
use shared::compute_color;
|
||||
use sqlx::postgres::PgPool;
|
||||
use tokio::sync::Mutex;
|
||||
use url::Url;
|
||||
|
||||
use crate::{
|
||||
compute_offset_limit,
|
||||
config::Config,
|
||||
error::ServerError,
|
||||
graphql::{NewsPost, Tag, Thread, ThreadSummary},
|
||||
AddOutlink, EscapeHtml, FrameImages, InlineStyle, Query, SanitizeHtml, SlurpContents,
|
||||
StripHtml, Transformer,
|
||||
};
|
||||
use crate::Query;
|
||||
|
||||
const TAG_PREFIX: &'static str = "News/";
|
||||
const THREAD_PREFIX: &'static str = "news:";
|
||||
|
||||
use crate::{
|
||||
compute_offset_limit,
|
||||
error::ServerError,
|
||||
graphql::{NewsPost, Tag, Thread, ThreadSummary},
|
||||
AddOutlink, EscapeHtml, FrameImages, InlineStyle, SanitizeHtml, SlurpContents, StripHtml,
|
||||
Transformer,
|
||||
};
|
||||
|
||||
pub fn is_newsreader_search(query: &str) -> bool {
|
||||
query.contains(TAG_PREFIX)
|
||||
}
|
||||
@ -130,9 +128,11 @@ pub async fn tags(pool: &PgPool, _needs_unread: bool) -> Result<Vec<Tag>, Server
|
||||
let tags = tags
|
||||
.into_iter()
|
||||
.map(|tag| {
|
||||
let mut hasher = DefaultHasher::new();
|
||||
tag.site.hash(&mut hasher);
|
||||
let hex = format!("#{:06x}", hasher.finish() % (1 << 24));
|
||||
let unread = tag.unread.unwrap_or(0).try_into().unwrap_or(0);
|
||||
let name = format!("{TAG_PREFIX}{}", tag.site.expect("tag must have site"));
|
||||
let hex = compute_color(&name);
|
||||
Tag {
|
||||
name,
|
||||
fg_color: "white".to_string(),
|
||||
@ -144,11 +144,7 @@ pub async fn tags(pool: &PgPool, _needs_unread: bool) -> Result<Vec<Tag>, Server
|
||||
Ok(tags)
|
||||
}
|
||||
|
||||
pub async fn thread(
|
||||
config: &Config,
|
||||
pool: &PgPool,
|
||||
thread_id: String,
|
||||
) -> Result<Thread, ServerError> {
|
||||
pub async fn thread(pool: &PgPool, thread_id: String) -> Result<Thread, ServerError> {
|
||||
let id = thread_id
|
||||
.strip_prefix(THREAD_PREFIX)
|
||||
.expect("news thread doesn't start with '{THREAD_PREFIX}'")
|
||||
@ -177,10 +173,8 @@ pub async fn thread(
|
||||
// TODO: add site specific cleanups. For example:
|
||||
// * Grafana does <div class="image-wrapp"><img class="lazyload>"<img src="/media/...>"</img></div>
|
||||
// * Some sites appear to be HTML encoded, unencode them, i.e. imperialviolent
|
||||
let cacher = Arc::new(Mutex::new(FilesystemCacher::new(&config.slurp_cache_path)?));
|
||||
let body_tranformers: Vec<Box<dyn Transformer>> = vec![
|
||||
Box::new(SlurpContents {
|
||||
cacher,
|
||||
site_selectors: hashmap![
|
||||
"atmeta.com".to_string() => vec![
|
||||
Selector::parse("div.entry-content").unwrap(),
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "shared"
|
||||
version = "0.0.23"
|
||||
version = "0.0.22"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
[package]
|
||||
version = "0.0.23"
|
||||
version = "0.0.22"
|
||||
name = "letterbox"
|
||||
repository = "https://github.com/seed-rs/seed-quickstart"
|
||||
authors = ["Bill Thiede <git@xinu.tv>"]
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user