server: WIP tantivy, cache slurps, use shared::compute_color,
This commit is contained in:
parent
e7cbf9cc45
commit
30f510bb03
1253
Cargo.lock
generated
1253
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@ -13,6 +13,7 @@ async-graphql = { version = "6.0.11", features = ["log"] }
|
|||||||
async-graphql-rocket = "6.0.11"
|
async-graphql-rocket = "6.0.11"
|
||||||
async-trait = "0.1.81"
|
async-trait = "0.1.81"
|
||||||
build-info = "0.0.38"
|
build-info = "0.0.38"
|
||||||
|
cacher = {git = "https://git-private.z.xinu.tv/wathiede/cacher"}
|
||||||
css-inline = "0.13.0"
|
css-inline = "0.13.0"
|
||||||
glog = "0.1.0"
|
glog = "0.1.0"
|
||||||
html-escape = "0.2.13"
|
html-escape = "0.2.13"
|
||||||
|
|||||||
@ -1,6 +1,8 @@
|
|||||||
[release]
|
[release]
|
||||||
address = "0.0.0.0"
|
address = "0.0.0.0"
|
||||||
port = 9345
|
port = 9345
|
||||||
|
newsreader_database_url = "postgres://newsreader@nixos-07.h.xinu.tv/newsreader"
|
||||||
|
newsreader_tantivy_db_path = "../target/database/newsreader"
|
||||||
|
|
||||||
[debug]
|
[debug]
|
||||||
address = "0.0.0.0"
|
address = "0.0.0.0"
|
||||||
@ -8,3 +10,5 @@ port = 9345
|
|||||||
# Uncomment to make it production like.
|
# Uncomment to make it production like.
|
||||||
#log_level = "critical"
|
#log_level = "critical"
|
||||||
newsreader_database_url = "postgres://newsreader@nixos-07.h.xinu.tv/newsreader"
|
newsreader_database_url = "postgres://newsreader@nixos-07.h.xinu.tv/newsreader"
|
||||||
|
newsreader_tantivy_db_path = "../target/database/newsreader"
|
||||||
|
slurp_cache_path = "/net/nasx/x/letterbox/slurp"
|
||||||
|
|||||||
10
server/sql/all-posts.sql
Normal file
10
server/sql/all-posts.sql
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
SELECT
|
||||||
|
site,
|
||||||
|
title,
|
||||||
|
summary,
|
||||||
|
link,
|
||||||
|
date,
|
||||||
|
is_read,
|
||||||
|
uid,
|
||||||
|
id
|
||||||
|
FROM post
|
||||||
@ -18,18 +18,14 @@ use rocket::{
|
|||||||
Response, State,
|
Response, State,
|
||||||
};
|
};
|
||||||
use rocket_cors::{AllowedHeaders, AllowedOrigins};
|
use rocket_cors::{AllowedHeaders, AllowedOrigins};
|
||||||
use serde::Deserialize;
|
|
||||||
use server::{
|
use server::{
|
||||||
|
config::Config,
|
||||||
error::ServerError,
|
error::ServerError,
|
||||||
graphql::{Attachment, GraphqlSchema, Mutation, QueryRoot},
|
graphql::{Attachment, GraphqlSchema, Mutation, QueryRoot},
|
||||||
nm::{attachment_bytes, cid_attachment_bytes},
|
nm::{attachment_bytes, cid_attachment_bytes},
|
||||||
};
|
};
|
||||||
use sqlx::postgres::PgPool;
|
use sqlx::postgres::PgPool;
|
||||||
|
use tantivy::{Index, IndexWriter};
|
||||||
#[derive(Deserialize)]
|
|
||||||
struct Config {
|
|
||||||
newsreader_database_url: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[get("/refresh")]
|
#[get("/refresh")]
|
||||||
async fn refresh(nm: &State<Notmuch>) -> Result<Json<String>, Debug<NotmuchError>> {
|
async fn refresh(nm: &State<Notmuch>) -> Result<Json<String>, Debug<NotmuchError>> {
|
||||||
@ -170,6 +166,122 @@ fn graphiql() -> content::RawHtml<String> {
|
|||||||
content::RawHtml(GraphiQLSource::build().endpoint("/api/graphql").finish())
|
content::RawHtml(GraphiQLSource::build().endpoint("/api/graphql").finish())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[rocket::post("/create-news-db")]
|
||||||
|
fn create_news_db(config: &State<Config>) -> Result<String, Debug<ServerError>> {
|
||||||
|
std::fs::remove_dir_all(&config.newsreader_tantivy_db_path).map_err(ServerError::from)?;
|
||||||
|
std::fs::create_dir_all(&config.newsreader_tantivy_db_path).map_err(ServerError::from)?;
|
||||||
|
use tantivy::schema::*;
|
||||||
|
let mut schema_builder = Schema::builder();
|
||||||
|
schema_builder.add_text_field("site", STRING | STORED);
|
||||||
|
schema_builder.add_text_field("title", TEXT | STORED);
|
||||||
|
schema_builder.add_text_field("summary", TEXT);
|
||||||
|
schema_builder.add_text_field("link", STRING | STORED);
|
||||||
|
schema_builder.add_date_field("date", FAST);
|
||||||
|
schema_builder.add_bool_field("is_read", FAST);
|
||||||
|
schema_builder.add_text_field("uid", STRING | STORED);
|
||||||
|
schema_builder.add_i64_field("id", FAST);
|
||||||
|
|
||||||
|
let schema = schema_builder.build();
|
||||||
|
Index::create_in_dir(&config.newsreader_tantivy_db_path, schema).map_err(ServerError::from)?;
|
||||||
|
Ok(format!(
|
||||||
|
"DB created in {}\n",
|
||||||
|
config.newsreader_tantivy_db_path
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[rocket::post("/reindex-news-db")]
|
||||||
|
async fn reindex_news_db(
|
||||||
|
pool: &State<PgPool>,
|
||||||
|
config: &State<Config>,
|
||||||
|
) -> Result<String, Debug<ServerError>> {
|
||||||
|
use tantivy::{doc, Term};
|
||||||
|
|
||||||
|
let start_time = std::time::Instant::now();
|
||||||
|
let pool: &PgPool = pool;
|
||||||
|
|
||||||
|
let index =
|
||||||
|
Index::open_in_dir(&config.newsreader_tantivy_db_path).map_err(ServerError::from)?;
|
||||||
|
let mut index_writer = index.writer(50_000_000).map_err(ServerError::from)?;
|
||||||
|
let schema = index.schema();
|
||||||
|
let site = schema.get_field("site").map_err(ServerError::from)?;
|
||||||
|
let title = schema.get_field("title").map_err(ServerError::from)?;
|
||||||
|
let summary = schema.get_field("summary").map_err(ServerError::from)?;
|
||||||
|
let link = schema.get_field("link").map_err(ServerError::from)?;
|
||||||
|
let date = schema.get_field("date").map_err(ServerError::from)?;
|
||||||
|
let is_read = schema.get_field("is_read").map_err(ServerError::from)?;
|
||||||
|
let uid = schema.get_field("uid").map_err(ServerError::from)?;
|
||||||
|
let id = schema.get_field("id").map_err(ServerError::from)?;
|
||||||
|
|
||||||
|
let rows = sqlx::query_file!("sql/all-posts.sql")
|
||||||
|
.fetch_all(pool)
|
||||||
|
.await
|
||||||
|
.map_err(ServerError::from)?;
|
||||||
|
|
||||||
|
let total = rows.len();
|
||||||
|
for (i, r) in rows.into_iter().enumerate() {
|
||||||
|
if i % 10_000 == 0 {
|
||||||
|
info!(
|
||||||
|
"{i}/{total} processed, elapsed {:.2}s",
|
||||||
|
start_time.elapsed().as_secs_f32()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
let id_term = Term::from_field_text(uid, &r.uid);
|
||||||
|
index_writer.delete_term(id_term);
|
||||||
|
index_writer
|
||||||
|
.add_document(doc!(
|
||||||
|
site => r.site.expect("UNKOWN_SITE"),
|
||||||
|
title => r.title.expect("UNKOWN_TITLE"),
|
||||||
|
// TODO: clean and extract text from HTML
|
||||||
|
summary => r.summary.expect("UNKNOWN_SUMMARY"),
|
||||||
|
link => r.link.expect("link"),
|
||||||
|
date => tantivy::DateTime::from_primitive(r.date.expect("date")),
|
||||||
|
is_read => r.is_read.expect("is_read"),
|
||||||
|
uid => r.uid,
|
||||||
|
id => r.id as i64,
|
||||||
|
))
|
||||||
|
.map_err(ServerError::from)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
index_writer.commit().map_err(ServerError::from)?;
|
||||||
|
|
||||||
|
info!("took {:.2}s to reindex", start_time.elapsed().as_secs_f32());
|
||||||
|
Ok(format!(
|
||||||
|
"DB openned in {}\n",
|
||||||
|
config.newsreader_tantivy_db_path
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[rocket::get("/search-news-db")]
|
||||||
|
fn search_news_db(
|
||||||
|
index: &State<tantivy::Index>,
|
||||||
|
reader: &State<tantivy::IndexReader>,
|
||||||
|
) -> Result<String, Debug<ServerError>> {
|
||||||
|
use tantivy::{collector::TopDocs, query::QueryParser, Document, TantivyDocument};
|
||||||
|
|
||||||
|
let searcher = reader.searcher();
|
||||||
|
let schema = index.schema();
|
||||||
|
let site = schema.get_field("site").map_err(ServerError::from)?;
|
||||||
|
let title = schema.get_field("title").map_err(ServerError::from)?;
|
||||||
|
let summary = schema.get_field("summary").map_err(ServerError::from)?;
|
||||||
|
let query_parser = QueryParser::for_index(&index, vec![site, title, summary]);
|
||||||
|
|
||||||
|
let query = query_parser
|
||||||
|
.parse_query("grapheme")
|
||||||
|
.map_err(ServerError::from)?;
|
||||||
|
let top_docs = searcher
|
||||||
|
.search(&query, &TopDocs::with_limit(10))
|
||||||
|
.map_err(ServerError::from)?;
|
||||||
|
let mut results = vec![];
|
||||||
|
info!("search found {} docs", top_docs.len());
|
||||||
|
for (_score, doc_address) in top_docs {
|
||||||
|
let retrieved_doc: TantivyDocument =
|
||||||
|
searcher.doc(doc_address).map_err(ServerError::from)?;
|
||||||
|
results.push(format!("{}", retrieved_doc.to_json(&schema)));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(format!("{}", results.join(" ")))
|
||||||
|
}
|
||||||
|
|
||||||
#[rocket::get("/graphql?<query..>")]
|
#[rocket::get("/graphql?<query..>")]
|
||||||
async fn graphql_query(schema: &State<GraphqlSchema>, query: GraphQLQuery) -> GraphQLResponse {
|
async fn graphql_query(schema: &State<GraphqlSchema>, query: GraphQLQuery) -> GraphQLResponse {
|
||||||
query.execute(schema.inner()).await
|
query.execute(schema.inner()).await
|
||||||
@ -183,7 +295,6 @@ async fn graphql_request(
|
|||||||
request.execute(schema.inner()).await
|
request.execute(schema.inner()).await
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#[rocket::main]
|
#[rocket::main]
|
||||||
async fn main() -> Result<(), Box<dyn Error>> {
|
async fn main() -> Result<(), Box<dyn Error>> {
|
||||||
glog::new()
|
glog::new()
|
||||||
@ -213,6 +324,9 @@ async fn main() -> Result<(), Box<dyn Error>> {
|
|||||||
.mount(
|
.mount(
|
||||||
shared::urls::MOUNT_POINT,
|
shared::urls::MOUNT_POINT,
|
||||||
routes![
|
routes![
|
||||||
|
create_news_db,
|
||||||
|
reindex_news_db,
|
||||||
|
search_news_db,
|
||||||
original,
|
original,
|
||||||
refresh,
|
refresh,
|
||||||
show_pretty,
|
show_pretty,
|
||||||
@ -229,14 +343,26 @@ async fn main() -> Result<(), Box<dyn Error>> {
|
|||||||
.attach(AdHoc::config::<Config>());
|
.attach(AdHoc::config::<Config>());
|
||||||
|
|
||||||
let config: Config = rkt.figment().extract()?;
|
let config: Config = rkt.figment().extract()?;
|
||||||
|
if !std::fs::exists(&config.slurp_cache_path)? {
|
||||||
|
info!("Creating slurp cache @ '{}'", &config.slurp_cache_path);
|
||||||
|
std::fs::create_dir_all(&config.slurp_cache_path)?;
|
||||||
|
}
|
||||||
let pool = PgPool::connect(&config.newsreader_database_url).await?;
|
let pool = PgPool::connect(&config.newsreader_database_url).await?;
|
||||||
|
let tantivy_newsreader_index = Index::open_in_dir(&config.newsreader_tantivy_db_path)?;
|
||||||
|
let tantivy_newsreader_reader = tantivy_newsreader_index.reader()?;
|
||||||
let schema = Schema::build(QueryRoot, Mutation, EmptySubscription)
|
let schema = Schema::build(QueryRoot, Mutation, EmptySubscription)
|
||||||
.data(Notmuch::default())
|
.data(Notmuch::default())
|
||||||
|
.data(config)
|
||||||
.data(pool.clone())
|
.data(pool.clone())
|
||||||
.extension(async_graphql::extensions::Logger)
|
.extension(async_graphql::extensions::Logger)
|
||||||
.finish();
|
.finish();
|
||||||
|
|
||||||
let rkt = rkt.manage(schema).manage(pool).manage(Notmuch::default());
|
let rkt = rkt
|
||||||
|
.manage(schema)
|
||||||
|
.manage(pool)
|
||||||
|
.manage(Notmuch::default())
|
||||||
|
.manage(tantivy_newsreader_index)
|
||||||
|
.manage(tantivy_newsreader_reader);
|
||||||
//.manage(Notmuch::with_config("../notmuch/testdata/notmuch.config"))
|
//.manage(Notmuch::with_config("../notmuch/testdata/notmuch.config"))
|
||||||
|
|
||||||
rkt.launch().await?;
|
rkt.launch().await?;
|
||||||
|
|||||||
7
server/src/config.rs
Normal file
7
server/src/config.rs
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
use serde::Deserialize;
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
pub struct Config {
|
||||||
|
pub newsreader_database_url: String,
|
||||||
|
pub newsreader_tantivy_db_path: String,
|
||||||
|
pub slurp_cache_path: String,
|
||||||
|
}
|
||||||
@ -1,6 +1,8 @@
|
|||||||
use std::{convert::Infallible, str::Utf8Error, string::FromUtf8Error};
|
use std::{convert::Infallible, str::Utf8Error, string::FromUtf8Error};
|
||||||
|
|
||||||
use mailparse::MailParseError;
|
use mailparse::MailParseError;
|
||||||
|
use tantivy::TantivyError;
|
||||||
|
use tantivy::query::QueryParserError;
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
|
|
||||||
use crate::TransformError;
|
use crate::TransformError;
|
||||||
@ -29,6 +31,10 @@ pub enum ServerError {
|
|||||||
StringError(String),
|
StringError(String),
|
||||||
#[error("invalid url: {0}")]
|
#[error("invalid url: {0}")]
|
||||||
UrlParseError(#[from] url::ParseError),
|
UrlParseError(#[from] url::ParseError),
|
||||||
|
#[error("tantivy error: {0}")]
|
||||||
|
TantivyError(#[from] TantivyError),
|
||||||
|
#[error("tantivy query parse error: {0}")]
|
||||||
|
QueryParseError(#[from] QueryParserError),
|
||||||
#[error("impossible: {0}")]
|
#[error("impossible: {0}")]
|
||||||
InfaillibleError(#[from] Infallible),
|
InfaillibleError(#[from] Infallible),
|
||||||
}
|
}
|
||||||
|
|||||||
@ -8,7 +8,7 @@ use notmuch::Notmuch;
|
|||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use sqlx::postgres::PgPool;
|
use sqlx::postgres::PgPool;
|
||||||
|
|
||||||
use crate::{newsreader, nm, Query};
|
use crate::{config::Config, newsreader, nm, Query};
|
||||||
|
|
||||||
/// # Number of seconds since the Epoch
|
/// # Number of seconds since the Epoch
|
||||||
pub type UnixTime = isize;
|
pub type UnixTime = isize;
|
||||||
@ -384,6 +384,7 @@ impl QueryRoot {
|
|||||||
async fn thread<'ctx>(&self, ctx: &Context<'ctx>, thread_id: String) -> Result<Thread, Error> {
|
async fn thread<'ctx>(&self, ctx: &Context<'ctx>, thread_id: String) -> Result<Thread, Error> {
|
||||||
let nm = ctx.data_unchecked::<Notmuch>();
|
let nm = ctx.data_unchecked::<Notmuch>();
|
||||||
let pool = ctx.data_unchecked::<PgPool>();
|
let pool = ctx.data_unchecked::<PgPool>();
|
||||||
|
let config = ctx.data_unchecked::<Config>();
|
||||||
let debug_content_tree = ctx
|
let debug_content_tree = ctx
|
||||||
.look_ahead()
|
.look_ahead()
|
||||||
.field("messages")
|
.field("messages")
|
||||||
@ -392,7 +393,7 @@ impl QueryRoot {
|
|||||||
.exists();
|
.exists();
|
||||||
// TODO: look at thread_id and conditionally load newsreader
|
// TODO: look at thread_id and conditionally load newsreader
|
||||||
if newsreader::is_newsreader_thread(&thread_id) {
|
if newsreader::is_newsreader_thread(&thread_id) {
|
||||||
Ok(newsreader::thread(pool, thread_id).await?)
|
Ok(newsreader::thread(config, pool, thread_id).await?)
|
||||||
} else {
|
} else {
|
||||||
Ok(nm::thread(nm, thread_id, debug_content_tree).await?)
|
Ok(nm::thread(nm, thread_id, debug_content_tree).await?)
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,14 +1,16 @@
|
|||||||
|
pub mod config;
|
||||||
pub mod error;
|
pub mod error;
|
||||||
pub mod graphql;
|
pub mod graphql;
|
||||||
pub mod newsreader;
|
pub mod newsreader;
|
||||||
pub mod nm;
|
pub mod nm;
|
||||||
|
|
||||||
use std::{collections::HashMap, convert::Infallible, str::FromStr};
|
use std::{collections::HashMap, convert::Infallible, str::FromStr, sync::Arc};
|
||||||
|
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
|
use cacher::{Cacher, FilesystemCacher};
|
||||||
use css_inline::{CSSInliner, InlineError, InlineOptions};
|
use css_inline::{CSSInliner, InlineError, InlineOptions};
|
||||||
use linkify::{LinkFinder, LinkKind};
|
use linkify::{LinkFinder, LinkKind};
|
||||||
use log::{error, warn};
|
use log::{error, info, warn};
|
||||||
use lol_html::{
|
use lol_html::{
|
||||||
element, errors::RewritingError, html_content::ContentType, rewrite_str, text,
|
element, errors::RewritingError, html_content::ContentType, rewrite_str, text,
|
||||||
RewriteStrSettings,
|
RewriteStrSettings,
|
||||||
@ -16,6 +18,7 @@ use lol_html::{
|
|||||||
use maplit::{hashmap, hashset};
|
use maplit::{hashmap, hashset};
|
||||||
use scraper::{Html, Selector};
|
use scraper::{Html, Selector};
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
|
use tokio::sync::Mutex;
|
||||||
use url::Url;
|
use url::Url;
|
||||||
|
|
||||||
use crate::newsreader::{extract_thread_id, is_newsreader_thread};
|
use crate::newsreader::{extract_thread_id, is_newsreader_thread};
|
||||||
@ -109,16 +112,17 @@ impl Transformer for InlineStyle {
|
|||||||
include_str!("custom.css"),
|
include_str!("custom.css"),
|
||||||
);
|
);
|
||||||
let inline_opts = InlineOptions {
|
let inline_opts = InlineOptions {
|
||||||
inline_style_tags: false,
|
inline_style_tags: true,
|
||||||
keep_style_tags: false,
|
keep_style_tags: false,
|
||||||
keep_link_tags: false,
|
keep_link_tags: true,
|
||||||
base_url: None,
|
base_url: None,
|
||||||
load_remote_stylesheets: false,
|
load_remote_stylesheets: true,
|
||||||
extra_css: Some(css.into()),
|
extra_css: Some(css.into()),
|
||||||
preallocate_node_capacity: 32,
|
preallocate_node_capacity: 32,
|
||||||
..InlineOptions::default()
|
..InlineOptions::default()
|
||||||
};
|
};
|
||||||
|
|
||||||
|
//info!("HTML:\n{html}");
|
||||||
Ok(match CSSInliner::new(inline_opts).inline(&html) {
|
Ok(match CSSInliner::new(inline_opts).inline(&html) {
|
||||||
Ok(inlined_html) => inlined_html,
|
Ok(inlined_html) => inlined_html,
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
@ -212,6 +216,7 @@ impl Transformer for AddOutlink {
|
|||||||
}
|
}
|
||||||
|
|
||||||
struct SlurpContents {
|
struct SlurpContents {
|
||||||
|
cacher: Arc<Mutex<FilesystemCacher>>,
|
||||||
site_selectors: HashMap<String, Vec<Selector>>,
|
site_selectors: HashMap<String, Vec<Selector>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -241,19 +246,26 @@ impl Transformer for SlurpContents {
|
|||||||
let Some(selectors) = self.get_selectors(&link) else {
|
let Some(selectors) = self.get_selectors(&link) else {
|
||||||
return Ok(html.to_string());
|
return Ok(html.to_string());
|
||||||
};
|
};
|
||||||
|
let mut cacher = self.cacher.lock().await;
|
||||||
|
let body = if let Some(body) = cacher.get(link.as_str()) {
|
||||||
|
info!("cache hit for {link}");
|
||||||
|
String::from_utf8_lossy(&body).to_string()
|
||||||
|
} else {
|
||||||
let body = reqwest::get(link.as_str()).await?.text().await?;
|
let body = reqwest::get(link.as_str()).await?.text().await?;
|
||||||
|
cacher.set(link.as_str(), body.as_bytes());
|
||||||
|
body
|
||||||
|
};
|
||||||
let doc = Html::parse_document(&body);
|
let doc = Html::parse_document(&body);
|
||||||
|
|
||||||
let mut results = Vec::new();
|
let mut results = Vec::new();
|
||||||
for selector in selectors {
|
for selector in selectors {
|
||||||
if let Some(frag) = doc.select(&selector).next() {
|
for frag in doc.select(&selector) {
|
||||||
results.push(frag.html())
|
results.push(frag.html())
|
||||||
} else {
|
// TODO: figure out how to warn if there were no hits
|
||||||
warn!("couldn't find '{:?}' in {}", selector, link);
|
//warn!("couldn't find '{:?}' in {}", selector, link);
|
||||||
return Ok(html.to_string());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(results.join("<br><br>"))
|
Ok(results.join(""))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -292,7 +304,7 @@ pub fn sanitize_html(
|
|||||||
) -> Result<String, TransformError> {
|
) -> Result<String, TransformError> {
|
||||||
let inline_opts = InlineOptions {
|
let inline_opts = InlineOptions {
|
||||||
inline_style_tags: true,
|
inline_style_tags: true,
|
||||||
keep_style_tags: false,
|
keep_style_tags: true,
|
||||||
keep_link_tags: false,
|
keep_link_tags: false,
|
||||||
base_url: None,
|
base_url: None,
|
||||||
load_remote_stylesheets: false,
|
load_remote_stylesheets: false,
|
||||||
@ -335,6 +347,30 @@ pub fn sanitize_html(
|
|||||||
|
|
||||||
el.set_attribute("src", &src)?;
|
el.set_attribute("src", &src)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}),
|
||||||
|
// Add https to href with //<domain name>
|
||||||
|
element!("link[href]", |el| {
|
||||||
|
info!("found link[href] {el:?}");
|
||||||
|
let mut href = el.get_attribute("href").expect("href was required");
|
||||||
|
if href.starts_with("//") {
|
||||||
|
warn!("adding https to {href}");
|
||||||
|
href.insert_str(0, "https:");
|
||||||
|
}
|
||||||
|
|
||||||
|
el.set_attribute("href", &href)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}),
|
||||||
|
// Add https to src with //<domain name>
|
||||||
|
element!("style[src]", |el| {
|
||||||
|
let mut src = el.get_attribute("src").expect("src was required");
|
||||||
|
if src.starts_with("//") {
|
||||||
|
src.insert_str(0, "https:");
|
||||||
|
}
|
||||||
|
|
||||||
|
el.set_attribute("src", &src)?;
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}),
|
}),
|
||||||
];
|
];
|
||||||
|
|||||||
@ -1,24 +1,26 @@
|
|||||||
use std::hash::{DefaultHasher, Hash, Hasher};
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use cacher::FilesystemCacher;
|
||||||
use log::info;
|
use log::info;
|
||||||
use maplit::hashmap;
|
use maplit::hashmap;
|
||||||
use scraper::Selector;
|
use scraper::Selector;
|
||||||
|
use shared::compute_color;
|
||||||
use sqlx::postgres::PgPool;
|
use sqlx::postgres::PgPool;
|
||||||
|
use tokio::sync::Mutex;
|
||||||
use url::Url;
|
use url::Url;
|
||||||
|
|
||||||
use crate::Query;
|
|
||||||
|
|
||||||
const TAG_PREFIX: &'static str = "News/";
|
|
||||||
const THREAD_PREFIX: &'static str = "news:";
|
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
compute_offset_limit,
|
compute_offset_limit,
|
||||||
|
config::Config,
|
||||||
error::ServerError,
|
error::ServerError,
|
||||||
graphql::{NewsPost, Tag, Thread, ThreadSummary},
|
graphql::{NewsPost, Tag, Thread, ThreadSummary},
|
||||||
AddOutlink, EscapeHtml, FrameImages, InlineStyle, SanitizeHtml, SlurpContents, StripHtml,
|
AddOutlink, EscapeHtml, FrameImages, InlineStyle, Query, SanitizeHtml, SlurpContents,
|
||||||
Transformer,
|
StripHtml, Transformer,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const TAG_PREFIX: &'static str = "News/";
|
||||||
|
const THREAD_PREFIX: &'static str = "news:";
|
||||||
|
|
||||||
pub fn is_newsreader_search(query: &str) -> bool {
|
pub fn is_newsreader_search(query: &str) -> bool {
|
||||||
query.contains(TAG_PREFIX)
|
query.contains(TAG_PREFIX)
|
||||||
}
|
}
|
||||||
@ -128,11 +130,9 @@ pub async fn tags(pool: &PgPool, _needs_unread: bool) -> Result<Vec<Tag>, Server
|
|||||||
let tags = tags
|
let tags = tags
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|tag| {
|
.map(|tag| {
|
||||||
let mut hasher = DefaultHasher::new();
|
|
||||||
tag.site.hash(&mut hasher);
|
|
||||||
let hex = format!("#{:06x}", hasher.finish() % (1 << 24));
|
|
||||||
let unread = tag.unread.unwrap_or(0).try_into().unwrap_or(0);
|
let unread = tag.unread.unwrap_or(0).try_into().unwrap_or(0);
|
||||||
let name = format!("{TAG_PREFIX}{}", tag.site.expect("tag must have site"));
|
let name = format!("{TAG_PREFIX}{}", tag.site.expect("tag must have site"));
|
||||||
|
let hex = compute_color(&name);
|
||||||
Tag {
|
Tag {
|
||||||
name,
|
name,
|
||||||
fg_color: "white".to_string(),
|
fg_color: "white".to_string(),
|
||||||
@ -144,7 +144,11 @@ pub async fn tags(pool: &PgPool, _needs_unread: bool) -> Result<Vec<Tag>, Server
|
|||||||
Ok(tags)
|
Ok(tags)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn thread(pool: &PgPool, thread_id: String) -> Result<Thread, ServerError> {
|
pub async fn thread(
|
||||||
|
config: &Config,
|
||||||
|
pool: &PgPool,
|
||||||
|
thread_id: String,
|
||||||
|
) -> Result<Thread, ServerError> {
|
||||||
let id = thread_id
|
let id = thread_id
|
||||||
.strip_prefix(THREAD_PREFIX)
|
.strip_prefix(THREAD_PREFIX)
|
||||||
.expect("news thread doesn't start with '{THREAD_PREFIX}'")
|
.expect("news thread doesn't start with '{THREAD_PREFIX}'")
|
||||||
@ -173,8 +177,10 @@ pub async fn thread(pool: &PgPool, thread_id: String) -> Result<Thread, ServerEr
|
|||||||
// TODO: add site specific cleanups. For example:
|
// TODO: add site specific cleanups. For example:
|
||||||
// * Grafana does <div class="image-wrapp"><img class="lazyload>"<img src="/media/...>"</img></div>
|
// * Grafana does <div class="image-wrapp"><img class="lazyload>"<img src="/media/...>"</img></div>
|
||||||
// * Some sites appear to be HTML encoded, unencode them, i.e. imperialviolent
|
// * Some sites appear to be HTML encoded, unencode them, i.e. imperialviolent
|
||||||
|
let cacher = Arc::new(Mutex::new(FilesystemCacher::new(&config.slurp_cache_path)?));
|
||||||
let body_tranformers: Vec<Box<dyn Transformer>> = vec![
|
let body_tranformers: Vec<Box<dyn Transformer>> = vec![
|
||||||
Box::new(SlurpContents {
|
Box::new(SlurpContents {
|
||||||
|
cacher,
|
||||||
site_selectors: hashmap![
|
site_selectors: hashmap![
|
||||||
"atmeta.com".to_string() => vec![
|
"atmeta.com".to_string() => vec![
|
||||||
Selector::parse("div.entry-content").unwrap(),
|
Selector::parse("div.entry-content").unwrap(),
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user