server: WIP tantivy, cache slurps, use shared::compute_color,

This commit is contained in:
2024-09-19 15:53:09 -07:00
parent e7cbf9cc45
commit 30f510bb03
10 changed files with 1341 additions and 177 deletions

View File

@@ -1,14 +1,16 @@
pub mod config;
pub mod error;
pub mod graphql;
pub mod newsreader;
pub mod nm;
use std::{collections::HashMap, convert::Infallible, str::FromStr};
use std::{collections::HashMap, convert::Infallible, str::FromStr, sync::Arc};
use async_trait::async_trait;
use cacher::{Cacher, FilesystemCacher};
use css_inline::{CSSInliner, InlineError, InlineOptions};
use linkify::{LinkFinder, LinkKind};
use log::{error, warn};
use log::{error, info, warn};
use lol_html::{
element, errors::RewritingError, html_content::ContentType, rewrite_str, text,
RewriteStrSettings,
@@ -16,6 +18,7 @@ use lol_html::{
use maplit::{hashmap, hashset};
use scraper::{Html, Selector};
use thiserror::Error;
use tokio::sync::Mutex;
use url::Url;
use crate::newsreader::{extract_thread_id, is_newsreader_thread};
@@ -109,16 +112,17 @@ impl Transformer for InlineStyle {
include_str!("custom.css"),
);
let inline_opts = InlineOptions {
inline_style_tags: false,
inline_style_tags: true,
keep_style_tags: false,
keep_link_tags: false,
keep_link_tags: true,
base_url: None,
load_remote_stylesheets: false,
load_remote_stylesheets: true,
extra_css: Some(css.into()),
preallocate_node_capacity: 32,
..InlineOptions::default()
};
//info!("HTML:\n{html}");
Ok(match CSSInliner::new(inline_opts).inline(&html) {
Ok(inlined_html) => inlined_html,
Err(err) => {
@@ -212,6 +216,7 @@ impl Transformer for AddOutlink {
}
struct SlurpContents {
cacher: Arc<Mutex<FilesystemCacher>>,
site_selectors: HashMap<String, Vec<Selector>>,
}
@@ -241,19 +246,26 @@ impl Transformer for SlurpContents {
let Some(selectors) = self.get_selectors(&link) else {
return Ok(html.to_string());
};
let body = reqwest::get(link.as_str()).await?.text().await?;
let mut cacher = self.cacher.lock().await;
let body = if let Some(body) = cacher.get(link.as_str()) {
info!("cache hit for {link}");
String::from_utf8_lossy(&body).to_string()
} else {
let body = reqwest::get(link.as_str()).await?.text().await?;
cacher.set(link.as_str(), body.as_bytes());
body
};
let doc = Html::parse_document(&body);
let mut results = Vec::new();
for selector in selectors {
if let Some(frag) = doc.select(&selector).next() {
for frag in doc.select(&selector) {
results.push(frag.html())
} else {
warn!("couldn't find '{:?}' in {}", selector, link);
return Ok(html.to_string());
// TODO: figure out how to warn if there were no hits
//warn!("couldn't find '{:?}' in {}", selector, link);
}
}
Ok(results.join("<br><br>"))
Ok(results.join(""))
}
}
@@ -292,7 +304,7 @@ pub fn sanitize_html(
) -> Result<String, TransformError> {
let inline_opts = InlineOptions {
inline_style_tags: true,
keep_style_tags: false,
keep_style_tags: true,
keep_link_tags: false,
base_url: None,
load_remote_stylesheets: false,
@@ -335,6 +347,30 @@ pub fn sanitize_html(
el.set_attribute("src", &src)?;
Ok(())
}),
// Add https to href with //<domain name>
element!("link[href]", |el| {
info!("found link[href] {el:?}");
let mut href = el.get_attribute("href").expect("href was required");
if href.starts_with("//") {
warn!("adding https to {href}");
href.insert_str(0, "https:");
}
el.set_attribute("href", &href)?;
Ok(())
}),
// Add https to src with //<domain name>
element!("style[src]", |el| {
let mut src = el.get_attribute("src").expect("src was required");
if src.starts_with("//") {
src.insert_str(0, "https:");
}
el.set_attribute("src", &src)?;
Ok(())
}),
];