server: WIP tantivy, cache slurps, use shared::compute_color,
This commit is contained in:
@@ -1,14 +1,16 @@
|
||||
pub mod config;
|
||||
pub mod error;
|
||||
pub mod graphql;
|
||||
pub mod newsreader;
|
||||
pub mod nm;
|
||||
|
||||
use std::{collections::HashMap, convert::Infallible, str::FromStr};
|
||||
use std::{collections::HashMap, convert::Infallible, str::FromStr, sync::Arc};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use cacher::{Cacher, FilesystemCacher};
|
||||
use css_inline::{CSSInliner, InlineError, InlineOptions};
|
||||
use linkify::{LinkFinder, LinkKind};
|
||||
use log::{error, warn};
|
||||
use log::{error, info, warn};
|
||||
use lol_html::{
|
||||
element, errors::RewritingError, html_content::ContentType, rewrite_str, text,
|
||||
RewriteStrSettings,
|
||||
@@ -16,6 +18,7 @@ use lol_html::{
|
||||
use maplit::{hashmap, hashset};
|
||||
use scraper::{Html, Selector};
|
||||
use thiserror::Error;
|
||||
use tokio::sync::Mutex;
|
||||
use url::Url;
|
||||
|
||||
use crate::newsreader::{extract_thread_id, is_newsreader_thread};
|
||||
@@ -109,16 +112,17 @@ impl Transformer for InlineStyle {
|
||||
include_str!("custom.css"),
|
||||
);
|
||||
let inline_opts = InlineOptions {
|
||||
inline_style_tags: false,
|
||||
inline_style_tags: true,
|
||||
keep_style_tags: false,
|
||||
keep_link_tags: false,
|
||||
keep_link_tags: true,
|
||||
base_url: None,
|
||||
load_remote_stylesheets: false,
|
||||
load_remote_stylesheets: true,
|
||||
extra_css: Some(css.into()),
|
||||
preallocate_node_capacity: 32,
|
||||
..InlineOptions::default()
|
||||
};
|
||||
|
||||
//info!("HTML:\n{html}");
|
||||
Ok(match CSSInliner::new(inline_opts).inline(&html) {
|
||||
Ok(inlined_html) => inlined_html,
|
||||
Err(err) => {
|
||||
@@ -212,6 +216,7 @@ impl Transformer for AddOutlink {
|
||||
}
|
||||
|
||||
struct SlurpContents {
|
||||
cacher: Arc<Mutex<FilesystemCacher>>,
|
||||
site_selectors: HashMap<String, Vec<Selector>>,
|
||||
}
|
||||
|
||||
@@ -241,19 +246,26 @@ impl Transformer for SlurpContents {
|
||||
let Some(selectors) = self.get_selectors(&link) else {
|
||||
return Ok(html.to_string());
|
||||
};
|
||||
let body = reqwest::get(link.as_str()).await?.text().await?;
|
||||
let mut cacher = self.cacher.lock().await;
|
||||
let body = if let Some(body) = cacher.get(link.as_str()) {
|
||||
info!("cache hit for {link}");
|
||||
String::from_utf8_lossy(&body).to_string()
|
||||
} else {
|
||||
let body = reqwest::get(link.as_str()).await?.text().await?;
|
||||
cacher.set(link.as_str(), body.as_bytes());
|
||||
body
|
||||
};
|
||||
let doc = Html::parse_document(&body);
|
||||
|
||||
let mut results = Vec::new();
|
||||
for selector in selectors {
|
||||
if let Some(frag) = doc.select(&selector).next() {
|
||||
for frag in doc.select(&selector) {
|
||||
results.push(frag.html())
|
||||
} else {
|
||||
warn!("couldn't find '{:?}' in {}", selector, link);
|
||||
return Ok(html.to_string());
|
||||
// TODO: figure out how to warn if there were no hits
|
||||
//warn!("couldn't find '{:?}' in {}", selector, link);
|
||||
}
|
||||
}
|
||||
Ok(results.join("<br><br>"))
|
||||
Ok(results.join(""))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -292,7 +304,7 @@ pub fn sanitize_html(
|
||||
) -> Result<String, TransformError> {
|
||||
let inline_opts = InlineOptions {
|
||||
inline_style_tags: true,
|
||||
keep_style_tags: false,
|
||||
keep_style_tags: true,
|
||||
keep_link_tags: false,
|
||||
base_url: None,
|
||||
load_remote_stylesheets: false,
|
||||
@@ -335,6 +347,30 @@ pub fn sanitize_html(
|
||||
|
||||
el.set_attribute("src", &src)?;
|
||||
|
||||
Ok(())
|
||||
}),
|
||||
// Add https to href with //<domain name>
|
||||
element!("link[href]", |el| {
|
||||
info!("found link[href] {el:?}");
|
||||
let mut href = el.get_attribute("href").expect("href was required");
|
||||
if href.starts_with("//") {
|
||||
warn!("adding https to {href}");
|
||||
href.insert_str(0, "https:");
|
||||
}
|
||||
|
||||
el.set_attribute("href", &href)?;
|
||||
|
||||
Ok(())
|
||||
}),
|
||||
// Add https to src with //<domain name>
|
||||
element!("style[src]", |el| {
|
||||
let mut src = el.get_attribute("src").expect("src was required");
|
||||
if src.starts_with("//") {
|
||||
src.insert_str(0, "https:");
|
||||
}
|
||||
|
||||
el.set_attribute("src", &src)?;
|
||||
|
||||
Ok(())
|
||||
}),
|
||||
];
|
||||
|
||||
Reference in New Issue
Block a user