865 lines
26 KiB
Rust
865 lines
26 KiB
Rust
pub mod config;
|
|
pub mod error;
|
|
pub mod graphql;
|
|
pub mod mail;
|
|
pub mod newsreader;
|
|
pub mod nm;
|
|
#[cfg(feature = "tantivy")]
|
|
pub mod tantivy;
|
|
|
|
use std::{collections::HashMap, convert::Infallible, fmt, str::FromStr, sync::Arc};
|
|
|
|
use async_trait::async_trait;
|
|
use cacher::{Cacher, FilesystemCacher};
|
|
use css_inline::{CSSInliner, InlineError, InlineOptions};
|
|
use linkify::{LinkFinder, LinkKind};
|
|
use log::{debug, error, info, warn};
|
|
use lol_html::{
|
|
element, errors::RewritingError, html_content::ContentType, rewrite_str, text,
|
|
RewriteStrSettings,
|
|
};
|
|
use maplit::{hashmap, hashset};
|
|
use regex::Regex;
|
|
use scraper::{Html, Selector};
|
|
use sqlx::types::time::PrimitiveDateTime;
|
|
use thiserror::Error;
|
|
use url::Url;
|
|
|
|
use crate::{
|
|
error::ServerError,
|
|
graphql::{Corpus, ThreadSummary},
|
|
newsreader::is_newsreader_thread,
|
|
nm::is_notmuch_thread_or_id,
|
|
};
|
|
|
|
const NEWSREADER_TAG_PREFIX: &'static str = "News/";
|
|
const NEWSREADER_THREAD_PREFIX: &'static str = "news:";
|
|
|
|
// TODO: figure out how to use Cow
|
|
#[async_trait]
|
|
trait Transformer: Send + Sync {
|
|
fn should_run(&self, _addr: &Option<Url>, _html: &str) -> bool {
|
|
true
|
|
}
|
|
// TODO: should html be something like `html_escape` uses:
|
|
// <S: ?Sized + AsRef<str>>(text: &S) -> Cow<str>
|
|
async fn transform(&self, addr: &Option<Url>, html: &str) -> Result<String, TransformError>;
|
|
}
|
|
|
|
// TODO: how would we make this more generic to allow good implementations of Transformer outside
|
|
// of this module?
|
|
#[derive(Error, Debug)]
|
|
pub enum TransformError {
|
|
#[error("lol-html rewrite error: {0}")]
|
|
RewritingError(#[from] RewritingError),
|
|
#[error("css inline error: {0}")]
|
|
InlineError(#[from] InlineError),
|
|
#[error("failed to fetch url error: {0}")]
|
|
ReqwestError(#[from] reqwest::Error),
|
|
#[error("failed to parse HTML: {0}")]
|
|
HtmlParsingError(String),
|
|
}
|
|
|
|
struct SanitizeHtml<'a> {
|
|
cid_prefix: &'a str,
|
|
base_url: &'a Option<Url>,
|
|
}
|
|
|
|
#[async_trait]
|
|
impl<'a> Transformer for SanitizeHtml<'a> {
|
|
async fn transform(&self, _: &Option<Url>, html: &str) -> Result<String, TransformError> {
|
|
Ok(sanitize_html(html, self.cid_prefix, self.base_url)?)
|
|
}
|
|
}
|
|
|
|
struct EscapeHtml;
|
|
|
|
#[async_trait]
|
|
impl Transformer for EscapeHtml {
|
|
fn should_run(&self, _: &Option<Url>, html: &str) -> bool {
|
|
html.contains("&")
|
|
}
|
|
async fn transform(&self, _: &Option<Url>, html: &str) -> Result<String, TransformError> {
|
|
Ok(html_escape::decode_html_entities(html).to_string())
|
|
}
|
|
}
|
|
|
|
struct StripHtml;
|
|
|
|
#[async_trait]
|
|
impl Transformer for StripHtml {
|
|
fn should_run(&self, _: &Option<Url>, html: &str) -> bool {
|
|
// Lame test
|
|
html.contains("<")
|
|
}
|
|
async fn transform(&self, _: &Option<Url>, html: &str) -> Result<String, TransformError> {
|
|
let mut text = String::new();
|
|
let element_content_handlers = vec![text!("*", |t| {
|
|
text += t.as_str();
|
|
Ok(())
|
|
})];
|
|
let _ = rewrite_str(
|
|
html,
|
|
RewriteStrSettings {
|
|
element_content_handlers,
|
|
..RewriteStrSettings::default()
|
|
},
|
|
)?;
|
|
let re = Regex::new(r"\s+").expect("failed to parse regex");
|
|
let text = re.replace_all(&text, " ").to_string();
|
|
|
|
Ok(text)
|
|
}
|
|
}
|
|
|
|
struct InlineRemoteStyle<'a> {
|
|
base_url: &'a Option<Url>,
|
|
}
|
|
|
|
#[async_trait]
|
|
impl<'a> Transformer for InlineRemoteStyle<'a> {
|
|
async fn transform(&self, _: &Option<Url>, html: &str) -> Result<String, TransformError> {
|
|
//info!("HTML:\n{html}");
|
|
Ok(
|
|
match CSSInliner::options()
|
|
.base_url(self.base_url.clone())
|
|
.build()
|
|
.inline(&html)
|
|
{
|
|
Ok(inlined_html) => inlined_html,
|
|
Err(err) => {
|
|
error!("failed to inline remote CSS: {err}");
|
|
html.to_string()
|
|
}
|
|
},
|
|
)
|
|
}
|
|
}
|
|
struct InlineStyle;
|
|
|
|
#[async_trait]
|
|
impl Transformer for InlineStyle {
|
|
async fn transform(&self, _: &Option<Url>, html: &str) -> Result<String, TransformError> {
|
|
let css = concat!(
|
|
"/* chrome-default.css */\n",
|
|
include_str!("chrome-default.css"),
|
|
//"\n/* mvp.css */\n",
|
|
//include_str!("mvp.css"),
|
|
//"\n/* Xinu Specific overrides */\n",
|
|
//include_str!("custom.css"),
|
|
);
|
|
let inline_opts = InlineOptions {
|
|
inline_style_tags: true,
|
|
keep_style_tags: false,
|
|
keep_link_tags: true,
|
|
base_url: None,
|
|
load_remote_stylesheets: true,
|
|
extra_css: Some(css.into()),
|
|
preallocate_node_capacity: 32,
|
|
..InlineOptions::default()
|
|
};
|
|
|
|
//info!("HTML:\n{html}");
|
|
Ok(match CSSInliner::new(inline_opts).inline(&html) {
|
|
Ok(inlined_html) => inlined_html,
|
|
Err(err) => {
|
|
error!("failed to inline CSS: {err}");
|
|
html.to_string()
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
/// Process images will extract any alt or title tags on images and place them as labels below said
|
|
/// image. It also handles data-src and data-cfsrc attributes
|
|
struct FrameImages;
|
|
|
|
#[async_trait]
|
|
impl Transformer for FrameImages {
|
|
async fn transform(&self, _: &Option<Url>, html: &str) -> Result<String, TransformError> {
|
|
Ok(rewrite_str(
|
|
html,
|
|
RewriteStrSettings {
|
|
element_content_handlers: vec![
|
|
element!("img[data-src]", |el| {
|
|
let src = el
|
|
.get_attribute("data-src")
|
|
.unwrap_or("https://placehold.co/600x400".to_string());
|
|
el.set_attribute("src", &src)?;
|
|
|
|
Ok(())
|
|
}),
|
|
element!("img[data-cfsrc]", |el| {
|
|
let src = el
|
|
.get_attribute("data-cfsrc")
|
|
.unwrap_or("https://placehold.co/600x400".to_string());
|
|
el.set_attribute("src", &src)?;
|
|
|
|
Ok(())
|
|
}),
|
|
element!("img[alt], img[title]", |el| {
|
|
let src = el
|
|
.get_attribute("src")
|
|
.unwrap_or("https://placehold.co/600x400".to_string());
|
|
let alt = el.get_attribute("alt");
|
|
let title = el.get_attribute("title");
|
|
let mut frags =
|
|
vec!["<figure>".to_string(), format!(r#"<img src="{src}">"#)];
|
|
alt.map(|t| {
|
|
if !t.is_empty() {
|
|
frags.push(format!("<figcaption>Alt: {t}</figcaption>"))
|
|
}
|
|
});
|
|
title.map(|t| {
|
|
if !t.is_empty() {
|
|
frags.push(format!("<figcaption>Title: {t}</figcaption>"))
|
|
}
|
|
});
|
|
frags.push("</figure>".to_string());
|
|
el.replace(&frags.join("\n"), ContentType::Html);
|
|
|
|
Ok(())
|
|
}),
|
|
],
|
|
..RewriteStrSettings::default()
|
|
},
|
|
)?)
|
|
}
|
|
}
|
|
struct AddOutlink;
|
|
|
|
#[async_trait]
|
|
impl Transformer for AddOutlink {
|
|
fn should_run(&self, link: &Option<Url>, html: &str) -> bool {
|
|
if let Some(link) = link {
|
|
link.scheme().starts_with("http") && !html.contains(link.as_str())
|
|
} else {
|
|
false
|
|
}
|
|
}
|
|
async fn transform(&self, link: &Option<Url>, html: &str) -> Result<String, TransformError> {
|
|
if let Some(link) = link {
|
|
Ok(format!(
|
|
r#"
|
|
{html}
|
|
<div><a href="{}">View on site</a></div>
|
|
"#,
|
|
link
|
|
))
|
|
} else {
|
|
Ok(html.to_string())
|
|
}
|
|
}
|
|
}
|
|
|
|
struct SlurpContents<'c> {
|
|
cacher: &'c FilesystemCacher,
|
|
inline_css: bool,
|
|
site_selectors: HashMap<String, Vec<Selector>>,
|
|
}
|
|
|
|
impl<'c> SlurpContents<'c> {
|
|
fn get_selectors(&self, link: &Url) -> Option<&[Selector]> {
|
|
for (host, selector) in self.site_selectors.iter() {
|
|
if link.host_str().map(|h| h.contains(host)).unwrap_or(false) {
|
|
return Some(&selector);
|
|
}
|
|
}
|
|
None
|
|
}
|
|
}
|
|
|
|
#[async_trait]
|
|
impl<'c> Transformer for SlurpContents<'c> {
|
|
fn should_run(&self, link: &Option<Url>, html: &str) -> bool {
|
|
let mut will_slurp = false;
|
|
if let Some(link) = link {
|
|
will_slurp = self.get_selectors(link).is_some();
|
|
}
|
|
if !will_slurp && self.inline_css {
|
|
return InlineStyle {}.should_run(link, html);
|
|
}
|
|
will_slurp
|
|
}
|
|
async fn transform(&self, link: &Option<Url>, html: &str) -> Result<String, TransformError> {
|
|
if let Some(test_link) = link {
|
|
// If SlurpContents is configured for inline CSS, but no
|
|
// configuration found for this site, use the local InlineStyle
|
|
// transform.
|
|
if self.inline_css && self.get_selectors(test_link).is_none() {
|
|
debug!("local inline CSS for {link:?}");
|
|
return InlineStyle {}.transform(link, html).await;
|
|
}
|
|
}
|
|
let Some(link) = link else {
|
|
return Ok(html.to_string());
|
|
};
|
|
let Some(selectors) = self.get_selectors(&link) else {
|
|
return Ok(html.to_string());
|
|
};
|
|
let cacher = self.cacher;
|
|
let body = if let Some(body) = cacher.get(link.as_str()) {
|
|
String::from_utf8_lossy(&body).to_string()
|
|
} else {
|
|
let body = reqwest::get(link.as_str()).await?.text().await?;
|
|
cacher.set(link.as_str(), body.as_bytes());
|
|
body
|
|
};
|
|
let body = Arc::new(body);
|
|
let base_url = Some(link.clone());
|
|
let body = if self.inline_css {
|
|
debug!("inlining CSS for {link}");
|
|
let inner_body = Arc::clone(&body);
|
|
let res = tokio::task::spawn_blocking(move || {
|
|
let css = concat!(
|
|
"/* chrome-default.css */\n",
|
|
include_str!("chrome-default.css"),
|
|
"\n/* vars.css */\n",
|
|
include_str!("../../web/static/vars.css"),
|
|
//"\n/* Xinu Specific overrides */\n",
|
|
//include_str!("custom.css"),
|
|
);
|
|
let res = CSSInliner::options()
|
|
.base_url(base_url)
|
|
.extra_css(Some(std::borrow::Cow::Borrowed(css)))
|
|
.build()
|
|
.inline(&inner_body);
|
|
|
|
match res {
|
|
Ok(inlined_html) => inlined_html,
|
|
Err(err) => {
|
|
error!("failed to inline remote CSS: {err}");
|
|
Arc::into_inner(inner_body).expect("failed to take body out of Arc")
|
|
}
|
|
}
|
|
})
|
|
.await;
|
|
match res {
|
|
Ok(inlined_html) => inlined_html,
|
|
Err(err) => {
|
|
error!("failed to spawn inline remote CSS: {err}");
|
|
Arc::into_inner(body).expect("failed to take body out of Arc")
|
|
}
|
|
}
|
|
} else {
|
|
debug!("using body as-is for {link:?}");
|
|
Arc::into_inner(body).expect("failed to take body out of Arc")
|
|
};
|
|
|
|
let doc = Html::parse_document(&body);
|
|
|
|
let mut results = Vec::new();
|
|
for selector in selectors {
|
|
for frag in doc.select(&selector) {
|
|
results.push(frag.html())
|
|
// TODO: figure out how to warn if there were no hits
|
|
//warn!("couldn't find '{:?}' in {}", selector, link);
|
|
}
|
|
}
|
|
Ok(results.join("<br>"))
|
|
}
|
|
}
|
|
|
|
pub fn linkify_html(text: &str) -> String {
|
|
let mut finder = LinkFinder::new();
|
|
let finder = finder.url_must_have_scheme(false).kinds(&[LinkKind::Url]);
|
|
let mut parts = Vec::new();
|
|
for span in finder.spans(text) {
|
|
// TODO(wathiede): use Cow<str>?
|
|
match span.kind() {
|
|
// Text as-is
|
|
None => parts.push(span.as_str().to_string()),
|
|
// Wrap in anchor tag
|
|
Some(LinkKind::Url) => {
|
|
let text = span.as_str();
|
|
let schema = if text.starts_with("http") {
|
|
""
|
|
} else {
|
|
"http://"
|
|
};
|
|
let a = format!(r#"<a href="{schema}{0}">{0}</a>"#, text);
|
|
parts.push(a);
|
|
}
|
|
_ => todo!("unhandled kind: {:?}", span.kind().unwrap()),
|
|
}
|
|
}
|
|
parts.join("")
|
|
}
|
|
|
|
// html contains the content to be cleaned, and cid_prefix is used to resolve mixed part image
|
|
// referrences
|
|
pub fn sanitize_html(
|
|
html: &str,
|
|
cid_prefix: &str,
|
|
base_url: &Option<Url>,
|
|
) -> Result<String, TransformError> {
|
|
let inline_opts = InlineOptions {
|
|
inline_style_tags: true,
|
|
keep_style_tags: true,
|
|
keep_link_tags: false,
|
|
base_url: None,
|
|
load_remote_stylesheets: false,
|
|
extra_css: None,
|
|
preallocate_node_capacity: 32,
|
|
..InlineOptions::default()
|
|
};
|
|
|
|
let html = match CSSInliner::new(inline_opts).inline(&html) {
|
|
Ok(inlined_html) => inlined_html,
|
|
Err(err) => {
|
|
error!("failed to inline CSS: {err}");
|
|
html.to_string()
|
|
}
|
|
};
|
|
let mut element_content_handlers = vec![
|
|
// Open links in new tab
|
|
element!("a[href]", |el| {
|
|
el.set_attribute("target", "_blank").unwrap();
|
|
|
|
Ok(())
|
|
}),
|
|
// Replace mixed part CID images with URL
|
|
element!("img[src]", |el| {
|
|
let src = el
|
|
.get_attribute("src")
|
|
.expect("src was required")
|
|
.replace("cid:", cid_prefix);
|
|
|
|
el.set_attribute("src", &src)?;
|
|
|
|
Ok(())
|
|
}),
|
|
// Only secure image URLs
|
|
element!("img[src]", |el| {
|
|
let src = el
|
|
.get_attribute("src")
|
|
.expect("src was required")
|
|
.replace("http:", "https:");
|
|
|
|
el.set_attribute("src", &src)?;
|
|
|
|
Ok(())
|
|
}),
|
|
// Add https to href with //<domain name>
|
|
element!("link[href]", |el| {
|
|
info!("found link[href] {el:?}");
|
|
let mut href = el.get_attribute("href").expect("href was required");
|
|
if href.starts_with("//") {
|
|
warn!("adding https to {href}");
|
|
href.insert_str(0, "https:");
|
|
}
|
|
|
|
el.set_attribute("href", &href)?;
|
|
|
|
Ok(())
|
|
}),
|
|
// Add https to src with //<domain name>
|
|
element!("style[src]", |el| {
|
|
let mut src = el.get_attribute("src").expect("src was required");
|
|
if src.starts_with("//") {
|
|
src.insert_str(0, "https:");
|
|
}
|
|
|
|
el.set_attribute("src", &src)?;
|
|
|
|
Ok(())
|
|
}),
|
|
];
|
|
if let Some(base_url) = base_url {
|
|
element_content_handlers.extend(vec![
|
|
// Make links with relative URLs absolute
|
|
element!("a[href]", |el| {
|
|
if let Some(Ok(href)) = el.get_attribute("href").map(|href| base_url.join(&href)) {
|
|
el.set_attribute("href", &href.as_str()).unwrap();
|
|
}
|
|
|
|
Ok(())
|
|
}),
|
|
// Make images with relative srcs absolute
|
|
element!("img[src]", |el| {
|
|
if let Some(Ok(src)) = el.get_attribute("src").map(|src| base_url.join(&src)) {
|
|
el.set_attribute("src", &src.as_str()).unwrap();
|
|
}
|
|
|
|
Ok(())
|
|
}),
|
|
]);
|
|
}
|
|
let html = rewrite_str(
|
|
&html,
|
|
RewriteStrSettings {
|
|
element_content_handlers,
|
|
..RewriteStrSettings::default()
|
|
},
|
|
)?;
|
|
// Default's don't allow style, but we want to preserve that.
|
|
// TODO: remove 'class' if rendering mails moves to a two phase process where abstract message
|
|
// types are collected, santized, and then grouped together as one big HTML doc
|
|
let attributes = hashset![
|
|
"align", "bgcolor", "class", "color", "height", "lang", "title", "width", "style",
|
|
];
|
|
|
|
let tags = hashset![
|
|
"a",
|
|
"abbr",
|
|
"acronym",
|
|
"area",
|
|
"article",
|
|
"aside",
|
|
"b",
|
|
"bdi",
|
|
"bdo",
|
|
"blockquote",
|
|
"br",
|
|
"caption",
|
|
"center",
|
|
"cite",
|
|
"code",
|
|
"col",
|
|
"colgroup",
|
|
"data",
|
|
"dd",
|
|
"del",
|
|
"details",
|
|
"dfn",
|
|
"div",
|
|
"dl",
|
|
"dt",
|
|
"em",
|
|
"figcaption",
|
|
"figure",
|
|
"footer",
|
|
"h1",
|
|
"h2",
|
|
"h3",
|
|
"h4",
|
|
"h5",
|
|
"h6",
|
|
"header",
|
|
"hgroup",
|
|
"hr",
|
|
"i",
|
|
"iframe", // wathiede
|
|
"img",
|
|
"ins",
|
|
"kbd",
|
|
"kbd",
|
|
"li",
|
|
"map",
|
|
"mark",
|
|
"nav",
|
|
"noscript", // wathiede
|
|
"ol",
|
|
"p",
|
|
"pre",
|
|
"q",
|
|
"rp",
|
|
"rt",
|
|
"rtc",
|
|
"ruby",
|
|
"s",
|
|
"samp",
|
|
"small",
|
|
"span",
|
|
"strike",
|
|
"strong",
|
|
"sub",
|
|
"summary",
|
|
"sup",
|
|
"table",
|
|
"tbody",
|
|
"td",
|
|
"th",
|
|
"thead",
|
|
"time",
|
|
"title", // wathiede
|
|
"tr",
|
|
"tt",
|
|
"u",
|
|
"ul",
|
|
"var",
|
|
"wbr",
|
|
];
|
|
let tag_attributes = hashmap![
|
|
"a" => hashset![
|
|
"href", "hreflang", "target",
|
|
],
|
|
"bdo" => hashset![
|
|
"dir"
|
|
],
|
|
"blockquote" => hashset![
|
|
"cite"
|
|
],
|
|
"col" => hashset![
|
|
"align", "char", "charoff", "span"
|
|
],
|
|
"colgroup" => hashset![
|
|
"align", "char", "charoff", "span"
|
|
],
|
|
"del" => hashset![
|
|
"cite", "datetime"
|
|
],
|
|
"hr" => hashset![
|
|
"align", "size", "width"
|
|
],
|
|
"iframe" => hashset![
|
|
"src", "allow", "allowfullscreen"
|
|
],
|
|
"img" => hashset![
|
|
"align", "alt", "height", "src", "width"
|
|
],
|
|
"ins" => hashset![
|
|
"cite", "datetime"
|
|
],
|
|
"ol" => hashset![
|
|
"start"
|
|
],
|
|
"q" => hashset![
|
|
"cite"
|
|
],
|
|
"table" => hashset![
|
|
"align", "border", "cellpadding", "cellspacing", "char", "charoff", "summary",
|
|
],
|
|
"tbody" => hashset![
|
|
"align", "char", "charoff"
|
|
],
|
|
"td" => hashset![
|
|
"align", "char", "charoff", "colspan", "headers", "rowspan"
|
|
],
|
|
"tfoot" => hashset![
|
|
"align", "char", "charoff"
|
|
],
|
|
"th" => hashset![
|
|
"align", "char", "charoff", "colspan", "headers", "rowspan", "scope"
|
|
],
|
|
"thead" => hashset![
|
|
"align", "char", "charoff"
|
|
],
|
|
"tr" => hashset![
|
|
"align", "char", "charoff"
|
|
],
|
|
];
|
|
|
|
let html = ammonia::Builder::default()
|
|
.tags(tags)
|
|
.tag_attributes(tag_attributes)
|
|
.generic_attributes(attributes)
|
|
.clean(&html)
|
|
.to_string();
|
|
|
|
Ok(html)
|
|
}
|
|
|
|
fn compute_offset_limit(
|
|
after: Option<i32>,
|
|
before: Option<i32>,
|
|
first: Option<i32>,
|
|
last: Option<i32>,
|
|
) -> (i32, i32) {
|
|
let default_page_size = 100;
|
|
match (after, before, first, last) {
|
|
// Reasonable defaults
|
|
(None, None, None, None) => (0, default_page_size),
|
|
(None, None, Some(first), None) => (0, first),
|
|
(Some(after), None, None, None) => (after + 1, default_page_size),
|
|
(Some(after), None, Some(first), None) => (after + 1, first),
|
|
(None, Some(before), None, None) => (0.max(before - default_page_size), default_page_size),
|
|
(None, Some(before), None, Some(last)) => (0.max(before - last), last),
|
|
(None, None, None, Some(_)) => {
|
|
panic!("specifying last and no before doesn't make sense")
|
|
}
|
|
(None, None, Some(_), Some(_)) => {
|
|
panic!("specifying first and last doesn't make sense")
|
|
}
|
|
(None, Some(_), Some(_), _) => {
|
|
panic!("specifying before and first doesn't make sense")
|
|
}
|
|
(Some(_), Some(_), _, _) => {
|
|
panic!("specifying after and before doesn't make sense")
|
|
}
|
|
(Some(_), None, None, Some(_)) => {
|
|
panic!("specifying after and last doesn't make sense")
|
|
}
|
|
(Some(_), None, Some(_), Some(_)) => {
|
|
panic!("specifying after, first and last doesn't make sense")
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Default)]
|
|
pub struct Query {
|
|
pub unread_only: bool,
|
|
pub tags: Vec<String>,
|
|
pub uids: Vec<String>,
|
|
pub remainder: Vec<String>,
|
|
pub is_notmuch: bool,
|
|
pub is_newsreader: bool,
|
|
pub is_tantivy: bool,
|
|
pub corpus: Option<Corpus>,
|
|
}
|
|
|
|
impl fmt::Display for Query {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
|
|
if self.unread_only {
|
|
write!(f, "is:unread ")?;
|
|
}
|
|
for tag in &self.tags {
|
|
write!(f, "tag:{tag} ")?;
|
|
}
|
|
for uid in &self.uids {
|
|
write!(f, "id:{uid} ")?;
|
|
}
|
|
if self.is_notmuch {
|
|
write!(f, "is:mail ")?;
|
|
}
|
|
if self.is_newsreader {
|
|
write!(f, "is:newsreader ")?;
|
|
}
|
|
if self.is_newsreader {
|
|
write!(f, "is:news ")?;
|
|
}
|
|
match self.corpus {
|
|
Some(c) => write!(f, "corpus:{c:?}")?,
|
|
_ => (),
|
|
}
|
|
for rem in &self.remainder {
|
|
write!(f, "{rem} ")?;
|
|
}
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
impl Query {
|
|
// Converts the internal state of Query to something suitable for notmuch queries. Removes and
|
|
// letterbox specific '<key>:<value' tags
|
|
fn to_notmuch(&self) -> String {
|
|
let mut parts = Vec::new();
|
|
if !self.is_notmuch {
|
|
return String::new();
|
|
}
|
|
|
|
if self.unread_only {
|
|
parts.push("is:unread".to_string());
|
|
}
|
|
for tag in &self.tags {
|
|
parts.push(format!("tag:{tag}"));
|
|
}
|
|
for uid in &self.uids {
|
|
parts.push(uid.clone());
|
|
}
|
|
parts.extend(self.remainder.clone());
|
|
parts.join(" ")
|
|
}
|
|
}
|
|
|
|
impl FromStr for Query {
|
|
type Err = Infallible;
|
|
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
|
let mut unread_only = false;
|
|
let mut tags = Vec::new();
|
|
let mut uids = Vec::new();
|
|
let mut remainder = Vec::new();
|
|
let mut is_notmuch = false;
|
|
let mut is_newsreader = false;
|
|
let mut is_tantivy = false;
|
|
let mut corpus = None;
|
|
for word in s.split_whitespace() {
|
|
if word == "is:unread" {
|
|
unread_only = true
|
|
} else if word.starts_with("tag:") {
|
|
tags.push(word["tag:".len()..].to_string());
|
|
|
|
/*
|
|
} else if word.starts_with("tag:") {
|
|
// Any tag that doesn't match site_prefix should explicitly set the site to something not in the
|
|
// database
|
|
site = Some(NON_EXISTENT_SITE_NAME.to_string());
|
|
*/
|
|
} else if word.starts_with("corpus:") {
|
|
let c = word["corpus:".len()..].to_string();
|
|
corpus = c.parse::<Corpus>().map(|c| Some(c)).unwrap_or_else(|e| {
|
|
warn!("Error parsing corpus '{c}': {e:?}");
|
|
None
|
|
});
|
|
} else if is_newsreader_thread(word) {
|
|
uids.push(word.to_string());
|
|
} else if is_notmuch_thread_or_id(word) {
|
|
uids.push(word.to_string());
|
|
} else if word == "is:mail" || word == "is:email" || word == "is:notmuch" {
|
|
is_notmuch = true;
|
|
} else if word == "is:news" {
|
|
is_newsreader = true;
|
|
} else if word == "is:newsreader" {
|
|
is_newsreader = true;
|
|
} else {
|
|
remainder.push(word.to_string());
|
|
}
|
|
}
|
|
// If we don't see any explicit filters for a corpus, flip them all on
|
|
if corpus.is_none() && !(is_notmuch || is_tantivy || is_newsreader) {
|
|
is_notmuch = true;
|
|
is_newsreader = true;
|
|
is_tantivy = true;
|
|
}
|
|
Ok(Query {
|
|
unread_only,
|
|
tags,
|
|
uids,
|
|
remainder,
|
|
is_notmuch,
|
|
is_newsreader,
|
|
is_tantivy,
|
|
corpus,
|
|
})
|
|
}
|
|
}
|
|
pub struct ThreadSummaryRecord {
|
|
pub site: Option<String>,
|
|
pub date: Option<PrimitiveDateTime>,
|
|
pub is_read: Option<bool>,
|
|
pub title: Option<String>,
|
|
pub uid: String,
|
|
pub name: Option<String>,
|
|
pub corpus: Corpus,
|
|
}
|
|
|
|
async fn thread_summary_from_row(r: ThreadSummaryRecord) -> ThreadSummary {
|
|
let site = r.site.unwrap_or("UNKOWN TAG".to_string());
|
|
let mut tags = vec![format!("{NEWSREADER_TAG_PREFIX}{site}")];
|
|
if !r.is_read.unwrap_or(true) {
|
|
tags.push("unread".to_string());
|
|
};
|
|
let mut title = r.title.unwrap_or("NO TITLE".to_string());
|
|
title = clean_title(&title).await.expect("failed to clean title");
|
|
ThreadSummary {
|
|
thread: format!("{NEWSREADER_THREAD_PREFIX}{}", r.uid),
|
|
timestamp: r
|
|
.date
|
|
.expect("post missing date")
|
|
.assume_utc()
|
|
.unix_timestamp() as isize,
|
|
date_relative: format!("{:?}", r.date),
|
|
//date_relative: "TODO date_relative".to_string(),
|
|
matched: 0,
|
|
total: 1,
|
|
authors: r.name.unwrap_or_else(|| site.clone()),
|
|
subject: title,
|
|
tags,
|
|
corpus: r.corpus,
|
|
}
|
|
}
|
|
async fn clean_title(title: &str) -> Result<String, ServerError> {
|
|
// Make title HTML so html parsers work
|
|
let mut title = format!("<html>{title}</html>");
|
|
let title_tranformers: Vec<Box<dyn Transformer>> =
|
|
vec![Box::new(EscapeHtml), Box::new(StripHtml)];
|
|
// Make title HTML so html parsers work
|
|
title = format!("<html>{title}</html>");
|
|
for t in title_tranformers.iter() {
|
|
if t.should_run(&None, &title) {
|
|
title = t.transform(&None, &title).await?;
|
|
}
|
|
}
|
|
Ok(title)
|
|
}
|