Files
letterbox/server/src/newsreader.rs

271 lines
8.1 KiB
Rust

use std::hash::{DefaultHasher, Hash, Hasher};
use log::info;
use sqlx::postgres::PgPool;
use url::Url;
use crate::Query;
const TAG_PREFIX: &'static str = "News/";
const THREAD_PREFIX: &'static str = "news:";
use crate::{
compute_offset_limit,
error::ServerError,
graphql::{Body, Email, Html, Message, Tag, Thread, ThreadSummary},
AddOutlink, EscapeHtml, InlineStyle, SanitizeHtml, StripHtml, Transformer,
};
pub fn is_newsreader_search(query: &str) -> bool {
query.contains(TAG_PREFIX)
}
pub fn is_newsreader_thread(query: &str) -> bool {
query.starts_with(THREAD_PREFIX)
}
pub fn extract_thread_id(query: &str) -> &str {
&query[THREAD_PREFIX.len()..]
}
pub fn extract_site(tag: &str) -> &str {
&tag[TAG_PREFIX.len()..]
}
pub fn make_news_tag(tag: &str) -> String {
format!("tag:{TAG_PREFIX}{tag}")
}
pub async fn count(pool: &PgPool, query: &Query) -> Result<usize, ServerError> {
if !query.remainder.is_empty() {
// TODO: handle full text search against all sites, for now, early return if search words
// are specified.
return Ok(0);
}
let row = sqlx::query_file!("sql/count.sql", query.tag, query.unread_only)
.fetch_one(pool)
.await?;
Ok(row.count.unwrap_or(0).try_into().unwrap_or(0))
}
pub async fn search(
pool: &PgPool,
after: Option<i32>,
before: Option<i32>,
first: Option<i32>,
last: Option<i32>,
query: &Query,
) -> Result<Vec<(i32, ThreadSummary)>, async_graphql::Error> {
info!("search({after:?} {before:?} {first:?} {last:?} {query:?}");
if !query.remainder.is_empty() {
// TODO: handle full text search against all sites, for now, early return if search words
// are specified.
return Ok(Vec::new());
}
let (offset, mut limit) = compute_offset_limit(after, before, first, last);
if before.is_none() {
// When searching forward, the +1 is to see if there are more pages of data available.
// Searching backwards implies there's more pages forward, because the value represented by
// `before` is on the next page.
limit = limit + 1;
}
let site = query.tag.as_ref().map(|t| extract_site(&t).to_string());
info!(
"search offset {offset} limit {limit} site {site:?} unread_only {}",
query.unread_only
);
// TODO: further limit results to include query.remainder if set
let rows = sqlx::query_file!(
"sql/threads.sql",
site,
query.unread_only,
offset as i64,
limit as i64
)
.fetch_all(pool)
.await?;
Ok(rows
.into_iter()
.enumerate()
.map(|(i, r)| {
let site = r.site.unwrap_or("UNKOWN TAG".to_string());
let mut tags = vec![format!("{TAG_PREFIX}{site}")];
if !r.is_read.unwrap_or(true) {
tags.push("unread".to_string());
};
let mut title = r.title.unwrap_or("NO TITLE".to_string());
title = clean_title(&title).expect("failed to clean title");
(
i as i32 + offset,
ThreadSummary {
thread: format!("{THREAD_PREFIX}{}", r.uid),
timestamp: r
.date
.expect("post missing date")
.assume_utc()
.unix_timestamp() as isize,
date_relative: "TODO date_relative".to_string(),
matched: 0,
total: 1,
authors: r.name.unwrap_or_else(|| site.clone()),
subject: title,
tags,
},
)
})
.collect())
}
pub async fn tags(pool: &PgPool, _needs_unread: bool) -> Result<Vec<Tag>, ServerError> {
// TODO: optimize query by using needs_unread
let tags = sqlx::query_file!("sql/tags.sql").fetch_all(pool).await?;
let tags = tags
.into_iter()
.map(|tag| {
let mut hasher = DefaultHasher::new();
tag.site.hash(&mut hasher);
let hex = format!("#{:06x}", hasher.finish() % (1 << 24));
let unread = tag.unread.unwrap_or(0).try_into().unwrap_or(0);
let name = format!("{TAG_PREFIX}{}", tag.site.expect("tag must have site"));
Tag {
name,
fg_color: "white".to_string(),
bg_color: hex,
unread,
}
})
.collect();
Ok(tags)
}
pub async fn thread(pool: &PgPool, thread_id: String) -> Result<Thread, ServerError> {
let id = thread_id
.strip_prefix(THREAD_PREFIX)
.expect("news thread doesn't start with '{THREAD_PREFIX}'")
.to_string();
let r = sqlx::query_file!("sql/thread.sql", id)
.fetch_one(pool)
.await?;
let site = r.site.unwrap_or("NO TAG".to_string());
let mut tags = vec![format!("{TAG_PREFIX}{site}")];
if r.is_read.unwrap_or(true) {
tags.push("unread".to_string());
};
let default_homepage = "http://no-homepage";
let homepage = Url::parse(
&r.homepage
.map(|h| {
if h.is_empty() {
default_homepage.to_string()
} else {
h
}
})
.unwrap_or(default_homepage.to_string()),
)?;
let link = &r
.link
.as_ref()
.map(|h| {
if h.is_empty() {
default_homepage.to_string()
} else {
h.to_string()
}
})
.map(|h| Url::parse(&h).ok())
.flatten();
let addr = r.link.as_ref().map(|link| {
if link.contains('@') {
link.clone()
} else {
if let Ok(url) = homepage.join(&link) {
url.to_string()
} else {
link.clone()
}
}
});
let mut body = r.summary.unwrap_or("NO SUMMARY".to_string());
// TODO: add site specific cleanups. For example:
// * Grafana does <div class="image-wrapp"><img class="lazyload>"<img src="/media/...>"</img></div>
// * Some sites appear to be HTML encoded, unencode them, i.e. imperialviolent
let mut body_tranformers: Vec<Box<dyn Transformer>> = vec![
Box::new(AddOutlink(link.clone())),
Box::new(EscapeHtml),
Box::new(InlineStyle),
Box::new(SanitizeHtml {
cid_prefix: "",
base_url: &link,
}),
];
for t in body_tranformers.iter() {
if t.should_run(&body) {
body = t.transform(&body)?;
}
}
let body = Body::Html(Html {
html: body,
content_tree: "".to_string(),
});
let title = clean_title(&r.title.unwrap_or("NO TITLE".to_string()))?;
let from = Some(Email {
name: r.name,
addr: addr.map(|a| a.to_string()),
});
Ok(Thread {
thread_id,
subject: title.clone(),
messages: vec![Message {
id,
from,
to: Vec::new(),
cc: Vec::new(),
subject: Some(title),
timestamp: Some(
r.date
.expect("post missing date")
.assume_utc()
.unix_timestamp(),
),
headers: Vec::new(),
body,
path: "".to_string(),
attachments: Vec::new(),
tags,
}],
})
}
pub async fn set_read_status<'ctx>(
pool: &PgPool,
query: &str,
unread: bool,
) -> Result<bool, ServerError> {
let query: Query = query.parse()?;
sqlx::query_file!("sql/set_unread.sql", !unread, query.uid)
.execute(pool)
.await?;
Ok(true)
}
fn clean_title(title: &str) -> Result<String, ServerError> {
// Make title HTML so html parsers work
let mut title = format!("<html>{title}</html>");
let title_tranformers: Vec<Box<dyn Transformer>> =
vec![Box::new(EscapeHtml), Box::new(StripHtml)];
// Make title HTML so html parsers work
title = format!("<html>{title}</html>");
for t in title_tranformers.iter() {
if t.should_run(&title) {
title = t.transform(&title)?;
}
}
Ok(title)
}