314 lines
10 KiB
Rust
314 lines
10 KiB
Rust
use std::sync::Arc;
|
|
|
|
use cacher::FilesystemCacher;
|
|
use log::info;
|
|
use maplit::hashmap;
|
|
use scraper::Selector;
|
|
use shared::compute_color;
|
|
use sqlx::postgres::PgPool;
|
|
use tokio::sync::Mutex;
|
|
use tracing::instrument;
|
|
use url::Url;
|
|
|
|
use crate::{
|
|
clean_title, compute_offset_limit,
|
|
config::Config,
|
|
error::ServerError,
|
|
graphql::{Corpus, NewsPost, Tag, Thread, ThreadSummary},
|
|
thread_summary_from_row, AddOutlink, EscapeHtml, FrameImages, InlineStyle, Query, SanitizeHtml,
|
|
SlurpContents, ThreadSummaryRecord, Transformer, NEWSREADER_TAG_PREFIX,
|
|
NEWSREADER_THREAD_PREFIX,
|
|
};
|
|
|
|
pub fn is_newsreader_query(query: &Query) -> bool {
|
|
query.is_newsreader || query.corpus == Some(Corpus::Newsreader)
|
|
}
|
|
|
|
pub fn is_newsreader_thread(query: &str) -> bool {
|
|
query.starts_with(NEWSREADER_THREAD_PREFIX)
|
|
}
|
|
|
|
pub fn extract_thread_id(query: &str) -> &str {
|
|
if query.starts_with(NEWSREADER_THREAD_PREFIX) {
|
|
&query[NEWSREADER_THREAD_PREFIX.len()..]
|
|
} else {
|
|
query
|
|
}
|
|
}
|
|
|
|
pub fn extract_site(tag: &str) -> &str {
|
|
&tag[NEWSREADER_TAG_PREFIX.len()..]
|
|
}
|
|
|
|
pub fn make_news_tag(tag: &str) -> String {
|
|
format!("tag:{NEWSREADER_TAG_PREFIX}{tag}")
|
|
}
|
|
|
|
fn site_from_tags(tags: &[String]) -> Option<String> {
|
|
for t in tags {
|
|
if t.starts_with(NEWSREADER_TAG_PREFIX) {
|
|
return Some(extract_site(t).to_string());
|
|
}
|
|
}
|
|
None
|
|
}
|
|
|
|
#[instrument(name = "newsreader::count", skip_all, fields(query=%query))]
|
|
pub async fn count(pool: &PgPool, query: &Query) -> Result<usize, ServerError> {
|
|
if !is_newsreader_query(query) {
|
|
return Ok(0);
|
|
}
|
|
let site = site_from_tags(&query.tags);
|
|
if !query.tags.is_empty() && site.is_none() {
|
|
// Newsreader can only handle all sites read/unread queries, anything with a non-site tag
|
|
// isn't supported
|
|
return Ok(0);
|
|
}
|
|
|
|
let search_term = query.remainder.join(" ");
|
|
let search_term = search_term.trim();
|
|
let search_term = if search_term.is_empty() {
|
|
None
|
|
} else {
|
|
Some(search_term)
|
|
};
|
|
// TODO: add support for looking for search_term in title and site
|
|
let row = sqlx::query_file!("sql/count.sql", site, query.unread_only, search_term)
|
|
.fetch_one(pool)
|
|
.await?;
|
|
Ok(row.count.unwrap_or(0).try_into().unwrap_or(0))
|
|
}
|
|
|
|
#[instrument(name = "newsreader::search", skip_all, fields(query=%query))]
|
|
pub async fn search(
|
|
pool: &PgPool,
|
|
after: Option<i32>,
|
|
before: Option<i32>,
|
|
first: Option<i32>,
|
|
last: Option<i32>,
|
|
query: &Query,
|
|
) -> Result<Vec<(i32, ThreadSummary)>, async_graphql::Error> {
|
|
info!("search({after:?} {before:?} {first:?} {last:?} {query:?}");
|
|
if !is_newsreader_query(query) {
|
|
return Ok(Vec::new());
|
|
}
|
|
let site = site_from_tags(&query.tags);
|
|
if !query.tags.is_empty() && site.is_none() {
|
|
// Newsreader can only handle all sites read/unread queries, anything with a non-site tag
|
|
// isn't supported
|
|
return Ok(Vec::new());
|
|
}
|
|
|
|
let (offset, mut limit) = compute_offset_limit(after, before, first, last);
|
|
if before.is_none() {
|
|
// When searching forward, the +1 is to see if there are more pages of data available.
|
|
// Searching backwards implies there's more pages forward, because the value represented by
|
|
// `before` is on the next page.
|
|
limit = limit + 1;
|
|
}
|
|
|
|
info!(
|
|
"search offset {offset} limit {limit} site {site:?} unread_only {}",
|
|
query.unread_only
|
|
);
|
|
let search_term = query.remainder.join(" ");
|
|
let search_term = search_term.trim();
|
|
let search_term = if search_term.is_empty() {
|
|
None
|
|
} else {
|
|
Some(search_term)
|
|
};
|
|
|
|
// TODO: add support for looking for search_term in title and site
|
|
let rows = sqlx::query_file!(
|
|
"sql/threads.sql",
|
|
site,
|
|
query.unread_only,
|
|
offset as i64,
|
|
limit as i64,
|
|
search_term
|
|
)
|
|
.fetch_all(pool)
|
|
.await?;
|
|
let mut res = Vec::new();
|
|
for (i, r) in rows.into_iter().enumerate() {
|
|
res.push((
|
|
i as i32 + offset,
|
|
thread_summary_from_row(ThreadSummaryRecord {
|
|
site: r.site,
|
|
date: r.date,
|
|
is_read: r.is_read,
|
|
title: r.title,
|
|
uid: r.uid,
|
|
name: r.name,
|
|
corpus: Corpus::Newsreader,
|
|
})
|
|
.await,
|
|
));
|
|
}
|
|
Ok(res)
|
|
}
|
|
#[instrument(name = "newsreader::tags", skip_all, fields(needs_unread=%_needs_unread))]
|
|
pub async fn tags(pool: &PgPool, _needs_unread: bool) -> Result<Vec<Tag>, ServerError> {
|
|
// TODO: optimize query by using needs_unread
|
|
let tags = sqlx::query_file!("sql/tags.sql").fetch_all(pool).await?;
|
|
let tags = tags
|
|
.into_iter()
|
|
.map(|tag| {
|
|
let unread = tag.unread.unwrap_or(0).try_into().unwrap_or(0);
|
|
let name = format!(
|
|
"{NEWSREADER_TAG_PREFIX}{}",
|
|
tag.site.expect("tag must have site")
|
|
);
|
|
let hex = compute_color(&name);
|
|
Tag {
|
|
name,
|
|
fg_color: "white".to_string(),
|
|
bg_color: hex,
|
|
unread,
|
|
}
|
|
})
|
|
.collect();
|
|
Ok(tags)
|
|
}
|
|
|
|
#[instrument(name = "newsreader::thread", skip_all, fields(thread_id=%thread_id))]
|
|
pub async fn thread(
|
|
config: &Config,
|
|
pool: &PgPool,
|
|
thread_id: String,
|
|
) -> Result<Thread, ServerError> {
|
|
let id = thread_id
|
|
.strip_prefix(NEWSREADER_THREAD_PREFIX)
|
|
.expect("news thread doesn't start with '{NEWSREADER_THREAD_PREFIX}'")
|
|
.to_string();
|
|
|
|
let r = sqlx::query_file!("sql/thread.sql", id)
|
|
.fetch_one(pool)
|
|
.await?;
|
|
|
|
let slug = r.site.unwrap_or("no-slug".to_string());
|
|
let site = r.name.unwrap_or("NO SITE".to_string());
|
|
let default_homepage = "http://no-homepage";
|
|
let link = &r
|
|
.link
|
|
.as_ref()
|
|
.map(|h| {
|
|
if h.is_empty() {
|
|
default_homepage.to_string()
|
|
} else {
|
|
h.to_string()
|
|
}
|
|
})
|
|
.map(|h| Url::parse(&h).ok())
|
|
.flatten();
|
|
let mut body = r.summary.unwrap_or("NO SUMMARY".to_string());
|
|
let cacher = Arc::new(Mutex::new(FilesystemCacher::new(&config.slurp_cache_path)?));
|
|
let body_tranformers: Vec<Box<dyn Transformer>> = vec![
|
|
Box::new(SlurpContents {
|
|
cacher,
|
|
site_selectors: hashmap![
|
|
"atmeta.com".to_string() => vec![
|
|
Selector::parse("div.entry-content").unwrap(),
|
|
],
|
|
"blog.prusa3d.com".to_string() => vec![
|
|
Selector::parse("article.content .post-block").unwrap(),
|
|
],
|
|
"blog.cloudflare.com".to_string() => vec![
|
|
Selector::parse(".author-lists .author-name-tooltip").unwrap(),
|
|
Selector::parse(".post-full-content").unwrap()
|
|
],
|
|
"blog.zsa.io".to_string() => vec![
|
|
Selector::parse("section.blog-article").unwrap(),
|
|
],
|
|
"engineering.fb.com".to_string() => vec![
|
|
Selector::parse("article").unwrap(),
|
|
],
|
|
"grafana.com".to_string() => vec![
|
|
Selector::parse(".blog-content").unwrap(),
|
|
],
|
|
"hackaday.com".to_string() => vec![
|
|
Selector::parse("div.entry-featured-image").unwrap(),
|
|
Selector::parse("div.entry-content").unwrap()
|
|
],
|
|
"ingowald.blog".to_string() => vec![
|
|
Selector::parse("article").unwrap(),
|
|
],
|
|
"mitchellh.com".to_string() => vec![Selector::parse("div.w-full").unwrap()],
|
|
"natwelch.com".to_string() => vec![
|
|
Selector::parse("article div.prose").unwrap(),
|
|
],
|
|
"rustacean-station.org".to_string() => vec![
|
|
Selector::parse("article").unwrap(),
|
|
],
|
|
"slashdot.org".to_string() => vec![
|
|
Selector::parse("span.story-byline").unwrap(),
|
|
Selector::parse("div.p").unwrap(),
|
|
],
|
|
"www.redox-os.org".to_string() => vec![
|
|
Selector::parse("div.content").unwrap(),
|
|
],
|
|
"www.smbc-comics.com".to_string() => vec![
|
|
Selector::parse("img#cc-comic").unwrap(),
|
|
Selector::parse("div#aftercomic img").unwrap(),
|
|
],
|
|
],
|
|
}),
|
|
Box::new(FrameImages),
|
|
Box::new(AddOutlink),
|
|
Box::new(EscapeHtml),
|
|
Box::new(SanitizeHtml {
|
|
cid_prefix: "",
|
|
base_url: &link,
|
|
}),
|
|
Box::new(InlineStyle),
|
|
];
|
|
for t in body_tranformers.iter() {
|
|
if t.should_run(&link, &body) {
|
|
body = t.transform(&link, &body).await?;
|
|
}
|
|
}
|
|
let title = clean_title(&r.title.unwrap_or("NO TITLE".to_string())).await?;
|
|
let is_read = r.is_read.unwrap_or(false);
|
|
let timestamp = r
|
|
.date
|
|
.expect("post missing date")
|
|
.assume_utc()
|
|
.unix_timestamp();
|
|
Ok(Thread::News(NewsPost {
|
|
thread_id,
|
|
is_read,
|
|
slug,
|
|
site,
|
|
title,
|
|
body,
|
|
url: link
|
|
.as_ref()
|
|
.map(|url| url.to_string())
|
|
.unwrap_or("NO URL".to_string()),
|
|
timestamp,
|
|
}))
|
|
}
|
|
#[instrument(name = "newsreader::set_read_status", skip_all, fields(query=%query,unread=%unread))]
|
|
pub async fn set_read_status<'ctx>(
|
|
pool: &PgPool,
|
|
query: &Query,
|
|
unread: bool,
|
|
) -> Result<bool, ServerError> {
|
|
// TODO: make single query when query.uids.len() > 1
|
|
let uids: Vec<_> = query
|
|
.uids
|
|
.iter()
|
|
.filter(|uid| is_newsreader_thread(uid))
|
|
.map(
|
|
|uid| extract_thread_id(uid), // TODO strip prefix
|
|
)
|
|
.collect();
|
|
for uid in uids {
|
|
sqlx::query_file!("sql/set_unread.sql", !unread, uid)
|
|
.execute(pool)
|
|
.await?;
|
|
}
|
|
Ok(true)
|
|
}
|