use std::hash::{DefaultHasher, Hash, Hasher}; use log::info; use maplit::hashmap; use scraper::Selector; use sqlx::postgres::PgPool; use url::Url; use crate::Query; const TAG_PREFIX: &'static str = "News/"; const THREAD_PREFIX: &'static str = "news:"; use crate::{ compute_offset_limit, error::ServerError, graphql::{NewsPost, Tag, Thread, ThreadSummary}, AddOutlink, EscapeHtml, FrameImages, InlineStyle, SanitizeHtml, SlurpContents, StripHtml, Transformer, }; pub fn is_newsreader_search(query: &str) -> bool { query.contains(TAG_PREFIX) } pub fn is_newsreader_thread(query: &str) -> bool { query.starts_with(THREAD_PREFIX) } pub fn extract_thread_id(query: &str) -> &str { &query[THREAD_PREFIX.len()..] } pub fn extract_site(tag: &str) -> &str { &tag[TAG_PREFIX.len()..] } pub fn make_news_tag(tag: &str) -> String { format!("tag:{TAG_PREFIX}{tag}") } pub async fn count(pool: &PgPool, query: &Query) -> Result { if !query.remainder.is_empty() { // TODO: handle full text search against all sites, for now, early return if search words // are specified. return Ok(0); } let row = sqlx::query_file!("sql/count.sql", query.tag, query.unread_only) .fetch_one(pool) .await?; Ok(row.count.unwrap_or(0).try_into().unwrap_or(0)) } pub async fn search( pool: &PgPool, after: Option, before: Option, first: Option, last: Option, query: &Query, ) -> Result, async_graphql::Error> { info!("search({after:?} {before:?} {first:?} {last:?} {query:?}"); if !query.remainder.is_empty() { // TODO: handle full text search against all sites, for now, early return if search words // are specified. return Ok(Vec::new()); } let (offset, mut limit) = compute_offset_limit(after, before, first, last); if before.is_none() { // When searching forward, the +1 is to see if there are more pages of data available. // Searching backwards implies there's more pages forward, because the value represented by // `before` is on the next page. limit = limit + 1; } let site = query.tag.as_ref().map(|t| extract_site(&t).to_string()); info!( "search offset {offset} limit {limit} site {site:?} unread_only {}", query.unread_only ); // TODO: further limit results to include query.remainder if set let rows = sqlx::query_file!( "sql/threads.sql", site, query.unread_only, offset as i64, limit as i64 ) .fetch_all(pool) .await?; let mut res = Vec::new(); for (i, r) in rows.into_iter().enumerate() { let site = r.site.unwrap_or("UNKOWN TAG".to_string()); let mut tags = vec![format!("{TAG_PREFIX}{site}")]; if !r.is_read.unwrap_or(true) { tags.push("unread".to_string()); }; let mut title = r.title.unwrap_or("NO TITLE".to_string()); title = clean_title(&title).await.expect("failed to clean title"); res.push(( i as i32 + offset, ThreadSummary { thread: format!("{THREAD_PREFIX}{}", r.uid), timestamp: r .date .expect("post missing date") .assume_utc() .unix_timestamp() as isize, date_relative: "TODO date_relative".to_string(), matched: 0, total: 1, authors: r.name.unwrap_or_else(|| site.clone()), subject: title, tags, }, )); } Ok(res) } pub async fn tags(pool: &PgPool, _needs_unread: bool) -> Result, ServerError> { // TODO: optimize query by using needs_unread let tags = sqlx::query_file!("sql/tags.sql").fetch_all(pool).await?; let tags = tags .into_iter() .map(|tag| { let mut hasher = DefaultHasher::new(); tag.site.hash(&mut hasher); let hex = format!("#{:06x}", hasher.finish() % (1 << 24)); let unread = tag.unread.unwrap_or(0).try_into().unwrap_or(0); let name = format!("{TAG_PREFIX}{}", tag.site.expect("tag must have site")); Tag { name, fg_color: "white".to_string(), bg_color: hex, unread, } }) .collect(); Ok(tags) } pub async fn thread(pool: &PgPool, thread_id: String) -> Result { let id = thread_id .strip_prefix(THREAD_PREFIX) .expect("news thread doesn't start with '{THREAD_PREFIX}'") .to_string(); let r = sqlx::query_file!("sql/thread.sql", id) .fetch_one(pool) .await?; let slug = r.site.unwrap_or("no-slug".to_string()); let site = r.name.unwrap_or("NO SITE".to_string()); let default_homepage = "http://no-homepage"; let link = &r .link .as_ref() .map(|h| { if h.is_empty() { default_homepage.to_string() } else { h.to_string() } }) .map(|h| Url::parse(&h).ok()) .flatten(); let mut body = r.summary.unwrap_or("NO SUMMARY".to_string()); // TODO: add site specific cleanups. For example: // * Grafana does
// * Some sites appear to be HTML encoded, unencode them, i.e. imperialviolent let body_tranformers: Vec> = vec![ Box::new(SlurpContents { site_selectors: hashmap![ "hackaday.com".to_string() => vec![ Selector::parse("div.entry-featured-image").unwrap(), Selector::parse("div.entry-content").unwrap() ], "mitchellh.com".to_string() => vec![Selector::parse("div.w-full").unwrap()], "natwelch.com".to_string() => vec![ Selector::parse("article div.prose").unwrap(), ], "slashdot.org".to_string() => vec![ Selector::parse("span.story-byline").unwrap(), Selector::parse("div.p").unwrap(), ], "www.smbc-comics.com".to_string() => vec![ Selector::parse("img#cc-comic").unwrap(), Selector::parse("div#aftercomic img").unwrap(), ], ], }), Box::new(FrameImages), Box::new(AddOutlink), Box::new(EscapeHtml), Box::new(InlineStyle), Box::new(SanitizeHtml { cid_prefix: "", base_url: &link, }), ]; for t in body_tranformers.iter() { if t.should_run(&link, &body) { body = t.transform(&link, &body).await?; } } let title = clean_title(&r.title.unwrap_or("NO TITLE".to_string())).await?; let is_read = r.is_read.unwrap_or(false); let timestamp = r .date .expect("post missing date") .assume_utc() .unix_timestamp(); Ok(Thread::News(NewsPost { thread_id, is_read, slug, site, title, body, url: link .as_ref() .map(|url| url.to_string()) .unwrap_or("NO URL".to_string()), timestamp, })) } pub async fn set_read_status<'ctx>( pool: &PgPool, query: &str, unread: bool, ) -> Result { let query: Query = query.parse()?; sqlx::query_file!("sql/set_unread.sql", !unread, query.uid) .execute(pool) .await?; Ok(true) } async fn clean_title(title: &str) -> Result { // Make title HTML so html parsers work let mut title = format!("{title}"); let title_tranformers: Vec> = vec![Box::new(EscapeHtml), Box::new(StripHtml)]; // Make title HTML so html parsers work title = format!("{title}"); for t in title_tranformers.iter() { if t.should_run(&None, &title) { title = t.transform(&None, &title).await?; } } Ok(title) }