letterbox/server/src/newsreader.rs

use std::hash::{DefaultHasher, Hash, Hasher};

use log::info;
use sqlx::postgres::PgPool;
use url::Url;

use crate::Query;

const TAG_PREFIX: &'static str = "News/";
const THREAD_PREFIX: &'static str = "news:";

use crate::{
    compute_offset_limit,
    error::ServerError,
    graphql::{Body, Email, Html, Message, Tag, Thread, ThreadSummary},
    AddOutlink, EscapeHtml, InlineStyle, SanitizeHtml, StripHtml, Transformer,
};

pub fn is_newsreader_search(query: &str) -> bool {
    query.contains(TAG_PREFIX)
}

pub fn is_newsreader_thread(query: &str) -> bool {
    query.starts_with(THREAD_PREFIX)
}

pub fn extract_thread_id(query: &str) -> &str {
    &query[THREAD_PREFIX.len()..]
}

pub fn extract_site(tag: &str) -> &str {
    &tag[TAG_PREFIX.len()..]
}

pub fn make_news_tag(tag: &str) -> String {
    format!("tag:{TAG_PREFIX}{tag}")
}

pub async fn count(pool: &PgPool, query: &Query) -> Result<usize, ServerError> {
    if !query.remainder.is_empty() {
        // TODO: handle full text search against all sites, for now, early return if search words
        // are specified.
        return Ok(0);
    }

    let row = sqlx::query_file!("sql/count.sql", query.tag, query.unread_only)
        .fetch_one(pool)
        .await?;
    Ok(row.count.unwrap_or(0).try_into().unwrap_or(0))
}

pub async fn search(
    pool: &PgPool,
    after: Option<i32>,
    before: Option<i32>,
    first: Option<i32>,
    last: Option<i32>,
    query: &Query,
) -> Result<Vec<(i32, ThreadSummary)>, async_graphql::Error> {
    info!("search({after:?} {before:?} {first:?} {last:?} {query:?}");
    if !query.remainder.is_empty() {
        // TODO: handle full text search against all sites, for now, early return if search words
        // are specified.
        return Ok(Vec::new());
    }

    let (offset, mut limit) = compute_offset_limit(after, before, first, last);
    if before.is_none() {
        // When searching forward, the +1 is to see if there are more pages of data available.
        // Searching backwards implies there's more pages forward, because the value represented by
        // `before` is on the next page.
        limit = limit + 1;
    }

    let site = query.tag.as_ref().map(|t| extract_site(&t).to_string());
    info!(
        "search offset {offset} limit {limit} site {site:?} unread_only {}",
        query.unread_only
    );

    // TODO: further limit results to include query.remainder if set
    let rows = sqlx::query_file!(
        "sql/threads.sql",
        site,
        query.unread_only,
        offset as i64,
        limit as i64
    )
    .fetch_all(pool)
    .await?;

    Ok(rows
        .into_iter()
        .enumerate()
        .map(|(i, r)| {
            let site = r.site.unwrap_or("UNKOWN TAG".to_string());
            let mut tags = vec![format!("{TAG_PREFIX}{site}")];
            if !r.is_read.unwrap_or(true) {
                tags.push("unread".to_string());
            };
            let mut title = r.title.unwrap_or("NO TITLE".to_string());
            title = clean_title(&title).expect("failed to clean title");
            (
                i as i32 + offset,
                ThreadSummary {
                    thread: format!("{THREAD_PREFIX}{}", r.uid),
                    timestamp: r
                        .date
                        .expect("post missing date")
                        .assume_utc()
                        .unix_timestamp() as isize,
                    date_relative: "TODO date_relative".to_string(),
                    matched: 0,
                    total: 1,
                    authors: r.name.unwrap_or_else(|| site.clone()),
                    subject: title,
                    tags,
                },
            )
        })
        .collect())
}

pub async fn tags(pool: &PgPool, _needs_unread: bool) -> Result<Vec<Tag>, ServerError> {
    // TODO: optimize query by using needs_unread
    let tags = sqlx::query_file!("sql/tags.sql").fetch_all(pool).await?;
    let tags = tags
        .into_iter()
        .map(|tag| {
            let mut hasher = DefaultHasher::new();
            tag.site.hash(&mut hasher);
            let hex = format!("#{:06x}", hasher.finish() % (1 << 24));
            let unread = tag.unread.unwrap_or(0).try_into().unwrap_or(0);
            let name = format!("{TAG_PREFIX}{}", tag.site.expect("tag must have site"));
            Tag {
                name,
                fg_color: "white".to_string(),
                bg_color: hex,
                unread,
            }
        })
        .collect();
    Ok(tags)
}

pub async fn thread(pool: &PgPool, thread_id: String) -> Result<Thread, ServerError> {
    let id = thread_id
        .strip_prefix(THREAD_PREFIX)
        .expect("news thread doesn't start with '{THREAD_PREFIX}'")
        .to_string();

    let r = sqlx::query_file!("sql/thread.sql", id)
        .fetch_one(pool)
        .await?;

    let site = r.site.unwrap_or("NO TAG".to_string());
    let mut tags = vec![format!("{TAG_PREFIX}{site}")];
    if r.is_read.unwrap_or(true) {
        tags.push("unread".to_string());
    };
    let default_homepage = "http://no-homepage";
    let homepage = Url::parse(
        &r.homepage
            .map(|h| {
                if h.is_empty() {
                    default_homepage.to_string()
                } else {
                    h
                }
            })
            .unwrap_or(default_homepage.to_string()),
    )?;
    let link = &r
        .link
        .as_ref()
        .map(|h| {
            if h.is_empty() {
                default_homepage.to_string()
            } else {
                h.to_string()
            }
        })
        .map(|h| Url::parse(&h).ok())
        .flatten();
    let addr = r.link.as_ref().map(|link| {
        if link.contains('@') {
            link.clone()
        } else {
            if let Ok(url) = homepage.join(&link) {
                url.to_string()
            } else {
                link.clone()
            }
        }
    });
    let mut body = r.summary.unwrap_or("NO SUMMARY".to_string());
    // TODO: add site specific cleanups. For example:
    // * Grafana does <div class="image-wrapp"><img class="lazyload>"<img src="/media/...>"</img></div>
    // * Some sites appear to be HTML encoded, unencode them, i.e. imperialviolent
    let mut body_tranformers: Vec<Box<dyn Transformer>> = vec![
        Box::new(AddOutlink(link.clone())),
        Box::new(EscapeHtml),
        Box::new(InlineStyle),
        Box::new(SanitizeHtml {
            cid_prefix: "",
            base_url: &link,
        }),
    ];
    for t in body_tranformers.iter() {
        if t.should_run(&body) {
            body = t.transform(&body)?;
        }
    }
    let body = Body::Html(Html {
        html: body,
        content_tree: "".to_string(),
    });
    let title = clean_title(&r.title.unwrap_or("NO TITLE".to_string()))?;
    let from = Some(Email {
        name: r.name,
        addr: addr.map(|a| a.to_string()),
    });
    Ok(Thread {
        thread_id,
        subject: title.clone(),
        messages: vec![Message {
            id,
            from,
            to: Vec::new(),
            cc: Vec::new(),
            subject: Some(title),
            timestamp: Some(
                r.date
                    .expect("post missing date")
                    .assume_utc()
                    .unix_timestamp(),
            ),
            headers: Vec::new(),
            body,
            path: "".to_string(),
            attachments: Vec::new(),
            tags,
        }],
    })
}
pub async fn set_read_status<'ctx>(
    pool: &PgPool,
    query: &str,
    unread: bool,
) -> Result<bool, ServerError> {
    let query: Query = query.parse()?;
    sqlx::query_file!("sql/set_unread.sql", !unread, query.uid)
        .execute(pool)
        .await?;
    Ok(true)
}
fn clean_title(title: &str) -> Result<String, ServerError> {
    // Make title HTML so html parsers work
    let mut title = format!("<html>{title}</html>");
    let title_tranformers: Vec<Box<dyn Transformer>> =
        vec![Box::new(EscapeHtml), Box::new(StripHtml)];
    // Make title HTML so html parsers work
    title = format!("<html>{title}</html>");
    for t in title_tranformers.iter() {
        if t.should_run(&title) {
            title = t.transform(&title)?;
        }
    }
    Ok(title)
}