From 51154044cc0885622926b798b84b729313947fbc Mon Sep 17 00:00:00 2001 From: Bill Thiede Date: Thu, 19 Dec 2024 10:40:18 -0800 Subject: [PATCH] WIP --- Cargo.lock | 113 +++++++++++- server/Cargo.toml | 2 + ...0241217021645_create-search-table.down.sql | 2 + ...0241218010438_create-email-tables.down.sql | 24 +++ .../20241218010438_create-email-tables.up.sql | 174 ++++++++++++++++++ server/src/bin/email2db.rs | 114 ++++++++++++ server/src/lib.rs | 1 + server/src/mail.rs | 81 ++++++++ 8 files changed, 509 insertions(+), 2 deletions(-) create mode 100644 server/migrations/20241218010438_create-email-tables.down.sql create mode 100644 server/migrations/20241218010438_create-email-tables.up.sql create mode 100644 server/src/bin/email2db.rs create mode 100644 server/src/mail.rs diff --git a/Cargo.lock b/Cargo.lock index db76f7f..89968a8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -94,6 +94,55 @@ dependencies = [ "libc", ] +[[package]] +name = "anstream" +version = "0.6.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" + +[[package]] +name = "anstyle-parse" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" +dependencies = [ + "windows-sys 0.59.0", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2109dbce0e72be3ec00bed26e6a7479ca384ad226efdd66db8fa2e3a38c83125" +dependencies = [ + "anstyle", + "windows-sys 0.59.0", +] + [[package]] name = "anyhow" version = "1.0.94" @@ -732,6 +781,46 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "clap" +version = "4.5.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3135e7ec2ef7b10c6ed8950f0f792ed96ee093fa088608f1c76e569722700c84" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30582fc632330df2bd26877bde0c1f4470d57c582bbc070376afcd04d8cb4838" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim 0.11.1", +] + +[[package]] +name = "clap_derive" +version = "4.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ac6a0c7b1a9e9a5186361f67dfa1b88213572f427fb9ab038efb2bd8c582dab" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "quote", + "syn 2.0.90", +] + +[[package]] +name = "clap_lex" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" + [[package]] name = "cloudabi" version = "0.0.3" @@ -741,6 +830,12 @@ dependencies = [ "bitflags 1.3.2", ] +[[package]] +name = "colorchoice" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" + [[package]] name = "combine" version = "4.6.7" @@ -2648,6 +2743,12 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "is_terminal_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" + [[package]] name = "itertools" version = "0.10.5" @@ -3545,9 +3646,9 @@ dependencies = [ [[package]] name = "ordered-float" -version = "4.5.0" +version = "4.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c65ee1f9701bf938026630b455d5315f490640234259037edb259798b3bcf85e" +checksum = "7bb71e1b3fa6ca1c61f383464aaf2bb0e2f8e772a1f01d486832464de363b951" dependencies = [ "num-traits", ] @@ -5061,6 +5162,8 @@ dependencies = [ "build-info", "build-info-build", "cacher", + "chrono", + "clap", "css-inline", "html-escape", "linkify", @@ -6685,6 +6788,12 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + [[package]] name = "uuid" version = "1.11.0" diff --git a/server/Cargo.toml b/server/Cargo.toml index df16657..07a356d 100644 --- a/server/Cargo.toml +++ b/server/Cargo.toml @@ -14,6 +14,8 @@ async-graphql-rocket = "6.0.11" async-trait = "0.1.81" build-info = "0.0.38" cacher = {git = "http://git-private.h.xinu.tv/wathiede/cacher.git"} +chrono = "0.4.39" +clap = { version = "4.5.23", features = ["derive"] } css-inline = "0.13.0" html-escape = "0.2.13" linkify = "0.10.0" diff --git a/server/migrations/20241217021645_create-search-table.down.sql b/server/migrations/20241217021645_create-search-table.down.sql index 0870a92..3eea805 100644 --- a/server/migrations/20241217021645_create-search-table.down.sql +++ b/server/migrations/20241217021645_create-search-table.down.sql @@ -1 +1,3 @@ DROP INDEX IF EXISTS post_summary_idx; +DROP INDEX IF EXISTS post_site_idx; +DROP INDEX IF EXISTS post_title_idx; diff --git a/server/migrations/20241218010438_create-email-tables.down.sql b/server/migrations/20241218010438_create-email-tables.down.sql new file mode 100644 index 0000000..47adf42 --- /dev/null +++ b/server/migrations/20241218010438_create-email-tables.down.sql @@ -0,0 +1,24 @@ +BEGIN; + +ALTER TABLE IF EXISTS public."Email" DROP CONSTRAINT IF EXISTS email_avatar_fkey; +ALTER TABLE IF EXISTS public."EmailDisplayName" DROP CONSTRAINT IF EXISTS email_id_fk; +ALTER TABLE IF EXISTS public."Message" DROP CONSTRAINT IF EXISTS message_to_fkey; +ALTER TABLE IF EXISTS public."Message" DROP CONSTRAINT IF EXISTS message_cc_fkey; +ALTER TABLE IF EXISTS public."Message" DROP CONSTRAINT IF EXISTS message_from_fkey; +ALTER TABLE IF EXISTS public."Message" DROP CONSTRAINT IF EXISTS message_header_fkey; +ALTER TABLE IF EXISTS public."Message" DROP CONSTRAINT IF EXISTS message_file_fkey; +ALTER TABLE IF EXISTS public."Message" DROP CONSTRAINT IF EXISTS message_body_id_fkey; +ALTER TABLE IF EXISTS public."Message" DROP CONSTRAINT IF EXISTS message_thread_fkey; +ALTER TABLE IF EXISTS public."Message" DROP CONSTRAINT IF EXISTS message_tag_fkey; + +DROP TABLE IF EXISTS public."Email"; +DROP TABLE IF EXISTS public."EmailDisplayName"; +DROP TABLE IF EXISTS public."Message"; +DROP TABLE IF EXISTS public."Header"; +DROP TABLE IF EXISTS public."File"; +DROP TABLE IF EXISTS public."Avatar"; +DROP TABLE IF EXISTS public."Body"; +DROP TABLE IF EXISTS public."Thread"; +DROP TABLE IF EXISTS public."Tag"; + +END; diff --git a/server/migrations/20241218010438_create-email-tables.up.sql b/server/migrations/20241218010438_create-email-tables.up.sql new file mode 100644 index 0000000..b25d408 --- /dev/null +++ b/server/migrations/20241218010438_create-email-tables.up.sql @@ -0,0 +1,174 @@ +-- This script was generated by the ERD tool in pgAdmin 4. +-- Please log an issue at https://github.com/pgadmin-org/pgadmin4/issues/new/choose if you find any bugs, including reproduction steps. +BEGIN; + +ALTER TABLE IF EXISTS public."Email" DROP CONSTRAINT IF EXISTS email_avatar_fkey; +ALTER TABLE IF EXISTS public."EmailDisplayName" DROP CONSTRAINT IF EXISTS email_id_fk; +ALTER TABLE IF EXISTS public."Message" DROP CONSTRAINT IF EXISTS message_to_fkey; +ALTER TABLE IF EXISTS public."Message" DROP CONSTRAINT IF EXISTS message_cc_fkey; +ALTER TABLE IF EXISTS public."Message" DROP CONSTRAINT IF EXISTS message_from_fkey; +ALTER TABLE IF EXISTS public."Message" DROP CONSTRAINT IF EXISTS message_header_fkey; +ALTER TABLE IF EXISTS public."Message" DROP CONSTRAINT IF EXISTS message_file_fkey; +ALTER TABLE IF EXISTS public."Message" DROP CONSTRAINT IF EXISTS message_body_id_fkey; +ALTER TABLE IF EXISTS public."Message" DROP CONSTRAINT IF EXISTS message_thread_fkey; +ALTER TABLE IF EXISTS public."Message" DROP CONSTRAINT IF EXISTS message_tag_fkey; + +CREATE TABLE IF NOT EXISTS public."Email" +( + id integer NOT NULL GENERATED ALWAYS AS IDENTITY, + address text NOT NULL, + avatar_id integer, + PRIMARY KEY (id), + CONSTRAINT avatar_id UNIQUE (avatar_id) +); + +CREATE TABLE IF NOT EXISTS public."EmailDisplayName" +( + id integer NOT NULL GENERATED ALWAYS AS IDENTITY, + email_id integer NOT NULL, + PRIMARY KEY (id) +); + +CREATE TABLE IF NOT EXISTS public."Message" +( + id integer NOT NULL GENERATED ALWAYS AS IDENTITY, + subject text, + "from" integer, + "to" integer, + cc integer, + header_id integer, + hash text NOT NULL, + file_id integer NOT NULL, + date timestamp with time zone NOT NULL, + unread boolean NOT NULL, + body_id integer NOT NULL, + thread_id integer NOT NULL, + tag_id integer, + CONSTRAINT body_id UNIQUE (body_id) +); + +CREATE TABLE IF NOT EXISTS public."Header" +( + id integer NOT NULL GENERATED ALWAYS AS IDENTITY, + key text NOT NULL, + value text NOT NULL, + PRIMARY KEY (id) +); + +CREATE TABLE IF NOT EXISTS public."File" +( + id integer NOT NULL GENERATED ALWAYS AS IDENTITY, + path text NOT NULL, + PRIMARY KEY (id) +); + +CREATE TABLE IF NOT EXISTS public."Avatar" +( + id integer NOT NULL GENERATED ALWAYS AS IDENTITY, + url text NOT NULL, + PRIMARY KEY (id) +); + +CREATE TABLE IF NOT EXISTS public."Body" +( + id integer NOT NULL GENERATED ALWAYS AS IDENTITY, + text text NOT NULL, + PRIMARY KEY (id) +); + +CREATE TABLE IF NOT EXISTS public."Thread" +( + id integer NOT NULL GENERATED ALWAYS AS IDENTITY, + PRIMARY KEY (id) +); + +CREATE TABLE IF NOT EXISTS public."Tag" +( + id integer NOT NULL GENERATED ALWAYS AS IDENTITY, + name text NOT NULL, + display text, + fg_color integer, + bg_color integer, + PRIMARY KEY (id) +); + +ALTER TABLE IF EXISTS public."Email" + ADD CONSTRAINT email_avatar_fkey FOREIGN KEY (avatar_id) + REFERENCES public."Avatar" (id) MATCH SIMPLE + ON UPDATE NO ACTION + ON DELETE NO ACTION + NOT VALID; + + +ALTER TABLE IF EXISTS public."EmailDisplayName" + ADD CONSTRAINT email_id_fk FOREIGN KEY (email_id) + REFERENCES public."Email" (id) MATCH SIMPLE + ON UPDATE NO ACTION + ON DELETE NO ACTION + NOT VALID; + + +ALTER TABLE IF EXISTS public."Message" + ADD CONSTRAINT message_to_fkey FOREIGN KEY ("to") + REFERENCES public."Email" (id) MATCH SIMPLE + ON UPDATE NO ACTION + ON DELETE NO ACTION + NOT VALID; + + +ALTER TABLE IF EXISTS public."Message" + ADD CONSTRAINT message_cc_fkey FOREIGN KEY (cc) + REFERENCES public."Email" (id) MATCH SIMPLE + ON UPDATE NO ACTION + ON DELETE NO ACTION + NOT VALID; + + +ALTER TABLE IF EXISTS public."Message" + ADD CONSTRAINT message_from_fkey FOREIGN KEY ("from") + REFERENCES public."Email" (id) MATCH SIMPLE + ON UPDATE NO ACTION + ON DELETE NO ACTION + NOT VALID; + + +ALTER TABLE IF EXISTS public."Message" + ADD CONSTRAINT message_header_fkey FOREIGN KEY (header_id) + REFERENCES public."Header" (id) MATCH SIMPLE + ON UPDATE NO ACTION + ON DELETE NO ACTION + NOT VALID; + + +ALTER TABLE IF EXISTS public."Message" + ADD CONSTRAINT message_file_fkey FOREIGN KEY (file_id) + REFERENCES public."File" (id) MATCH SIMPLE + ON UPDATE NO ACTION + ON DELETE NO ACTION + NOT VALID; + + +ALTER TABLE IF EXISTS public."Message" + ADD CONSTRAINT message_body_id_fkey FOREIGN KEY (body_id) + REFERENCES public."Body" (id) MATCH SIMPLE + ON UPDATE NO ACTION + ON DELETE NO ACTION + NOT VALID; + + +ALTER TABLE IF EXISTS public."Message" + ADD CONSTRAINT message_thread_fkey FOREIGN KEY (thread_id) + REFERENCES public."Thread" (id) MATCH SIMPLE + ON UPDATE NO ACTION + ON DELETE NO ACTION + NOT VALID; + + +ALTER TABLE IF EXISTS public."Message" + ADD CONSTRAINT message_tag_fkey FOREIGN KEY (tag_id) + REFERENCES public."Tag" (id) MATCH SIMPLE + ON UPDATE NO ACTION + ON DELETE NO ACTION + NOT VALID; + +END; diff --git a/server/src/bin/email2db.rs b/server/src/bin/email2db.rs new file mode 100644 index 0000000..5fb11ec --- /dev/null +++ b/server/src/bin/email2db.rs @@ -0,0 +1,114 @@ +use chrono::NaiveDateTime; +use clap::Parser; +use mailparse::{addrparse_header, dateparse, parse_mail, MailHeaderMap, ParsedMail}; +use server::mail::read_mail_to_db; +use sqlx::postgres::PgPool; + +/// Add certain emails as posts in newsfeed app. +#[derive(Parser, Debug)] +#[command(author, version, about, long_about = None)] +struct Args { + /// DB URL, something like postgres://newsreader@nixos-07.h.xinu.tv/newsreader + #[arg(short, long)] + db_url: String, + /// path to parse + path: String, +} +#[tokio::main] +async fn main() -> anyhow::Result<()> { + let args = Args::parse(); + let pool = PgPool::connect(&args.db_url).await?; + let mut buffer = Vec::new(); + read_mail_to_db(&pool, &args.path)?; + Ok(()) +} + +async fn add_post( + pool: &PgPool, + site: &str, + title: &str, + summary: &str, + link: &str, + date: i64, + uid: &str, + feed_url: &str, +) -> Result<(), MailError> { + todo!("add_post") + /* + // site | text + // title | text + // summary | text + // link | text + // date | timestamp without time zone + // is_read | boolean + // uid | text + // clean_site | text + // clean_title | text + // clean_summary | text + // clean_summary_text | text + // feed_url | text + let date = NaiveDateTime::from_timestamp_millis(date * 1000); + + sqlx::query!( + r#" + INSERT INTO post ( + site, + title, + summary, + link, + date, + is_read, + uid, + clean_title, + clean_summary, + clean_summary_text, + feed_url + ) + VALUES ( $1, $2, $3, $4, $5, false, $6, $2, $3, '', $7 ) + "#, + site, + title, + summary, + link, + date, + uid, + feed_url + ) + .execute(pool) + .await?; + Ok(()) + */ +} + +async fn find_feed(pool: &PgPool, name: &str, slug: &str, url: &str) -> Result { + match sqlx::query!( + r#" +SELECT id +FROM feed +WHERE slug = $1 + "#, + slug + ) + .fetch_one(pool) + .await + { + Err(sqlx::Error::RowNotFound) => { + let rec = sqlx::query!( + r#" +INSERT INTO feed ( name, slug, url, homepage, selector ) +VALUES ( $1, $2, $3, '', '' ) +RETURNING id + "#, + name, + slug, + url + ) + .fetch_one(pool) + .await?; + + return Ok(rec.id); + } + Ok(rec) => return Ok(rec.id), + Err(e) => return Err(e.into()), + }; +} diff --git a/server/src/lib.rs b/server/src/lib.rs index 4c4056e..a85f077 100644 --- a/server/src/lib.rs +++ b/server/src/lib.rs @@ -1,6 +1,7 @@ pub mod config; pub mod error; pub mod graphql; +pub mod mail; pub mod newsreader; pub mod nm; #[cfg(feature = "tantivy")] diff --git a/server/src/mail.rs b/server/src/mail.rs new file mode 100644 index 0000000..ff04cca --- /dev/null +++ b/server/src/mail.rs @@ -0,0 +1,81 @@ +use std::{fs::File, io, io::Read}; + +use mailparse::{ + addrparse_header, dateparse, parse_mail, MailHeaderMap, MailParseError, ParsedMail, +}; +use sqlx::postgres::PgPool; +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum MailError { + #[error("missing from header")] + MissingFrom, + #[error("missing from header display name")] + MissingFromDisplayName, + #[error("missing subject header")] + MissingSubject, + #[error("missing html part")] + MissingHtmlPart, + #[error("missing message ID")] + MissingMessageId, + #[error("missing date")] + MissingDate, + #[error("DB error {0}")] + SqlxError(#[from] sqlx::Error), + #[error("IO error {0}")] + IOError(#[from] std::io::Error), + #[error("mail parse error {0}")] + MailParseError(#[from] MailParseError), +} + +pub async fn read_mail_to_db(pool: &PgPool, path: &str) -> Result<(), MailError> { + let mut file = File::open(path)?; + let mut buffer = Vec::new(); + file.read_to_end(&mut buffer)?; + let m = parse_mail(&buffer)?; + + let subject = m + .headers + .get_first_value("subject") + .ok_or(MailError::MissingSubject)?; + + let from = addrparse_header( + m.headers + .get_first_header("from") + .ok_or(MailError::MissingFrom)?, + )?; + let from = from.extract_single_info().ok_or(MailError::MissingFrom)?; + let name = from.display_name.ok_or(MailError::MissingFromDisplayName)?; + let slug = name.to_lowercase().replace(' ', "-"); + let url = from.addr; + let message_id = m + .headers + .get_first_value("Message-ID") + .ok_or(MailError::MissingMessageId)?; + let uid = &message_id; + let feed_id = find_feed(&pool, &name, &slug, &url).await?; + let date = dateparse( + &m.headers + .get_first_value("Date") + .ok_or(MailError::MissingDate)?, + )?; + + println!("Feed: {feed_id} Subject: {}", subject); + + if let Some(m) = first_html(&m) { + let body = m.get_body()?; + add_post(&pool, &slug, &subject, &body, &message_id, date, &uid, &url).await?; + } else { + return Err(MailError::MissingHtmlPart.into()); + } + + Ok(()) +} +fn first_html<'m>(m: &'m ParsedMail<'m>) -> Option<&'m ParsedMail<'m>> { + for ele in m.parts() { + if ele.ctype.mimetype == "text/html" { + return Some(ele); + } + } + None +}