This commit is contained in:
2024-12-19 10:40:18 -08:00
parent 06c5cb6cbf
commit 51154044cc
8 changed files with 509 additions and 2 deletions

114
server/src/bin/email2db.rs Normal file
View File

@@ -0,0 +1,114 @@
use chrono::NaiveDateTime;
use clap::Parser;
use mailparse::{addrparse_header, dateparse, parse_mail, MailHeaderMap, ParsedMail};
use server::mail::read_mail_to_db;
use sqlx::postgres::PgPool;
/// Add certain emails as posts in newsfeed app.
#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
struct Args {
/// DB URL, something like postgres://newsreader@nixos-07.h.xinu.tv/newsreader
#[arg(short, long)]
db_url: String,
/// path to parse
path: String,
}
#[tokio::main]
async fn main() -> anyhow::Result<()> {
let args = Args::parse();
let pool = PgPool::connect(&args.db_url).await?;
let mut buffer = Vec::new();
read_mail_to_db(&pool, &args.path)?;
Ok(())
}
async fn add_post(
pool: &PgPool,
site: &str,
title: &str,
summary: &str,
link: &str,
date: i64,
uid: &str,
feed_url: &str,
) -> Result<(), MailError> {
todo!("add_post")
/*
// site | text
// title | text
// summary | text
// link | text
// date | timestamp without time zone
// is_read | boolean
// uid | text
// clean_site | text
// clean_title | text
// clean_summary | text
// clean_summary_text | text
// feed_url | text
let date = NaiveDateTime::from_timestamp_millis(date * 1000);
sqlx::query!(
r#"
INSERT INTO post (
site,
title,
summary,
link,
date,
is_read,
uid,
clean_title,
clean_summary,
clean_summary_text,
feed_url
)
VALUES ( $1, $2, $3, $4, $5, false, $6, $2, $3, '', $7 )
"#,
site,
title,
summary,
link,
date,
uid,
feed_url
)
.execute(pool)
.await?;
Ok(())
*/
}
async fn find_feed(pool: &PgPool, name: &str, slug: &str, url: &str) -> Result<i32, MailError> {
match sqlx::query!(
r#"
SELECT id
FROM feed
WHERE slug = $1
"#,
slug
)
.fetch_one(pool)
.await
{
Err(sqlx::Error::RowNotFound) => {
let rec = sqlx::query!(
r#"
INSERT INTO feed ( name, slug, url, homepage, selector )
VALUES ( $1, $2, $3, '', '' )
RETURNING id
"#,
name,
slug,
url
)
.fetch_one(pool)
.await?;
return Ok(rec.id);
}
Ok(rec) => return Ok(rec.id),
Err(e) => return Err(e.into()),
};
}

View File

@@ -1,6 +1,7 @@
pub mod config;
pub mod error;
pub mod graphql;
pub mod mail;
pub mod newsreader;
pub mod nm;
#[cfg(feature = "tantivy")]

81
server/src/mail.rs Normal file
View File

@@ -0,0 +1,81 @@
use std::{fs::File, io, io::Read};
use mailparse::{
addrparse_header, dateparse, parse_mail, MailHeaderMap, MailParseError, ParsedMail,
};
use sqlx::postgres::PgPool;
use thiserror::Error;
#[derive(Error, Debug)]
pub enum MailError {
#[error("missing from header")]
MissingFrom,
#[error("missing from header display name")]
MissingFromDisplayName,
#[error("missing subject header")]
MissingSubject,
#[error("missing html part")]
MissingHtmlPart,
#[error("missing message ID")]
MissingMessageId,
#[error("missing date")]
MissingDate,
#[error("DB error {0}")]
SqlxError(#[from] sqlx::Error),
#[error("IO error {0}")]
IOError(#[from] std::io::Error),
#[error("mail parse error {0}")]
MailParseError(#[from] MailParseError),
}
pub async fn read_mail_to_db(pool: &PgPool, path: &str) -> Result<(), MailError> {
let mut file = File::open(path)?;
let mut buffer = Vec::new();
file.read_to_end(&mut buffer)?;
let m = parse_mail(&buffer)?;
let subject = m
.headers
.get_first_value("subject")
.ok_or(MailError::MissingSubject)?;
let from = addrparse_header(
m.headers
.get_first_header("from")
.ok_or(MailError::MissingFrom)?,
)?;
let from = from.extract_single_info().ok_or(MailError::MissingFrom)?;
let name = from.display_name.ok_or(MailError::MissingFromDisplayName)?;
let slug = name.to_lowercase().replace(' ', "-");
let url = from.addr;
let message_id = m
.headers
.get_first_value("Message-ID")
.ok_or(MailError::MissingMessageId)?;
let uid = &message_id;
let feed_id = find_feed(&pool, &name, &slug, &url).await?;
let date = dateparse(
&m.headers
.get_first_value("Date")
.ok_or(MailError::MissingDate)?,
)?;
println!("Feed: {feed_id} Subject: {}", subject);
if let Some(m) = first_html(&m) {
let body = m.get_body()?;
add_post(&pool, &slug, &subject, &body, &message_id, date, &uid, &url).await?;
} else {
return Err(MailError::MissingHtmlPart.into());
}
Ok(())
}
fn first_html<'m>(m: &'m ParsedMail<'m>) -> Option<&'m ParsedMail<'m>> {
for ele in m.parts() {
if ele.ctype.mimetype == "text/html" {
return Some(ele);
}
}
None
}