//! Below is the file `devel/schemata` from the notmuch source tree. //! //! This file describes the schemata used for notmuch's structured output //! format (currently JSON and S-Expressions). //! //! []'s indicate lists. List items can be marked with a '?', meaning //! they are optional; or a '*', meaning there can be zero or more of that //! item. {}'s indicate an object that maps from field identifiers to //! values. An object field marked '?' is optional. |'s indicate //! alternates (e.g., int|string means something can be an int or a //! string). //! //! For S-Expression output, lists are printed delimited by () instead of //! []. Objects are printed as p-lists, i.e. lists where the keys and values //! are interleaved. Keys are printed as keywords (symbols preceded by a //! colon), e.g. (:id "123" :time 54321 :from "foobar"). Null is printed as //! nil, true as t and false as nil. //! //! This is version 2 of the structured output format. //! //! Version history //! --------------- //! //! v1 //! - First versioned schema release. //! - Added part.content-length and part.content-transfer-encoding fields. //! //! v2 //! - Added the thread_summary.query field. //! //! Common non-terminals //! -------------------- //! //! # Number of seconds since the Epoch //! `unix_time = int` //! //! # Thread ID, sans "thread:" //! `threadid = string` //! //! # Message ID, sans "id:" //! `messageid = string` //! //! notmuch show schema //! ------------------- //! //! # A top-level set of threads (do_show) //! # Returned by notmuch show without a --part argument //! `thread_set = [thread*]` //! //! # Top-level messages in a thread (show_messages) //! `thread = [thread_node*]` //! //! # A message and its replies (show_messages) //! ```text //! thread_node = [ //! message|null, # null if not matched and not --entire-thread //! [thread_node*] # children of message //! ] //! ``` //! //! # A message (format_part_sprinter) //! ```text //! message = { //! # (format_message_sprinter) //! id: messageid, //! match: bool, //! filename: string, //! timestamp: unix_time, # date header as unix time //! date_relative: string, # user-friendly timestamp //! tags: [string*], //! //! headers: headers, //! body?: [part] # omitted if --body=false //! } //! ``` //! //! # A MIME part (format_part_sprinter) //! ```text //! part = { //! id: int|string, # part id (currently DFS part number) //! //! encstatus?: encstatus, //! sigstatus?: sigstatus, //! //! content-type: string, //! content-id?: string, //! # if content-type starts with "multipart/": //! content: [part*], //! # if content-type is "message/rfc822": //! content: [{headers: headers, body: [part]}], //! # otherwise (leaf parts): //! filename?: string, //! content-charset?: string, //! # A leaf part's body content is optional, but may be included if //! # it can be correctly encoded as a string. Consumers should use //! # this in preference to fetching the part content separately. //! content?: string, //! # If a leaf part's body content is not included, the length of //! # the encoded content (in bytes) may be given instead. //! content-length?: int, //! # If a leaf part's body content is not included, its transfer encoding //! # may be given. Using this and the encoded content length, it is //! # possible for the consumer to estimate the decoded content length. //! content-transfer-encoding?: string //! } //! ``` //! //! # The headers of a message or part (format_headers_sprinter with reply = FALSE) //! ```text //! headers = { //! Subject: string, //! From: string, //! To?: string, //! Cc?: string, //! Bcc?: string, //! Reply-To?: string, //! Date: string //! } //! ``` //! //! # Encryption status (format_part_sprinter) //! `encstatus = [{status: "good"|"bad"}]` //! //! # Signature status (format_part_sigstatus_sprinter) //! `sigstatus = [signature*]` //! //! ```text //! signature = { //! # (signature_status_to_string) //! status: "none"|"good"|"bad"|"error"|"unknown", //! # if status is "good": //! fingerprint?: string, //! created?: unix_time, //! expires?: unix_time, //! userid?: string //! # if status is not "good": //! keyid?: string //! # if the signature has errors: //! errors?: int //! } //! ``` //! //! notmuch search schema //! --------------------- //! //! # --output=summary //! `search_summary = [thread_summary*]` //! //! # --output=threads //! `search_threads = [threadid*]` //! //! # --output=messages //! `search_messages = [messageid*]` //! //! # --output=files //! `search_files = [string*]` //! //! # --output=tags //! `search_tags = [string*]` //! //! ```text //! thread_summary = { //! thread: threadid, //! timestamp: unix_time, //! date_relative: string, # user-friendly timestamp //! matched: int, # number of matched messages //! total: int, # total messages in thread //! authors: string, # comma-separated names with | between //! # matched and unmatched //! subject: string, //! tags: [string*], //! //! # Two stable query strings identifying exactly the matched and //! # unmatched messages currently in this thread. The messages //! # matched by these queries will not change even if more messages //! # arrive in the thread. If there are no matched or unmatched //! # messages, the corresponding query will be null (there is no //! # query that matches nothing). (Added in schema version 2.) //! query: [string|null, string|null], //! } //! ``` //! //! notmuch reply schema //! -------------------- //! //! ```text //! reply = { //! # The headers of the constructed reply //! reply-headers: reply_headers, //! //! # As in the show format (format_part_sprinter) //! original: message //! } //! ``` //! //! # Reply headers (format_headers_sprinter with reply = TRUE) //! ```text //! reply_headers = { //! Subject: string, //! From: string, //! To?: string, //! Cc?: string, //! Bcc?: string, //! In-reply-to: string, //! References: string //! } //! ``` use std::{ ffi::OsStr, io::{self}, path::{Path, PathBuf}, process::Command, }; use log::info; use serde::{Deserialize, Serialize}; /// # Number of seconds since the Epoch pub type UnixTime = isize; /// # Thread ID, sans "thread:" pub type ThreadId = String; /// # Message ID, sans "id:" pub type MessageId = String; /// A top-level set of threads (do_show) /// Returned by notmuch show without a --part argument #[derive(Serialize, Deserialize, Debug, PartialEq)] pub struct ThreadSet(pub Vec); /// Top-level messages in a thread (show_messages) #[derive(Serialize, Deserialize, Debug, PartialEq)] pub struct Thread(pub Vec); /// A message and its replies (show_messages) #[derive(Serialize, Deserialize, Debug, PartialEq)] pub struct ThreadNode( pub Option, // null if not matched and not --entire-thread pub Vec, // children of message ); /// A message (format_part_sprinter) #[derive(Serialize, Deserialize, Debug, PartialEq)] pub struct Message { pub id: MessageId, pub r#match: bool, pub excluded: bool, pub filename: Vec, pub timestamp: UnixTime, // date header as unix time pub date_relative: String, // user-friendly timestamp pub tags: Vec, pub headers: Headers, #[serde(skip_serializing_if = "Option::is_none")] pub body: Option>, // omitted if --body=false } /// The headers of a message or part (format_headers_sprinter with reply = FALSE) #[derive(Serialize, Deserialize, Debug, PartialEq, Default)] #[serde(rename_all = "PascalCase")] pub struct Headers { pub subject: String, pub from: String, #[serde(skip_serializing_if = "Option::is_none")] pub to: Option, #[serde(skip_serializing_if = "Option::is_none")] pub cc: Option, #[serde(skip_serializing_if = "Option::is_none")] pub bcc: Option, #[serde(skip_serializing_if = "Option::is_none")] pub reply_to: Option, pub date: String, } #[derive(Serialize, Deserialize, Debug, PartialEq)] #[serde(untagged)] pub enum IntOrString { Int(isize), String(String), } impl Default for IntOrString { fn default() -> Self { IntOrString::Int(0) } } impl From for IntOrString { fn from(i: isize) -> Self { IntOrString::Int(i) } } #[derive(Serialize, Deserialize, Debug, PartialEq)] pub struct Rfc822 { pub headers: Headers, pub body: Vec, } #[derive(Serialize, Deserialize, Debug, PartialEq)] #[serde(untagged)] pub enum Content { /// if content-type starts with "multipart/": Multipart(Vec), /// if content-type is "message/rfc822": Rfc822(Vec), /// otherwise (leaf parts): Leaf { #[serde(skip_serializing_if = "Option::is_none")] filename: Option, #[serde(skip_serializing_if = "Option::is_none")] content_charset: Option, /// A leaf part's body content is optional, but may be included if /// it can be correctly encoded as a string. Consumers should use /// this in preference to fetching the part content separately. #[serde(skip_serializing_if = "Option::is_none")] content: Option, // If a leaf part's body content is not included, the length of // the encoded content (in bytes) may be given instead. #[serde(skip_serializing_if = "Option::is_none")] content_length: Option, // If a leaf part's body content is not included, its transfer encoding // may be given. Using this and the encoded content length, it is // possible for the consumer to estimate the decoded content length. #[serde(skip_serializing_if = "Option::is_none")] content_transfer_encoding: Option, }, // TODO(wathiede): flatten Leaf variant to replace this. String(String), } impl Default for Content { fn default() -> Self { Content::Leaf { filename: None, content_charset: None, content: None, content_length: None, content_transfer_encoding: None, } } } /// A MIME part #[derive(Serialize, Deserialize, Debug, PartialEq, Default)] pub struct Part { pub id: IntOrString, // part id (currently DFS part number) #[serde(skip_serializing_if = "Option::is_none")] pub encstatus: Option, #[serde(skip_serializing_if = "Option::is_none")] pub sigstatus: Option, #[serde(rename = "content-type")] pub content_type: String, #[serde(skip_serializing_if = "Option::is_none")] #[serde(rename = "content-disposition")] pub content_disposition: Option, #[serde(skip_serializing_if = "Option::is_none")] #[serde(rename = "content-transfer-encoding")] pub content_transfer_encoding: Option, #[serde(skip_serializing_if = "Option::is_none")] #[serde(rename = "content-length")] pub content_length: Option, #[serde(skip_serializing_if = "Option::is_none")] #[serde(rename = "content-id")] pub content_id: Option, #[serde(skip_serializing_if = "Option::is_none")] pub content: Option, #[serde(skip_serializing_if = "Option::is_none")] pub filename: Option, } /// `encstatus = [{status: "good"|"bad"}]` pub type EncStatus = String; /// # Signature status (format_part_sigstatus_sprinter) #[derive(Serialize, Deserialize, Debug, PartialEq)] pub struct SigStatus(pub Vec); #[derive(Serialize, Deserialize, Debug, PartialEq)] pub enum Signature { /// (signature_status_to_string) Good { #[serde(skip_serializing_if = "Option::is_none")] fingerprint: Option, #[serde(skip_serializing_if = "Option::is_none")] created: Option, #[serde(skip_serializing_if = "Option::is_none")] expires: Option, #[serde(skip_serializing_if = "Option::is_none")] userid: Option, }, None { #[serde(skip_serializing_if = "Option::is_none")] keyid: Option, }, Bad { #[serde(skip_serializing_if = "Option::is_none")] keyid: Option, }, Unknown { #[serde(skip_serializing_if = "Option::is_none")] keyid: Option, }, Error { #[serde(skip_serializing_if = "Option::is_none")] keyid: Option, #[serde(skip_serializing_if = "Option::is_none")] errors: Option, }, } #[derive(Serialize, Deserialize, Debug, PartialEq)] pub struct SearchSummary(pub Vec); #[derive(Serialize, Deserialize, Debug, PartialEq)] pub struct SearchThreads(pub Vec); #[derive(Serialize, Deserialize, Debug, PartialEq)] pub struct SearchMessages(pub Vec); #[derive(Serialize, Deserialize, Debug, PartialEq)] pub struct SearchFiles(pub Vec); #[derive(Serialize, Deserialize, Debug, PartialEq)] pub struct SearchTags(pub Vec); #[derive(Serialize, Deserialize, Debug, PartialEq)] pub struct ThreadSummary { pub thread: ThreadId, pub timestamp: UnixTime, /// user-friendly timestamp pub date_relative: String, /// number of matched messages pub matched: isize, /// total messages in thread pub total: isize, /// comma-separated names with | between matched and unmatched pub authors: String, pub subject: String, pub tags: Vec, /// Two stable query strings identifying exactly the matched and unmatched messages currently /// in this thread. The messages matched by these queries will not change even if more /// messages arrive in the thread. If there are no matched or unmatched messages, the /// corresponding query will be null (there is no query that matches nothing). (Added in /// schema version 2.) pub query: (Option, Option), } // TODO(wathiede): notmuch reply schema #[derive(thiserror::Error, Debug)] pub enum NotmuchError { #[error("notmuch execution error")] Notmuch(#[from] io::Error), #[error("json decoding error")] SerdeJson(#[from] serde_json::Error), #[error("failed to parse bytes as str")] Utf8Error(#[from] std::str::Utf8Error), #[error("failed to parse bytes as String")] StringUtf8Error(#[from] std::string::FromUtf8Error), #[error("failed to parse str as int")] ParseIntError(#[from] std::num::ParseIntError), } #[derive(Default)] pub struct Notmuch { config_path: Option, } impl Notmuch { pub fn with_config>(config_path: P) -> Notmuch { Notmuch { config_path: Some(config_path.as_ref().into()), } } pub fn new(&self) -> Result, NotmuchError> { self.run_notmuch(["new"]) } pub fn no_args(&self) -> Result, NotmuchError> { self.run_notmuch(std::iter::empty::<&str>()) } pub fn tags_for_query(&self, query: &str) -> Result, NotmuchError> { let res = self.run_notmuch(["search", "--format=json", "--output=tags", query])?; Ok(serde_json::from_slice(&res)?) } pub fn tags(&self) -> Result, NotmuchError> { self.tags_for_query("*") } pub fn tag_add(&self, tag: &str, search_term: &str) -> Result<(), NotmuchError> { self.run_notmuch(["tag", &format!("+{tag}"), search_term])?; Ok(()) } pub fn tag_remove(&self, tag: &str, search_term: &str) -> Result<(), NotmuchError> { self.run_notmuch(["tag", &format!("-{tag}"), search_term])?; Ok(()) } pub fn search( &self, query: &str, offset: usize, limit: usize, ) -> Result { let query = if query.is_empty() { "*" } else { query }; let res = self.run_notmuch([ "search", "--format=json", &format!("--offset={offset}"), &format!("--limit={limit}"), query, ])?; Ok(serde_json::from_slice(&res)?) } pub fn count(&self, query: &str) -> Result { // TODO: compare speed of notmuch count for * w/ and w/o --output=threads let res = self.run_notmuch(["count", "--output=threads", query])?; // Strip '\n' from res. let s = std::str::from_utf8(&res[..res.len() - 1])?; Ok(s.parse()?) } pub fn show(&self, query: &str) -> Result { let slice = self.run_notmuch([ "show", "--include-html=true", "--entire-thread=false", "--format=json", query, ])?; // Notmuch returns JSON with invalid unicode. So we lossy convert it to a string here and // use that for parsing in rust. let s = String::from_utf8_lossy(&slice); let mut deserializer = serde_json::Deserializer::from_str(&s); deserializer.disable_recursion_limit(); let val = serde::de::Deserialize::deserialize(&mut deserializer)?; deserializer.end()?; Ok(val) } pub fn show_part(&self, query: &str, part: usize) -> Result { let slice = self.run_notmuch([ "show", "--include-html=true", "--entire-thread=true", "--format=json", &format!("--part={}", part), query, ])?; // Notmuch returns JSON with invalid unicode. So we lossy convert it to a string here and // use that for parsing in rust. let s = String::from_utf8_lossy(&slice); let mut deserializer = serde_json::Deserializer::from_str(&s); deserializer.disable_recursion_limit(); let val = serde::de::Deserialize::deserialize(&mut deserializer)?; deserializer.end()?; Ok(val) } pub fn show_original(&self, id: &MessageId) -> Result, NotmuchError> { self.show_original_part(id, 0) } pub fn show_original_part(&self, id: &MessageId, part: usize) -> Result, NotmuchError> { let res = self.run_notmuch(["show", "--part", &part.to_string(), id])?; Ok(res) } pub fn message_ids(&self, query: &str) -> Result, NotmuchError> { let res = self.run_notmuch(["search", "--output=messages", "--format=json", query])?; Ok(serde_json::from_slice(&res)?) } pub fn files(&self, query: &str) -> Result, NotmuchError> { let res = self.run_notmuch(["search", "--output=files", "--format=json", query])?; Ok(serde_json::from_slice(&res)?) } fn run_notmuch(&self, args: I) -> Result, NotmuchError> where I: IntoIterator, S: AsRef, { let mut cmd = Command::new("notmuch"); if let Some(config_path) = &self.config_path { cmd.arg("--config").arg(config_path); } cmd.args(args); info!("{:?}", &cmd); let out = cmd.output()?; Ok(out.stdout) } } #[cfg(test)] mod tests { use super::*; #[test] #[ignore] // skip because notmuch config is relative to $HOME fn new() -> Result<(), NotmuchError> { let nm = Notmuch::with_config("testdata/notmuch.config"); nm.new()?; let output = nm.no_args()?; let s = String::from_utf8_lossy(&output); assert!( s.contains("Notmuch is configured and appears to have a database. Excellent!"), "output:\n```\n{}```", s ); Ok(()) } #[test] #[ignore] // skip because notmuch config is relative to $HOME fn search() -> Result<(), NotmuchError> { let nm = Notmuch::with_config("testdata/notmuch.config"); nm.new()?; let res = nm.search("goof", 0, 100)?; assert_eq!(res.0.len(), 1); Ok(()) } #[test] #[ignore] // skip because notmuch config is relative to $HOME fn show() -> Result<(), NotmuchError> { let nm = Notmuch::with_config("testdata/notmuch.config"); nm.new()?; let res = nm.show("goof")?; assert_eq!(res.0.len(), 1); Ok(()) } #[test] #[ignore] // skip because notmuch config is relative to $HOME fn count() -> Result<(), NotmuchError> { let nm = Notmuch::with_config("testdata/notmuch.config"); nm.new()?; let c = nm.count("*")?; assert_eq!(c, 14); Ok(()) } #[test] fn thread_set_serde() { let ts = ThreadSet(vec![Thread(vec![ThreadNode( Some(Message { id: "4khpM7BF.1187467196.1017920.wathiede.xinu@localhost".to_string(), r#match: true, excluded: false, filename: vec!["/file/path/email.txt".to_string()], timestamp: 1187467196, date_relative: "2007-08-18".to_string(), tags: vec!["inbox".to_string()], body: Some(vec![Part { id: 1.into(), content_type: "multipart/mixed".to_string(), content: Some(Content::Multipart(vec![ Part { id: 2.into(), content_type: "text/plain".to_string(), content_disposition: Some("inline".to_string()), content: Some(Content::String("Spam detection software".to_string())), ..Default::default() }, Part { id: 3.into(), content_type: "message/rfc822".to_string(), content_disposition: Some("inline".to_string()), content: Some(Content::Rfc822(vec![Rfc822 { headers: Headers { subject: "Re: Registration goof".to_string(), from: "\"Bill Thiede\" ".to_string(), to: Some( "jimpark@med.umich.edu, registration@a2ultimate.org" .to_string(), ), date: "Sat, 18 Aug 2007 15:59:56 -0400".to_string(), ..Default::default() }, body: vec![Part { id: 4.into(), content_type: "text/plain".to_string(), content: Some(Content::String("Hello".to_string())), ..Default::default() }], }])), ..Default::default() }, ])), ..Default::default() }]), headers: Headers { subject: "Re: Registration goof".to_string(), from: "\"Bill Thiede\" ".to_string(), to: Some("jimpark@med.umich.edu, registration@a2ultimate.org".to_string()), date: "Sat, 18 Aug 2007 15:59:56 -0400".to_string(), ..Default::default() }, }), vec![], )])]); let s = serde_json::to_string_pretty(&ts).expect("failed to encode"); println!("{}", s); let got = serde_json::from_str(include_str!("../testdata/thread_set.json")) .expect("failed to decode"); use pretty_assertions::assert_eq; assert_eq!(ts, got); } }