Move notmuch json types into separate crate.

Add starter testdata and notmuch config.
2021-10-25 17:33:51 -07:00
parent 8f2e14049c
commit 6e2f243f55
15 changed files with 5330 additions and 0 deletions
--- a/notmuch/src/lib.rs
+++ b/notmuch/src/lib.rs
@@ -0,0 +1,391 @@
+//! Below is the file `devel/schemata` from the notmuch source tree.
+//!
+//! This file describes the schemata used for notmuch's structured output
+//! format (currently JSON and S-Expressions).
+//!
+//! []'s indicate lists.  List items can be marked with a '?', meaning
+//! they are optional; or a '*', meaning there can be zero or more of that
+//! item.  {}'s indicate an object that maps from field identifiers to
+//! values.  An object field marked '?' is optional.  |'s indicate
+//! alternates (e.g., int|string means something can be an int or a
+//! string).
+//!
+//! For S-Expression output, lists are printed delimited by () instead of
+//! []. Objects are printed as p-lists, i.e. lists where the keys and values
+//! are interleaved. Keys are printed as keywords (symbols preceded by a
+//! colon), e.g. (:id "123" :time 54321 :from "foobar"). Null is printed as
+//! nil, true as t and false as nil.
+//!
+//! This is version 2 of the structured output format.
+//!
+//! Version history
+//! ---------------
+//!
+//! v1
+//! - First versioned schema release.
+//! - Added part.content-length and part.content-transfer-encoding fields.
+//!
+//! v2
+//! - Added the thread_summary.query field.
+//!
+//! Common non-terminals
+//! --------------------
+//!
+//! # Number of seconds since the Epoch
+//! `unix_time = int`
+//!
+//! # Thread ID, sans "thread:"
+//! `threadid = string`
+//!
+//! # Message ID, sans "id:"
+//! `messageid = string`
+//!
+//! notmuch show schema
+//! -------------------
+//!
+//! # A top-level set of threads (do_show)
+//! # Returned by notmuch show without a --part argument
+//! `thread_set = [thread*]`
+//!
+//! # Top-level messages in a thread (show_messages)
+//! `thread = [thread_node*]`
+//!
+//! # A message and its replies (show_messages)
+//! ```text
+//! thread_node = [
+//!     message|null,             # null if not matched and not --entire-thread
+//!     [thread_node*]            # children of message
+//! ]
+//! ```
+//!
+//! # A message (format_part_sprinter)
+//! ```text
+//! message = {
+//!     # (format_message_sprinter)
+//!     id:             messageid,
+//!     match:          bool,
+//!     filename:           string,
+//!     timestamp:      unix_time, # date header as unix time
+//!     date_relative:  string,   # user-friendly timestamp
+//!     tags:           [string*],
+//!
+//!     headers:        headers,
+//!     body?:          [part]    # omitted if --body=false
+//! }
+//! ```
+//!
+//! # A MIME part (format_part_sprinter)
+//! ```text
+//! part = {
+//!     id:             int|string, # part id (currently DFS part number)
+//!
+//!     encstatus?:     encstatus,
+//!     sigstatus?:     sigstatus,
+//!
+//!     content-type:   string,
+//!     content-id?:    string,
+//!     # if content-type starts with "multipart/":
+//!     content:        [part*],
+//!     # if content-type is "message/rfc822":
+//!     content:        [{headers: headers, body: [part]}],
+//!     # otherwise (leaf parts):
+//!     filename?:      string,
+//!     content-charset?: string,
+//!     # A leaf part's body content is optional, but may be included if
+//!     # it can be correctly encoded as a string.  Consumers should use
+//!     # this in preference to fetching the part content separately.
+//!     content?:       string,
+//!     # If a leaf part's body content is not included, the length of
+//!     # the encoded content (in bytes) may be given instead.
+//!     content-length?: int,
+//!     # If a leaf part's body content is not included, its transfer encoding
+//!     # may be given.  Using this and the encoded content length, it is
+//!     # possible for the consumer to estimate the decoded content length.
+//!     content-transfer-encoding?: string
+//! }
+//! ```
+//!
+//! # The headers of a message or part (format_headers_sprinter with reply = FALSE)
+//! ```text
+//! headers = {
+//!     Subject:        string,
+//!     From:           string,
+//!     To?:            string,
+//!     Cc?:            string,
+//!     Bcc?:           string,
+//!     Reply-To?:      string,
+//!     Date:           string
+//! }
+//! ```
+//!
+//! # Encryption status (format_part_sprinter)
+//! `encstatus = [{status: "good"|"bad"}]`
+//!
+//! # Signature status (format_part_sigstatus_sprinter)
+//! `sigstatus = [signature*]`
+//!
+//! ```text
+//! signature = {
+//!     # (signature_status_to_string)
+//!     status:         "none"|"good"|"bad"|"error"|"unknown",
+//!     # if status is "good":
+//!     fingerprint?:   string,
+//!     created?:       unix_time,
+//!     expires?:       unix_time,
+//!     userid?:        string
+//!     # if status is not "good":
+//!     keyid?:         string
+//!     # if the signature has errors:
+//!     errors?:        int
+//! }
+//! ```
+//!
+//! notmuch search schema
+//! ---------------------
+//!
+//! # --output=summary
+//! `search_summary = [thread_summary*]`
+//!
+//! # --output=threads
+//! `search_threads = [threadid*]`
+//!
+//! # --output=messages
+//! `search_messages = [messageid*]`
+//!
+//! # --output=files
+//! `search_files = [string*]`
+//!
+//! # --output=tags
+//! `search_tags = [string*]`
+//!
+//! ```text
+//! thread_summary = {
+//!     thread:         threadid,
+//!     timestamp:      unix_time,
+//!     date_relative:  string,   # user-friendly timestamp
+//!     matched:        int,      # number of matched messages
+//!     total:          int,      # total messages in thread
+//!     authors:        string,   # comma-separated names with | between
+//!                               # matched and unmatched
+//!     subject:        string,
+//!     tags:           [string*],
+//!
+//!     # Two stable query strings identifying exactly the matched and
+//!     # unmatched messages currently in this thread.  The messages
+//!     # matched by these queries will not change even if more messages
+//!     # arrive in the thread.  If there are no matched or unmatched
+//!     # messages, the corresponding query will be null (there is no
+//!     # query that matches nothing).  (Added in schema version 2.)
+//!     query:          [string|null, string|null],
+//! }
+//! ```
+//!
+//! notmuch reply schema
+//! --------------------
+//!
+//! ```text
+//! reply = {
+//!     # The headers of the constructed reply
+//!     reply-headers: reply_headers,
+//!
+//!     # As in the show format (format_part_sprinter)
+//!     original: message
+//! }
+//! ```
+//!
+//! # Reply headers (format_headers_sprinter with reply = TRUE)
+//! ```text
+//! reply_headers = {
+//!     Subject:        string,
+//!     From:           string,
+//!     To?:            string,
+//!     Cc?:            string,
+//!     Bcc?:           string,
+//!     In-reply-to:    string,
+//!     References:     string
+//! }
+//! ```
+
+use std::{ffi::OsStr, process::Command};
+
+use serde::Deserialize;
+
+/// # Number of seconds since the Epoch
+pub type UnixTime = isize;
+
+/// # Thread ID, sans "thread:"
+pub type ThreadId = String;
+
+/// # Message ID, sans "id:"
+pub type MessageId = String;
+
+/// A top-level set of threads (do_show)
+/// Returned by notmuch show without a --part argument
+#[derive(Deserialize, Debug)]
+pub struct ThreadSet(pub Vec<Thread>);
+
+/// Top-level messages in a thread (show_messages)
+#[derive(Deserialize, Debug)]
+pub struct Thread(pub Vec<ThreadNode>);
+
+/// A message and its replies (show_messages)
+#[derive(Deserialize, Debug)]
+pub struct ThreadNode {
+    pub message: Option<Message>, // null if not matched and not --entire-thread
+    pub children: Vec<ThreadNode>, // children of message
+}
+///  A message (format_part_sprinter)
+#[derive(Deserialize, Debug)]
+pub struct Message {
+    pub id: MessageId,
+    pub r#match: bool,
+    pub excluded: bool,
+    pub filename: Vec<String>,
+    pub timestamp: UnixTime,   // date header as unix time
+    pub date_relative: String, // user-friendly timestamp
+    pub tags: Vec<String>,
+
+    pub headers: Headers,
+    pub body: Option<Vec<Part>>, // omitted if --body=false
+}
+
+/// The headers of a message or part (format_headers_sprinter with reply = FALSE)
+#[derive(Deserialize, Debug)]
+pub struct Headers {
+    pub subject: String,
+    pub from: String,
+    pub to: Option<String>,
+    pub cc: Option<String>,
+    pub bcc: Option<String>,
+    pub reply_to: Option<String>,
+    pub date: String,
+}
+
+#[derive(Deserialize, Debug)]
+pub enum IntOrString {
+    Int(isize),
+    String(String),
+}
+
+#[derive(Deserialize, Debug)]
+pub struct Rfc822 {
+    pub headers: Headers,
+    pub body: Vec<Part>,
+}
+#[derive(Deserialize, Debug)]
+#[serde(tag = "type")]
+pub enum Content {
+    /// if content-type starts with "multipart/":
+    Multipart(Vec<Part>),
+    /// if content-type is "message/rfc822":
+    Rfc822(Vec<Rfc822>),
+    /// otherwise (leaf parts):
+    Leaf {
+        filename: Option<String>,
+        content_charset: Option<String>,
+        /// A leaf part's body content is optional, but may be included if
+        /// it can be correctly encoded as a string.  Consumers should use
+        /// this in preference to fetching the part content separately.
+        content: Option<String>,
+        // If a leaf part's body content is not included, the length of
+        // the encoded content (in bytes) may be given instead.
+        content_length: Option<isize>,
+        // If a leaf part's body content is not included, its transfer encoding
+        // may be given.  Using this and the encoded content length, it is
+        // possible for the consumer to estimate the decoded content length.
+        content_transfer_encoding: Option<String>,
+    },
+}
+
+/// A MIME part
+#[derive(Deserialize, Debug)]
+pub struct Part {
+    pub id: IntOrString, // part id (currently DFS part number)
+
+    pub encstatus: Option<EncStatus>,
+    pub sigstatus: Option<SigStatus>,
+
+    #[serde(rename = "content-type")]
+    pub content_type: String,
+    #[serde(rename = "content-id")]
+    pub content_id: Option<String>,
+    //pub content: Content,
+}
+
+/// `encstatus = [{status: "good"|"bad"}]`
+pub type EncStatus = String;
+
+/// # Signature status (format_part_sigstatus_sprinter)
+#[derive(Deserialize, Debug)]
+pub struct SigStatus(pub Vec<Signature>);
+#[derive(Deserialize, Debug)]
+pub enum Signature {
+    /// (signature_status_to_string)
+    Good {
+        fingerprint: Option<String>,
+        created: Option<UnixTime>,
+        expires: Option<UnixTime>,
+        userid: Option<String>,
+    },
+    None {
+        keyid: Option<String>,
+    },
+    Bad {
+        keyid: Option<String>,
+    },
+    Unknown {
+        keyid: Option<String>,
+    },
+    Error {
+        keyid: Option<String>,
+        errors: Option<isize>,
+    },
+}
+
+#[derive(Deserialize, Debug)]
+pub struct SearchSummary(pub Vec<ThreadSummary>);
+#[derive(Deserialize, Debug)]
+pub struct SearchThreads(pub Vec<ThreadId>);
+#[derive(Deserialize, Debug)]
+pub struct SearchMessages(pub Vec<MessageId>);
+#[derive(Deserialize, Debug)]
+pub struct SearchFiles(pub Vec<String>);
+#[derive(Deserialize, Debug)]
+pub struct SearchTags(pub Vec<String>);
+
+#[derive(Deserialize, Debug)]
+pub struct ThreadSummary {
+    pub thread: ThreadId,
+    pub timestamp: UnixTime,
+    pub date_relative: String,
+    /// user-friendly timestamp
+    pub matched: isize,
+    /// number of matched messages
+    pub total: isize,
+    /// total messages in thread
+    pub authors: String,
+    /// comma-separated names with | between matched and unmatched
+    pub subject: String,
+    pub tags: Vec<String>,
+
+    /// Two stable query strings identifying exactly the matched and unmatched messages currently
+    /// in this thread.  The messages matched by these queries will not change even if more
+    /// messages arrive in the thread.  If there are no matched or unmatched messages, the
+    /// corresponding query will be null (there is no query that matches nothing).  (Added in
+    /// schema version 2.)
+    pub query: (Option<String>, Option<String>),
+}
+
+// TODO(wathiede): notmuch reply schema
+
+pub fn run_notmuch<I, S>(args: I) -> std::io::Result<Vec<u8>>
+where
+    I: IntoIterator<Item = S>,
+    S: AsRef<OsStr>,
+{
+    let mut cmd = Command::new("notmuch");
+    let cmd = cmd.args(args);
+    dbg!(&cmd);
+    let out = cmd.output()?;
+    Ok(out.stdout)
+}