superdeduper/src/lib.rs

514 lines
16 KiB
Rust

use std::collections::HashMap;
use std::env;
use std::ffi::OsStr;
use std::fmt;
use std::fmt::Display;
use std::fmt::Formatter;
use std::fs::File;
use std::io::BufReader;
use std::io::BufWriter;
use std::path::Path;
use std::path::PathBuf;
use std::process::Command;
use std::str::FromStr;
use failure::bail;
use failure::Error;
use failure::ResultExt;
use glob::glob;
use lazy_static::lazy_static;
use log::error;
use log::info;
use rayon::iter::ParallelBridge;
use rayon::prelude::ParallelIterator;
use serde::de;
use serde::de::Deserializer;
use serde::Deserialize;
use serde::Serialize;
use serde_json::Value;
#[derive(Clone, Deserialize, Debug, Serialize)]
pub struct Resolution(usize, usize);
impl Display for Resolution {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
let v = format!("{}x{}", self.0, self.1);
f.pad(&v)
}
}
fn option_from_str<'de, T, D>(deserializer: D) -> Result<Option<T>, D::Error>
where
T: FromStr,
T::Err: Display,
D: Deserializer<'de>,
{
let s = String::deserialize(deserializer)?;
T::from_str(&s).map(Some).map_err(de::Error::custom)
}
fn from_str<'de, T, D>(deserializer: D) -> Result<T, D::Error>
where
T: FromStr,
T::Err: Display,
D: Deserializer<'de>,
{
let s = String::deserialize(deserializer)?;
T::from_str(&s).map_err(de::Error::custom)
}
#[derive(Clone, Deserialize, Debug, Serialize)]
pub struct Format {
#[serde(deserialize_with = "from_str")]
bit_rate: usize,
#[serde(deserialize_with = "from_str")]
duration: f32,
filename: String,
format_name: String,
#[serde(deserialize_with = "from_str")]
size: usize,
}
#[derive(Clone, Deserialize, Debug, Serialize)]
pub struct Tags(HashMap<String, String>);
impl Tags {
fn title(&self) -> Option<String> {
self.0.get("title").map(|s| s.to_string())
}
fn language(&self) -> Option<String> {
self.0.get("language").map(|s| s.to_string())
}
}
// TODO(wathiede): make strem an enum with the tag type stored in codec_type?
#[derive(Clone, Deserialize, Debug, Serialize)]
#[serde(tag = "codec_type")]
pub enum Stream {
#[serde(rename = "video")]
Video {
#[serde(default, deserialize_with = "option_from_str")]
#[serde(skip_serializing_if = "Option::is_none")]
bit_rate: Option<usize>,
codec_name: String,
codec_long_name: String,
coded_height: usize,
coded_width: usize,
display_aspect_ratio: String,
#[serde(default, deserialize_with = "from_str")]
duration: f32,
height: usize,
width: usize,
tags: Option<Tags>,
},
#[serde(rename = "audio")]
Audio {
codec_name: String,
codec_long_name: String,
channels: usize,
channel_layout: String,
tags: Option<Tags>,
},
#[serde(rename = "subtitle")]
Subtitle {
codec_name: String,
codec_long_name: String,
tags: Option<Tags>,
},
#[serde(rename = "attachment")]
Attachment {},
#[serde(rename = "data")]
Data {},
}
impl Stream {
pub fn dimension(&self) -> Option<Resolution> {
None
}
}
#[derive(Clone, Deserialize, Debug, Serialize)]
pub struct Metadata {
format: Format,
streams: Vec<Stream>,
}
impl Metadata {
pub fn dimension(&self) -> Option<Resolution> {
None
}
pub fn duration(&self) -> f32 {
self.format.duration
}
pub fn size(&self) -> usize {
self.format.size
}
}
#[derive(Clone, Deserialize, Debug, Serialize)]
pub struct VideoFormat {
short_name: String,
long_name: String,
height: usize,
width: usize,
#[serde(skip_serializing_if = "Option::is_none")]
title: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
language: Option<String>,
}
#[derive(Clone, Deserialize, Debug, Serialize)]
pub struct AudioFormat {
short_name: String,
long_name: String,
channels: usize,
channel_layout: String,
#[serde(skip_serializing_if = "Option::is_none")]
title: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
language: Option<String>,
}
#[derive(Clone, Deserialize, Debug, Serialize)]
pub struct SubtitleFormat {
short_name: String,
long_name: String,
#[serde(skip_serializing_if = "Option::is_none")]
title: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
language: Option<String>,
}
#[derive(Clone, Deserialize, Debug, Serialize)]
pub struct CompactMetadata {
#[serde(deserialize_with = "from_str")]
bit_rate: usize,
#[serde(deserialize_with = "from_str")]
duration: f32,
filename: String,
format_name: String,
#[serde(deserialize_with = "from_str")]
size: usize,
video: Vec<VideoFormat>,
audio: Vec<AudioFormat>,
subtitle: Vec<SubtitleFormat>,
}
#[derive(Deserialize, Debug, Serialize)]
pub struct MetadataFile {
#[serde(flatten)]
pub metadata: HashMap<String, Metadata>,
}
pub struct MovieLibrary {
root: String,
}
fn json_metadata_for_path<P: AsRef<OsStr>>(path: P) -> Result<String, Error> {
let mut cmd = Command::new("ffprobe");
// TODO(wathiede): maybe add "-select_streams v"
cmd.args(&[
"-v",
"quiet",
"-print_format",
"json",
"-show_format",
"-show_error",
"-show_streams",
])
.arg(path);
info!(target: "json", "cmd {:?}", cmd);
let output = cmd.output()?;
if output.status.success() {
return Ok(String::from_utf8(output.stdout)?);
}
bail!(
"{:?} exit status {}:\nSTDOUT: {}\nSTDERR: {}",
cmd,
output.status,
String::from_utf8_lossy(&output.stdout),
String::from_utf8_lossy(&output.stderr)
)
}
lazy_static! {
static ref MOVIE_EXTS: Vec<&'static str> = vec!["avi", "m4v", "mkv", "mov", "mp4"];
}
impl MovieLibrary {
pub fn new<S: Into<String>>(root: S) -> MovieLibrary {
MovieLibrary { root: root.into() }
}
pub fn compact_metadata(&self) -> Result<(), Error> {
let mdf = read_metadata_from_file(Path::new(&self.root).join("metadata.json"))?;
info!("Read metadata, {} videos found", mdf.metadata.len());
let metadata: HashMap<String, CompactMetadata> = mdf
.metadata
.into_iter()
.map(|(path, Metadata { format, streams })| (path, Metadata { format, streams }))
.map(|(path, md)| {
let video = md
.streams
.iter()
.filter_map(|s| {
if let Stream::Video {
codec_name,
codec_long_name,
height,
width,
tags,
..
} = s
{
Some(VideoFormat {
short_name: codec_name.to_string(),
long_name: codec_long_name.to_string(),
height: *height,
width: *width,
title: tags.as_ref().and_then(|t| t.title()),
language: tags.as_ref().and_then(|t| t.language()),
})
} else {
None
}
})
.collect();
let audio = md
.streams
.iter()
.filter_map(|s| {
if let Stream::Audio {
codec_name,
codec_long_name,
channels,
channel_layout,
tags,
..
} = s
{
Some(AudioFormat {
short_name: codec_name.to_string(),
long_name: codec_long_name.to_string(),
channels: *channels,
channel_layout: channel_layout.to_string(),
title: tags.as_ref().and_then(|t| t.title()),
language: tags.as_ref().and_then(|t| t.language()),
})
} else {
None
}
})
.collect();
let subtitle = md
.streams
.iter()
.filter_map(|s| {
if let Stream::Subtitle {
codec_name,
codec_long_name,
tags,
..
} = s
{
Some(SubtitleFormat {
short_name: codec_name.to_string(),
long_name: codec_long_name.to_string(),
title: tags.as_ref().and_then(|t| t.title()),
language: tags.as_ref().and_then(|t| t.language()),
})
} else {
None
}
})
.collect();
(
path,
CompactMetadata {
bit_rate: md.format.bit_rate,
duration: md.format.duration,
filename: md.format.filename,
format_name: md.format.format_name,
size: md.format.size,
video,
audio,
subtitle,
},
)
})
.collect();
let f = File::create(Path::new(&self.root).join("metadata.compact.json"))?;
let f = BufWriter::new(f);
Ok(serde_json::ser::to_writer_pretty(f, &metadata)?)
}
pub fn update_metadata(&self) -> Result<(), Error> {
let path = Path::new(&self.root).join("metadata.json");
// Open the file in read-only mode with buffer.
let f = File::open(&path).context(format!("open {}", path.display()))?;
let r = BufReader::new(f);
// Read the JSON contents of the file as an instance of `User`.
let old_metadata: HashMap<String, Value> = serde_json::from_reader(r)
.context(format!("serde_json::from_reader {}", path.display()))?;
info!("Read metadata, {} videos found", old_metadata.len());
let mut metadata: HashMap<_, _> = self
.iter_video_files()
.filter(|r| r.is_ok())
.filter(|r| {
let path = r
.as_ref()
.unwrap()
.strip_prefix(&self.root)
.unwrap()
.to_str()
.unwrap()
.to_owned();
!old_metadata.contains_key(&path)
})
.par_bridge()
.filter_map(move |path| {
env::set_current_dir(&self.root).unwrap();
let path: PathBuf = path.unwrap().into();
let path = path.strip_prefix(&self.root).unwrap();
match json_metadata_for_path(&path) {
Ok(json) => {
info!("{}", path.display());
Some((path.to_string_lossy().into_owned(), json))
}
Err(e) => {
error!("{}", e);
None
}
}
})
.map(|(path, json)| (path, serde_json::from_str::<Value>(&json).unwrap()))
.collect();
info!("Adding {} new videos", metadata.len());
metadata.extend(old_metadata);
let f = File::create(Path::new(&self.root).join("metadata.json"))?;
let f = BufWriter::new(f);
serde_json::ser::to_writer_pretty(f, &metadata)?;
Ok(())
}
fn iter_video_files(&self) -> impl Send + Iterator<Item = Result<PathBuf, glob::GlobError>> {
glob(&format!("{}/*/*", self.root)).unwrap().filter(|path| {
let path = path.as_ref().unwrap();
match path.extension() {
Some(ext) => {
let ext: &str = &ext.to_str().unwrap().to_lowercase();
if !MOVIE_EXTS.contains(&ext) {
return false;
}
}
None => return false,
}
return true;
})
}
pub fn movies(&self, include_stale: bool) -> Result<(HashMap<String, Metadata>), Error> {
let mut movies = HashMap::new();
for md in glob(&format!("{}/*/metadata.json", self.root))? {
let path = md?;
let mdf = read_metadata_from_file(&path)?;
for (name, md) in mdf.metadata {
if include_stale {
movies.insert(name, md);
} else {
// Filter out files that don't exist
let mut p = PathBuf::from(&self.root);
p.push(&name);
if p.is_file() {
movies.insert(name, md);
}
}
}
}
Ok(movies)
}
}
fn read_metadata_from_file<P: AsRef<Path>>(path: P) -> Result<MetadataFile, Error> {
let path = path.as_ref();
// Open the file in read-only mode with buffer.
let f = File::open(path).context(format!("open {}", path.display()))?;
let r = BufReader::new(f);
// Read the JSON contents of the file as an instance of `User`.
let md = serde_json::from_reader(r)
.context(format!("serde_json::from_reader {}", path.display()))?;
// Return the `User`.
Ok(md)
}
#[cfg(test)]
mod tests {
use super::*;
fn testdata_dir() -> String {
format!("{}/testdata", env::var("CARGO_MANIFEST_DIR").unwrap())
}
#[test]
fn test_read_full_metadata() {
let mdf = read_metadata_from_file(Path::new(&testdata_dir()).join("Movies/metadata.json"))
.expect("failed to read metadata");
assert_eq!(mdf.metadata.len(), 1214);
}
/*
#[test]
fn test_movies() {
let lib = MovieLibrary::new(format!("{}/Movies", testdata_dir()));
let movies = lib.movies(true).expect("failed to get movies");
let mut got = movies.keys().collect::<Vec<_>>();
got.sort();
let want = [
"Aladdin (1992)/Aladdin.1992.720p.BRrip.x264.GAZ.YIFY.mp4",
"Aladdin (2019)/4fe12adfdf4b4e9daa4f1366452d3431.mkv",
"Higher Learning/Higher Learning CD1.avi",
"Higher Learning/Higher Learning CD2.avi",
"J0hn W1ck (2014)/J0hn W1ck (2014) m720p x264 aac.m4v",
"J0hn W1ck (2014)/J0hn W1ck (2014) m720p x264 aac.sample.m4v",
"Stale Sample (2019)/Stale Sample (2019) m720p x264 aac.sample.m4v",
"The Hudsucker Proxy (1994)/54151c3b9a2a4773958f848efecefc3b.mkv",
"The Hudsucker Proxy (1994)/The Hudsucker Proxy CD1.avi",
"The Hudsucker Proxy (1994)/The Hudsucker Proxy CD2.avi",
];
assert_eq!(got, want);
}
*/
/*
#[test]
fn test_filter_stale() {
let lib = MovieLibrary::new(format!("{}/Movies", testdata_dir()));
let movies = lib.movies(false).expect("failed to get movies");
let mut got = movies.keys().collect::<Vec<_>>();
got.sort();
let want = [
"Aladdin (1992)/Aladdin.1992.720p.BRrip.x264.GAZ.YIFY.mp4",
"Aladdin (2019)/4fe12adfdf4b4e9daa4f1366452d3431.mkv",
"Higher Learning/Higher Learning CD1.avi",
"Higher Learning/Higher Learning CD2.avi",
"J0hn W1ck (2014)/J0hn W1ck (2014) m720p x264 aac.m4v",
"J0hn W1ck (2014)/J0hn W1ck (2014) m720p x264 aac.sample.m4v",
"The Hudsucker Proxy (1994)/54151c3b9a2a4773958f848efecefc3b.mkv",
"The Hudsucker Proxy (1994)/The Hudsucker Proxy CD1.avi",
"The Hudsucker Proxy (1994)/The Hudsucker Proxy CD2.avi",
];
assert_eq!(got, want);
}
*/
}