use std::cmp::Ordering; use std::collections::HashMap; use std::collections::HashSet; use std::env; use std::ffi::OsStr; use std::fmt; use std::fmt::Display; use std::fmt::Formatter; use std::fs::File; use std::io::BufReader; use std::io::BufWriter; use std::path::Path; use std::path::PathBuf; use std::process::Command; use std::str::FromStr; use failure::bail; use failure::Error; use failure::ResultExt; use glob::glob; use lazy_static::lazy_static; use log::error; use log::info; use rayon::iter::ParallelBridge; use rayon::prelude::ParallelIterator; use regex::Regex; use serde::de; use serde::de::Deserializer; use serde::Deserialize; use serde::Serialize; use serde_json::Value; const FULL_METADATA_FILENAME: &str = "metadata.json"; const COMPACT_METADATA_FILENAME: &str = "metadata.compact.json"; pub fn clean_path_parent>(path: P) -> PathBuf { let path = path.as_ref(); let path = path.parent().unwrap(); let path = path.to_str().unwrap(); PathBuf::from(path) } #[derive(Clone, Deserialize, Debug, Eq, PartialEq, Serialize)] pub struct Resolution(usize, usize); impl From<(usize, usize)> for Resolution { fn from(res: (usize, usize)) -> Self { Resolution(res.0, res.1) } } impl Display for Resolution { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { let v = format!("{}x{}", self.0, self.1); f.pad(&v) } } impl Ord for Resolution { fn cmp(&self, other: &Resolution) -> Ordering { let pixels = self.0.checked_mul(self.1).unwrap_or(usize::max_value()); let other_pixels = other.0.checked_mul(other.1).unwrap_or(usize::max_value()); pixels.cmp(&other_pixels) } } impl PartialOrd for Resolution { fn partial_cmp(&self, other: &Resolution) -> Option { Some(self.cmp(other)) } } fn option_from_str<'de, T, D>(deserializer: D) -> Result, D::Error> where T: FromStr, T::Err: Display, D: Deserializer<'de>, { let s = String::deserialize(deserializer)?; T::from_str(&s).map(Some).map_err(de::Error::custom) } fn from_str<'de, T, D>(deserializer: D) -> Result where T: FromStr, T::Err: Display, D: Deserializer<'de>, { let s = String::deserialize(deserializer)?; T::from_str(&s).map_err(de::Error::custom) } fn collapse_multidisc(names: &Vec) -> HashMap> { lazy_static! { static ref DIGIT: Regex = Regex::new("[0-9]").unwrap(); } let mut set = HashMap::new(); for name in names { let clean = DIGIT.replace_all(&name, "#").to_string(); set.entry(clean) .or_insert(Vec::new()) .push(name.to_string()); } set } pub fn is_multidisc(names: &Vec) -> bool { let set = collapse_multidisc(names); set.len() == 1 } #[derive(Clone, Deserialize, Debug, PartialEq, Serialize)] struct Format { #[serde(deserialize_with = "from_str")] bit_rate: usize, #[serde(deserialize_with = "from_str")] duration: f32, filename: String, format_name: String, #[serde(deserialize_with = "from_str")] size: usize, } #[derive(Clone, Deserialize, Debug, PartialEq, Serialize)] pub struct Tags(HashMap); impl Tags { fn title(&self) -> Option { self.0.get("title").map(|s| s.to_string()) } fn language(&self) -> Option { self.0.get("language").map(|s| s.to_string()) } } #[derive(Clone, Deserialize, Debug, PartialEq, Serialize)] #[serde(tag = "codec_type")] enum Stream { #[serde(rename = "video")] Video { #[serde(default, deserialize_with = "option_from_str")] #[serde(skip_serializing_if = "Option::is_none")] bit_rate: Option, codec_name: String, codec_long_name: String, coded_height: usize, coded_width: usize, display_aspect_ratio: Option, #[serde(default, deserialize_with = "from_str")] duration: f32, height: usize, width: usize, tags: Option, }, #[serde(rename = "audio")] Audio { codec_name: String, codec_long_name: String, channels: usize, channel_layout: Option, tags: Option, }, #[serde(rename = "subtitle")] Subtitle { codec_name: String, codec_long_name: String, tags: Option, }, #[serde(rename = "attachment")] Attachment {}, #[serde(rename = "data")] Data {}, } #[derive(Clone, Deserialize, Debug, PartialEq, Serialize)] struct Metadata { format: Format, streams: Vec, } #[derive(Clone, Deserialize, Debug, PartialEq, Serialize)] pub struct VideoFormat { short_name: String, long_name: String, height: usize, width: usize, #[serde(skip_serializing_if = "Option::is_none")] title: Option, #[serde(skip_serializing_if = "Option::is_none")] language: Option, } #[cfg(test)] impl Default for VideoFormat { fn default() -> Self { VideoFormat { short_name: "UNNAMED_SHORT".to_string(), long_name: "UNNAMED_LONG".to_string(), height: 0, width: 0, title: None, language: None, } } } #[derive(Clone, Deserialize, Debug, PartialEq, Serialize)] pub struct AudioFormat { short_name: String, long_name: String, channels: usize, #[serde(skip_serializing_if = "Option::is_none")] channel_layout: Option, #[serde(skip_serializing_if = "Option::is_none")] title: Option, #[serde(skip_serializing_if = "Option::is_none")] language: Option, } #[derive(Clone, Deserialize, Debug, PartialEq, Serialize)] pub struct SubtitleFormat { short_name: String, long_name: String, #[serde(skip_serializing_if = "Option::is_none")] title: Option, #[serde(skip_serializing_if = "Option::is_none")] language: Option, } #[derive(Clone, Deserialize, Debug, PartialEq, Serialize)] pub struct CompactMetadata { pub bit_rate: usize, pub duration: f32, filename: String, format_name: String, pub size: usize, video: Vec, audio: Vec, subtitle: Vec, } impl CompactMetadata { pub fn largest_dimension(&self) -> Option { if self.video.is_empty() { return None; } Some(self.video.iter().fold( Resolution(0, 0), |acc, VideoFormat { width, height, .. }| { if acc.0 * acc.1 < width * height { Resolution(*width, *height) } else { acc } }, )) } } #[cfg(test)] impl Default for CompactMetadata { fn default() -> Self { CompactMetadata { bit_rate: 0, duration: 0., filename: "UNSET".to_string(), format_name: "UNKNOWN".to_string(), size: 0, video: Vec::new(), audio: Vec::new(), subtitle: Vec::new(), } } } #[derive(Deserialize, Debug, PartialEq, Serialize)] pub struct MetadataFile { #[serde(flatten)] metadata: HashMap, } #[derive(PartialEq, Debug)] pub struct MovieLibrary { pub root: String, } fn json_metadata_for_path + AsRef>(path: P) -> Result { let mut cmd = Command::new("ffprobe"); cmd.args(&[ "-v", "quiet", "-print_format", "json", "-show_format", "-show_error", "-show_streams", "-i", ]) .arg(Path::new("./").join(path)); info!(target: "json", "cmd {:?}", cmd); let output = cmd.output()?; if output.status.success() { return Ok(String::from_utf8(output.stdout)?); } bail!( "{:?} exit status {}:\nSTDOUT: {}\nSTDERR: {}", cmd, output.status, String::from_utf8_lossy(&output.stdout), String::from_utf8_lossy(&output.stderr) ) } lazy_static! { static ref MOVIE_EXTS: Vec<&'static str> = vec!["avi", "m4v", "mkv", "mov", "mp4"]; } #[derive(Default, Debug, PartialEq)] pub struct Movie { pub files: Vec<(String, CompactMetadata)>, } impl Movie { fn min_bit_rate(&self) -> Option { if self.files.is_empty() { None } else { Some(self.files.iter().fold(usize::max_value(), |acc, (_, cmd)| { std::cmp::min(acc, cmd.bit_rate) })) } } } impl Display for Movie { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { let p = &self.files.first().unwrap().0; write!(f, "{}", &p[..p.find("/").unwrap()])?; for (path, cmd) in &self.files { write!( f, " {} {}", &path[path.rfind("/").unwrap()..], cmd.largest_dimension().unwrap() )?; } Ok(()) } } #[derive(Debug, PartialEq)] pub struct Movies { movies: Vec, } impl Movies { /// Find all movies with multiple copies. The returned vec contains a tuple of (Movie to keep, /// One or more Movies to remove). The highest bit rate movie is kept. /// Movies with differing years are considered distinct movies. /// If there is a yearless movie and one or more movies with a year exist, then the yearless /// movie will be removed pub fn duplicate_candidates(&self) -> Vec<(&Movie, Vec<&Movie>)> { let date_re = Regex::new(r"\(\d{4}\)$").unwrap(); let mut movie_counter = HashMap::new(); let mut movies_without_date_counter = HashMap::new(); for m in &self.movies { let (path, _cmd) = m.files.first().unwrap(); let parent = clean_path_parent(path) .to_string_lossy() .to_ascii_lowercase() .to_string(); if date_re.is_match(&parent) { movie_counter.entry(parent).or_insert(Vec::new()).push(m); } else { movies_without_date_counter .entry(parent) .or_insert(Vec::new()) .push(m); } } let mut dupes: Vec<(&Movie, Vec<&Movie>)> = Vec::new(); for (parent, movies) in movie_counter.iter() { let dateless_parent = parent[..parent.len() - 7].to_string(); if let Some(movie) = movies_without_date_counter.remove(&dateless_parent) { let tuple = (movies[0], movie); dupes.push(tuple); } } for (_parent, mut movies) in movie_counter.into_iter() { if movies.len() > 1 { // Sort, lowest bit_rate movie first movies.sort_by(|a, b| a.min_bit_rate().cmp(&b.min_bit_rate())); // Flip order, we care about the largest. movies.reverse(); // Take the largest image, return the rest for removal. let tuple = (movies.remove(0), movies); dupes.push(tuple); } } // Sort to make this function deterministic. dupes.sort_by(|(a_keep, _), (b_keep, _)| { a_keep .files .first() .unwrap() .0 .partial_cmp(&b_keep.files.first().unwrap().0) .unwrap() }); dupes } } fn movies_from_paths_compact_metadata(mut p_cmd: HashMap) -> Movies { let multidisc = collapse_multidisc(&p_cmd.keys().map(|s| s.to_string()).collect()); let movies = multidisc .into_iter() .map(|(_hash, names)| { let mut files: Vec<(String, CompactMetadata)> = names .iter() .map(|name| (name.to_string(), p_cmd.remove(name).unwrap())) .collect(); files.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap()); Movie { files } }) .collect(); let mut m = Movies { movies }; m.movies.sort_by(|a, b| { a.files .first() .unwrap() .0 .partial_cmp(&b.files.first().unwrap().0) .unwrap() }); m } impl MovieLibrary { pub fn new>(root: S) -> MovieLibrary { MovieLibrary { root: root.into() } } pub fn compact_metadata(&self) -> Result<(), Error> { let path = Path::new(&self.root).join(FULL_METADATA_FILENAME); // Open the file in read-only mode with buffer. let f = File::open(&path).context(format!("open {}", path.display()))?; let r = BufReader::new(f); let mdf: MetadataFile = serde_json::from_reader(r) .context(format!("serde_json::from_reader {}", path.display()))?; info!("Read metadata, {} videos found", mdf.metadata.len()); let metadata: HashMap = mdf .metadata .into_iter() .map(|(path, Metadata { format, streams })| (path, Metadata { format, streams })) .map(|(path, md)| { let video = md .streams .iter() .filter_map(|s| { if let Stream::Video { codec_name, codec_long_name, height, width, tags, .. } = s { Some(VideoFormat { short_name: codec_name.to_string(), long_name: codec_long_name.to_string(), height: *height, width: *width, title: tags.as_ref().and_then(|t| t.title()), language: tags.as_ref().and_then(|t| t.language()), }) } else { None } }) .collect(); let audio = md .streams .iter() .filter_map(|s| { if let Stream::Audio { codec_name, codec_long_name, channels, channel_layout, tags, .. } = s { Some(AudioFormat { short_name: codec_name.to_string(), long_name: codec_long_name.to_string(), channels: *channels, channel_layout: channel_layout.clone(), title: tags.as_ref().and_then(|t| t.title()), language: tags.as_ref().and_then(|t| t.language()), }) } else { None } }) .collect(); let subtitle = md .streams .iter() .filter_map(|s| { if let Stream::Subtitle { codec_name, codec_long_name, tags, .. } = s { Some(SubtitleFormat { short_name: codec_name.to_string(), long_name: codec_long_name.to_string(), title: tags.as_ref().and_then(|t| t.title()), language: tags.as_ref().and_then(|t| t.language()), }) } else { None } }) .collect(); ( path, CompactMetadata { bit_rate: md.format.bit_rate, duration: md.format.duration, filename: md.format.filename, format_name: md.format.format_name, size: md.format.size, video, audio, subtitle, }, ) }) .collect(); let f = File::create(Path::new(&self.root).join(COMPACT_METADATA_FILENAME))?; let f = BufWriter::new(f); Ok(serde_json::ser::to_writer_pretty(f, &metadata)?) } pub fn update_metadata(&self) -> Result, Error> { let path = Path::new(&self.root).join(FULL_METADATA_FILENAME); let mut old_metadata: HashMap = match File::open(&path) { Ok(f) => { let r = BufReader::new(f); serde_json::from_reader(r)? } Err(e) => { error!("Failed to open {}: {}", path.display(), e); HashMap::new() } }; info!("Read metadata, {} videos found", old_metadata.len()); // Filter out stale metadata (where the file no longer exists). let old_metadata: HashMap = self .iter_video_files() .filter(|r| r.is_ok()) .filter_map(|r| { let path = r .as_ref() .unwrap() .strip_prefix(&self.root) .unwrap() .to_str() .unwrap() .to_owned(); match old_metadata.remove(&path) { Some(v) => Some((path, v)), None => None, } }) .collect(); info!( "After removing stale metadata, {} videos found", old_metadata.len() ); let mut metadata: HashMap<_, _> = self .iter_video_files() .filter(|r| r.is_ok()) .filter(|r| { let path = r .as_ref() .unwrap() .strip_prefix(&self.root) .unwrap() .to_str() .unwrap() .to_owned(); !old_metadata.contains_key(&path) }) .par_bridge() .filter_map(move |path| { env::set_current_dir(&self.root).unwrap(); let path: PathBuf = path.unwrap().into(); let path = path.strip_prefix(&self.root).unwrap(); match json_metadata_for_path(&path) { Ok(json) => { info!("{}", path.display()); Some((path.to_string_lossy().into_owned(), json)) } Err(e) => { error!("{}", e); None } } }) .map(|(path, json)| (path, serde_json::from_str::(&json).unwrap())) .collect(); let new_videos = metadata.keys().cloned().collect(); info!("Adding {} new videos", metadata.len()); metadata.extend(old_metadata); let f = File::create(Path::new(&self.root).join(FULL_METADATA_FILENAME))?; let f = BufWriter::new(f); serde_json::ser::to_writer_pretty(f, &metadata)?; Ok(new_videos) } fn iter_video_files(&self) -> impl Send + Iterator> { glob(&format!("{}/*/*", self.root)).unwrap().filter(|path| { let path = path.as_ref().unwrap(); match path.extension() { Some(ext) => { let ext: &str = &ext.to_str().unwrap().to_lowercase(); if !MOVIE_EXTS.contains(&ext) { return false; } } None => return false, } return true; }) } pub fn movies(&self) -> Result { let path = Path::new(&self.root).join(COMPACT_METADATA_FILENAME); let f = File::open(&path).context(format!("open {}", path.display()))?; let r = BufReader::new(f); let p_cmd: HashMap = serde_json::from_reader(r) .context(format!("serde_json::from_reader {}", path.display()))?; Ok(movies_from_paths_compact_metadata(p_cmd)) } pub fn videos(&self) -> Result, Error> { let path = Path::new(&self.root).join(COMPACT_METADATA_FILENAME); let f = File::open(&path).context(format!("open {}", path.display()))?; let r = BufReader::new(f); Ok(serde_json::from_reader(r) .context(format!("serde_json::from_reader {}", path.display()))?) } pub fn empty_dirs(&self) -> Result, Error> { let mut all_dirs = HashSet::new(); let root_len = self.root.len() + 1; // +1 for trailing slash for de in Path::new(&self.root).read_dir()? { let de = de?; if de.metadata()?.is_dir() { let path = de.path().to_string_lossy().to_string(); all_dirs.insert(path[root_len..].to_string()); } } for path in self.videos()?.keys() { let dir = match path.find("/") { Some(idx) => path[..idx].to_string(), None => path.to_string(), }; all_dirs.remove(&dir); } let mut empty_dirs: Vec<_> = all_dirs.into_iter().collect(); empty_dirs.sort(); Ok(empty_dirs) } } #[cfg(test)] mod movielibrary_test; #[cfg(test)] mod tests { use super::*; #[test] fn largest_dimension() { let md = CompactMetadata { ..Default::default() }; assert_eq!(md.largest_dimension(), None); let md = CompactMetadata { video: vec![ VideoFormat { height: 3, width: 4, ..Default::default() }, VideoFormat { width: 640, height: 480, ..Default::default() }, ], ..Default::default() }; assert_eq!(md.largest_dimension(), Some(Resolution(640, 480))); let md = CompactMetadata { video: vec![ VideoFormat { width: 640, height: 480, ..Default::default() }, VideoFormat { height: 3, width: 4, ..Default::default() }, ], ..Default::default() }; assert_eq!(md.largest_dimension(), Some(Resolution(640, 480))); } #[test] fn test_multidisc() { // Empty set is not a multidisc set. assert!(!is_multidisc(&vec![])); assert!(is_multidisc(&vec![ "Unbearable.Lightness.Of.Being Part 1.avi".to_string(), "Unbearable.Lightness.Of.Being Part 2.avi".to_string(), "Unbearable.Lightness.Of.Being Part 3.avi".to_string(), ])); assert!(!is_multidisc(&vec![ "Scent Of A Woman 1992 DvDrip[Eng]-greenbud1969.avi".to_string(), "Scent.Of.A.Woman.1992.1080p.BluRay.x264.AC3.mp4".to_string(), ])); } }