From 974d9386fb84884d617168f5ee81be8c38a3f9f3 Mon Sep 17 00:00:00 2001 From: Bill Thiede Date: Mon, 25 Nov 2019 08:17:37 -0800 Subject: [PATCH] Fix bug computing highest resolution video. Update Command::PrintDupes. --- src/lib.rs | 98 ++++++++++++++++++++++++++++++++++++++-- src/main.rs | 80 ++++++++++---------------------- src/movielibrary_test.rs | 70 +++++++++++++++++++++++++++- 3 files changed, 187 insertions(+), 61 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index c44e7a7..8c8a0b4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,4 @@ +use std::cmp::Ordering; use std::collections::HashMap; use std::env; use std::ffi::OsStr; @@ -31,7 +32,20 @@ use serde_json::Value; const FULL_METADATA_FILENAME: &str = "metadata.json"; const COMPACT_METADATA_FILENAME: &str = "metadata.compact.json"; -#[derive(Clone, Deserialize, Debug, PartialEq, Serialize)] +pub fn clean_path_parent>(path: P) -> PathBuf { + let path = path.as_ref(); + let path = path.parent().unwrap(); + let path = path.to_str().unwrap(); + /* + // Strip year + if path.ends_with(')') { + path = &path[..path.len() - 7]; + } + */ + PathBuf::from(path) +} + +#[derive(Clone, Deserialize, Debug, Eq, PartialEq, Serialize)] pub struct Resolution(usize, usize); impl From<(usize, usize)> for Resolution { fn from(res: (usize, usize)) -> Self { @@ -46,6 +60,20 @@ impl Display for Resolution { } } +impl Ord for Resolution { + fn cmp(&self, other: &Resolution) -> Ordering { + let pixels = self.0.checked_mul(self.1).unwrap_or(usize::max_value()); + let other_pixels = other.0.checked_mul(other.1).unwrap_or(usize::max_value()); + pixels.cmp(&other_pixels) + } +} + +impl PartialOrd for Resolution { + fn partial_cmp(&self, other: &Resolution) -> Option { + Some(self.cmp(other)) + } +} + fn option_from_str<'de, T, D>(deserializer: D) -> Result, D::Error> where T: FromStr, @@ -291,8 +319,37 @@ lazy_static! { } #[derive(Default, Debug, PartialEq)] -struct Movie { - files: Vec<(String, CompactMetadata)>, +pub struct Movie { + pub files: Vec<(String, CompactMetadata)>, +} + +impl Movie { + fn min_resolution(&self) -> Option { + if self.files.is_empty() { + None + } else { + Some(self.files.iter().fold( + Resolution(usize::max_value(), usize::max_value()), + |acc, (_, cmd)| std::cmp::min(acc, cmd.largest_dimension().unwrap()), + )) + } + } +} + +impl Display for Movie { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + let p = &self.files.first().unwrap().0; + write!(f, "{}", &p[..p.find("/").unwrap()])?; + for (path, cmd) in &self.files { + write!( + f, + " {} {}", + &path[path.rfind("/").unwrap()..], + cmd.largest_dimension().unwrap() + )?; + } + Ok(()) + } } #[derive(Debug, PartialEq)] @@ -300,6 +357,41 @@ pub struct Movies { movies: Vec, } +impl Movies { + /// Find all movies with multiple copies. The returned vec contains a tuple of (Movie to keep, + /// One or more Movies to remove). The highest resolution movie is kept, TODO(wathiede): with + /// higher bit rate breaking ties. + pub fn duplicate_candidates(&self) -> Vec<(&Movie, Vec<&Movie>)> { + let mut movie_counter = HashMap::new(); + for m in &self.movies { + let (path, _cmd) = m.files.first().unwrap(); + let parent = clean_path_parent(path).to_string_lossy().to_string(); + movie_counter.entry(parent).or_insert(Vec::new()).push(m); + } + let mut dupes = Vec::new(); + for (_parent, mut movies) in movie_counter.into_iter() { + if movies.len() > 1 { + // Sort, smallest movie first. + movies.sort_by(|a, b| a.min_resolution().cmp(&b.min_resolution())); + // Flip order, we care about the largest. + movies.reverse(); + // Take the largest image, return the rest for removal. + let tuple = (movies.remove(0), movies); + dupes.push(tuple); + } + } + for d in &dupes { + let (biggest, deletes) = d; + eprintln!("biggest: {}", biggest); + for (i, delete) in deletes.iter().enumerate() { + eprintln!("{}. delete: {}", i + 1, delete); + } + } + + dupes + } +} + fn movies_from_paths_compact_metadata(mut p_cmd: HashMap) -> Movies { let multidisc = collapse_multidisc(&p_cmd.keys().map(|s| s.to_string()).collect()); let movies = multidisc diff --git a/src/main.rs b/src/main.rs index cd4a54a..45b02e0 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,8 +1,6 @@ use std::collections::HashMap; use std::error::Error; use std::io::Write; -use std::path::Path; -use std::path::PathBuf; use std::time::Duration; use human_format::Formatter; @@ -14,26 +12,13 @@ use regex::Regex; use structopt::StructOpt; use tabwriter::TabWriter; -use superdeduper::is_multidisc; +use superdeduper::clean_path_parent; use superdeduper::CompactMetadata; use superdeduper::MovieLibrary; const MOVIE_DIR: &str = "/home/wathiede/Movies"; const TO_BE_REMOVED_DIR: &str = "/storage/media/to-be-deleted/"; -fn clean_path_parent>(path: P) -> PathBuf { - let path = path.as_ref(); - let path = path.parent().unwrap(); - let path = path.to_str().unwrap(); - /* - // Strip year - if path.ends_with(')') { - path = &path[..path.len() - 7]; - } - */ - PathBuf::from(path) -} - lazy_static! { static ref CLEAN_TITLE_CHARS: Regex = Regex::new("[^ 0-9[:alpha:]]").unwrap(); } @@ -47,55 +32,38 @@ fn normalize(path: &str) -> String { lazy_static! { static ref YEAR_SUFFIX: Regex = Regex::new(r" \d{4}$").unwrap(); } -fn print_dupes(videos: HashMap) { - let mut video_groups: HashMap> = HashMap::new(); - for (name, md) in videos.into_iter() { - let clean_name = normalize(clean_path_parent(&name).to_str().unwrap()); - let paths = video_groups.entry(clean_name).or_insert(Vec::new()); - paths.push((name.to_string(), md)); - } - let mut names = video_groups.keys().collect::>(); - names.sort(); - - for name in &names { - if YEAR_SUFFIX.is_match(&name) { - let yearless = &name[..&name.len() - 5]; - info!("is '{}' in map", yearless); - if let Some(yearless_vg) = video_groups.get(yearless) { - println!("Possible dupe between movie with year and no year:"); - println!(" {:?}", video_groups.get(name.as_str()).unwrap()); - println!(" {:?}", yearless_vg); - } - } - } +fn print_dupes(lib: &MovieLibrary) { + let videos = lib.movies().expect("couldn't get videos from library"); let mut fmtr = Formatter::new(); fmtr.with_separator(""); fmtr.with_scales(Scales::Binary()); - for name in names { - let possible_dupes = &video_groups[name]; - if possible_dupes.len() < 2 { - continue; - } - let paths: Vec = possible_dupes - .iter() - .map(|(name, _)| name.to_string()) - .collect(); - if is_multidisc(&paths) { - continue; - } - let mut file: Vec<_> = video_groups[name].iter().collect(); - file.sort_by(|(n1, _), (n2, _)| n1.partial_cmp(n2).unwrap()); - println!("{}:", name); - for (p, md) in file { + for (keep, deletes) in videos.duplicate_candidates() { + let p = &keep.files.first().unwrap().0; + println!("{}", &p[..p.find("/").unwrap()]); + println!(" Keeping:"); + for (p, md) in &keep.files { println!( - " {:>9} {:>9} {} {}", + " {:>9} {:>9} {} {}", md.largest_dimension().unwrap(), fmtr.format(md.size as f64), humantime::Duration::from(Duration::from_secs(md.duration as u64)), &p[p.rfind("/").unwrap() + 1..] ); } + println!(" Need to remove:"); + for delete in &deletes { + for (p, md) in &delete.files { + println!( + " {:>9} {:>9} {} {}", + md.largest_dimension().unwrap(), + fmtr.format(md.size as f64), + humantime::Duration::from(Duration::from_secs(md.duration as u64)), + &p[p.rfind("/").unwrap() + 1..] + ); + } + } + println!(); } } @@ -257,9 +225,7 @@ fn main() -> Result<(), Box> { } Command::PrintDupes => { let lib = MovieLibrary::new(MOVIE_DIR); - let videos = lib.videos()?; - - print_dupes(videos); + print_dupes(&lib); } Command::PrintAll => { let lib = MovieLibrary::new(MOVIE_DIR); diff --git a/src/movielibrary_test.rs b/src/movielibrary_test.rs index ee54147..3929d61 100644 --- a/src/movielibrary_test.rs +++ b/src/movielibrary_test.rs @@ -173,6 +173,74 @@ fn test_roundtrip_library() -> Result<(), Box> { let got = ml.movies().expect("failed to build movies"); assert_eq!(got.movies.len(), want.movies.len()); assert_eq!(got, want); - //assert_eq!(got, want, "Got {:#?}\nWant {:#?}", got, want); + Ok(()) +} + +fn validate_duplicates(got: Vec<(&Movie, Vec<&Movie>)>, want: Vec<(Movie, Vec)>) { + assert_eq!(got.len(), want.len()); + for (g, w) in got.iter().zip(&want) { + assert_eq!(g.0, &w.0); + assert_eq!(g.1, w.1.iter().map(|v| v).collect::>()); + } +} + +#[test] +fn test_duplicate_candidates() -> Result<(), Box> { + let movies = build_complex_movies(); + let got = movies.duplicate_candidates(); + let want = vec![( + build_movie(vec![( + "Two Movies With Multi Parts (2019)/somethingelse.mkv", + (1920, 1080), + )]), + vec![build_movie(vec![ + ( + "Two Movies With Multi Parts (2019)/abcdef123456789 part 1.mkv", + (1280, 720), + ), + ( + "Two Movies With Multi Parts (2019)/abcdef123456789 part 2.mkv", + (1280, 720), + ), + ])], + )]; + validate_duplicates(got, want); + Ok(()) +} + +#[test] +fn test_fullmetal() -> Result<(), Box> { + let mut movies = Movies { + movies: vec![ + build_movie(vec![( + "Full Metal Jacket (1987)/Full Metal Jacket.mp4", + (1280, 720), + )]), + build_movie(vec![( + "Full Metal Jacket (1987)/1776f8e2fb614a6fb77a66cde601bb45.mkv", + (1920, 1080), + )]), + ], + }; + movies.movies.sort_by(|a, b| { + a.files + .first() + .unwrap() + .0 + .partial_cmp(&b.files.first().unwrap().0) + .unwrap() + }); + let got = movies.duplicate_candidates(); + let want = vec![( + build_movie(vec![( + "Full Metal Jacket (1987)/1776f8e2fb614a6fb77a66cde601bb45.mkv", + (1920, 1080), + )]), + vec![build_movie(vec![( + "Full Metal Jacket (1987)/Full Metal Jacket.mp4", + (1280, 720), + )])], + )]; + validate_duplicates(got, want); Ok(()) }