diff --git a/src/lib.rs b/src/lib.rs index e4f8066..36c5023 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -36,12 +36,6 @@ pub fn clean_path_parent>(path: P) -> PathBuf { let path = path.as_ref(); let path = path.parent().unwrap(); let path = path.to_str().unwrap(); - /* - // Strip year - if path.ends_with(')') { - path = &path[..path.len() - 7]; - } - */ PathBuf::from(path) } @@ -370,20 +364,39 @@ pub struct Movies { impl Movies { /// Find all movies with multiple copies. The returned vec contains a tuple of (Movie to keep, - /// One or more Movies to remove). The highest resolution movie is kept, TODO(wathiede): with - /// higher bit rate breaking ties. + /// One or more Movies to remove). The highest bit rate movie is kept. + /// Movies with differing years are considered distinct movies. + /// If there is a yearless movie and one or more movies with a year exist, then the yearless + /// movie will be removed pub fn duplicate_candidates(&self) -> Vec<(&Movie, Vec<&Movie>)> { + let date_re = Regex::new(r"\(\d{4}\)$").unwrap(); let mut movie_counter = HashMap::new(); + let mut movies_without_date_counter = HashMap::new(); for m in &self.movies { let (path, _cmd) = m.files.first().unwrap(); let parent = clean_path_parent(path) .to_string_lossy() .to_ascii_lowercase() .to_string(); - movie_counter.entry(parent).or_insert(Vec::new()).push(m); + if date_re.is_match(&parent) { + movie_counter.entry(parent).or_insert(Vec::new()).push(m); + } else { + movies_without_date_counter + .entry(parent) + .or_insert(Vec::new()) + .push(m); + } } - let mut dupes = Vec::new(); - for (_parent, mut movies) in movie_counter.into_iter() { + let mut dupes: Vec<(&Movie, Vec<&Movie>)> = Vec::new(); + for (parent, movies) in movie_counter.iter() { + let dateless_parent = parent[..parent.len() - 7].to_string(); + if let Some(movie) = movies_without_date_counter.remove(&dateless_parent) { + let tuple = (movies[0], movie); + dupes.push(tuple); + } + } + + for (parent, mut movies) in movie_counter.into_iter() { if movies.len() > 1 { // Sort, lowest bit_rate movie first movies.sort_by(|a, b| a.min_bit_rate().cmp(&b.min_bit_rate())); @@ -392,8 +405,26 @@ impl Movies { // Take the largest image, return the rest for removal. let tuple = (movies.remove(0), movies); dupes.push(tuple); + } else { + /* + let dateless_parent = if date_re.is_match(&parent) { + parent[..parent.len() - 7].to_string() + } else { + parent.to_string() + }; + */ } } + // Sort to make this function deterministic. + dupes.sort_by(|(a_keep, _), (b_keep, _)| { + a_keep + .files + .first() + .unwrap() + .0 + .partial_cmp(&b_keep.files.first().unwrap().0) + .unwrap() + }); for d in &dupes { let (biggest, deletes) = d; eprintln!("biggest: {}", biggest); diff --git a/src/movielibrary_test.rs b/src/movielibrary_test.rs index c6edb4f..7f45d92 100644 --- a/src/movielibrary_test.rs +++ b/src/movielibrary_test.rs @@ -86,6 +86,8 @@ where fn build_complex_metadata() -> HashMap { vec![ + build_tuple("Remake (2019)/new.mkv", (1920, 1080), 10), + build_tuple("Remake (1960)/old.mkv", (1920, 1080), 1), build_tuple( "One Movie With Year (2019)/abcdef123456789.mkv", (1920, 1080), @@ -117,15 +119,25 @@ fn build_complex_metadata() -> HashMap { 5000, ), build_tuple( - "two movies different caps (2019)/abcdef123456789.mkv", + "two movies different caps (2019)/larger.mkv", (1920, 1080), 100, ), build_tuple( - "Two Movies Different Caps (2019)/abcdef123456789.mkv", + "Two Movies Different Caps (2019)/smaller.mkv", (640, 480), 1, ), + build_tuple( + "Movie with and without year (2019)/with year.mkv", + (1920, 1080), + 10, + ), + build_tuple( + "Movie with and without year/without year.mkv", + (1280, 720), + 1, + ), ] .into_iter() .collect() @@ -134,6 +146,8 @@ fn build_complex_metadata() -> HashMap { fn build_complex_movies() -> Movies { let mut m = Movies { movies: vec![ + build_movie(vec![("Remake (2019)/new.mkv", (1920, 1080), 10)]), + build_movie(vec![("Remake (1960)/old.mkv", (1920, 1080), 1)]), build_movie(vec![( "One Movie With Year (2019)/abcdef123456789.mkv", (1920, 1080), @@ -169,15 +183,25 @@ fn build_complex_movies() -> Movies { 5000, )]), build_movie(vec![( - "two movies different caps (2019)/abcdef123456789.mkv", + "two movies different caps (2019)/larger.mkv", (1920, 1080), 100, )]), build_movie(vec![( - "Two Movies Different Caps (2019)/abcdef123456789.mkv", + "Two Movies Different Caps (2019)/smaller.mkv", (640, 480), 1, )]), + build_movie(vec![( + "Movie with and without year (2019)/with year.mkv", + (1920, 1080), + 10, + )]), + build_movie(vec![( + "Movie with and without year/without year.mkv", + (1280, 720), + 1, + )]), ], }; m.movies.sort_by(|a, b| { @@ -209,12 +233,11 @@ fn test_roundtrip_library() -> Result<(), Box> { } fn validate_duplicates(got: Vec<(&Movie, Vec<&Movie>)>, want: Vec<(Movie, Vec)>) { - assert_eq!(got.len(), want.len()); - for (g, w) in got.iter().zip(&want) { - assert_eq!(g.0, &w.0, "Got:\n{:?}\nWant:\n{:?}", g, w); - let want = w.1.iter().map(|v| v).collect::>(); - assert_eq!(g.1, want, "Got:\n{:?}\nWant:\n{:?}", g, w); - } + let want: Vec<(&Movie, Vec<&Movie>)> = want + .iter() + .map(|(keep, delete)| (keep, delete.iter().map(|m| m).collect())) + .collect(); + assert_eq!(got, want); } #[test] @@ -222,6 +245,18 @@ fn test_duplicate_candidates() -> Result<(), Box> { let movies = build_complex_movies(); let got = movies.duplicate_candidates(); let want = vec![ + ( + build_movie(vec![( + "Movie with and without year (2019)/with year.mkv", + (1920, 1080), + 10, + )]), + vec![build_movie(vec![( + "Movie with and without year/without year.mkv", + (1280, 720), + 1, + )])], + ), ( build_movie(vec![( "Two Movies With Multi Parts (2019)/somethingelse.mkv", @@ -243,12 +278,12 @@ fn test_duplicate_candidates() -> Result<(), Box> { ), ( build_movie(vec![( - "two movies different caps (2019)/abcdef123456789.mkv", + "two movies different caps (2019)/larger.mkv", (1920, 1080), 100, )]), vec![build_movie(vec![( - "Two Movies Different Caps (2019)/abcdef123456789.mkv", + "Two Movies Different Caps (2019)/smaller.mkv", (640, 480), 1, )])],