From 70174e9e4920351195eba456a1651a3241d75779 Mon Sep 17 00:00:00 2001 From: Bill Thiede Date: Sun, 21 Nov 2021 16:50:18 -0800 Subject: [PATCH] Remove dashes and repeated spaces when comparing names for dupes. --- src/lib.rs | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 4c1a2ff..bc46c89 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -371,16 +371,23 @@ impl Movies { /// If there is a yearless movie and one or more movies with a year exist, then the yearless /// movie will be removed pub fn duplicate_candidates(&self) -> Vec<(&Movie, Vec<&Movie>)> { + lazy_static! { + static ref MULTIPLE_SPACES: Regex = Regex::new(r"\s+").unwrap(); + } let date_re = Regex::new(r"\(\d{4}\)$").unwrap(); let mut movie_counter = HashMap::new(); let mut movies_without_date_counter = HashMap::new(); for m in &self.movies { let (path, _cmd) = m.files.first().unwrap(); - let parent = clean_path_parent(path) - .to_string_lossy() - .to_ascii_lowercase() - .replace("-", " ") - .replace("'", "") + let parent = MULTIPLE_SPACES + .replace_all( + &clean_path_parent(path) + .to_string_lossy() + .to_ascii_lowercase() + .replace("-", " ") + .replace("'", " "), + " ", + ) .to_string(); if date_re.is_match(&parent) { movie_counter.entry(parent).or_insert(Vec::new()).push(m);