Fix bug computing highest resolution video.
Update Command::PrintDupes.
This commit is contained in:
parent
8eafec7fd2
commit
974d9386fb
98
src/lib.rs
98
src/lib.rs
@ -1,3 +1,4 @@
|
|||||||
|
use std::cmp::Ordering;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::env;
|
use std::env;
|
||||||
use std::ffi::OsStr;
|
use std::ffi::OsStr;
|
||||||
@ -31,7 +32,20 @@ use serde_json::Value;
|
|||||||
const FULL_METADATA_FILENAME: &str = "metadata.json";
|
const FULL_METADATA_FILENAME: &str = "metadata.json";
|
||||||
const COMPACT_METADATA_FILENAME: &str = "metadata.compact.json";
|
const COMPACT_METADATA_FILENAME: &str = "metadata.compact.json";
|
||||||
|
|
||||||
#[derive(Clone, Deserialize, Debug, PartialEq, Serialize)]
|
pub fn clean_path_parent<P: AsRef<Path>>(path: P) -> PathBuf {
|
||||||
|
let path = path.as_ref();
|
||||||
|
let path = path.parent().unwrap();
|
||||||
|
let path = path.to_str().unwrap();
|
||||||
|
/*
|
||||||
|
// Strip year
|
||||||
|
if path.ends_with(')') {
|
||||||
|
path = &path[..path.len() - 7];
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
PathBuf::from(path)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Deserialize, Debug, Eq, PartialEq, Serialize)]
|
||||||
pub struct Resolution(usize, usize);
|
pub struct Resolution(usize, usize);
|
||||||
impl From<(usize, usize)> for Resolution {
|
impl From<(usize, usize)> for Resolution {
|
||||||
fn from(res: (usize, usize)) -> Self {
|
fn from(res: (usize, usize)) -> Self {
|
||||||
@ -46,6 +60,20 @@ impl Display for Resolution {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Ord for Resolution {
|
||||||
|
fn cmp(&self, other: &Resolution) -> Ordering {
|
||||||
|
let pixels = self.0.checked_mul(self.1).unwrap_or(usize::max_value());
|
||||||
|
let other_pixels = other.0.checked_mul(other.1).unwrap_or(usize::max_value());
|
||||||
|
pixels.cmp(&other_pixels)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialOrd for Resolution {
|
||||||
|
fn partial_cmp(&self, other: &Resolution) -> Option<Ordering> {
|
||||||
|
Some(self.cmp(other))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn option_from_str<'de, T, D>(deserializer: D) -> Result<Option<T>, D::Error>
|
fn option_from_str<'de, T, D>(deserializer: D) -> Result<Option<T>, D::Error>
|
||||||
where
|
where
|
||||||
T: FromStr,
|
T: FromStr,
|
||||||
@ -291,8 +319,37 @@ lazy_static! {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Default, Debug, PartialEq)]
|
#[derive(Default, Debug, PartialEq)]
|
||||||
struct Movie {
|
pub struct Movie {
|
||||||
files: Vec<(String, CompactMetadata)>,
|
pub files: Vec<(String, CompactMetadata)>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Movie {
|
||||||
|
fn min_resolution(&self) -> Option<Resolution> {
|
||||||
|
if self.files.is_empty() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(self.files.iter().fold(
|
||||||
|
Resolution(usize::max_value(), usize::max_value()),
|
||||||
|
|acc, (_, cmd)| std::cmp::min(acc, cmd.largest_dimension().unwrap()),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Display for Movie {
|
||||||
|
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
|
||||||
|
let p = &self.files.first().unwrap().0;
|
||||||
|
write!(f, "{}", &p[..p.find("/").unwrap()])?;
|
||||||
|
for (path, cmd) in &self.files {
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
" {} {}",
|
||||||
|
&path[path.rfind("/").unwrap()..],
|
||||||
|
cmd.largest_dimension().unwrap()
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
@ -300,6 +357,41 @@ pub struct Movies {
|
|||||||
movies: Vec<Movie>,
|
movies: Vec<Movie>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Movies {
|
||||||
|
/// Find all movies with multiple copies. The returned vec contains a tuple of (Movie to keep,
|
||||||
|
/// One or more Movies to remove). The highest resolution movie is kept, TODO(wathiede): with
|
||||||
|
/// higher bit rate breaking ties.
|
||||||
|
pub fn duplicate_candidates(&self) -> Vec<(&Movie, Vec<&Movie>)> {
|
||||||
|
let mut movie_counter = HashMap::new();
|
||||||
|
for m in &self.movies {
|
||||||
|
let (path, _cmd) = m.files.first().unwrap();
|
||||||
|
let parent = clean_path_parent(path).to_string_lossy().to_string();
|
||||||
|
movie_counter.entry(parent).or_insert(Vec::new()).push(m);
|
||||||
|
}
|
||||||
|
let mut dupes = Vec::new();
|
||||||
|
for (_parent, mut movies) in movie_counter.into_iter() {
|
||||||
|
if movies.len() > 1 {
|
||||||
|
// Sort, smallest movie first.
|
||||||
|
movies.sort_by(|a, b| a.min_resolution().cmp(&b.min_resolution()));
|
||||||
|
// Flip order, we care about the largest.
|
||||||
|
movies.reverse();
|
||||||
|
// Take the largest image, return the rest for removal.
|
||||||
|
let tuple = (movies.remove(0), movies);
|
||||||
|
dupes.push(tuple);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for d in &dupes {
|
||||||
|
let (biggest, deletes) = d;
|
||||||
|
eprintln!("biggest: {}", biggest);
|
||||||
|
for (i, delete) in deletes.iter().enumerate() {
|
||||||
|
eprintln!("{}. delete: {}", i + 1, delete);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
dupes
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn movies_from_paths_compact_metadata(mut p_cmd: HashMap<String, CompactMetadata>) -> Movies {
|
fn movies_from_paths_compact_metadata(mut p_cmd: HashMap<String, CompactMetadata>) -> Movies {
|
||||||
let multidisc = collapse_multidisc(&p_cmd.keys().map(|s| s.to_string()).collect());
|
let multidisc = collapse_multidisc(&p_cmd.keys().map(|s| s.to_string()).collect());
|
||||||
let movies = multidisc
|
let movies = multidisc
|
||||||
|
|||||||
80
src/main.rs
80
src/main.rs
@ -1,8 +1,6 @@
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::error::Error;
|
use std::error::Error;
|
||||||
use std::io::Write;
|
use std::io::Write;
|
||||||
use std::path::Path;
|
|
||||||
use std::path::PathBuf;
|
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
use human_format::Formatter;
|
use human_format::Formatter;
|
||||||
@ -14,26 +12,13 @@ use regex::Regex;
|
|||||||
use structopt::StructOpt;
|
use structopt::StructOpt;
|
||||||
use tabwriter::TabWriter;
|
use tabwriter::TabWriter;
|
||||||
|
|
||||||
use superdeduper::is_multidisc;
|
use superdeduper::clean_path_parent;
|
||||||
use superdeduper::CompactMetadata;
|
use superdeduper::CompactMetadata;
|
||||||
use superdeduper::MovieLibrary;
|
use superdeduper::MovieLibrary;
|
||||||
|
|
||||||
const MOVIE_DIR: &str = "/home/wathiede/Movies";
|
const MOVIE_DIR: &str = "/home/wathiede/Movies";
|
||||||
const TO_BE_REMOVED_DIR: &str = "/storage/media/to-be-deleted/";
|
const TO_BE_REMOVED_DIR: &str = "/storage/media/to-be-deleted/";
|
||||||
|
|
||||||
fn clean_path_parent<P: AsRef<Path>>(path: P) -> PathBuf {
|
|
||||||
let path = path.as_ref();
|
|
||||||
let path = path.parent().unwrap();
|
|
||||||
let path = path.to_str().unwrap();
|
|
||||||
/*
|
|
||||||
// Strip year
|
|
||||||
if path.ends_with(')') {
|
|
||||||
path = &path[..path.len() - 7];
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
PathBuf::from(path)
|
|
||||||
}
|
|
||||||
|
|
||||||
lazy_static! {
|
lazy_static! {
|
||||||
static ref CLEAN_TITLE_CHARS: Regex = Regex::new("[^ 0-9[:alpha:]]").unwrap();
|
static ref CLEAN_TITLE_CHARS: Regex = Regex::new("[^ 0-9[:alpha:]]").unwrap();
|
||||||
}
|
}
|
||||||
@ -47,55 +32,38 @@ fn normalize(path: &str) -> String {
|
|||||||
lazy_static! {
|
lazy_static! {
|
||||||
static ref YEAR_SUFFIX: Regex = Regex::new(r" \d{4}$").unwrap();
|
static ref YEAR_SUFFIX: Regex = Regex::new(r" \d{4}$").unwrap();
|
||||||
}
|
}
|
||||||
fn print_dupes(videos: HashMap<String, CompactMetadata>) {
|
|
||||||
let mut video_groups: HashMap<String, Vec<(String, CompactMetadata)>> = HashMap::new();
|
|
||||||
for (name, md) in videos.into_iter() {
|
|
||||||
let clean_name = normalize(clean_path_parent(&name).to_str().unwrap());
|
|
||||||
let paths = video_groups.entry(clean_name).or_insert(Vec::new());
|
|
||||||
paths.push((name.to_string(), md));
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut names = video_groups.keys().collect::<Vec<_>>();
|
fn print_dupes(lib: &MovieLibrary) {
|
||||||
names.sort();
|
let videos = lib.movies().expect("couldn't get videos from library");
|
||||||
|
|
||||||
for name in &names {
|
|
||||||
if YEAR_SUFFIX.is_match(&name) {
|
|
||||||
let yearless = &name[..&name.len() - 5];
|
|
||||||
info!("is '{}' in map", yearless);
|
|
||||||
if let Some(yearless_vg) = video_groups.get(yearless) {
|
|
||||||
println!("Possible dupe between movie with year and no year:");
|
|
||||||
println!(" {:?}", video_groups.get(name.as_str()).unwrap());
|
|
||||||
println!(" {:?}", yearless_vg);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
let mut fmtr = Formatter::new();
|
let mut fmtr = Formatter::new();
|
||||||
fmtr.with_separator("");
|
fmtr.with_separator("");
|
||||||
fmtr.with_scales(Scales::Binary());
|
fmtr.with_scales(Scales::Binary());
|
||||||
for name in names {
|
for (keep, deletes) in videos.duplicate_candidates() {
|
||||||
let possible_dupes = &video_groups[name];
|
let p = &keep.files.first().unwrap().0;
|
||||||
if possible_dupes.len() < 2 {
|
println!("{}", &p[..p.find("/").unwrap()]);
|
||||||
continue;
|
println!(" Keeping:");
|
||||||
}
|
for (p, md) in &keep.files {
|
||||||
let paths: Vec<String> = possible_dupes
|
|
||||||
.iter()
|
|
||||||
.map(|(name, _)| name.to_string())
|
|
||||||
.collect();
|
|
||||||
if is_multidisc(&paths) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
let mut file: Vec<_> = video_groups[name].iter().collect();
|
|
||||||
file.sort_by(|(n1, _), (n2, _)| n1.partial_cmp(n2).unwrap());
|
|
||||||
println!("{}:", name);
|
|
||||||
for (p, md) in file {
|
|
||||||
println!(
|
println!(
|
||||||
" {:>9} {:>9} {} {}",
|
" {:>9} {:>9} {} {}",
|
||||||
md.largest_dimension().unwrap(),
|
md.largest_dimension().unwrap(),
|
||||||
fmtr.format(md.size as f64),
|
fmtr.format(md.size as f64),
|
||||||
humantime::Duration::from(Duration::from_secs(md.duration as u64)),
|
humantime::Duration::from(Duration::from_secs(md.duration as u64)),
|
||||||
&p[p.rfind("/").unwrap() + 1..]
|
&p[p.rfind("/").unwrap() + 1..]
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
println!(" Need to remove:");
|
||||||
|
for delete in &deletes {
|
||||||
|
for (p, md) in &delete.files {
|
||||||
|
println!(
|
||||||
|
" {:>9} {:>9} {} {}",
|
||||||
|
md.largest_dimension().unwrap(),
|
||||||
|
fmtr.format(md.size as f64),
|
||||||
|
humantime::Duration::from(Duration::from_secs(md.duration as u64)),
|
||||||
|
&p[p.rfind("/").unwrap() + 1..]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
println!();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -257,9 +225,7 @@ fn main() -> Result<(), Box<dyn Error>> {
|
|||||||
}
|
}
|
||||||
Command::PrintDupes => {
|
Command::PrintDupes => {
|
||||||
let lib = MovieLibrary::new(MOVIE_DIR);
|
let lib = MovieLibrary::new(MOVIE_DIR);
|
||||||
let videos = lib.videos()?;
|
print_dupes(&lib);
|
||||||
|
|
||||||
print_dupes(videos);
|
|
||||||
}
|
}
|
||||||
Command::PrintAll => {
|
Command::PrintAll => {
|
||||||
let lib = MovieLibrary::new(MOVIE_DIR);
|
let lib = MovieLibrary::new(MOVIE_DIR);
|
||||||
|
|||||||
@ -173,6 +173,74 @@ fn test_roundtrip_library() -> Result<(), Box<dyn Error>> {
|
|||||||
let got = ml.movies().expect("failed to build movies");
|
let got = ml.movies().expect("failed to build movies");
|
||||||
assert_eq!(got.movies.len(), want.movies.len());
|
assert_eq!(got.movies.len(), want.movies.len());
|
||||||
assert_eq!(got, want);
|
assert_eq!(got, want);
|
||||||
//assert_eq!(got, want, "Got {:#?}\nWant {:#?}", got, want);
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn validate_duplicates(got: Vec<(&Movie, Vec<&Movie>)>, want: Vec<(Movie, Vec<Movie>)>) {
|
||||||
|
assert_eq!(got.len(), want.len());
|
||||||
|
for (g, w) in got.iter().zip(&want) {
|
||||||
|
assert_eq!(g.0, &w.0);
|
||||||
|
assert_eq!(g.1, w.1.iter().map(|v| v).collect::<Vec<_>>());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_duplicate_candidates() -> Result<(), Box<dyn Error>> {
|
||||||
|
let movies = build_complex_movies();
|
||||||
|
let got = movies.duplicate_candidates();
|
||||||
|
let want = vec![(
|
||||||
|
build_movie(vec![(
|
||||||
|
"Two Movies With Multi Parts (2019)/somethingelse.mkv",
|
||||||
|
(1920, 1080),
|
||||||
|
)]),
|
||||||
|
vec![build_movie(vec![
|
||||||
|
(
|
||||||
|
"Two Movies With Multi Parts (2019)/abcdef123456789 part 1.mkv",
|
||||||
|
(1280, 720),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"Two Movies With Multi Parts (2019)/abcdef123456789 part 2.mkv",
|
||||||
|
(1280, 720),
|
||||||
|
),
|
||||||
|
])],
|
||||||
|
)];
|
||||||
|
validate_duplicates(got, want);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_fullmetal() -> Result<(), Box<dyn Error>> {
|
||||||
|
let mut movies = Movies {
|
||||||
|
movies: vec![
|
||||||
|
build_movie(vec![(
|
||||||
|
"Full Metal Jacket (1987)/Full Metal Jacket.mp4",
|
||||||
|
(1280, 720),
|
||||||
|
)]),
|
||||||
|
build_movie(vec![(
|
||||||
|
"Full Metal Jacket (1987)/1776f8e2fb614a6fb77a66cde601bb45.mkv",
|
||||||
|
(1920, 1080),
|
||||||
|
)]),
|
||||||
|
],
|
||||||
|
};
|
||||||
|
movies.movies.sort_by(|a, b| {
|
||||||
|
a.files
|
||||||
|
.first()
|
||||||
|
.unwrap()
|
||||||
|
.0
|
||||||
|
.partial_cmp(&b.files.first().unwrap().0)
|
||||||
|
.unwrap()
|
||||||
|
});
|
||||||
|
let got = movies.duplicate_candidates();
|
||||||
|
let want = vec![(
|
||||||
|
build_movie(vec![(
|
||||||
|
"Full Metal Jacket (1987)/1776f8e2fb614a6fb77a66cde601bb45.mkv",
|
||||||
|
(1920, 1080),
|
||||||
|
)]),
|
||||||
|
vec![build_movie(vec![(
|
||||||
|
"Full Metal Jacket (1987)/Full Metal Jacket.mp4",
|
||||||
|
(1280, 720),
|
||||||
|
)])],
|
||||||
|
)];
|
||||||
|
validate_duplicates(got, want);
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user