Compare commits

...

32 Commits

Author SHA1 Message Date
d44c4da72f Use upstream human_format 2024-04-06 16:24:21 -07:00
48d92f6b67 Better debugging when metadata is invalid.
Handle missing subtitle encoding.
2022-10-15 10:01:06 -07:00
318ce583ea Make root directory a flag. 2022-07-23 21:33:36 -07:00
3a61e15449 Latest lock. 2022-07-23 21:06:49 -07:00
c46ae525fc Just remove dupes, don't move them. 2022-07-23 21:06:22 -07:00
e16d899c14 Update tests to match intended behavior. 2022-07-23 21:06:03 -07:00
e06d2419e5 Remove extra dbg!(). 2021-12-25 09:35:39 -08:00
4b1cf6c491 Fix movie size comparison.
Use largest movie pixel size (some movies have low res video streams
    embedded).
2021-12-25 09:30:55 -08:00
70174e9e49 Remove dashes and repeated spaces when comparing names for dupes. 2021-11-21 16:50:18 -08:00
b2ef1d3d3d Prefer higher resolution files. 2021-11-12 19:20:42 -08:00
708e44053e Ignore dashes and apostrophes when finding dupes. 2021-11-12 19:06:07 -08:00
37b4e1b4b2 Use parent envrc / default.nix. 2021-11-12 18:05:56 -08:00
4ba8e3e3ee Better error reporting when ffprobe fails. 2021-02-24 15:26:21 -08:00
872c1096a6 Better error log messaging. 2021-02-24 07:12:14 -08:00
7da8639881 Use TO_BE_REMOVED_DIR inplace of static string. 2021-02-21 09:42:21 -08:00
d4c94a5a3a Add default.nix & direnv. 2021-02-21 08:30:03 -08:00
db29d662c6 Use to-be-deleted path that works on sagan and nixos-05 2021-02-21 08:29:38 -08:00
d00d49135a Print suggested empty directory cleanup and clean lint. 2020-06-07 16:38:38 -07:00
18108b5d44 Print full path when suggesting moves. 2020-06-07 16:30:47 -07:00
8af62e313b Suggest rerunning empty-dirs if dupes found. 2020-05-10 07:52:54 -07:00
334d2a5e53 Add empty-dirs verb to find movie directories with no movies. 2020-03-29 15:46:33 -07:00
6a52f3c5b1 Remove dead code. 2019-12-14 16:08:38 -08:00
0714ae6f2f Remove eprintln. 2019-12-14 16:07:35 -08:00
96819d2437 Handle dateless movies when dated movie present. 2019-12-14 10:26:54 -08:00
b3ca6e1cb3 More logging when test fails. 2019-12-09 19:36:41 -08:00
da717aeae1 Handle missing display_aspect_ratio and movies differing only in case. 2019-12-04 21:31:23 -08:00
4d0ce2cd13 Fix parsing when no channel_layout present in audio config. 2019-12-04 20:47:24 -08:00
ab716f0398 Quote mv command so paths with ' work. 2019-11-25 09:34:48 -08:00
7f00c90003 Filter out stale metadata entries. 2019-11-25 09:26:04 -08:00
a2f17ed511 Print suggested 'delete' command for removing dupes. 2019-11-25 08:53:58 -08:00
150bdfddef Use bit rate instead of resolution in making dedup decision.
Keep lower resolution movie if it is higher bit rate.
2019-11-25 08:47:04 -08:00
974d9386fb Fix bug computing highest resolution video.
Update Command::PrintDupes.
2019-11-25 08:19:21 -08:00
5 changed files with 734 additions and 365 deletions

468
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -9,7 +9,7 @@ edition = "2018"
[dependencies] [dependencies]
failure = "0.1" failure = "0.1"
glob = "0.3" glob = "0.3"
human_format = { git ="https://github.com/wathiede/human-format-rs" } human_format = "1.1.0"
humantime = "1" humantime = "1"
lazy_static = "1.4" lazy_static = "1.4"
log = "0.4" log = "0.4"

View File

@@ -1,37 +1,37 @@
use std::collections::HashMap; use std::{
use std::env; cmp::Ordering,
use std::ffi::OsStr; collections::{HashMap, HashSet},
use std::fmt; env,
use std::fmt::Display; ffi::OsStr,
use std::fmt::Formatter; fmt,
use std::fs::File; fmt::{Display, Formatter},
use std::io::BufReader; fs::File,
use std::io::BufWriter; io::{BufReader, BufWriter},
use std::path::Path; path::{Path, PathBuf},
use std::path::PathBuf; process::Command,
use std::process::Command; str::FromStr,
use std::str::FromStr; };
use failure::bail; use failure::{bail, Error, ResultExt};
use failure::Error;
use failure::ResultExt;
use glob::glob; use glob::glob;
use lazy_static::lazy_static; use lazy_static::lazy_static;
use log::error; use log::{error, info};
use log::info; use rayon::{iter::ParallelBridge, prelude::ParallelIterator};
use rayon::iter::ParallelBridge;
use rayon::prelude::ParallelIterator;
use regex::Regex; use regex::Regex;
use serde::de; use serde::{de, de::Deserializer, Deserialize, Serialize};
use serde::de::Deserializer;
use serde::Deserialize;
use serde::Serialize;
use serde_json::Value; use serde_json::Value;
const FULL_METADATA_FILENAME: &str = "metadata.json"; const FULL_METADATA_FILENAME: &str = "metadata.json";
const COMPACT_METADATA_FILENAME: &str = "metadata.compact.json"; const COMPACT_METADATA_FILENAME: &str = "metadata.compact.json";
#[derive(Clone, Deserialize, Debug, PartialEq, Serialize)] pub fn clean_path_parent<P: AsRef<Path>>(path: P) -> PathBuf {
let path = path.as_ref();
let path = path.parent().unwrap();
let path = path.to_str().unwrap();
PathBuf::from(path)
}
#[derive(Clone, Deserialize, Debug, Eq, PartialEq, Serialize)]
pub struct Resolution(usize, usize); pub struct Resolution(usize, usize);
impl From<(usize, usize)> for Resolution { impl From<(usize, usize)> for Resolution {
fn from(res: (usize, usize)) -> Self { fn from(res: (usize, usize)) -> Self {
@@ -46,6 +46,20 @@ impl Display for Resolution {
} }
} }
impl Ord for Resolution {
fn cmp(&self, other: &Resolution) -> Ordering {
let pixels = self.0.checked_mul(self.1).unwrap_or(usize::max_value());
let other_pixels = other.0.checked_mul(other.1).unwrap_or(usize::max_value());
pixels.cmp(&other_pixels)
}
}
impl PartialOrd for Resolution {
fn partial_cmp(&self, other: &Resolution) -> Option<Ordering> {
Some(self.cmp(other))
}
}
fn option_from_str<'de, T, D>(deserializer: D) -> Result<Option<T>, D::Error> fn option_from_str<'de, T, D>(deserializer: D) -> Result<Option<T>, D::Error>
where where
T: FromStr, T: FromStr,
@@ -121,7 +135,7 @@ enum Stream {
codec_long_name: String, codec_long_name: String,
coded_height: usize, coded_height: usize,
coded_width: usize, coded_width: usize,
display_aspect_ratio: String, display_aspect_ratio: Option<String>,
#[serde(default, deserialize_with = "from_str")] #[serde(default, deserialize_with = "from_str")]
duration: f32, duration: f32,
height: usize, height: usize,
@@ -133,13 +147,13 @@ enum Stream {
codec_name: String, codec_name: String,
codec_long_name: String, codec_long_name: String,
channels: usize, channels: usize,
channel_layout: String, channel_layout: Option<String>,
tags: Option<Tags>, tags: Option<Tags>,
}, },
#[serde(rename = "subtitle")] #[serde(rename = "subtitle")]
Subtitle { Subtitle {
codec_name: String, codec_name: Option<String>,
codec_long_name: String, codec_long_name: Option<String>,
tags: Option<Tags>, tags: Option<Tags>,
}, },
#[serde(rename = "attachment")] #[serde(rename = "attachment")]
@@ -185,7 +199,8 @@ pub struct AudioFormat {
short_name: String, short_name: String,
long_name: String, long_name: String,
channels: usize, channels: usize,
channel_layout: String, #[serde(skip_serializing_if = "Option::is_none")]
channel_layout: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
title: Option<String>, title: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
@@ -194,8 +209,8 @@ pub struct AudioFormat {
#[derive(Clone, Deserialize, Debug, PartialEq, Serialize)] #[derive(Clone, Deserialize, Debug, PartialEq, Serialize)]
pub struct SubtitleFormat { pub struct SubtitleFormat {
short_name: String, short_name: Option<String>,
long_name: String, long_name: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
title: Option<String>, title: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
@@ -256,7 +271,7 @@ pub struct MetadataFile {
#[derive(PartialEq, Debug)] #[derive(PartialEq, Debug)]
pub struct MovieLibrary { pub struct MovieLibrary {
root: String, pub root: String,
} }
fn json_metadata_for_path<P: AsRef<Path> + AsRef<OsStr>>(path: P) -> Result<String, Error> { fn json_metadata_for_path<P: AsRef<Path> + AsRef<OsStr>>(path: P) -> Result<String, Error> {
@@ -273,7 +288,7 @@ fn json_metadata_for_path<P: AsRef<Path> + AsRef<OsStr>>(path: P) -> Result<Stri
]) ])
.arg(Path::new("./").join(path)); .arg(Path::new("./").join(path));
info!(target: "json", "cmd {:?}", cmd); info!(target: "json", "cmd {:?}", cmd);
let output = cmd.output()?; let output = cmd.output().context(format!("failed to run {:?}", cmd))?;
if output.status.success() { if output.status.success() {
return Ok(String::from_utf8(output.stdout)?); return Ok(String::from_utf8(output.stdout)?);
} }
@@ -291,8 +306,60 @@ lazy_static! {
} }
#[derive(Default, Debug, PartialEq)] #[derive(Default, Debug, PartialEq)]
struct Movie { pub struct Movie {
files: Vec<(String, CompactMetadata)>, pub files: Vec<(String, CompactMetadata)>,
}
impl Movie {
fn max_pixel_count(&self) -> Option<usize> {
if self.files.is_empty() {
None
} else {
Some(self.files.iter().fold(usize::min_value(), |acc, (_, cmd)| {
let min = cmd.video.iter().fold(usize::min_value(), |acc, v| {
std::cmp::max(acc, v.width * v.height)
});
std::cmp::max(acc, min)
}))
}
}
fn min_pixel_count(&self) -> Option<usize> {
if self.files.is_empty() {
None
} else {
Some(self.files.iter().fold(usize::max_value(), |acc, (_, cmd)| {
let min = cmd.video.iter().fold(usize::max_value(), |acc, v| {
std::cmp::min(acc, v.width * v.height)
});
std::cmp::min(acc, min)
}))
}
}
fn min_bit_rate(&self) -> Option<usize> {
if self.files.is_empty() {
None
} else {
Some(self.files.iter().fold(usize::max_value(), |acc, (_, cmd)| {
std::cmp::min(acc, cmd.bit_rate)
}))
}
}
}
impl Display for Movie {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
let p = &self.files.first().unwrap().0;
write!(f, "{}", &p[..p.find("/").unwrap()])?;
for (path, cmd) in &self.files {
write!(
f,
" {} {}",
&path[path.rfind("/").unwrap()..],
cmd.largest_dimension().unwrap()
)?;
}
Ok(())
}
} }
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
@@ -300,6 +367,74 @@ pub struct Movies {
movies: Vec<Movie>, movies: Vec<Movie>,
} }
impl Movies {
/// Find all movies with multiple copies. The returned vec contains a tuple of (Movie to keep,
/// One or more Movies to remove). The highest resolution movie is kept.
/// Movies with differing years are considered distinct movies.
/// If there is a yearless movie and one or more movies with a year exist, then the yearless
/// movie will be removed
pub fn duplicate_candidates(&self) -> Vec<(&Movie, Vec<&Movie>)> {
lazy_static! {
static ref MULTIPLE_SPACES: Regex = Regex::new(r"\s+").unwrap();
}
let date_re = Regex::new(r"\(\d{4}\)$").unwrap();
let mut movie_counter = HashMap::new();
let mut movies_without_date_counter = HashMap::new();
for m in &self.movies {
let (path, _cmd) = m.files.first().unwrap();
let parent = MULTIPLE_SPACES
.replace_all(
&clean_path_parent(path)
.to_string_lossy()
.to_ascii_lowercase()
.replace("-", " ")
.replace("'", " "),
" ",
)
.to_string();
if date_re.is_match(&parent) {
movie_counter.entry(parent).or_insert(Vec::new()).push(m);
} else {
movies_without_date_counter
.entry(parent)
.or_insert(Vec::new())
.push(m);
}
}
let mut dupes: Vec<(&Movie, Vec<&Movie>)> = Vec::new();
for (parent, movies) in movie_counter.iter() {
let dateless_parent = parent[..parent.len() - 7].to_string();
if let Some(movie) = movies_without_date_counter.remove(&dateless_parent) {
let tuple = (movies[0], movie);
dupes.push(tuple);
}
}
for (_parent, mut movies) in movie_counter.into_iter() {
if movies.len() > 1 {
// Sort, lowest resolution movie first
movies.sort_by(|a, b| a.max_pixel_count().cmp(&b.max_pixel_count()));
// Flip order, we care about the largest.
movies.reverse();
// Take the largest image, return the rest for removal.
let tuple = (movies.remove(0), movies);
dupes.push(tuple);
}
}
// Sort to make this function deterministic.
dupes.sort_by(|(a_keep, _), (b_keep, _)| {
a_keep
.files
.first()
.unwrap()
.0
.partial_cmp(&b_keep.files.first().unwrap().0)
.unwrap()
});
dupes
}
}
fn movies_from_paths_compact_metadata(mut p_cmd: HashMap<String, CompactMetadata>) -> Movies { fn movies_from_paths_compact_metadata(mut p_cmd: HashMap<String, CompactMetadata>) -> Movies {
let multidisc = collapse_multidisc(&p_cmd.keys().map(|s| s.to_string()).collect()); let multidisc = collapse_multidisc(&p_cmd.keys().map(|s| s.to_string()).collect());
let movies = multidisc let movies = multidisc
@@ -336,7 +471,6 @@ impl MovieLibrary {
let f = File::open(&path).context(format!("open {}", path.display()))?; let f = File::open(&path).context(format!("open {}", path.display()))?;
let r = BufReader::new(f); let r = BufReader::new(f);
// Read the JSON contents of the file as an instance of `User`.
let mdf: MetadataFile = serde_json::from_reader(r) let mdf: MetadataFile = serde_json::from_reader(r)
.context(format!("serde_json::from_reader {}", path.display()))?; .context(format!("serde_json::from_reader {}", path.display()))?;
@@ -391,7 +525,7 @@ impl MovieLibrary {
short_name: codec_name.to_string(), short_name: codec_name.to_string(),
long_name: codec_long_name.to_string(), long_name: codec_long_name.to_string(),
channels: *channels, channels: *channels,
channel_layout: channel_layout.to_string(), channel_layout: channel_layout.clone(),
title: tags.as_ref().and_then(|t| t.title()), title: tags.as_ref().and_then(|t| t.title()),
language: tags.as_ref().and_then(|t| t.language()), language: tags.as_ref().and_then(|t| t.language()),
}) })
@@ -412,8 +546,8 @@ impl MovieLibrary {
} = s } = s
{ {
Some(SubtitleFormat { Some(SubtitleFormat {
short_name: codec_name.to_string(), short_name: codec_name.clone(),
long_name: codec_long_name.to_string(), long_name: codec_long_name.clone(),
title: tags.as_ref().and_then(|t| t.title()), title: tags.as_ref().and_then(|t| t.title()),
language: tags.as_ref().and_then(|t| t.language()), language: tags.as_ref().and_then(|t| t.language()),
}) })
@@ -446,7 +580,7 @@ impl MovieLibrary {
pub fn update_metadata(&self) -> Result<Vec<String>, Error> { pub fn update_metadata(&self) -> Result<Vec<String>, Error> {
let path = Path::new(&self.root).join(FULL_METADATA_FILENAME); let path = Path::new(&self.root).join(FULL_METADATA_FILENAME);
let old_metadata: HashMap<String, Value> = match File::open(&path) { let mut old_metadata: HashMap<String, Value> = match File::open(&path) {
Ok(f) => { Ok(f) => {
let r = BufReader::new(f); let r = BufReader::new(f);
serde_json::from_reader(r)? serde_json::from_reader(r)?
@@ -458,6 +592,29 @@ impl MovieLibrary {
}; };
info!("Read metadata, {} videos found", old_metadata.len()); info!("Read metadata, {} videos found", old_metadata.len());
// Filter out stale metadata (where the file no longer exists).
let old_metadata: HashMap<String, Value> = self
.iter_video_files()
.filter(|r| r.is_ok())
.filter_map(|r| {
let path = r
.as_ref()
.unwrap()
.strip_prefix(&self.root)
.unwrap()
.to_str()
.unwrap()
.to_owned();
match old_metadata.remove(&path) {
Some(v) => Some((path, v)),
None => None,
}
})
.collect();
info!(
"After removing stale metadata, {} videos found",
old_metadata.len()
);
let mut metadata: HashMap<_, _> = self let mut metadata: HashMap<_, _> = self
.iter_video_files() .iter_video_files()
@@ -484,11 +641,17 @@ impl MovieLibrary {
Some((path.to_string_lossy().into_owned(), json)) Some((path.to_string_lossy().into_owned(), json))
} }
Err(e) => { Err(e) => {
error!("{}", e); error!("Failed to open {}: {}", path.to_string_lossy(), e);
None None
} }
} }
}) })
.inspect(|(path, json)| {
if let Err(err) = serde_json::from_str::<Metadata>(&json) {
error!("Can't parse metadata for {}: {}", path, err);
error!("{}", json);
}
})
.map(|(path, json)| (path, serde_json::from_str::<Value>(&json).unwrap())) .map(|(path, json)| (path, serde_json::from_str::<Value>(&json).unwrap()))
.collect(); .collect();
let new_videos = metadata.keys().cloned().collect(); let new_videos = metadata.keys().cloned().collect();
@@ -527,7 +690,7 @@ impl MovieLibrary {
Ok(movies_from_paths_compact_metadata(p_cmd)) Ok(movies_from_paths_compact_metadata(p_cmd))
} }
pub fn videos(&self) -> Result<(HashMap<String, CompactMetadata>), Error> { pub fn videos(&self) -> Result<HashMap<String, CompactMetadata>, Error> {
let path = Path::new(&self.root).join(COMPACT_METADATA_FILENAME); let path = Path::new(&self.root).join(COMPACT_METADATA_FILENAME);
let f = File::open(&path).context(format!("open {}", path.display()))?; let f = File::open(&path).context(format!("open {}", path.display()))?;
let r = BufReader::new(f); let r = BufReader::new(f);
@@ -535,6 +698,28 @@ impl MovieLibrary {
Ok(serde_json::from_reader(r) Ok(serde_json::from_reader(r)
.context(format!("serde_json::from_reader {}", path.display()))?) .context(format!("serde_json::from_reader {}", path.display()))?)
} }
pub fn empty_dirs(&self) -> Result<Vec<String>, Error> {
let mut all_dirs = HashSet::new();
let root_len = self.root.len() + 1; // +1 for trailing slash
for de in Path::new(&self.root).read_dir()? {
let de = de?;
if de.metadata()?.is_dir() {
let path = de.path().to_string_lossy().to_string();
all_dirs.insert(path[root_len..].to_string());
}
}
for path in self.videos()?.keys() {
let dir = match path.find("/") {
Some(idx) => path[..idx].to_string(),
None => path.to_string(),
};
all_dirs.remove(&dir);
}
let mut empty_dirs: Vec<_> = all_dirs.into_iter().collect();
empty_dirs.sort();
Ok(empty_dirs)
}
} }
#[cfg(test)] #[cfg(test)]

View File

@@ -1,12 +1,6 @@
use std::collections::HashMap; use std::{collections::HashMap, error::Error, io::Write, path::Path, time::Duration};
use std::error::Error;
use std::io::Write;
use std::path::Path;
use std::path::PathBuf;
use std::time::Duration;
use human_format::Formatter; use human_format::{Formatter, Scales};
use human_format::Scales;
use humantime; use humantime;
use lazy_static::lazy_static; use lazy_static::lazy_static;
use log::info; use log::info;
@@ -14,25 +8,10 @@ use regex::Regex;
use structopt::StructOpt; use structopt::StructOpt;
use tabwriter::TabWriter; use tabwriter::TabWriter;
use superdeduper::is_multidisc; use superdeduper::{clean_path_parent, CompactMetadata, MovieLibrary};
use superdeduper::CompactMetadata;
use superdeduper::MovieLibrary;
const MOVIE_DIR: &str = "/home/wathiede/Movies"; const MOVIE_DIR: &str = "/home/wathiede/Movies";
const TO_BE_REMOVED_DIR: &str = "/storage/media/to-be-deleted/"; const TO_BE_REMOVED_DIR: &str = "/home/wathiede/to-be-deleted/";
fn clean_path_parent<P: AsRef<Path>>(path: P) -> PathBuf {
let path = path.as_ref();
let path = path.parent().unwrap();
let path = path.to_str().unwrap();
/*
// Strip year
if path.ends_with(')') {
path = &path[..path.len() - 7];
}
*/
PathBuf::from(path)
}
lazy_static! { lazy_static! {
static ref CLEAN_TITLE_CHARS: Regex = Regex::new("[^ 0-9[:alpha:]]").unwrap(); static ref CLEAN_TITLE_CHARS: Regex = Regex::new("[^ 0-9[:alpha:]]").unwrap();
@@ -47,47 +26,18 @@ fn normalize(path: &str) -> String {
lazy_static! { lazy_static! {
static ref YEAR_SUFFIX: Regex = Regex::new(r" \d{4}$").unwrap(); static ref YEAR_SUFFIX: Regex = Regex::new(r" \d{4}$").unwrap();
} }
fn print_dupes(videos: HashMap<String, CompactMetadata>) {
let mut video_groups: HashMap<String, Vec<(String, CompactMetadata)>> = HashMap::new();
for (name, md) in videos.into_iter() {
let clean_name = normalize(clean_path_parent(&name).to_str().unwrap());
let paths = video_groups.entry(clean_name).or_insert(Vec::new());
paths.push((name.to_string(), md));
}
let mut names = video_groups.keys().collect::<Vec<_>>(); fn print_dupes(lib: &MovieLibrary) {
names.sort(); let videos = lib.movies().expect("couldn't get videos from library");
for name in &names {
if YEAR_SUFFIX.is_match(&name) {
let yearless = &name[..&name.len() - 5];
info!("is '{}' in map", yearless);
if let Some(yearless_vg) = video_groups.get(yearless) {
println!("Possible dupe between movie with year and no year:");
println!(" {:?}", video_groups.get(name.as_str()).unwrap());
println!(" {:?}", yearless_vg);
}
}
}
let mut fmtr = Formatter::new(); let mut fmtr = Formatter::new();
fmtr.with_separator(""); fmtr.with_separator("");
fmtr.with_scales(Scales::Binary()); fmtr.with_scales(Scales::Binary());
for name in names { let mut delete_paths = Vec::new();
let possible_dupes = &video_groups[name]; for (keep, deletes) in videos.duplicate_candidates() {
if possible_dupes.len() < 2 { let p = &keep.files.first().unwrap().0;
continue; println!("{}", &p[..p.find("/").unwrap()]);
} println!(" Keeping:");
let paths: Vec<String> = possible_dupes for (p, md) in &keep.files {
.iter()
.map(|(name, _)| name.to_string())
.collect();
if is_multidisc(&paths) {
continue;
}
let mut file: Vec<_> = video_groups[name].iter().collect();
file.sort_by(|(n1, _), (n2, _)| n1.partial_cmp(n2).unwrap());
println!("{}:", name);
for (p, md) in file {
println!( println!(
" {:>9} {:>9} {} {}", " {:>9} {:>9} {} {}",
md.largest_dimension().unwrap(), md.largest_dimension().unwrap(),
@@ -96,6 +46,28 @@ fn print_dupes(videos: HashMap<String, CompactMetadata>) {
&p[p.rfind("/").unwrap() + 1..] &p[p.rfind("/").unwrap() + 1..]
); );
} }
println!(" Need to remove:");
for delete in &deletes {
for (p, md) in &delete.files {
delete_paths.push(p);
println!(
" {:>9} {:>9} {} {}",
md.largest_dimension().unwrap(),
fmtr.format(md.size as f64),
humantime::Duration::from(Duration::from_secs(md.duration as u64)),
&p[p.rfind("/").unwrap() + 1..]
);
}
}
println!();
}
delete_paths.sort();
let root = Path::new(&lib.root);
for path in &delete_paths {
println!(r#"rm "{}""#, root.join(path).to_string_lossy(),);
}
if delete_paths.len() > 0 {
println!("superdeduper update-compact-metadata && superdeduper empty-dirs")
} }
} }
@@ -171,7 +143,7 @@ fn print_videos(videos: &HashMap<String, CompactMetadata>, filter: Option<&Regex
humantime::Duration::from(Duration::from_secs(md.duration as u64)), humantime::Duration::from(Duration::from_secs(md.duration as u64)),
&name[MOVIE_DIR.len() + 1..] &name[MOVIE_DIR.len() + 1..]
); );
println!("mv '{}' '{}'", name, TO_BE_REMOVED_DIR); println!("rm '{}'", name);
} }
} }
@@ -197,6 +169,8 @@ enum Command {
about = "Write full metadata files and update compact file on changes" about = "Write full metadata files and update compact file on changes"
)] )]
UpdateAndCompactMetadata, UpdateAndCompactMetadata,
#[structopt(about = "Print directories with no movie files in them")]
EmptyDirs,
} }
#[derive(StructOpt)] #[derive(StructOpt)]
@@ -205,6 +179,13 @@ enum Command {
about = "Tool for pruning extra videos in collection" about = "Tool for pruning extra videos in collection"
)] )]
struct SuperDeduper { struct SuperDeduper {
#[structopt(
short = "r",
long = "root",
help = "Root directory to store files.",
default_value = MOVIE_DIR,
)]
root: String,
#[structopt( #[structopt(
short = "v", short = "v",
help = "Sets the level of verbosity", help = "Sets the level of verbosity",
@@ -232,14 +213,14 @@ fn main() -> Result<(), Box<dyn Error>> {
match app.cmd { match app.cmd {
Command::Samples => { Command::Samples => {
let lib = MovieLibrary::new(MOVIE_DIR); let lib = MovieLibrary::new(app.root);
let videos = lib.videos()?; let videos = lib.videos()?;
let samples_re = Regex::new(r"(?i).*sample.*").unwrap(); let samples_re = Regex::new(r"(?i).*sample.*").unwrap();
print_videos(&videos, Some(&samples_re)); print_videos(&videos, Some(&samples_re));
} }
Command::Groups => { Command::Groups => {
let lib = MovieLibrary::new(MOVIE_DIR); let lib = MovieLibrary::new(app.root);
let videos = lib.videos()?; let videos = lib.videos()?;
let mut video_groups: HashMap<String, Vec<(String, CompactMetadata)>> = HashMap::new(); let mut video_groups: HashMap<String, Vec<(String, CompactMetadata)>> = HashMap::new();
@@ -252,35 +233,38 @@ fn main() -> Result<(), Box<dyn Error>> {
print_video_groups(&video_groups); print_video_groups(&video_groups);
} }
Command::CompactMetadata => { Command::CompactMetadata => {
let lib = MovieLibrary::new(MOVIE_DIR); let lib = MovieLibrary::new(app.root);
lib.compact_metadata()?; lib.compact_metadata()?;
} }
Command::PrintDupes => { Command::PrintDupes => {
let lib = MovieLibrary::new(MOVIE_DIR); let lib = MovieLibrary::new(app.root);
let videos = lib.videos()?; print_dupes(&lib);
print_dupes(videos);
} }
Command::PrintAll => { Command::PrintAll => {
let lib = MovieLibrary::new(MOVIE_DIR); let lib = MovieLibrary::new(app.root);
let videos = lib.videos()?; let videos = lib.videos()?;
print_all(videos); print_all(videos);
} }
Command::UpdateMetadata => { Command::UpdateMetadata => {
let lib = MovieLibrary::new(MOVIE_DIR); let lib = MovieLibrary::new(app.root);
lib.update_metadata()?; lib.update_metadata()?;
} }
Command::UpdateAndCompactMetadata => { Command::UpdateAndCompactMetadata => {
let lib = MovieLibrary::new(MOVIE_DIR); let lib = MovieLibrary::new(app.root);
let new_videos = lib.update_metadata()?; lib.update_metadata()?;
if !new_videos.is_empty() {
info!(
"{} new videos added, recompacting metadata",
new_videos.len()
);
lib.compact_metadata()?; lib.compact_metadata()?;
} }
Command::EmptyDirs => {
let lib = MovieLibrary::new(app.root);
let dirs = lib.empty_dirs()?;
let root = Path::new(&lib.root);
if !dirs.is_empty() {
println!("Empty directories:");
for d in dirs {
println!(r#"rm -rf "{}""#, root.join(d).to_string_lossy());
}
}
} }
} }
Ok(()) Ok(())

View File

@@ -37,7 +37,7 @@ fn test_simple_library() {
short_name: "mp3".to_string(), short_name: "mp3".to_string(),
long_name: "MP3 (MPEG audio layer 3)".to_string(), long_name: "MP3 (MPEG audio layer 3)".to_string(),
channels: 2, channels: 2,
channel_layout: "stereo".to_string(), channel_layout: Some("stereo".to_string()),
title: None, title: None,
language: None, language: None,
}], }],
@@ -49,7 +49,7 @@ fn test_simple_library() {
); );
} }
fn build_tuple<R>(path: &str, res: R) -> (String, CompactMetadata) fn build_tuple<R>(path: &str, res: R, bit_rate: usize) -> (String, CompactMetadata)
where where
R: Into<Resolution>, R: Into<Resolution>,
{ {
@@ -58,7 +58,7 @@ where
path.to_string(), path.to_string(),
CompactMetadata { CompactMetadata {
filename: format!("./{}", path), filename: format!("./{}", path),
bit_rate: 1, bit_rate,
duration: 1.0, duration: 1.0,
format_name: "test_format".to_string(), format_name: "test_format".to_string(),
size: 1, size: 1,
@@ -72,43 +72,71 @@ where
) )
} }
fn build_movie<R>(paths: Vec<(&str, R)>) -> Movie fn build_movie<R>(paths: Vec<(&str, R, usize)>) -> Movie
where where
R: Into<Resolution>, R: Into<Resolution>,
{ {
Movie { Movie {
files: paths files: paths
.into_iter() .into_iter()
.map(|(path, res)| build_tuple(path, res)) .map(|(path, res, bit_rate)| build_tuple(path, res, bit_rate))
.collect(), .collect(),
} }
} }
fn build_complex_metadata() -> HashMap<String, CompactMetadata> { fn build_complex_metadata() -> HashMap<String, CompactMetadata> {
vec![ vec![
build_tuple("Remake (2019)/new.mkv", (1920, 1080), 10),
build_tuple("Remake (1960)/old.mkv", (1920, 1080), 1),
build_tuple( build_tuple(
"One Movie With Year (2019)/abcdef123456789.mkv", "One Movie With Year (2019)/abcdef123456789.mkv",
(1920, 1080), (1920, 1080),
1,
), ),
build_tuple( build_tuple(
"One Movie With Two Parts (2019)/abcdef123456789 part 1.mkv", "One Movie With Two Parts (2019)/abcdef123456789 part 1.mkv",
(1280, 720), (1280, 720),
1,
), ),
build_tuple( build_tuple(
"One Movie With Two Parts (2019)/abcdef123456789 part 2.mkv", "One Movie With Two Parts (2019)/abcdef123456789 part 2.mkv",
(1280, 720), (1280, 720),
1,
), ),
build_tuple( build_tuple(
"Two Movies With Multi Parts (2019)/abcdef123456789 part 1.mkv", "Two Movies With Multi Parts (2019)/abcdef123456789 part 1.mkv",
(1280, 720), (1280, 720),
1000,
), ),
build_tuple( build_tuple(
"Two Movies With Multi Parts (2019)/abcdef123456789 part 2.mkv", "Two Movies With Multi Parts (2019)/abcdef123456789 part 2.mkv",
(1280, 720), (1280, 720),
1000,
), ),
build_tuple( build_tuple(
"Two Movies With Multi Parts (2019)/somethingelse.mkv", "Two Movies With Multi Parts (2019)/somethingelse.mkv",
(1920, 1080), (1920, 1080),
5000,
),
build_tuple(
"two movies different caps (2019)/larger.mkv",
(1920, 1080),
100,
),
build_tuple(
"Two Movies Different Caps (2019)/smaller.mkv",
(640, 480),
1,
),
build_tuple(
"Movie with and without year (2019)/with year.mkv",
(1920, 1080),
10,
),
build_tuple(
"Movie with and without year/without year.mkv",
(1280, 720),
1,
), ),
] ]
.into_iter() .into_iter()
@@ -118,33 +146,61 @@ fn build_complex_metadata() -> HashMap<String, CompactMetadata> {
fn build_complex_movies() -> Movies { fn build_complex_movies() -> Movies {
let mut m = Movies { let mut m = Movies {
movies: vec![ movies: vec![
build_movie(vec![("Remake (2019)/new.mkv", (1920, 1080), 10)]),
build_movie(vec![("Remake (1960)/old.mkv", (1920, 1080), 1)]),
build_movie(vec![( build_movie(vec![(
"One Movie With Year (2019)/abcdef123456789.mkv", "One Movie With Year (2019)/abcdef123456789.mkv",
(1920, 1080), (1920, 1080),
1,
)]), )]),
build_movie(vec![ build_movie(vec![
( (
"One Movie With Two Parts (2019)/abcdef123456789 part 1.mkv", "One Movie With Two Parts (2019)/abcdef123456789 part 1.mkv",
(1280, 720), (1280, 720),
1,
), ),
( (
"One Movie With Two Parts (2019)/abcdef123456789 part 2.mkv", "One Movie With Two Parts (2019)/abcdef123456789 part 2.mkv",
(1280, 720), (1280, 720),
1,
), ),
]), ]),
build_movie(vec![ build_movie(vec![
( (
"Two Movies With Multi Parts (2019)/abcdef123456789 part 1.mkv", "Two Movies With Multi Parts (2019)/abcdef123456789 part 1.mkv",
(1280, 720), (1280, 720),
1000,
), ),
( (
"Two Movies With Multi Parts (2019)/abcdef123456789 part 2.mkv", "Two Movies With Multi Parts (2019)/abcdef123456789 part 2.mkv",
(1280, 720), (1280, 720),
1000,
), ),
]), ]),
build_movie(vec![( build_movie(vec![(
"Two Movies With Multi Parts (2019)/somethingelse.mkv", "Two Movies With Multi Parts (2019)/somethingelse.mkv",
(1920, 1080), (1920, 1080),
5000,
)]),
build_movie(vec![(
"two movies different caps (2019)/larger.mkv",
(1920, 1080),
100,
)]),
build_movie(vec![(
"Two Movies Different Caps (2019)/smaller.mkv",
(640, 480),
1,
)]),
build_movie(vec![(
"Movie with and without year (2019)/with year.mkv",
(1920, 1080),
10,
)]),
build_movie(vec![(
"Movie with and without year/without year.mkv",
(1280, 720),
1,
)]), )]),
], ],
}; };
@@ -173,6 +229,150 @@ fn test_roundtrip_library() -> Result<(), Box<dyn Error>> {
let got = ml.movies().expect("failed to build movies"); let got = ml.movies().expect("failed to build movies");
assert_eq!(got.movies.len(), want.movies.len()); assert_eq!(got.movies.len(), want.movies.len());
assert_eq!(got, want); assert_eq!(got, want);
//assert_eq!(got, want, "Got {:#?}\nWant {:#?}", got, want); Ok(())
}
fn validate_duplicates(got: Vec<(&Movie, Vec<&Movie>)>, want: Vec<(Movie, Vec<Movie>)>) {
let want: Vec<(&Movie, Vec<&Movie>)> = want
.iter()
.map(|(keep, delete)| (keep, delete.iter().map(|m| m).collect()))
.collect();
assert_eq!(got, want);
}
#[test]
fn test_duplicate_candidates() -> Result<(), Box<dyn Error>> {
let movies = build_complex_movies();
let got = movies.duplicate_candidates();
let want = vec![
(
build_movie(vec![(
"Movie with and without year (2019)/with year.mkv",
(1920, 1080),
10,
)]),
vec![build_movie(vec![(
"Movie with and without year/without year.mkv",
(1280, 720),
1,
)])],
),
(
build_movie(vec![(
"Two Movies With Multi Parts (2019)/somethingelse.mkv",
(1920, 1080),
5000,
)]),
vec![build_movie(vec![
(
"Two Movies With Multi Parts (2019)/abcdef123456789 part 1.mkv",
(1280, 720),
1000,
),
(
"Two Movies With Multi Parts (2019)/abcdef123456789 part 2.mkv",
(1280, 720),
1000,
),
])],
),
(
build_movie(vec![(
"two movies different caps (2019)/larger.mkv",
(1920, 1080),
100,
)]),
vec![build_movie(vec![(
"Two Movies Different Caps (2019)/smaller.mkv",
(640, 480),
1,
)])],
),
];
validate_duplicates(got, want);
Ok(())
}
#[test]
fn test_fullmetal() -> Result<(), Box<dyn Error>> {
let mut movies = Movies {
movies: vec![
build_movie(vec![(
"Full Metal Jacket (1987)/Full Metal Jacket.mp4",
(1280, 720),
2581935,
)]),
build_movie(vec![(
"Full Metal Jacket (1987)/1776f8e2fb614a6fb77a66cde601bb45.mkv",
(1920, 1080),
5719802,
)]),
],
};
movies.movies.sort_by(|a, b| {
a.files
.first()
.unwrap()
.0
.partial_cmp(&b.files.first().unwrap().0)
.unwrap()
});
let got = movies.duplicate_candidates();
let want = vec![(
build_movie(vec![(
"Full Metal Jacket (1987)/1776f8e2fb614a6fb77a66cde601bb45.mkv",
(1920, 1080),
5719802,
)]),
vec![build_movie(vec![(
"Full Metal Jacket (1987)/Full Metal Jacket.mp4",
(1280, 720),
2581935,
)])],
)];
validate_duplicates(got, want);
Ok(())
}
#[test]
fn test_keep_higher_res_lower_bit_rate() -> Result<(), Box<dyn Error>> {
let mut movies = Movies {
movies: vec![
build_movie(vec![(
"X Men The Last Stand (2006)/X.Men.The.Last.Stand.2006.1080p.BluRay.x264.DTS-ES.PRoDJi.mkv",
(1920, 800),
11349705,
)]),
build_movie(vec![(
"X Men The Last Stand (2006)/948f08a4ba784626ac13de77b77559dd.mkv",
(1920, 1080),
6574160,
)]),
],
};
movies.movies.sort_by(|a, b| {
a.files
.first()
.unwrap()
.0
.partial_cmp(&b.files.first().unwrap().0)
.unwrap()
});
let got = movies.duplicate_candidates();
let want = vec![(
build_movie(vec![(
"X Men The Last Stand (2006)/948f08a4ba784626ac13de77b77559dd.mkv",
(1920, 1080),
6574160,
)]),
vec![
build_movie(vec![(
"X Men The Last Stand (2006)/X.Men.The.Last.Stand.2006.1080p.BluRay.x264.DTS-ES.PRoDJi.mkv",
(1920, 800),
11349705,
)])
],
)];
validate_duplicates(got, want);
Ok(()) Ok(())
} }