Compare commits

...

15 Commits

Author SHA1 Message Date
d44c4da72f Use upstream human_format 2024-04-06 16:24:21 -07:00
48d92f6b67 Better debugging when metadata is invalid.
Handle missing subtitle encoding.
2022-10-15 10:01:06 -07:00
318ce583ea Make root directory a flag. 2022-07-23 21:33:36 -07:00
3a61e15449 Latest lock. 2022-07-23 21:06:49 -07:00
c46ae525fc Just remove dupes, don't move them. 2022-07-23 21:06:22 -07:00
e16d899c14 Update tests to match intended behavior. 2022-07-23 21:06:03 -07:00
e06d2419e5 Remove extra dbg!(). 2021-12-25 09:35:39 -08:00
4b1cf6c491 Fix movie size comparison.
Use largest movie pixel size (some movies have low res video streams
    embedded).
2021-12-25 09:30:55 -08:00
70174e9e49 Remove dashes and repeated spaces when comparing names for dupes. 2021-11-21 16:50:18 -08:00
b2ef1d3d3d Prefer higher resolution files. 2021-11-12 19:20:42 -08:00
708e44053e Ignore dashes and apostrophes when finding dupes. 2021-11-12 19:06:07 -08:00
37b4e1b4b2 Use parent envrc / default.nix. 2021-11-12 18:05:56 -08:00
4ba8e3e3ee Better error reporting when ffprobe fails. 2021-02-24 15:26:21 -08:00
872c1096a6 Better error log messaging. 2021-02-24 07:12:14 -08:00
7da8639881 Use TO_BE_REMOVED_DIR inplace of static string. 2021-02-21 09:42:21 -08:00
7 changed files with 334 additions and 333 deletions

1
.envrc
View File

@@ -1 +0,0 @@
use_nix

468
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -9,7 +9,7 @@ edition = "2018"
[dependencies] [dependencies]
failure = "0.1" failure = "0.1"
glob = "0.3" glob = "0.3"
human_format = { git ="https://github.com/wathiede/human-format-rs" } human_format = "1.1.0"
humantime = "1" humantime = "1"
lazy_static = "1.4" lazy_static = "1.4"
log = "0.4" log = "0.4"

View File

@@ -1,27 +0,0 @@
let
pkgs = import <nixpkgs> {
overlays = [
(import (builtins.fetchTarball
"https://github.com/oxalica/rust-overlay/archive/master.tar.gz"))
];
};
rust = pkgs.rust-bin.stable.latest.rust.override {
extensions = [ "rust-src" ];
};
in with pkgs;
pkgs.mkShell rec {
name = "rust";
buildInputs = [
openssl
pkg-config
cargo
rust
rustfmt
rust-analyzer
wasm-pack
wasm-bindgen-cli
nodePackages.rollup
];
}

View File

@@ -1,33 +1,24 @@
use std::cmp::Ordering; use std::{
use std::collections::HashMap; cmp::Ordering,
use std::collections::HashSet; collections::{HashMap, HashSet},
use std::env; env,
use std::ffi::OsStr; ffi::OsStr,
use std::fmt; fmt,
use std::fmt::Display; fmt::{Display, Formatter},
use std::fmt::Formatter; fs::File,
use std::fs::File; io::{BufReader, BufWriter},
use std::io::BufReader; path::{Path, PathBuf},
use std::io::BufWriter; process::Command,
use std::path::Path; str::FromStr,
use std::path::PathBuf; };
use std::process::Command;
use std::str::FromStr;
use failure::bail; use failure::{bail, Error, ResultExt};
use failure::Error;
use failure::ResultExt;
use glob::glob; use glob::glob;
use lazy_static::lazy_static; use lazy_static::lazy_static;
use log::error; use log::{error, info};
use log::info; use rayon::{iter::ParallelBridge, prelude::ParallelIterator};
use rayon::iter::ParallelBridge;
use rayon::prelude::ParallelIterator;
use regex::Regex; use regex::Regex;
use serde::de; use serde::{de, de::Deserializer, Deserialize, Serialize};
use serde::de::Deserializer;
use serde::Deserialize;
use serde::Serialize;
use serde_json::Value; use serde_json::Value;
const FULL_METADATA_FILENAME: &str = "metadata.json"; const FULL_METADATA_FILENAME: &str = "metadata.json";
@@ -161,8 +152,8 @@ enum Stream {
}, },
#[serde(rename = "subtitle")] #[serde(rename = "subtitle")]
Subtitle { Subtitle {
codec_name: String, codec_name: Option<String>,
codec_long_name: String, codec_long_name: Option<String>,
tags: Option<Tags>, tags: Option<Tags>,
}, },
#[serde(rename = "attachment")] #[serde(rename = "attachment")]
@@ -218,8 +209,8 @@ pub struct AudioFormat {
#[derive(Clone, Deserialize, Debug, PartialEq, Serialize)] #[derive(Clone, Deserialize, Debug, PartialEq, Serialize)]
pub struct SubtitleFormat { pub struct SubtitleFormat {
short_name: String, short_name: Option<String>,
long_name: String, long_name: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
title: Option<String>, title: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
@@ -297,7 +288,7 @@ fn json_metadata_for_path<P: AsRef<Path> + AsRef<OsStr>>(path: P) -> Result<Stri
]) ])
.arg(Path::new("./").join(path)); .arg(Path::new("./").join(path));
info!(target: "json", "cmd {:?}", cmd); info!(target: "json", "cmd {:?}", cmd);
let output = cmd.output()?; let output = cmd.output().context(format!("failed to run {:?}", cmd))?;
if output.status.success() { if output.status.success() {
return Ok(String::from_utf8(output.stdout)?); return Ok(String::from_utf8(output.stdout)?);
} }
@@ -320,6 +311,30 @@ pub struct Movie {
} }
impl Movie { impl Movie {
fn max_pixel_count(&self) -> Option<usize> {
if self.files.is_empty() {
None
} else {
Some(self.files.iter().fold(usize::min_value(), |acc, (_, cmd)| {
let min = cmd.video.iter().fold(usize::min_value(), |acc, v| {
std::cmp::max(acc, v.width * v.height)
});
std::cmp::max(acc, min)
}))
}
}
fn min_pixel_count(&self) -> Option<usize> {
if self.files.is_empty() {
None
} else {
Some(self.files.iter().fold(usize::max_value(), |acc, (_, cmd)| {
let min = cmd.video.iter().fold(usize::max_value(), |acc, v| {
std::cmp::min(acc, v.width * v.height)
});
std::cmp::min(acc, min)
}))
}
}
fn min_bit_rate(&self) -> Option<usize> { fn min_bit_rate(&self) -> Option<usize> {
if self.files.is_empty() { if self.files.is_empty() {
None None
@@ -354,19 +369,28 @@ pub struct Movies {
impl Movies { impl Movies {
/// Find all movies with multiple copies. The returned vec contains a tuple of (Movie to keep, /// Find all movies with multiple copies. The returned vec contains a tuple of (Movie to keep,
/// One or more Movies to remove). The highest bit rate movie is kept. /// One or more Movies to remove). The highest resolution movie is kept.
/// Movies with differing years are considered distinct movies. /// Movies with differing years are considered distinct movies.
/// If there is a yearless movie and one or more movies with a year exist, then the yearless /// If there is a yearless movie and one or more movies with a year exist, then the yearless
/// movie will be removed /// movie will be removed
pub fn duplicate_candidates(&self) -> Vec<(&Movie, Vec<&Movie>)> { pub fn duplicate_candidates(&self) -> Vec<(&Movie, Vec<&Movie>)> {
lazy_static! {
static ref MULTIPLE_SPACES: Regex = Regex::new(r"\s+").unwrap();
}
let date_re = Regex::new(r"\(\d{4}\)$").unwrap(); let date_re = Regex::new(r"\(\d{4}\)$").unwrap();
let mut movie_counter = HashMap::new(); let mut movie_counter = HashMap::new();
let mut movies_without_date_counter = HashMap::new(); let mut movies_without_date_counter = HashMap::new();
for m in &self.movies { for m in &self.movies {
let (path, _cmd) = m.files.first().unwrap(); let (path, _cmd) = m.files.first().unwrap();
let parent = clean_path_parent(path) let parent = MULTIPLE_SPACES
.to_string_lossy() .replace_all(
.to_ascii_lowercase() &clean_path_parent(path)
.to_string_lossy()
.to_ascii_lowercase()
.replace("-", " ")
.replace("'", " "),
" ",
)
.to_string(); .to_string();
if date_re.is_match(&parent) { if date_re.is_match(&parent) {
movie_counter.entry(parent).or_insert(Vec::new()).push(m); movie_counter.entry(parent).or_insert(Vec::new()).push(m);
@@ -388,8 +412,8 @@ impl Movies {
for (_parent, mut movies) in movie_counter.into_iter() { for (_parent, mut movies) in movie_counter.into_iter() {
if movies.len() > 1 { if movies.len() > 1 {
// Sort, lowest bit_rate movie first // Sort, lowest resolution movie first
movies.sort_by(|a, b| a.min_bit_rate().cmp(&b.min_bit_rate())); movies.sort_by(|a, b| a.max_pixel_count().cmp(&b.max_pixel_count()));
// Flip order, we care about the largest. // Flip order, we care about the largest.
movies.reverse(); movies.reverse();
// Take the largest image, return the rest for removal. // Take the largest image, return the rest for removal.
@@ -522,8 +546,8 @@ impl MovieLibrary {
} = s } = s
{ {
Some(SubtitleFormat { Some(SubtitleFormat {
short_name: codec_name.to_string(), short_name: codec_name.clone(),
long_name: codec_long_name.to_string(), long_name: codec_long_name.clone(),
title: tags.as_ref().and_then(|t| t.title()), title: tags.as_ref().and_then(|t| t.title()),
language: tags.as_ref().and_then(|t| t.language()), language: tags.as_ref().and_then(|t| t.language()),
}) })
@@ -617,11 +641,17 @@ impl MovieLibrary {
Some((path.to_string_lossy().into_owned(), json)) Some((path.to_string_lossy().into_owned(), json))
} }
Err(e) => { Err(e) => {
error!("{}", e); error!("Failed to open {}: {}", path.to_string_lossy(), e);
None None
} }
} }
}) })
.inspect(|(path, json)| {
if let Err(err) = serde_json::from_str::<Metadata>(&json) {
error!("Can't parse metadata for {}: {}", path, err);
error!("{}", json);
}
})
.map(|(path, json)| (path, serde_json::from_str::<Value>(&json).unwrap())) .map(|(path, json)| (path, serde_json::from_str::<Value>(&json).unwrap()))
.collect(); .collect();
let new_videos = metadata.keys().cloned().collect(); let new_videos = metadata.keys().cloned().collect();

View File

@@ -1,11 +1,6 @@
use std::collections::HashMap; use std::{collections::HashMap, error::Error, io::Write, path::Path, time::Duration};
use std::error::Error;
use std::io::Write;
use std::path::Path;
use std::time::Duration;
use human_format::Formatter; use human_format::{Formatter, Scales};
use human_format::Scales;
use humantime; use humantime;
use lazy_static::lazy_static; use lazy_static::lazy_static;
use log::info; use log::info;
@@ -13,9 +8,7 @@ use regex::Regex;
use structopt::StructOpt; use structopt::StructOpt;
use tabwriter::TabWriter; use tabwriter::TabWriter;
use superdeduper::clean_path_parent; use superdeduper::{clean_path_parent, CompactMetadata, MovieLibrary};
use superdeduper::CompactMetadata;
use superdeduper::MovieLibrary;
const MOVIE_DIR: &str = "/home/wathiede/Movies"; const MOVIE_DIR: &str = "/home/wathiede/Movies";
const TO_BE_REMOVED_DIR: &str = "/home/wathiede/to-be-deleted/"; const TO_BE_REMOVED_DIR: &str = "/home/wathiede/to-be-deleted/";
@@ -71,10 +64,7 @@ fn print_dupes(lib: &MovieLibrary) {
delete_paths.sort(); delete_paths.sort();
let root = Path::new(&lib.root); let root = Path::new(&lib.root);
for path in &delete_paths { for path in &delete_paths {
println!( println!(r#"rm "{}""#, root.join(path).to_string_lossy(),);
r#"mv "{}" /storage/media/to-be-deleted/"#,
root.join(path).to_string_lossy()
);
} }
if delete_paths.len() > 0 { if delete_paths.len() > 0 {
println!("superdeduper update-compact-metadata && superdeduper empty-dirs") println!("superdeduper update-compact-metadata && superdeduper empty-dirs")
@@ -153,7 +143,7 @@ fn print_videos(videos: &HashMap<String, CompactMetadata>, filter: Option<&Regex
humantime::Duration::from(Duration::from_secs(md.duration as u64)), humantime::Duration::from(Duration::from_secs(md.duration as u64)),
&name[MOVIE_DIR.len() + 1..] &name[MOVIE_DIR.len() + 1..]
); );
println!("mv '{}' '{}'", name, TO_BE_REMOVED_DIR); println!("rm '{}'", name);
} }
} }
@@ -189,6 +179,13 @@ enum Command {
about = "Tool for pruning extra videos in collection" about = "Tool for pruning extra videos in collection"
)] )]
struct SuperDeduper { struct SuperDeduper {
#[structopt(
short = "r",
long = "root",
help = "Root directory to store files.",
default_value = MOVIE_DIR,
)]
root: String,
#[structopt( #[structopt(
short = "v", short = "v",
help = "Sets the level of verbosity", help = "Sets the level of verbosity",
@@ -216,14 +213,14 @@ fn main() -> Result<(), Box<dyn Error>> {
match app.cmd { match app.cmd {
Command::Samples => { Command::Samples => {
let lib = MovieLibrary::new(MOVIE_DIR); let lib = MovieLibrary::new(app.root);
let videos = lib.videos()?; let videos = lib.videos()?;
let samples_re = Regex::new(r"(?i).*sample.*").unwrap(); let samples_re = Regex::new(r"(?i).*sample.*").unwrap();
print_videos(&videos, Some(&samples_re)); print_videos(&videos, Some(&samples_re));
} }
Command::Groups => { Command::Groups => {
let lib = MovieLibrary::new(MOVIE_DIR); let lib = MovieLibrary::new(app.root);
let videos = lib.videos()?; let videos = lib.videos()?;
let mut video_groups: HashMap<String, Vec<(String, CompactMetadata)>> = HashMap::new(); let mut video_groups: HashMap<String, Vec<(String, CompactMetadata)>> = HashMap::new();
@@ -236,30 +233,30 @@ fn main() -> Result<(), Box<dyn Error>> {
print_video_groups(&video_groups); print_video_groups(&video_groups);
} }
Command::CompactMetadata => { Command::CompactMetadata => {
let lib = MovieLibrary::new(MOVIE_DIR); let lib = MovieLibrary::new(app.root);
lib.compact_metadata()?; lib.compact_metadata()?;
} }
Command::PrintDupes => { Command::PrintDupes => {
let lib = MovieLibrary::new(MOVIE_DIR); let lib = MovieLibrary::new(app.root);
print_dupes(&lib); print_dupes(&lib);
} }
Command::PrintAll => { Command::PrintAll => {
let lib = MovieLibrary::new(MOVIE_DIR); let lib = MovieLibrary::new(app.root);
let videos = lib.videos()?; let videos = lib.videos()?;
print_all(videos); print_all(videos);
} }
Command::UpdateMetadata => { Command::UpdateMetadata => {
let lib = MovieLibrary::new(MOVIE_DIR); let lib = MovieLibrary::new(app.root);
lib.update_metadata()?; lib.update_metadata()?;
} }
Command::UpdateAndCompactMetadata => { Command::UpdateAndCompactMetadata => {
let lib = MovieLibrary::new(MOVIE_DIR); let lib = MovieLibrary::new(app.root);
lib.update_metadata()?; lib.update_metadata()?;
lib.compact_metadata()?; lib.compact_metadata()?;
} }
Command::EmptyDirs => { Command::EmptyDirs => {
let lib = MovieLibrary::new(MOVIE_DIR); let lib = MovieLibrary::new(app.root);
let dirs = lib.empty_dirs()?; let dirs = lib.empty_dirs()?;
let root = Path::new(&lib.root); let root = Path::new(&lib.root);
if !dirs.is_empty() { if !dirs.is_empty() {

View File

@@ -335,7 +335,7 @@ fn test_fullmetal() -> Result<(), Box<dyn Error>> {
} }
#[test] #[test]
fn test_keep_lower_res_higher_bit_rate() -> Result<(), Box<dyn Error>> { fn test_keep_higher_res_lower_bit_rate() -> Result<(), Box<dyn Error>> {
let mut movies = Movies { let mut movies = Movies {
movies: vec![ movies: vec![
build_movie(vec![( build_movie(vec![(
@@ -361,15 +361,17 @@ fn test_keep_lower_res_higher_bit_rate() -> Result<(), Box<dyn Error>> {
let got = movies.duplicate_candidates(); let got = movies.duplicate_candidates();
let want = vec![( let want = vec![(
build_movie(vec![( build_movie(vec![(
"X Men The Last Stand (2006)/X.Men.The.Last.Stand.2006.1080p.BluRay.x264.DTS-ES.PRoDJi.mkv",
(1920, 800),
11349705,
)]),
vec![build_movie(vec![(
"X Men The Last Stand (2006)/948f08a4ba784626ac13de77b77559dd.mkv", "X Men The Last Stand (2006)/948f08a4ba784626ac13de77b77559dd.mkv",
(1920, 1080), (1920, 1080),
6574160, 6574160,
)])], )]),
vec![
build_movie(vec![(
"X Men The Last Stand (2006)/X.Men.The.Last.Stand.2006.1080p.BluRay.x264.DTS-ES.PRoDJi.mkv",
(1920, 800),
11349705,
)])
],
)]; )];
validate_duplicates(got, want); validate_duplicates(got, want);
Ok(()) Ok(())