Add is_multidisc, move big tests to separate module.
Started work on a method movie() that groups multipart movies together.
This commit is contained in:
parent
d2866bace7
commit
04585e8d24
146
src/lib.rs
146
src/lib.rs
@ -1,4 +1,5 @@
|
||||
use std::collections::HashMap;
|
||||
use std::collections::HashSet;
|
||||
use std::env;
|
||||
use std::ffi::OsStr;
|
||||
use std::fmt;
|
||||
@ -21,6 +22,7 @@ use log::error;
|
||||
use log::info;
|
||||
use rayon::iter::ParallelBridge;
|
||||
use rayon::prelude::ParallelIterator;
|
||||
use regex::Regex;
|
||||
use serde::de;
|
||||
use serde::de::Deserializer;
|
||||
use serde::Deserialize;
|
||||
@ -59,7 +61,23 @@ where
|
||||
T::from_str(&s).map_err(de::Error::custom)
|
||||
}
|
||||
|
||||
#[derive(Clone, Deserialize, Debug, Serialize)]
|
||||
pub fn is_multidisc(names: &Vec<String>) -> bool {
|
||||
// TODO(wathiede): smarter version that helps with:
|
||||
// The Hudsucker Proxy:
|
||||
// 1920x1080 4.78Gi 1h 50m 45s 54151c3b9a2a4773958f848efecefc3b.mkv
|
||||
// 720x416 736.51Mi 50m 40s The Hudsucker Proxy CD1.avi
|
||||
// 720x416 736.49Mi 1h 3s The Hudsucker Proxy CD2.avi
|
||||
lazy_static! {
|
||||
static ref DIGIT: Regex = Regex::new("[0-9]").unwrap();
|
||||
}
|
||||
let mut set = HashSet::new();
|
||||
for name in names {
|
||||
set.insert(DIGIT.replace_all(&name, "#").to_string());
|
||||
}
|
||||
set.len() == 1
|
||||
}
|
||||
|
||||
#[derive(Clone, Deserialize, Debug, PartialEq, Serialize)]
|
||||
struct Format {
|
||||
#[serde(deserialize_with = "from_str")]
|
||||
bit_rate: usize,
|
||||
@ -71,7 +89,7 @@ struct Format {
|
||||
size: usize,
|
||||
}
|
||||
|
||||
#[derive(Clone, Deserialize, Debug, Serialize)]
|
||||
#[derive(Clone, Deserialize, Debug, PartialEq, Serialize)]
|
||||
pub struct Tags(HashMap<String, String>);
|
||||
|
||||
impl Tags {
|
||||
@ -83,7 +101,7 @@ impl Tags {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Deserialize, Debug, Serialize)]
|
||||
#[derive(Clone, Deserialize, Debug, PartialEq, Serialize)]
|
||||
#[serde(tag = "codec_type")]
|
||||
enum Stream {
|
||||
#[serde(rename = "video")]
|
||||
@ -122,13 +140,13 @@ enum Stream {
|
||||
Data {},
|
||||
}
|
||||
|
||||
#[derive(Clone, Deserialize, Debug, Serialize)]
|
||||
#[derive(Clone, Deserialize, Debug, PartialEq, Serialize)]
|
||||
struct Metadata {
|
||||
format: Format,
|
||||
streams: Vec<Stream>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Deserialize, Debug, Serialize)]
|
||||
#[derive(Clone, Deserialize, Debug, PartialEq, Serialize)]
|
||||
pub struct VideoFormat {
|
||||
short_name: String,
|
||||
long_name: String,
|
||||
@ -154,7 +172,7 @@ impl Default for VideoFormat {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Deserialize, Debug, Serialize)]
|
||||
#[derive(Clone, Deserialize, Debug, PartialEq, Serialize)]
|
||||
pub struct AudioFormat {
|
||||
short_name: String,
|
||||
long_name: String,
|
||||
@ -166,7 +184,7 @@ pub struct AudioFormat {
|
||||
language: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Deserialize, Debug, Serialize)]
|
||||
#[derive(Clone, Deserialize, Debug, PartialEq, Serialize)]
|
||||
pub struct SubtitleFormat {
|
||||
short_name: String,
|
||||
long_name: String,
|
||||
@ -176,7 +194,7 @@ pub struct SubtitleFormat {
|
||||
language: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Deserialize, Debug, Serialize)]
|
||||
#[derive(Clone, Deserialize, Debug, PartialEq, Serialize)]
|
||||
pub struct CompactMetadata {
|
||||
pub bit_rate: usize,
|
||||
pub duration: f32,
|
||||
@ -222,12 +240,13 @@ impl Default for CompactMetadata {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug, Serialize)]
|
||||
#[derive(Deserialize, Debug, PartialEq, Serialize)]
|
||||
pub struct MetadataFile {
|
||||
#[serde(flatten)]
|
||||
metadata: HashMap<String, Metadata>,
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Debug)]
|
||||
pub struct MovieLibrary {
|
||||
root: String,
|
||||
}
|
||||
@ -263,6 +282,34 @@ lazy_static! {
|
||||
static ref MOVIE_EXTS: Vec<&'static str> = vec!["avi", "m4v", "mkv", "mov", "mp4"];
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
struct Movie {
|
||||
files: Vec<(String, CompactMetadata)>,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct Movies {
|
||||
movies: Vec<Movie>,
|
||||
}
|
||||
|
||||
fn movies_from_paths_compact_metadata(p_cmd: HashMap<String, CompactMetadata>) -> Movies {
|
||||
// file path
|
||||
let files_to_movies: HashMap<String, Vec<(String, CompactMetadata)>> = HashMap::new();
|
||||
// TODO(wathiede):
|
||||
// - walk over every item, use something based on is_multidisc to pack multifile movies
|
||||
// together.
|
||||
// - then walk over `files` and create a Movie for each
|
||||
// - then store those Movie structs in Movies
|
||||
|
||||
let movies = p_cmd
|
||||
.into_iter()
|
||||
.map(|(p, cmd)| Movie {
|
||||
files: vec![(p, cmd)],
|
||||
})
|
||||
.collect();
|
||||
Movies { movies }
|
||||
}
|
||||
|
||||
impl MovieLibrary {
|
||||
pub fn new<S: Into<String>>(root: S) -> MovieLibrary {
|
||||
MovieLibrary { root: root.into() }
|
||||
@ -455,11 +502,18 @@ impl MovieLibrary {
|
||||
})
|
||||
}
|
||||
|
||||
pub fn videos(
|
||||
&self,
|
||||
_include_stale: bool,
|
||||
) -> Result<(HashMap<String, CompactMetadata>), Error> {
|
||||
// TODO(wathiede): implement include_stale.
|
||||
pub fn movies(&self) -> Result<Movies, Error> {
|
||||
let path = Path::new(&self.root).join(COMPACT_METADATA_FILENAME);
|
||||
// Open the file in read-only mode with buffer.
|
||||
let f = File::open(&path).context(format!("open {}", path.display()))?;
|
||||
let r = BufReader::new(f);
|
||||
|
||||
let p_cmd: HashMap<String, CompactMetadata> = serde_json::from_reader(r)
|
||||
.context(format!("serde_json::from_reader {}", path.display()))?;
|
||||
Ok(movies_from_paths_compact_metadata(p_cmd))
|
||||
}
|
||||
|
||||
pub fn videos(&self) -> Result<(HashMap<String, CompactMetadata>), Error> {
|
||||
let path = Path::new(&self.root).join(COMPACT_METADATA_FILENAME);
|
||||
// Open the file in read-only mode with buffer.
|
||||
let f = File::open(&path).context(format!("open {}", path.display()))?;
|
||||
@ -470,15 +524,13 @@ impl MovieLibrary {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod movielibrary_test;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[allow(dead_code)]
|
||||
fn testdata_dir() -> String {
|
||||
format!("{}/testdata", env::var("CARGO_MANIFEST_DIR").unwrap())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn largest_dimension() {
|
||||
let md = CompactMetadata {
|
||||
@ -521,50 +573,18 @@ mod tests {
|
||||
assert_eq!(md.largest_dimension(), Some(Resolution(640, 480)));
|
||||
}
|
||||
|
||||
/*
|
||||
#[test]
|
||||
fn test_movies() {
|
||||
let lib = MovieLibrary::new(format!("{}/Movies", testdata_dir()));
|
||||
let movies = lib.movies(true).expect("failed to get movies");
|
||||
let mut got = movies.keys().collect::<Vec<_>>();
|
||||
got.sort();
|
||||
let want = [
|
||||
"Aladdin (1992)/Aladdin.1992.720p.BRrip.x264.GAZ.YIFY.mp4",
|
||||
"Aladdin (2019)/4fe12adfdf4b4e9daa4f1366452d3431.mkv",
|
||||
"Higher Learning/Higher Learning CD1.avi",
|
||||
"Higher Learning/Higher Learning CD2.avi",
|
||||
"J0hn W1ck (2014)/J0hn W1ck (2014) m720p x264 aac.m4v",
|
||||
"J0hn W1ck (2014)/J0hn W1ck (2014) m720p x264 aac.sample.m4v",
|
||||
"Stale Sample (2019)/Stale Sample (2019) m720p x264 aac.sample.m4v",
|
||||
"The Hudsucker Proxy (1994)/54151c3b9a2a4773958f848efecefc3b.mkv",
|
||||
"The Hudsucker Proxy (1994)/The Hudsucker Proxy CD1.avi",
|
||||
"The Hudsucker Proxy (1994)/The Hudsucker Proxy CD2.avi",
|
||||
];
|
||||
|
||||
assert_eq!(got, want);
|
||||
fn test_multidisc() {
|
||||
// Empty set is not a multidisc set.
|
||||
assert!(!is_multidisc(&vec![]));
|
||||
assert!(is_multidisc(&vec![
|
||||
"Unbearable.Lightness.Of.Being Part 1.avi".to_string(),
|
||||
"Unbearable.Lightness.Of.Being Part 2.avi".to_string(),
|
||||
"Unbearable.Lightness.Of.Being Part 3.avi".to_string(),
|
||||
]));
|
||||
assert!(!is_multidisc(&vec![
|
||||
"Scent Of A Woman 1992 DvDrip[Eng]-greenbud1969.avi".to_string(),
|
||||
"Scent.Of.A.Woman.1992.1080p.BluRay.x264.AC3.mp4".to_string(),
|
||||
]));
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
#[test]
|
||||
fn test_filter_stale() {
|
||||
let lib = MovieLibrary::new(format!("{}/Movies", testdata_dir()));
|
||||
let movies = lib.movies(false).expect("failed to get movies");
|
||||
let mut got = movies.keys().collect::<Vec<_>>();
|
||||
got.sort();
|
||||
let want = [
|
||||
"Aladdin (1992)/Aladdin.1992.720p.BRrip.x264.GAZ.YIFY.mp4",
|
||||
"Aladdin (2019)/4fe12adfdf4b4e9daa4f1366452d3431.mkv",
|
||||
"Higher Learning/Higher Learning CD1.avi",
|
||||
"Higher Learning/Higher Learning CD2.avi",
|
||||
"J0hn W1ck (2014)/J0hn W1ck (2014) m720p x264 aac.m4v",
|
||||
"J0hn W1ck (2014)/J0hn W1ck (2014) m720p x264 aac.sample.m4v",
|
||||
"The Hudsucker Proxy (1994)/54151c3b9a2a4773958f848efecefc3b.mkv",
|
||||
"The Hudsucker Proxy (1994)/The Hudsucker Proxy CD1.avi",
|
||||
"The Hudsucker Proxy (1994)/The Hudsucker Proxy CD2.avi",
|
||||
];
|
||||
|
||||
assert_eq!(got, want);
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
89
src/main.rs
89
src/main.rs
@ -8,11 +8,13 @@ use std::time::Duration;
|
||||
use human_format::Formatter;
|
||||
use human_format::Scales;
|
||||
use humantime;
|
||||
use lazy_static::lazy_static;
|
||||
use log::info;
|
||||
use regex::Regex;
|
||||
use structopt::StructOpt;
|
||||
use tabwriter::TabWriter;
|
||||
|
||||
use superdeduper::is_multidisc;
|
||||
use superdeduper::CompactMetadata;
|
||||
use superdeduper::MovieLibrary;
|
||||
|
||||
@ -22,14 +24,80 @@ const TO_BE_REMOVED_DIR: &str = "/storage/media/to-be-deleted/";
|
||||
fn clean_path_parent<P: AsRef<Path>>(path: P) -> PathBuf {
|
||||
let path = path.as_ref();
|
||||
let path = path.parent().unwrap();
|
||||
let mut path = path.to_str().unwrap();
|
||||
let path = path.to_str().unwrap();
|
||||
/*
|
||||
// Strip year
|
||||
if path.ends_with(')') {
|
||||
path = &path[..path.len() - 7];
|
||||
}
|
||||
*/
|
||||
PathBuf::from(path)
|
||||
}
|
||||
|
||||
fn print_dupes(videos: HashMap<String, CompactMetadata>) {}
|
||||
lazy_static! {
|
||||
static ref CLEAN_TITLE_CHARS: Regex = Regex::new("[^ 0-9[:alpha:]]").unwrap();
|
||||
}
|
||||
|
||||
fn normalize(path: &str) -> String {
|
||||
CLEAN_TITLE_CHARS
|
||||
.replace_all(&path, "")
|
||||
.to_ascii_lowercase()
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref YEAR_SUFFIX: Regex = Regex::new(r" \d{4}$").unwrap();
|
||||
}
|
||||
fn print_dupes(videos: HashMap<String, CompactMetadata>) {
|
||||
let mut video_groups: HashMap<String, Vec<(String, CompactMetadata)>> = HashMap::new();
|
||||
for (name, md) in videos.into_iter() {
|
||||
let clean_name = normalize(clean_path_parent(&name).to_str().unwrap());
|
||||
let paths = video_groups.entry(clean_name).or_insert(Vec::new());
|
||||
paths.push((name.to_string(), md));
|
||||
}
|
||||
|
||||
let mut names = video_groups.keys().collect::<Vec<_>>();
|
||||
names.sort();
|
||||
|
||||
for name in &names {
|
||||
if YEAR_SUFFIX.is_match(&name) {
|
||||
let yearless = &name[..&name.len() - 5];
|
||||
info!("is '{}' in map", yearless);
|
||||
if let Some(yearless_vg) = video_groups.get(yearless) {
|
||||
println!("Possible dupe between movie with year and no year:");
|
||||
println!(" {:?}", video_groups.get(name.as_str()).unwrap());
|
||||
println!(" {:?}", yearless_vg);
|
||||
}
|
||||
}
|
||||
}
|
||||
let mut fmtr = Formatter::new();
|
||||
fmtr.with_separator("");
|
||||
fmtr.with_scales(Scales::Binary());
|
||||
for name in names {
|
||||
let possible_dupes = &video_groups[name];
|
||||
if possible_dupes.len() < 2 {
|
||||
continue;
|
||||
}
|
||||
let paths: Vec<String> = possible_dupes
|
||||
.iter()
|
||||
.map(|(name, _)| name.to_string())
|
||||
.collect();
|
||||
if is_multidisc(&paths) {
|
||||
continue;
|
||||
}
|
||||
let mut file: Vec<_> = video_groups[name].iter().collect();
|
||||
file.sort_by(|(n1, _), (n2, _)| n1.partial_cmp(n2).unwrap());
|
||||
println!("{}:", name);
|
||||
for (p, md) in file {
|
||||
println!(
|
||||
" {:>9} {:>9} {} {}",
|
||||
md.largest_dimension().unwrap(),
|
||||
fmtr.format(md.size as f64),
|
||||
humantime::Duration::from(Duration::from_secs(md.duration as u64)),
|
||||
&p[p.rfind("/").unwrap() + 1..]
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn print_all(videos: HashMap<String, CompactMetadata>) {
|
||||
let mut names = videos.keys().collect::<Vec<_>>();
|
||||
@ -40,7 +108,6 @@ fn print_all(videos: HashMap<String, CompactMetadata>) {
|
||||
fmtr.with_scales(Scales::Binary());
|
||||
let mut tw = TabWriter::new(vec![]);
|
||||
for name in names {
|
||||
let clean_name = clean_path_parent(&name);
|
||||
let md = &videos[name];
|
||||
write!(
|
||||
&mut tw,
|
||||
@ -57,7 +124,7 @@ fn print_all(videos: HashMap<String, CompactMetadata>) {
|
||||
println!("{}", String::from_utf8(tw.into_inner().unwrap()).unwrap());
|
||||
}
|
||||
|
||||
fn print_video_groups(video_groups: &HashMap<PathBuf, Vec<(String, CompactMetadata)>>) {
|
||||
fn print_video_groups(video_groups: &HashMap<String, Vec<(String, CompactMetadata)>>) {
|
||||
let mut names = video_groups.keys().collect::<Vec<_>>();
|
||||
names.sort();
|
||||
|
||||
@ -71,7 +138,7 @@ fn print_video_groups(video_groups: &HashMap<PathBuf, Vec<(String, CompactMetada
|
||||
}
|
||||
let mut file: Vec<_> = video_groups[name].iter().collect();
|
||||
file.sort_by(|(n1, _), (n2, _)| n1.partial_cmp(n2).unwrap());
|
||||
println!("{}:", name.display());
|
||||
println!("{}:", name);
|
||||
for (p, md) in file {
|
||||
println!(
|
||||
" {:>9} {:>9} {} {}",
|
||||
@ -166,18 +233,18 @@ fn main() -> Result<(), Box<dyn Error>> {
|
||||
match app.cmd {
|
||||
Command::Samples => {
|
||||
let lib = MovieLibrary::new(MOVIE_DIR);
|
||||
let videos = lib.videos(false)?;
|
||||
let videos = lib.videos()?;
|
||||
|
||||
let samples_re = Regex::new(r"(?i).*sample.*").unwrap();
|
||||
print_videos(&videos, Some(&samples_re));
|
||||
}
|
||||
Command::Groups => {
|
||||
let lib = MovieLibrary::new(MOVIE_DIR);
|
||||
let videos = lib.videos(false)?;
|
||||
let videos = lib.videos()?;
|
||||
|
||||
let mut video_groups: HashMap<PathBuf, Vec<(String, CompactMetadata)>> = HashMap::new();
|
||||
let mut video_groups: HashMap<String, Vec<(String, CompactMetadata)>> = HashMap::new();
|
||||
for (name, md) in videos.into_iter() {
|
||||
let clean_name = clean_path_parent(&name);
|
||||
let clean_name = normalize(clean_path_parent(&name).to_str().unwrap());
|
||||
let paths = video_groups.entry(clean_name).or_insert(Vec::new());
|
||||
paths.push((name.to_string(), md));
|
||||
}
|
||||
@ -190,13 +257,13 @@ fn main() -> Result<(), Box<dyn Error>> {
|
||||
}
|
||||
Command::PrintDupes => {
|
||||
let lib = MovieLibrary::new(MOVIE_DIR);
|
||||
let videos = lib.videos(false)?;
|
||||
let videos = lib.videos()?;
|
||||
|
||||
print_dupes(videos);
|
||||
}
|
||||
Command::PrintAll => {
|
||||
let lib = MovieLibrary::new(MOVIE_DIR);
|
||||
let videos = lib.videos(false)?;
|
||||
let videos = lib.videos()?;
|
||||
|
||||
print_all(videos);
|
||||
}
|
||||
|
||||
45
src/movielibrary_test.rs
Normal file
45
src/movielibrary_test.rs
Normal file
@ -0,0 +1,45 @@
|
||||
use super::*;
|
||||
|
||||
#[allow(dead_code)]
|
||||
fn testdata_dir() -> PathBuf {
|
||||
format!("{}/testdata", env::var("CARGO_MANIFEST_DIR").unwrap()).into()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_library() {
|
||||
let ml = MovieLibrary::new(testdata_dir().join("simple").to_str().unwrap());
|
||||
assert_eq!(
|
||||
ml.movies().expect("failed to build movies"),
|
||||
Movies {
|
||||
movies: vec![Movie {
|
||||
files: vec![(
|
||||
"One Movie With Year (2018)/abcdef123456789.mkv".to_string(),
|
||||
CompactMetadata {
|
||||
bit_rate: 100000,
|
||||
duration: 3600.0,
|
||||
filename: "./One Movie With Year (2018)/abcdef123456789.mkv".to_string(),
|
||||
format_name: "mkv".to_string(),
|
||||
size: 2000000,
|
||||
video: vec![VideoFormat {
|
||||
short_name: "mpeg4".to_string(),
|
||||
long_name: "MPEG-4 part 2".to_string(),
|
||||
height: 362,
|
||||
width: 660,
|
||||
title: None,
|
||||
language: None,
|
||||
}],
|
||||
audio: vec![AudioFormat {
|
||||
short_name: "mp3".to_string(),
|
||||
long_name: "MP3 (MPEG audio layer 3)".to_string(),
|
||||
channels: 2,
|
||||
channel_layout: "stereo".to_string(),
|
||||
title: None,
|
||||
language: None,
|
||||
}],
|
||||
subtitle: Vec::new(),
|
||||
}
|
||||
)]
|
||||
}]
|
||||
}
|
||||
);
|
||||
}
|
||||
26
testdata/simple/metadata.compact.json
vendored
Normal file
26
testdata/simple/metadata.compact.json
vendored
Normal file
@ -0,0 +1,26 @@
|
||||
{
|
||||
"One Movie With Year (2018)/abcdef123456789.mkv": {
|
||||
"bit_rate": 100000,
|
||||
"duration": 3600.0,
|
||||
"filename": "./One Movie With Year (2018)/abcdef123456789.mkv",
|
||||
"format_name": "mkv",
|
||||
"size": 2000000,
|
||||
"video": [
|
||||
{
|
||||
"short_name": "mpeg4",
|
||||
"long_name": "MPEG-4 part 2",
|
||||
"height": 362,
|
||||
"width": 660
|
||||
}
|
||||
],
|
||||
"audio": [
|
||||
{
|
||||
"short_name": "mp3",
|
||||
"long_name": "MP3 (MPEG audio layer 3)",
|
||||
"channels": 2,
|
||||
"channel_layout": "stereo"
|
||||
}
|
||||
],
|
||||
"subtitle": []
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user