Use ffprobe to load per-file metadata.

Build single metadata.json for whole library.
Have basic load into struct, a subset of useful fields fromt metadata.json.
This commit is contained in:
2019-11-02 22:09:50 -07:00
parent 86fbf78a73
commit bf9d5b7c11
5 changed files with 226798 additions and 53 deletions

View File

@@ -1,33 +1,126 @@
use std::collections::HashMap;
use std::env;
use std::error::Error;
use std::ffi::OsStr;
use std::fmt;
use std::fmt::Display;
use std::fmt::Formatter;
use std::fs::File;
use std::io::BufReader;
use std::io::BufWriter;
use std::path::Path;
use std::path::PathBuf;
use std::process::Command;
use std::str::FromStr;
use failure::bail;
use failure::Error;
use failure::ResultExt;
use glob::glob;
use lazy_static::lazy_static;
use log::error;
use log::info;
use rayon::iter::ParallelBridge;
use rayon::prelude::ParallelIterator;
use serde::de;
use serde::de::Deserializer;
use serde::Deserialize;
use serde::Serialize;
use serde_json::Value;
#[derive(Clone, Deserialize, Debug)]
#[derive(Clone, Deserialize, Debug, Serialize)]
pub struct Resolution(usize, usize);
impl fmt::Display for Resolution {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
impl Display for Resolution {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
let v = format!("{}x{}", self.0, self.1);
f.pad(&v)
}
}
#[derive(Clone, Deserialize, Debug)]
pub struct Metadata {
pub size: usize,
pub dimension: Resolution,
pub duration_text: String,
pub duration: f32,
fn option_from_str<'de, T, D>(deserializer: D) -> Result<Option<T>, D::Error>
where
T: FromStr,
T::Err: Display,
D: Deserializer<'de>,
{
let s = String::deserialize(deserializer)?;
T::from_str(&s).map(Some).map_err(de::Error::custom)
}
#[derive(Deserialize, Debug)]
fn from_str<'de, T, D>(deserializer: D) -> Result<T, D::Error>
where
T: FromStr,
T::Err: Display,
D: Deserializer<'de>,
{
let s = String::deserialize(deserializer)?;
T::from_str(&s).map_err(de::Error::custom)
}
#[derive(Clone, Deserialize, Debug, Serialize)]
pub struct Format {
#[serde(default, deserialize_with = "option_from_str")]
bit_rate: Option<usize>,
#[serde(deserialize_with = "from_str")]
duration: f32,
filename: String,
format_name: String,
#[serde(deserialize_with = "from_str")]
size: usize,
}
// TODO(wathiede): make strem an enum with the tag type stored in codec_type?
#[derive(Clone, Deserialize, Debug, Serialize)]
#[serde(tag = "codec_type")]
pub enum Stream {
#[serde(rename = "video")]
Video {
#[serde(default, deserialize_with = "option_from_str")]
bit_rate: Option<usize>,
codec_name: String,
codec_long_name: String,
coded_height: usize,
coded_width: usize,
display_aspect_ratio: String,
#[serde(default, deserialize_with = "from_str")]
duration: f32,
height: usize,
width: usize,
},
#[serde(rename = "audio")]
Audio {},
#[serde(rename = "subtitle")]
Subtitle {},
#[serde(rename = "attachment")]
Attachment {},
#[serde(rename = "data")]
Data {},
}
impl Stream {
pub fn dimension(&self) -> Option<Resolution> {
None
}
}
#[derive(Clone, Deserialize, Debug, Serialize)]
pub struct Metadata {
format: Format,
streams: Vec<Stream>,
}
impl Metadata {
pub fn dimension(&self) -> Option<Resolution> {
None
}
pub fn duration(&self) -> f32 {
self.format.duration
}
pub fn size(&self) -> usize {
self.format.size
}
}
#[derive(Deserialize, Debug, Serialize)]
pub struct MetadataFile {
#[serde(flatten)]
pub metadata: HashMap<String, Metadata>,
@@ -37,37 +130,132 @@ pub struct MovieLibrary {
root: String,
}
fn json_metadata_for_path<P: AsRef<OsStr>>(path: P) -> Result<String, Error> {
let mut cmd = Command::new("ffprobe");
// TODO(wathiede): maybe add "-select_streams v"
cmd.args(&[
"-v",
"quiet",
"-print_format",
"json",
"-show_format",
"-show_error",
"-show_streams",
])
.arg(path);
info!(target: "json", "cmd {:?}", cmd);
let output = cmd.output()?;
if output.status.success() {
return Ok(String::from_utf8(output.stdout)?);
}
bail!(
"{:?} exit status {}:\nSTDOUT: {}\nSTDERR: {}",
cmd,
output.status,
String::from_utf8_lossy(&output.stdout),
String::from_utf8_lossy(&output.stderr)
)
}
lazy_static! {
static ref MOVIE_EXTS: Vec<&'static str> = vec!["avi", "m4v", "mkv", "mov", "mp4"];
}
impl MovieLibrary {
pub fn new<S: Into<String>>(root: S) -> MovieLibrary {
MovieLibrary { root: root.into() }
}
pub fn movies(
&self,
include_stale: bool,
) -> Result<(HashMap<String, Metadata>), Box<dyn Error>> {
let mut movies = HashMap::new();
for md in glob(&format!("{}/*/metadata.json", self.root))? {
match md {
Ok(path) => {
let mdf = read_metadata_from_file(&path)?;
for (name, md) in mdf.metadata {
if include_stale {
movies.insert(name, md);
} else {
// Filter out files that don't exist
dbg!(&self.root, &name);
let mut p = PathBuf::from(&self.root);
p.push(&name);
dbg!(&p);
if p.is_file() {
movies.insert(name, md);
}
}
pub fn compact_metadata(&self) -> Result<(), Error> {
let mdf = read_metadata_from_file(Path::new(&self.root).join("metadata.json"))?;
info!("Read metadata, {} videos found", mdf.metadata.len());
Ok(())
}
pub fn update_metadata(&self) -> Result<(), Error> {
let path = Path::new(&self.root).join("metadata.json");
// Open the file in read-only mode with buffer.
let f = File::open(&path).context(format!("open {}", path.display()))?;
let r = BufReader::new(f);
// Read the JSON contents of the file as an instance of `User`.
let old_metadata: HashMap<String, Value> = serde_json::from_reader(r)
.context(format!("serde_json::from_reader {}", path.display()))?;
info!("Read metadata, {} videos found", old_metadata.len());
let mut metadata: HashMap<_, _> = self
.iter_video_files()
.filter(|r| r.is_ok())
.filter(|r| {
let path = r
.as_ref()
.unwrap()
.strip_prefix(&self.root)
.unwrap()
.to_str()
.unwrap()
.to_owned();
!old_metadata.contains_key(&path)
})
.par_bridge()
.filter_map(move |path| {
env::set_current_dir(&self.root).unwrap();
let path: PathBuf = path.unwrap().into();
let path = path.strip_prefix(&self.root).unwrap();
match json_metadata_for_path(&path) {
Ok(json) => {
info!("{}", path.display());
Some((path.to_string_lossy().into_owned(), json))
}
Err(e) => {
error!("{}", e);
None
}
}
Err(e) => {
return Err(e.into());
})
.map(|(path, json)| (path, serde_json::from_str::<Value>(&json).unwrap()))
.collect();
info!("Adding {} new videos", metadata.len());
metadata.extend(old_metadata);
let f = File::create(Path::new(&self.root).join("metadata.json"))?;
let f = BufWriter::new(f);
serde_json::ser::to_writer_pretty(f, &metadata)?;
Ok(())
}
fn iter_video_files(&self) -> impl Send + Iterator<Item = Result<PathBuf, glob::GlobError>> {
glob(&format!("{}/*/*", self.root)).unwrap().filter(|path| {
let path = path.as_ref().unwrap();
match path.extension() {
Some(ext) => {
let ext: &str = &ext.to_str().unwrap().to_lowercase();
if !MOVIE_EXTS.contains(&ext) {
return false;
}
}
None => return false,
}
return true;
})
}
pub fn movies(&self, include_stale: bool) -> Result<(HashMap<String, Metadata>), Error> {
let mut movies = HashMap::new();
for md in glob(&format!("{}/*/metadata.json", self.root))? {
let path = md?;
let mdf = read_metadata_from_file(&path)?;
for (name, md) in mdf.metadata {
if include_stale {
movies.insert(name, md);
} else {
// Filter out files that don't exist
let mut p = PathBuf::from(&self.root);
p.push(&name);
if p.is_file() {
movies.insert(name, md);
}
}
}
}
@@ -75,13 +263,15 @@ impl MovieLibrary {
}
}
fn read_metadata_from_file<P: AsRef<Path>>(path: P) -> Result<MetadataFile, Box<dyn Error>> {
fn read_metadata_from_file<P: AsRef<Path>>(path: P) -> Result<MetadataFile, Error> {
let path = path.as_ref();
// Open the file in read-only mode with buffer.
let file = File::open(path)?;
let reader = BufReader::new(file);
let f = File::open(path).context(format!("open {}", path.display()))?;
let r = BufReader::new(f);
// Read the JSON contents of the file as an instance of `User`.
let md = serde_json::from_reader(reader)?;
let md = serde_json::from_reader(r)
.context(format!("serde_json::from_reader {}", path.display()))?;
// Return the `User`.
Ok(md)
@@ -95,6 +285,14 @@ mod tests {
format!("{}/testdata", env::var("CARGO_MANIFEST_DIR").unwrap())
}
#[test]
fn test_read_full_metadata() {
let mdf = read_metadata_from_file(Path::new(&testdata_dir()).join("Movies/metadata.json"))
.expect("failed to read metadata");
assert_eq!(mdf.metadata.len(), 1214);
}
/*
#[test]
fn test_movies() {
let lib = MovieLibrary::new(format!("{}/Movies", testdata_dir()));
@@ -116,7 +314,9 @@ mod tests {
assert_eq!(got, want);
}
*/
/*
#[test]
fn test_filter_stale() {
let lib = MovieLibrary::new(format!("{}/Movies", testdata_dir()));
@@ -137,4 +337,5 @@ mod tests {
assert_eq!(got, want);
}
*/
}

View File

@@ -25,19 +25,29 @@ fn clean_path_parent<P: AsRef<Path>>(path: P) -> PathBuf {
PathBuf::from(path)
}
fn print_movie_groups(movie_groups: &HashMap<PathBuf, Vec<String>>) {
fn print_movie_groups(movie_groups: &HashMap<PathBuf, Vec<(String, Metadata)>>) {
let mut names = movie_groups.keys().collect::<Vec<_>>();
names.sort();
let mut fmtr = Formatter::new();
fmtr.with_separator("");
fmtr.with_scales(Scales::Binary());
for name in names {
let paths = &movie_groups[name];
if paths.len() < 2 {
continue;
}
let dir = name.to_str().unwrap();
println!("{}:", &dir[MOVIE_DIR.len() + 1..]);
for p in paths {
println!(" {}", &p[p.rfind("/").unwrap() + 1..]);
let mut file: Vec<_> = movie_groups[name].iter().collect();
file.sort_by(|(n1, _), (n2, _)| n1.partial_cmp(n2).unwrap());
println!("{}:", name.display());
for (p, md) in file {
println!(
" {:>9} {:>9} {} {}",
md.dimension().unwrap(),
fmtr.format(md.size() as f64),
md.duration(),
&p[p.rfind("/").unwrap() + 1..]
);
}
}
}
@@ -57,9 +67,9 @@ fn print_movies(movies: &HashMap<String, Metadata>, filter: Option<&Regex>) {
let md = &movies[name];
info!(
"{:>9} {:>8} {} {}",
md.dimension,
fmtr.format(md.size as f64),
md.duration_text,
md.dimension().unwrap(),
fmtr.format(md.size() as f64),
md.duration(),
&name[MOVIE_DIR.len() + 1..]
);
println!("mv '{}' '{}'", name, TO_BE_REMOVED_DIR);
@@ -72,6 +82,13 @@ enum Command {
Samples,
#[structopt(name = "groups", about = "Print movies grouped by root name")]
Groups,
#[structopt(
name = "compact-metadata",
about = "Read full metadata file and write compact file."
)]
CompactMetadata,
#[structopt(name = "update-metadata", about = "Write metadata files")]
UpdateMetadata,
}
#[derive(StructOpt)]
@@ -86,15 +103,22 @@ struct SuperDeduper {
parse(from_occurrences)
)]
verbose: usize,
#[structopt(long = "module", help = "Additional log target to enable")]
module: Option<String>,
#[structopt(subcommand)] // Note that we mark a field as a subcommand
cmd: Command,
}
fn main() -> Result<(), Box<dyn Error>> {
let app = SuperDeduper::from_args();
let mut modules = vec![module_path!().to_string()];
if let Some(module) = app.module {
modules.push(module);
}
stderrlog::new()
.verbosity(app.verbose)
.timestamp(stderrlog::Timestamp::Millisecond)
.modules(modules)
.init()
.unwrap();
@@ -110,15 +134,23 @@ fn main() -> Result<(), Box<dyn Error>> {
let lib = MovieLibrary::new(MOVIE_DIR);
let movies = lib.movies(false)?;
let mut movie_groups: HashMap<PathBuf, Vec<String>> = HashMap::new();
for name in movies.keys() {
let mut movie_groups: HashMap<PathBuf, Vec<(String, Metadata)>> = HashMap::new();
for (name, md) in movies.into_iter() {
let clean_name = clean_path_parent(&name);
let paths = movie_groups.entry(clean_name).or_insert(Vec::new());
paths.push(name.to_string())
paths.push((name.to_string(), md));
}
print_movie_groups(&movie_groups);
}
Command::CompactMetadata => {
let lib = MovieLibrary::new(MOVIE_DIR);
lib.compact_metadata()?;
}
Command::UpdateMetadata => {
let lib = MovieLibrary::new(MOVIE_DIR);
lib.update_metadata()?;
}
}
Ok(())
}