Compare commits

...

4 commits

3 changed files with 53 additions and 45 deletions

View file

@ -1,6 +1,6 @@
[package] [package]
name = "lndups" name = "lndups"
version = "0.1.4" version = "0.1.5"
edition = "2021" edition = "2021"
description = "Hardlink duplicate files" description = "Hardlink duplicate files"
authors = ["George H <georgeh124v2@gmail.com>"] authors = ["George H <georgeh124v2@gmail.com>"]
@ -10,8 +10,8 @@ license = "GPL-3.0-or-later"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
clap = { version = "4.5", features = ["derive"] }
shlex = "1.3" shlex = "1.3"
smallvec = "1.13" smallvec = "1.14"
structopt = "0.3"
unicode-width = "<0.1.14" # subdependency causing compilation failure unicode-width = "<0.1.14" # subdependency causing compilation failure

Binary file not shown.

View file

@ -1,14 +1,13 @@
extern crate shlex; extern crate shlex;
extern crate smallvec; extern crate smallvec;
extern crate structopt;
use std::borrow::Borrow; use std::borrow::Borrow;
use std::cell::RefCell; use std::cell::RefCell;
use std::collections::HashMap; use std::collections::HashMap;
use std::io::{Read, Write, BufReader, BufRead}; use std::io::{Read, Write, BufReader, BufRead};
use std::os::linux::fs::MetadataExt as MetadataExtLinux; use std::os::linux::fs::MetadataExt as MetadataExtLinux;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use crate::structopt::StructOpt; use clap::Parser;
use crate::smallvec::*; use smallvec::*;
@ -16,54 +15,57 @@ macro_rules! s_arg_target_file_name { () => { "target-file" } }
macro_rules! s_default_target_separator { () => { ";" } } macro_rules! s_default_target_separator { () => { ";" } }
#[derive(StructOpt)] #[derive(Parser)]
#[structopt( #[command(
about="Hardlink duplicate files recursively\nSymlinks are treated as normal files", about=concat!(
usage=concat!(env!("CARGO_PKG_NAME"), " [OPTION]... TARGET... ['", s_default_target_separator!(), "' TARGET...]") "Hardlink duplicate files recursively\n",
"Symlinks are treated as normal files",
),
// usage=concat!(env!("CARGO_PKG_NAME"), " [OPTION]... TARGET... ['", s_default_target_separator!(), "' TARGET...]")
)] )]
struct CLIArguments { struct CLIArguments {
#[structopt(short, long, parse(from_occurrences), help="Increase verbosity")] #[arg(short, long, action=clap::ArgAction::Count, help="Increase verbosity")]
verbose: i8, verbose: i8,
#[structopt(short, long, parse(from_occurrences), help="Decrease verbosity")] #[arg(short, long, action=clap::ArgAction::Count, help="Decrease verbosity")]
quiet: i8, quiet: i8,
#[structopt(long, help=concat!( #[arg(long, help=concat!(
"Disable brace notation for output\n", "Disable brace notation for output\n",
" Ex: /home/user/{dir,backup}/file", " Ex: /home/user/{dir,backup}/file",
))] ))]
no_brace_output: bool, no_brace_output: bool,
#[structopt(long, help=concat!( #[arg(long, help=concat!(
"Perform no operations on the filesystem", "Perform no operations on the filesystem",
))] ))]
dry_run: bool, dry_run: bool,
#[structopt(short="i", help=concat!( #[arg(short='i', help=concat!(
"Prompt once before operating\n", "Prompt once before operating\n",
"Doesn't occurs if no targets are provided", "Doesn't occurs if no targets are provided",
))] ))]
prompt: bool, prompt: bool,
#[structopt(short, long, value_name="VALUE", help=concat!( #[arg(short, long, value_name="VALUE", help=concat!(
"Minimum file size to be considered for hardlinking\n", "Minimum file size to be considered for hardlinking\n",
"Never goes below 1 (the default)", "Never goes below 1 (the default)",
))] ))]
min_size: Option<u64>, min_size: Option<u64>,
#[structopt(short, long, value_name="SEPARATOR", help=concat!( #[arg(short, long, value_name="SEPARATOR", help=concat!(
"Separator between sets of targets (default: ", s_default_target_separator!(), ")", "Separator between sets of targets (default: ", s_default_target_separator!(), ")",
))] ))]
separator: Option<String>, separator: Option<String>,
#[structopt(long=s_arg_target_file_name!(), value_name="FILE", help=concat!( #[arg(long=s_arg_target_file_name!(), value_name="FILE", help=concat!(
"File to source targets from (can be '-' for stdin)\n", "File to source targets from (can be '-' for stdin)\n",
"Same rules as CLI argument targets apply\n", "Same rules as CLI argument targets apply\n",
"Mutually exclusive with CLI argument targets", "Mutually exclusive with CLI argument targets",
))] ))]
file_containing_targets: Option<String>, file_containing_targets: Option<String>,
#[structopt(value_name="TARGET", help=concat!( #[arg(value_name="TARGET", help=concat!(
"Target files and directories (recursive)\n", "Target files and directories (recursive)\n",
"Each SEPARATOR denotes a new set of targets\n", "Each SEPARATOR denotes a new set of targets\n",
" Each set of targets are separate from all other sets\n", " Each set of targets are separate from all other sets\n",
@ -87,11 +89,11 @@ struct Config {
fn main() -> Result<(), i32> { fn main() -> Result<(), i32> {
let mut args = CLIArguments::from_args(); let mut args = CLIArguments::parse();
let verbosity = args.verbose - args.quiet; let verbosity = args.verbose - args.quiet;
let config = Config { let config = Config {
min_size: args.min_size.map(|v| if v > 1 { v } else { 1 }).unwrap_or(1), min_size: args.min_size.map(|v| std::cmp::max(v, 1)).unwrap_or(1),
no_brace_output: args.no_brace_output, no_brace_output: args.no_brace_output,
dry_run: args.dry_run, dry_run: args.dry_run,
verbosity verbosity
@ -127,13 +129,13 @@ fn main() -> Result<(), i32> {
} }
if args.prompt { if args.prompt {
if !prompt_confirm(&run_targets) { if !prompt_confirm(&run_targets).map_err(|_| { eprintln!("IO Error during confirmation prompt"); 1 })? {
return Ok(()); return Ok(());
} }
} }
for paths in run_paths { for paths in run_paths {
run(paths, &config); run(paths, &config).map_err(|_| 1)?;
} }
Ok(()) Ok(())
@ -187,7 +189,8 @@ fn obtain_run_targets<'a>(
/// result has no symlinks; may be empty; contents each nonempty /// result has no symlinks; may be empty; contents each nonempty
fn obtain_run_paths<T, Y, U>(run_targets: T, verbosity: i8) -> Result<Vec<Vec<PathWithMetadata>>, i32> fn obtain_run_paths<T, Y, U>(run_targets: T, verbosity: i8)
-> Result<Vec<Vec<PathWithMetadata>>, i32>
where where
T: Iterator<Item=Y> + ExactSizeIterator, T: Iterator<Item=Y> + ExactSizeIterator,
Y: Iterator<Item=U> + ExactSizeIterator, Y: Iterator<Item=U> + ExactSizeIterator,
@ -222,7 +225,7 @@ where
/// perform a full run /// perform a full run
fn run(pwmds: Vec<PathWithMetadata>, cfg: &Config) { fn run(pwmds: Vec<PathWithMetadata>, cfg: &Config) -> std::io::Result<()> {
let mut registry: HashMap<u64, Vec<PathWithMetadata>> = HashMap::new(); let mut registry: HashMap<u64, Vec<PathWithMetadata>> = HashMap::new();
for pwmd in pwmds { for pwmd in pwmds {
register(pwmd, &mut registry, cfg); register(pwmd, &mut registry, cfg);
@ -233,25 +236,31 @@ fn run(pwmds: Vec<PathWithMetadata>, cfg: &Config) {
if let Some(stdout_buffer) = &mut stdout_buffer { if let Some(stdout_buffer) = &mut stdout_buffer {
if cfg.verbosity >= 0 { if cfg.verbosity >= 0 {
writeln!(stdout_buffer, "Considering {} total files for duplicates", registry.iter().map(|(_,files)| files.len()).sum::<usize>()).unwrap(); writeln!(stdout_buffer,
"Considering {} total files for duplicates",
registry.iter().map(|(_,files)| files.len()).sum::<usize>()
)?;
} }
} }
for (fsize, pwmds) in registry { for (fsize, pwmds) in registry {
run_one_size(fsize, &pwmds, cfg, stdout_buffer.as_mut()); run_one_size(fsize, &pwmds, cfg, stdout_buffer.as_mut())?;
} }
Ok(())
} }
fn run_one_size<W: Write>(fsize: u64, pwmds: &[PathWithMetadata], cfg: &Config, mut stdout_buffer: Option<&mut W>) { fn run_one_size<W: Write>(fsize: u64, pwmds: &[PathWithMetadata], cfg: &Config, mut stdout_buffer: Option<&mut W>) -> std::io::Result<()> {
if let Some(stdout_buffer) = stdout_buffer.as_mut() { if let Some(stdout_buffer) = stdout_buffer.as_mut() {
if cfg.verbosity >= 1 { if cfg.verbosity >= 1 {
writeln!(stdout_buffer, "Considering {} files of size {} for duplicates", pwmds.len(), fsize).unwrap(); writeln!(stdout_buffer, "Considering {} files of size {} for duplicates", pwmds.len(), fsize)?;
} }
} }
// if cfg.verbosity >= 0 { // if cfg.verbosity >= 0 {
// pwmds.sort_by_key(|pwmd| pwmd.path.file_name().unwrap_or_default().to_string_lossy().to_string()); // pwmds.sort_by_key(|pwmd| pwmd.path.file_name().unwrap_or_default().to_string_lossy().to_string());
// } // }
let mut by_inode: Vec<SmallVec<[&PathWithMetadata; 1]>> = Vec::with_capacity((pwmds.len() as f64 * 0.8) as usize); // each nonempty let mut by_inode: Vec<SmallVec<[&PathWithMetadata; 1]>>
= Vec::with_capacity((pwmds.len() as f64 * 0.8) as usize); // each nonempty
let mut inodes: Vec<u64> = Vec::with_capacity(by_inode.len()); let mut inodes: Vec<u64> = Vec::with_capacity(by_inode.len());
for pwmd in pwmds { for pwmd in pwmds {
let inode: u64 = pwmd.md().st_ino(); let inode: u64 = pwmd.md().st_ino();
@ -282,6 +291,8 @@ fn run_one_size<W: Write>(fsize: u64, pwmds: &[PathWithMetadata], cfg: &Config,
} }
i += 1; i += 1;
} }
Ok(())
} }
@ -334,7 +345,7 @@ impl PathWithMetadata {
Ok(PathWithMetadata{ path, md }) Ok(PathWithMetadata{ path, md })
} }
#[inline(always)] #[inline(always)]
pub fn md(&self) -> std::cell::Ref<std::fs::Metadata> { pub fn md<'a>(&'a self) -> std::cell::Ref<'a, std::fs::Metadata> {
self.md.borrow() self.md.borrow()
} }
pub fn reset_md(&self) -> Result<(), String> { pub fn reset_md(&self) -> Result<(), String> {
@ -360,23 +371,21 @@ impl AsRef<Path> for PathWithMetadata {
/// return whether or not user gave confirmation /// return whether or not user gave confirmation
fn prompt_confirm<'a, T: Borrow<[Y]>, Y: AsRef<str>>(run_targets: &[T]) -> bool { fn prompt_confirm<'a, T: Borrow<[Y]>, Y: AsRef<str>>(run_targets: &[T]) -> std::io::Result<bool> {
println!("Are you sure you want to link all duplicates in each of these sets of targets?"); {
let mut stdout_buffer = std::io::BufWriter::new(std::io::stdout().lock());
writeln!(&mut stdout_buffer, "Are you sure you want to link all duplicates in each of these sets of targets?")?;
for spaths in run_targets { for spaths in run_targets {
println!(" {}", shlex::try_join(spaths.borrow().iter().map(|s| s.as_ref())).unwrap()); writeln!(&mut stdout_buffer, " {}", shlex::try_join(spaths.borrow().iter().map(|s| s.as_ref())).unwrap())?;
}
write!(&mut stdout_buffer, "> ")?;
stdout_buffer.flush().unwrap_or_else(|_| ());
} }
print!("> ");
std::io::stdout().flush().unwrap_or_else(|_| ());
let mut response = String::new(); let mut response = String::new();
std::io::stdin().read_line(&mut response).unwrap_or_else( std::io::stdin().read_line(&mut response)?;
|_| {
eprintln!("Problem reading input");
std::process::exit(1);
}
);
response.to_lowercase().starts_with("y") Ok(response.to_lowercase().starts_with("y"))
} }
fn read_lines(reader: impl BufRead, dest: &mut Vec<String>) -> Result<(), String> { fn read_lines(reader: impl BufRead, dest: &mut Vec<String>) -> Result<(), String> {
@ -464,7 +473,6 @@ where T: smallvec::Array<Item=&'b PathWithMetadata>,
writeln!(stdout_buffer, "hardlinked {}", format_pair(&keep.path.to_string_lossy(), &replace.path.to_string_lossy(), cfg)).unwrap(); writeln!(stdout_buffer, "hardlinked {}", format_pair(&keep.path.to_string_lossy(), &replace.path.to_string_lossy(), cfg)).unwrap();
} }
} }
drop(keep);
keeps.push(replace); keeps.push(replace);
} }
true true