Compare commits

..

4 commits

3 changed files with 53 additions and 45 deletions

View file

@ -1,6 +1,6 @@
[package]
name = "lndups"
version = "0.1.4"
version = "0.1.5"
edition = "2021"
description = "Hardlink duplicate files"
authors = ["George H <georgeh124v2@gmail.com>"]
@ -10,8 +10,8 @@ license = "GPL-3.0-or-later"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
clap = { version = "4.5", features = ["derive"] }
shlex = "1.3"
smallvec = "1.13"
structopt = "0.3"
smallvec = "1.14"
unicode-width = "<0.1.14" # subdependency causing compilation failure

Binary file not shown.

View file

@ -1,14 +1,13 @@
extern crate shlex;
extern crate smallvec;
extern crate structopt;
use std::borrow::Borrow;
use std::cell::RefCell;
use std::collections::HashMap;
use std::io::{Read, Write, BufReader, BufRead};
use std::os::linux::fs::MetadataExt as MetadataExtLinux;
use std::path::{Path, PathBuf};
use crate::structopt::StructOpt;
use crate::smallvec::*;
use clap::Parser;
use smallvec::*;
@ -16,54 +15,57 @@ macro_rules! s_arg_target_file_name { () => { "target-file" } }
macro_rules! s_default_target_separator { () => { ";" } }
#[derive(StructOpt)]
#[structopt(
about="Hardlink duplicate files recursively\nSymlinks are treated as normal files",
usage=concat!(env!("CARGO_PKG_NAME"), " [OPTION]... TARGET... ['", s_default_target_separator!(), "' TARGET...]")
#[derive(Parser)]
#[command(
about=concat!(
"Hardlink duplicate files recursively\n",
"Symlinks are treated as normal files",
),
// usage=concat!(env!("CARGO_PKG_NAME"), " [OPTION]... TARGET... ['", s_default_target_separator!(), "' TARGET...]")
)]
struct CLIArguments {
#[structopt(short, long, parse(from_occurrences), help="Increase verbosity")]
#[arg(short, long, action=clap::ArgAction::Count, help="Increase verbosity")]
verbose: i8,
#[structopt(short, long, parse(from_occurrences), help="Decrease verbosity")]
#[arg(short, long, action=clap::ArgAction::Count, help="Decrease verbosity")]
quiet: i8,
#[structopt(long, help=concat!(
#[arg(long, help=concat!(
"Disable brace notation for output\n",
" Ex: /home/user/{dir,backup}/file",
))]
no_brace_output: bool,
#[structopt(long, help=concat!(
#[arg(long, help=concat!(
"Perform no operations on the filesystem",
))]
dry_run: bool,
#[structopt(short="i", help=concat!(
#[arg(short='i', help=concat!(
"Prompt once before operating\n",
"Doesn't occurs if no targets are provided",
))]
prompt: bool,
#[structopt(short, long, value_name="VALUE", help=concat!(
#[arg(short, long, value_name="VALUE", help=concat!(
"Minimum file size to be considered for hardlinking\n",
"Never goes below 1 (the default)",
))]
min_size: Option<u64>,
#[structopt(short, long, value_name="SEPARATOR", help=concat!(
#[arg(short, long, value_name="SEPARATOR", help=concat!(
"Separator between sets of targets (default: ", s_default_target_separator!(), ")",
))]
separator: Option<String>,
#[structopt(long=s_arg_target_file_name!(), value_name="FILE", help=concat!(
#[arg(long=s_arg_target_file_name!(), value_name="FILE", help=concat!(
"File to source targets from (can be '-' for stdin)\n",
"Same rules as CLI argument targets apply\n",
"Mutually exclusive with CLI argument targets",
))]
file_containing_targets: Option<String>,
#[structopt(value_name="TARGET", help=concat!(
#[arg(value_name="TARGET", help=concat!(
"Target files and directories (recursive)\n",
"Each SEPARATOR denotes a new set of targets\n",
" Each set of targets are separate from all other sets\n",
@ -87,11 +89,11 @@ struct Config {
fn main() -> Result<(), i32> {
let mut args = CLIArguments::from_args();
let mut args = CLIArguments::parse();
let verbosity = args.verbose - args.quiet;
let config = Config {
min_size: args.min_size.map(|v| if v > 1 { v } else { 1 }).unwrap_or(1),
min_size: args.min_size.map(|v| std::cmp::max(v, 1)).unwrap_or(1),
no_brace_output: args.no_brace_output,
dry_run: args.dry_run,
verbosity
@ -127,13 +129,13 @@ fn main() -> Result<(), i32> {
}
if args.prompt {
if !prompt_confirm(&run_targets) {
if !prompt_confirm(&run_targets).map_err(|_| { eprintln!("IO Error during confirmation prompt"); 1 })? {
return Ok(());
}
}
for paths in run_paths {
run(paths, &config);
run(paths, &config).map_err(|_| 1)?;
}
Ok(())
@ -187,7 +189,8 @@ fn obtain_run_targets<'a>(
/// result has no symlinks; may be empty; contents each nonempty
fn obtain_run_paths<T, Y, U>(run_targets: T, verbosity: i8) -> Result<Vec<Vec<PathWithMetadata>>, i32>
fn obtain_run_paths<T, Y, U>(run_targets: T, verbosity: i8)
-> Result<Vec<Vec<PathWithMetadata>>, i32>
where
T: Iterator<Item=Y> + ExactSizeIterator,
Y: Iterator<Item=U> + ExactSizeIterator,
@ -222,7 +225,7 @@ where
/// perform a full run
fn run(pwmds: Vec<PathWithMetadata>, cfg: &Config) {
fn run(pwmds: Vec<PathWithMetadata>, cfg: &Config) -> std::io::Result<()> {
let mut registry: HashMap<u64, Vec<PathWithMetadata>> = HashMap::new();
for pwmd in pwmds {
register(pwmd, &mut registry, cfg);
@ -233,25 +236,31 @@ fn run(pwmds: Vec<PathWithMetadata>, cfg: &Config) {
if let Some(stdout_buffer) = &mut stdout_buffer {
if cfg.verbosity >= 0 {
writeln!(stdout_buffer, "Considering {} total files for duplicates", registry.iter().map(|(_,files)| files.len()).sum::<usize>()).unwrap();
writeln!(stdout_buffer,
"Considering {} total files for duplicates",
registry.iter().map(|(_,files)| files.len()).sum::<usize>()
)?;
}
}
for (fsize, pwmds) in registry {
run_one_size(fsize, &pwmds, cfg, stdout_buffer.as_mut());
}
run_one_size(fsize, &pwmds, cfg, stdout_buffer.as_mut())?;
}
fn run_one_size<W: Write>(fsize: u64, pwmds: &[PathWithMetadata], cfg: &Config, mut stdout_buffer: Option<&mut W>) {
Ok(())
}
fn run_one_size<W: Write>(fsize: u64, pwmds: &[PathWithMetadata], cfg: &Config, mut stdout_buffer: Option<&mut W>) -> std::io::Result<()> {
if let Some(stdout_buffer) = stdout_buffer.as_mut() {
if cfg.verbosity >= 1 {
writeln!(stdout_buffer, "Considering {} files of size {} for duplicates", pwmds.len(), fsize).unwrap();
writeln!(stdout_buffer, "Considering {} files of size {} for duplicates", pwmds.len(), fsize)?;
}
}
// if cfg.verbosity >= 0 {
// pwmds.sort_by_key(|pwmd| pwmd.path.file_name().unwrap_or_default().to_string_lossy().to_string());
// }
let mut by_inode: Vec<SmallVec<[&PathWithMetadata; 1]>> = Vec::with_capacity((pwmds.len() as f64 * 0.8) as usize); // each nonempty
let mut by_inode: Vec<SmallVec<[&PathWithMetadata; 1]>>
= Vec::with_capacity((pwmds.len() as f64 * 0.8) as usize); // each nonempty
let mut inodes: Vec<u64> = Vec::with_capacity(by_inode.len());
for pwmd in pwmds {
let inode: u64 = pwmd.md().st_ino();
@ -282,6 +291,8 @@ fn run_one_size<W: Write>(fsize: u64, pwmds: &[PathWithMetadata], cfg: &Config,
}
i += 1;
}
Ok(())
}
@ -334,7 +345,7 @@ impl PathWithMetadata {
Ok(PathWithMetadata{ path, md })
}
#[inline(always)]
pub fn md(&self) -> std::cell::Ref<std::fs::Metadata> {
pub fn md<'a>(&'a self) -> std::cell::Ref<'a, std::fs::Metadata> {
self.md.borrow()
}
pub fn reset_md(&self) -> Result<(), String> {
@ -360,23 +371,21 @@ impl AsRef<Path> for PathWithMetadata {
/// return whether or not user gave confirmation
fn prompt_confirm<'a, T: Borrow<[Y]>, Y: AsRef<str>>(run_targets: &[T]) -> bool {
println!("Are you sure you want to link all duplicates in each of these sets of targets?");
fn prompt_confirm<'a, T: Borrow<[Y]>, Y: AsRef<str>>(run_targets: &[T]) -> std::io::Result<bool> {
{
let mut stdout_buffer = std::io::BufWriter::new(std::io::stdout().lock());
writeln!(&mut stdout_buffer, "Are you sure you want to link all duplicates in each of these sets of targets?")?;
for spaths in run_targets {
println!(" {}", shlex::try_join(spaths.borrow().iter().map(|s| s.as_ref())).unwrap());
writeln!(&mut stdout_buffer, " {}", shlex::try_join(spaths.borrow().iter().map(|s| s.as_ref())).unwrap())?;
}
write!(&mut stdout_buffer, "> ")?;
stdout_buffer.flush().unwrap_or_else(|_| ());
}
print!("> ");
std::io::stdout().flush().unwrap_or_else(|_| ());
let mut response = String::new();
std::io::stdin().read_line(&mut response).unwrap_or_else(
|_| {
eprintln!("Problem reading input");
std::process::exit(1);
}
);
std::io::stdin().read_line(&mut response)?;
response.to_lowercase().starts_with("y")
Ok(response.to_lowercase().starts_with("y"))
}
fn read_lines(reader: impl BufRead, dest: &mut Vec<String>) -> Result<(), String> {
@ -464,7 +473,6 @@ where T: smallvec::Array<Item=&'b PathWithMetadata>,
writeln!(stdout_buffer, "hardlinked {}", format_pair(&keep.path.to_string_lossy(), &replace.path.to_string_lossy(), cfg)).unwrap();
}
}
drop(keep);
keeps.push(replace);
}
true