diff --git a/Cargo.toml b/Cargo.toml index f81f4a3..a593e43 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,17 +1,10 @@ [package] name = "lndups" -version = "0.1.4" +version = "0.1.0" edition = "2021" -description = "Hardlink duplicate files" -authors = ["George H "] -repository = "https://github.com/ascendforever/lndups" -license = "GPL-3.0-or-later" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -shlex = "1.3" -smallvec = "1.13" +shlex = "1.2" structopt = "0.3" - -unicode-width = "<0.1.14" # subdependency causing compilation failure diff --git a/README.md b/README.md index 743eb49..d29e849 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,3 @@ Hardlink duplicate files Linux is the sole supported OS simply because I don't need it for another OS. It can be modified to be multiplatform though. - - -## Installation -`cargo install lndups` or `curl -O 'https://github.com/ascendforever/lndups/raw/refs/heads/main/prebuilt-x86-64-linux/lndups'` diff --git a/prebuilt-x86-64-linux/lndups b/prebuilt-x86-64-linux/lndups index 6134397..06531d9 100755 Binary files a/prebuilt-x86-64-linux/lndups and b/prebuilt-x86-64-linux/lndups differ diff --git a/src/main.rs b/src/main.rs index 97ca443..8bcfcc0 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,82 +1,29 @@ extern crate shlex; -extern crate smallvec; extern crate structopt; use std::borrow::Borrow; -use std::cell::RefCell; -use std::collections::HashMap; use std::io::{Read, Write, BufReader, BufRead}; +use std::path::{Path,PathBuf}; +use std::collections::HashMap; use std::os::linux::fs::MetadataExt as MetadataExtLinux; -use std::path::{Path, PathBuf}; use crate::structopt::StructOpt; -use crate::smallvec::*; - -macro_rules! s_arg_target_file_name { () => { "target-file" } } macro_rules! s_default_target_separator { () => { ";" } } - -#[derive(StructOpt)] -#[structopt( - about="Hardlink duplicate files recursively\nSymlinks are treated as normal files", - usage=concat!(env!("CARGO_PKG_NAME"), " [OPTION]... TARGET... ['", s_default_target_separator!(), "' TARGET...]") -)] -struct CLIArguments { - #[structopt(short, long, parse(from_occurrences), help="Increase verbosity")] - verbose: i8, - - #[structopt(short, long, parse(from_occurrences), help="Decrease verbosity")] - quiet: i8, - - #[structopt(long, help=concat!( - "Disable brace notation for output\n", - " Ex: /home/user/{dir,backup}/file", - ))] - no_brace_output: bool, - - #[structopt(long, help=concat!( - "Perform no operations on the filesystem", - ))] - dry_run: bool, - - #[structopt(short="i", help=concat!( - "Prompt once before operating\n", - "Doesn't occurs if no targets are provided", - ))] - prompt: bool, - - #[structopt(short, long, value_name="VALUE", help=concat!( - "Minimum file size to be considered for hardlinking\n", - "Never goes below 1 (the default)", - ))] - min_size: Option, - - #[structopt(short, long, value_name="SEPARATOR", help=concat!( - "Separator between sets of targets (default: ", s_default_target_separator!(), ")", - ))] - separator: Option, - - #[structopt(long=s_arg_target_file_name!(), value_name="FILE", help=concat!( - "File to source targets from (can be '-' for stdin)\n", - "Same rules as CLI argument targets apply\n", - "Mutually exclusive with CLI argument targets", - ))] - file_containing_targets: Option, - - #[structopt(value_name="TARGET", help=concat!( - "Target files and directories (recursive)\n", - "Each SEPARATOR denotes a new set of targets\n", - " Each set of targets are separate from all other sets\n", - " All targets must be on the same device\n", - "All symlinks are ignored\n", - "'-' is not treated as special\n", - "Mutually exclusive with --", s_arg_target_file_name!(), - ))] - targets: Vec, +fn main() -> Result<(), i32> { + match process_args() { + ProcessArgsResult::Ok(run_paths, cfg) => { + for paths in run_paths { + run(paths, &cfg); + } + Ok(()) + }, + ProcessArgsResult::Exit => Ok(()), + ProcessArgsResult::ExitError => Err(1), + } } - struct Config { dry_run: bool, min_size: u64, @@ -85,285 +32,55 @@ struct Config { } +#[derive(StructOpt)] +#[structopt( + about="Hardlink duplicate files recursively\nSymlinks are treated as normal files", + usage=concat!(env!("CARGO_PKG_NAME"), " [OPTION]... TARGET... ['", s_default_target_separator!(), "' TARGET...]") +)] +struct CLIArguments { + #[structopt(short, long, parse(from_occurrences), + help="Increase verbosity")] + verbose: i8, -fn main() -> Result<(), i32> { - let mut args = CLIArguments::from_args(); - let verbosity = args.verbose - args.quiet; + #[structopt(short, long, parse(from_occurrences), + help="Decrease verbosity")] + quiet: i8, - let config = Config { - min_size: args.min_size.map(|v| if v > 1 { v } else { 1 }).unwrap_or(1), - no_brace_output: args.no_brace_output, - dry_run: args.dry_run, - verbosity - }; + #[structopt(long, + help="Disable brace notation for output\n Ex: /home/user/{dir,backup}/file")] + no_brace_output: bool, - let run_targets: Vec> = obtain_run_targets( - args.file_containing_targets.as_ref(), - &mut args.targets, - args.separator.as_ref().unwrap_or(&s_default_target_separator!().to_string()), - verbosity, - )?; - if run_targets.is_empty() { - if verbosity >= 1 { - println!("No targets provided"); - } - return Ok(()); - } + #[structopt(long, + help="Perform no operations on the filesystem")] + dry_run: bool, - let run_paths: Vec> = obtain_run_paths( - run_targets.iter().map(|v| v.iter()), - verbosity, - )?; + #[structopt(short="i", + help="Prompt once before operating\nDoesn't occurs if no targets are provided")] + prompt: bool, - for paths in &run_paths { - if let Err(s) = check_all_same_device(paths) { - eprintln!("{}", s); - return Err(1); - } - } + #[structopt(short, long, value_name="VALUE", + help="Minimum file size to be considered for hardlinking\nNever goes below 1 (the default)")] + min_size: Option, - if run_paths.len() == 0 { - return Ok(()); - } + #[structopt(short, long, value_name="SEPARATOR", + help=concat!("Separator between sets of targets (default: ", s_default_target_separator!(), ")"))] + separator: Option, - if args.prompt { - if !prompt_confirm(&run_targets) { - return Ok(()); - } - } + #[structopt(long, value_name="FILE", + help="File to source arguments from (can be '-' for stdin)")] + argument_file: Option, - for paths in run_paths { - run(paths, &config); - } - - Ok(()) + #[structopt(value_name="TARGET", + help="Target files and directories (recursive)\nEach SEPARATOR denotes a new set of targets\n Each set of targets are separate from all other sets\n All targets must be on the same device\nAll symlinks are ignored\n'-' is not treated as special")] + targets: Vec, } -/// result may be empty; contents each nonempty -fn obtain_run_targets<'a>( - arg_file: Option<&String>, - arg_targets: &'a mut Vec, - separator: &String, verbosity: i8 -) -> Result>, i32> { - if let Some(arg_file) = &arg_file { - if !arg_targets.is_empty() { - if verbosity >= 0 { - eprintln!("No targets should be provided as cli arguments if arguments are being read from file"); - } - return Err(1); - } - if let Err(s) = { - if *arg_file == "-" { - read_lines(std::io::stdin().lock(), arg_targets) - } else { - read_file_lines(Path::new(&arg_file), arg_targets) - } - } { - if verbosity >= 0 { - eprintln!("Error reading file containing targets: {}", s); - } - return Err(1); - } - } else { - for target in arg_targets.iter() { - if target.contains('\0') { - if verbosity >= 0 { - eprintln!("Paths can never contain null byte: {}", target); - } - return Err(1); - } - } - } - - let mut run_targets = split_vec(arg_targets, &separator); - for i in (0..run_targets.len()).rev() { - if run_targets[i].len() == 0 { - run_targets.swap_remove(i); - } - } - Ok(run_targets) -} - - -/// result has no symlinks; may be empty; contents each nonempty -fn obtain_run_paths(run_targets: T, verbosity: i8) -> Result>, i32> -where - T: Iterator + ExactSizeIterator, - Y: Iterator + ExactSizeIterator, - U: AsRef, -{ - let mut run_paths: Vec> = Vec::with_capacity(run_targets.len()); - for spaths in run_targets { - let mut paths = Vec::with_capacity(spaths.len()); - for spath in spaths { - let path = Path::new(spath.as_ref()).canonicalize().map_err(|_| { - if verbosity >= 1 { - eprintln!("Failed to retrieve absolute path for {}", shlex::try_quote(spath.as_ref()).unwrap()); - } - 1 - })?; - let pwmd = PathWithMetadata::new(path).map_err(|s| { - if verbosity >= 1 { - eprintln!("{}", s); - } - 1 - })?; - if !pwmd.md().file_type().is_symlink() { - paths.push(pwmd); - } - } - if paths.len() > 0 { - run_paths.push(paths); - } - } - Ok(run_paths) -} - - -/// perform a full run -fn run(pwmds: Vec, cfg: &Config) { - let mut registry: HashMap> = HashMap::new(); - for pwmd in pwmds { - register(pwmd, &mut registry, cfg); - } - registry.retain(|_,files| files.len() >= 2); - - let mut stdout_buffer = (cfg.verbosity >= 0).then(|| std::io::BufWriter::new(std::io::stdout().lock())); - - if let Some(stdout_buffer) = &mut stdout_buffer { - if cfg.verbosity >= 0 { - writeln!(stdout_buffer, "Considering {} total files for duplicates", registry.iter().map(|(_,files)| files.len()).sum::()).unwrap(); - } - } - - for (fsize, pwmds) in registry { - run_one_size(fsize, &pwmds, cfg, stdout_buffer.as_mut()); - } -} - -fn run_one_size(fsize: u64, pwmds: &[PathWithMetadata], cfg: &Config, mut stdout_buffer: Option<&mut W>) { - if let Some(stdout_buffer) = stdout_buffer.as_mut() { - if cfg.verbosity >= 1 { - writeln!(stdout_buffer, "Considering {} files of size {} for duplicates", pwmds.len(), fsize).unwrap(); - } - } - // if cfg.verbosity >= 0 { - // pwmds.sort_by_key(|pwmd| pwmd.path.file_name().unwrap_or_default().to_string_lossy().to_string()); - // } - let mut by_inode: Vec> = Vec::with_capacity((pwmds.len() as f64 * 0.8) as usize); // each nonempty - let mut inodes: Vec = Vec::with_capacity(by_inode.len()); - for pwmd in pwmds { - let inode: u64 = pwmd.md().st_ino(); - match inodes.binary_search(&inode) { - Ok(i) => { - by_inode[i].push(pwmd); - }, - Err(i) => { - inodes.insert(i, inode); - by_inode.insert(i, smallvec![pwmd]); - } - } - } - drop(inodes); - by_inode.sort_by(|a,b| b.len().cmp(&a.len())); // descending size order - - // compare each with eachother - let mut i = 0; - while i < by_inode.len() { - let mut j = i+1; - while j < by_inode.len() { - let (keeps, replaces) = get2mut(&mut by_inode, i, j); - if hardlink_all(keeps, replaces, cfg, stdout_buffer.as_mut()) { - by_inode.swap_remove(j); - } else { - j += 1; - } - } - i += 1; - } -} - - -/// recursively register path or its contents if directory into registry -/// eprints errors -fn register( - pwmd: PathWithMetadata, - registry: &mut HashMap>, - cfg: &Config, -) { - if pwmd.md().file_type().is_symlink() { - return; - } - - if pwmd.path.is_file() { - let size = pwmd.md().st_size(); - if size >= cfg.min_size { - registry.entry(size).or_default().push(pwmd); - } - return; - } - - if pwmd.path.is_dir() { match std::fs::read_dir(&pwmd.path) { - Ok(entries) => for entry in entries { match entry { - Ok(entry) => match PathWithMetadata::new(entry.path()) { - Ok(child_pwmd) => register(child_pwmd, registry, cfg), - Err(s) => if cfg.verbosity >= 1 { - eprintln!("{}", s); - }, - }, - Err(error) => if cfg.verbosity >= 1 { - eprintln!("Failed to inspect {}: {}", shlex::try_quote(&pwmd.path.to_string_lossy()).unwrap(), error); - }, - } }, - Err(error) => if cfg.verbosity >= 1 { - eprintln!("Failed to read dir {}: {}", shlex::try_quote(&pwmd.path.to_string_lossy()).unwrap(), error); - }, - } } -} - - - -struct PathWithMetadata { - pub path: PathBuf, - md: RefCell, -} -impl PathWithMetadata { - pub fn new(path: PathBuf) -> Result{ - let md = RefCell::new(Self::get_md(&path)?); - Ok(PathWithMetadata{ path, md }) - } - #[inline(always)] - pub fn md(&self) -> std::cell::Ref { - self.md.borrow() - } - pub fn reset_md(&self) -> Result<(), String> { - *self.md.borrow_mut() = Self::get_md(&self.path)?; - Ok(()) - } - fn get_md(path: &PathBuf) -> Result { - std::fs::symlink_metadata(path).map_err(|_| format!("Failed to retrive metadata for {}", shlex::try_quote(&path.to_string_lossy()).unwrap())) - } - -} -impl AsRef for PathWithMetadata { - fn as_ref(&self) -> &PathBuf { - return &self.path; - } -} -impl AsRef for PathWithMetadata { - fn as_ref(&self) -> &Path { - return &self.path.as_ref(); - } -} - - - /// return whether or not user gave confirmation fn prompt_confirm<'a, T: Borrow<[Y]>, Y: AsRef>(run_targets: &[T]) -> bool { println!("Are you sure you want to link all duplicates in each of these sets of targets?"); for spaths in run_targets { - println!(" {}", shlex::try_join(spaths.borrow().iter().map(|s| s.as_ref())).unwrap()); + println!(" {}", shlex::join(spaths.borrow().iter().map(|s| s.as_ref()))); } print!("> "); std::io::stdout().flush().unwrap_or_else(|_| ()); @@ -379,104 +96,210 @@ fn prompt_confirm<'a, T: Borrow<[Y]>, Y: AsRef>(run_targets: &[T]) -> bool response.to_lowercase().starts_with("y") } -fn read_lines(reader: impl BufRead, dest: &mut Vec) -> Result<(), String> { - for line in reader.lines() { - dest.push(line.map_err(|e| format!("Error reading line: {}", e))?); - } - Ok(()) -} fn read_file_lines(path: &Path, dest: &mut Vec) -> Result<(), String> { if !path.is_file() { - return Err(format!("File does not exist or is not a normal file ({})", shlex::try_quote(&path.to_string_lossy()).unwrap())); + return Err(format!("File does not exist or is not a normal file ({})", shlex::quote(&path.to_string_lossy()))); } - let reader = BufReader::new(std::fs::File::open(path).map_err( - |e| format!("Could not open {}: {}", shlex::try_quote(&path.to_string_lossy()).unwrap(), e) - )?); - read_lines(reader, dest) + if let Ok(f) = std::fs::File::open(path) { + let reader = BufReader::new(f); + for line in reader.lines() { + match line { + Ok(line) => dest.push(line), + Err(err) => return Err(format!("Error reading line: {}", err)) + } + } + Ok(()) + } else { + Err(format!("Could not open {}", shlex::quote(&path.to_string_lossy()))) + } +} + +enum ProcessArgsResult { + Ok(Vec>, Config), + Exit, + ExitError, +} + +/// may exit +fn process_args() -> ProcessArgsResult { + let mut args = CLIArguments::from_args(); + let verbosity = args.verbose - args.quiet; + + let config = Config { + min_size: args.min_size.map(|v| if v > 1 { v } else { 1 }).unwrap_or(1), + no_brace_output: args.no_brace_output, + dry_run: args.dry_run, + verbosity + }; + + if let Some(arg_file) = args.argument_file { + if !args.targets.is_empty() { + eprintln!("No targets should be provided as cli arguments if arguments are being read from file"); + return ProcessArgsResult::ExitError; + } + let path = Path::new(&arg_file); + if let Err(s) = read_file_lines(path, &mut args.targets) { + eprintln!("Error reading argument file: {}", s); + return ProcessArgsResult::ExitError; + } + } + + let run_targets: Vec> = split_vec(&args.targets, &args.separator.unwrap_or(s_default_target_separator!().to_string())); + + if run_targets.is_empty() { + if verbosity > 0 { + println!("No targets provided"); + } + return ProcessArgsResult::Exit; + } + + if args.prompt { + if !prompt_confirm(&run_targets) { + std::process::exit(0); + } + } + + let mut bad = false; + let run_paths: Vec> = run_targets.iter().enumerate().map( + |(_,spaths)| spaths.iter().map( + |spath| Path::new(spath).canonicalize().unwrap_or_else( + |_| { + eprintln!("Failed to retrieve absolute path for {}", shlex::quote(spath)); + bad = true; + Default::default() + } + ) + ).collect() + ).collect(); + if bad { + return ProcessArgsResult::ExitError; + } + + + for paths in &run_paths { + if let Err(s) = check_all_same_device(paths) { + eprintln!("{}", s); + return ProcessArgsResult::ExitError; + } + } + + ProcessArgsResult::Ok(run_paths, config) } -fn check_all_same_device(pwmds: &[PathWithMetadata]) -> Result<(), String> { - if pwmds.len() <= 1 { +/// exit on error +fn get_st_dev(file: &PathBuf) -> Result { + if let Ok(metadata) = std::fs::metadata(file) { + Ok(metadata.st_dev()) + } else { + Err(format!("Failed to retrive device id for {}", shlex::quote(&file.to_string_lossy()))) + } +} + +fn check_all_same_device(paths: &[PathBuf]) -> Result<(), String> { + if paths.len() <= 1 { return Ok(()) } - let mut by_dev: HashMap> = Default::default(); - for pwmd in pwmds.iter() { - by_dev.entry(pwmd.md().st_dev()).or_default().push(pwmd); - } - if by_dev.len() <= 1 { - return Ok(()); - } - let mut lines = Vec::with_capacity(1+by_dev.len()); - lines.push(String::from("Device ids must all be the same; got paths on different devices:")); - lines.extend(by_dev.into_iter().map(|(dev,pwmds)| { - if pwmds.len() == 1 { - format!(" Device {}: {} path: {}", dev, pwmds.len(), &shlex::try_quote(&pwmds[0].path.to_string_lossy()).unwrap()) - } else { - format!(" Device {}: {} paths", dev, pwmds.len()) + let first_device_id = get_st_dev(&paths[0])?; + let mut wrong: Vec<&PathBuf> = Vec::new(); + for path in &paths[1..] { + if get_st_dev(path)? != first_device_id { + wrong.push(path); } - })); - Err(lines.join("\n")) -} - - -/// get two mutable references in an array -/// expects correct inputs -fn get2mut<'a, T>(v: &'a mut [T], i: usize, j: usize) -> (&'a mut T, &'a mut T) { - let (left, right) = v.split_at_mut(j); - (&mut left[i], &mut right[0]) -} - - -fn hardlink(keep: &PathWithMetadata, replace: &PathWithMetadata) -> Result<(), String> { - std::fs::remove_file(&replace.path).map_err(|_| "Failed to remove for hardlinking")?; - std::fs::hard_link(&keep.path, &replace.path).map_err(|_| { - match std::fs::copy(&keep.path, &replace.path) { - Ok(_) => "Failed to hardlink (copied instead)", - Err(_) => "Failed to hardlink or copy" // awful scenario but i believe it is impossible since i don't see how you could remove a file yet not create one in its place - } - })?; - replace.reset_md()?; - Ok(()) -} - -/// returns whether linking was done -/// eprints errors -fn hardlink_all<'a, 'b, T, W: Write>(keeps: &'a mut SmallVec, replaces: &'a mut SmallVec, cfg: &Config, mut stdout_buffer: Option<&mut W>) -> bool -where T: smallvec::Array, -{ - if !cmp(&replaces.first().unwrap().path, &keeps.first().unwrap().path).unwrap_or(false) { - return false; } - for replace in replaces.into_iter() { - let keep = keeps.first().unwrap(); - if !cfg.dry_run { - if let Err(msg) = hardlink(keep, replace) { - if cfg.verbosity >= 0 { - eprintln!("{}: {}", msg, format_pair(&keep.path.to_string_lossy(), &replace.path.to_string_lossy(), cfg)); + if wrong.is_empty() { + Ok(()) + } else { + let mut s = String::with_capacity(wrong.len()*128); // 75 max estimated len of path, 53 for prefix msg + nl + for path in wrong { + s.push_str("Device ids must all be the same; got different for: {}"); + s.push_str(&shlex::quote(&path.to_string_lossy())); + s.push_str("\n"); + } + s.pop(); // remove last newline + Err(s) + } +} + + +/// perform a full run +fn run(paths: Vec, cfg: &Config) { + let mut registry: HashMap> = HashMap::new(); + + for path in paths { + register(path.to_path_buf(), &mut registry, cfg); + } + registry.retain(|_,files| files.len() >= 2); + + let mut stdout_buffer = if cfg.verbosity >= 0 { + let stdout = std::io::stdout(); + let stdout_buffer = std::io::BufWriter::new(stdout.lock()); + Some(stdout_buffer) + } else { + None + }; + + if let Some(stdout_buffer) = &mut stdout_buffer { + if cfg.verbosity >= 0 { + writeln!(stdout_buffer, "Considering {} total files for duplicates", registry.iter().map(|(_,files)| files.len()).sum::()).unwrap(); + } + } + + for (fsize, mut files) in registry { + if files.len() > 8 { + files.sort_by_key(|path| path.file_name().unwrap_or_default().to_string_lossy().to_string()); + } + if let Some(stdout_buffer) = &mut stdout_buffer { + if cfg.verbosity > 1 { + writeln!(stdout_buffer, "Considering {} files of size {} for duplicates", files.len(), fsize).unwrap(); + } + } + for i in (0..files.len()).rev() { + let f1 = &files[i]; + for j in (0..i).rev() { + let f2 = &files[j]; + if !are_hardlinked(f1, f2) && cmp(f1, f2).unwrap_or(false) { + if !cfg.dry_run { + if let Err(msg) = hardlink(f1, f2) { + eprintln!("{}: {}", msg, format_pair(f1, f2, cfg)); + continue + } + } + if let Some(stdout_buffer) = &mut stdout_buffer { + if cfg.verbosity >= 0 { + writeln!(stdout_buffer, "hardlinked {}", format_pair(f1, f2, cfg)).unwrap(); + } + } } - continue // path no longer valid } } - if let Some(stdout_buffer) = stdout_buffer.as_mut() { - if cfg.verbosity >= 0 { - writeln!(stdout_buffer, "hardlinked {}", format_pair(&keep.path.to_string_lossy(), &replace.path.to_string_lossy(), cfg)).unwrap(); - } - } - drop(keep); - keeps.push(replace); } - true } -fn format_pair(f1s: &str, f2s: &str, cfg: &Config) -> String { +fn hardlink(f1: &PathBuf, f2: &PathBuf) -> Result<(), &'static str> { + if let Err(_) = std::fs::remove_file(f2) { + Err("Failed to remove second file for hardlinking") + } else if let Err(_) = std::fs::hard_link(f1, f2) { // same as ln in terms of args: left args's inode becomes right arg's inode + match std::fs::copy(f1, f2) { + Ok(_) => Err("Failed to hardlink (copied instead)"), + Err(_) => Err("Failed to hardlink or copy") + } + } else { + Ok(()) + } +} + + +fn format_pair(f1: &PathBuf, f2: &PathBuf, cfg: &Config) -> String { + let f1s = f1.to_string_lossy(); + let f2s = f2.to_string_lossy(); if cfg.no_brace_output { return format!( "{} {}", - shlex::try_quote(&f1s).unwrap(), - shlex::try_quote(&f2s).unwrap() + shlex::quote(&f1s), + shlex::quote(&f2s) ) } @@ -487,43 +310,78 @@ fn format_pair(f1s: &str, f2s: &str, cfg: &Config) -> String { if prefixlong && suffixlong { format!( "{}{{{},{}}}{}", - shlex::try_quote(prefix).unwrap(), - shlex::try_quote(&f1s[ prefix.len()..std::cmp::max(prefix.len(), f1s.len()-suffix.len()) ]).unwrap(), - shlex::try_quote(&f2s[ prefix.len()..std::cmp::max(prefix.len(), f2s.len()-suffix.len()) ]).unwrap(), - shlex::try_quote(suffix).unwrap() + shlex::quote(prefix), + shlex::quote(&f1s[ prefix.len()..std::cmp::max(prefix.len(), f1s.len()-suffix.len()) ]), + shlex::quote(&f2s[ prefix.len()..std::cmp::max(prefix.len(), f2s.len()-suffix.len()) ]), + shlex::quote(suffix) ) } else if prefixlong { format!( "{}{{{},{}}}", - shlex::try_quote(prefix).unwrap(), - shlex::try_quote(&f1s[prefix.len()..]).unwrap(), - shlex::try_quote(&f2s[prefix.len()..]).unwrap() + shlex::quote(prefix), + shlex::quote(&f1s[prefix.len()..]), + shlex::quote(&f2s[prefix.len()..]) ) } else if suffixlong { format!( "{{{},{}}}{}", - shlex::try_quote(&f1s[..f1s.len()-suffix.len()]).unwrap(), - shlex::try_quote(&f2s[..f2s.len()-suffix.len()]).unwrap(), - shlex::try_quote(suffix).unwrap(), + shlex::quote(&f1s[..f1s.len()-suffix.len()]), + shlex::quote(&f2s[..f2s.len()-suffix.len()]), + shlex::quote(suffix), ) } else { format!( "{} <-> {}", - shlex::try_quote(&f1s).unwrap(), - shlex::try_quote(&f2s).unwrap() + shlex::quote(&f1s), + shlex::quote(&f2s) ) } } +/// recursively register path or its contents if directory into registry +fn register(path: PathBuf, registry: &mut HashMap>, cfg: &Config) { + if let Ok(metadata) = std::fs::symlink_metadata(&path) { + if metadata.file_type().is_symlink() { + return + } + + if path.is_file() { + let size = metadata.st_size(); + if size >= cfg.min_size { + registry.entry(size).or_insert_with(|| Vec::new()).push(path); + } + } else if path.is_dir() { + if let Ok(entries) = std::fs::read_dir(path) { + for entry in entries { + if let Ok(entry) = entry { + register(entry.path(), registry, cfg); + } + } + } + } + } +} + + +fn are_hardlinked(f1: &PathBuf, f2: &PathBuf) -> bool { + if let (Ok(md1), Ok(md2)) = (std::fs::metadata(f1), std::fs::metadata(f2)) { + md1.st_ino() == md2.st_ino() + } else { + false + } +} + + /// check equality of contents of two paths to files -/// does not check sizes -fn cmp(f1: impl AsRef, f2: impl AsRef) -> std::io::Result { - cmp_read(std::fs::File::open(f1)?, std::fs::File::open(f2)?) +fn cmp(f1: &PathBuf, f2: &PathBuf) -> std::io::Result { + if let (Ok(mut f1), Ok(mut f2)) = (std::fs::File::open(f1), std::fs::File::open(f2)) { + cmp_files(&mut f1, &mut f2) + } else { Ok(false) } } /// check equality of contents of two open files -fn cmp_read(mut f1: impl Read, mut f2: impl Read) -> std::io::Result { +fn cmp_files(f1: &mut std::fs::File, f2: &mut std::fs::File) -> std::io::Result { let buff1: &mut [u8] = &mut [0; 1024]; let buff2: &mut [u8] = &mut [0; 1024]; loop { @@ -561,7 +419,6 @@ fn common_suffix<'a>(s1: &'a str, s2: &'a str) -> &'a str { } -/// double delimiters will result in empty vecs fn split_vec<'a, T: std::cmp::PartialEq>(input: &'a [T], delimiter: &T) -> Vec> { let mut result: Vec> = Vec::new(); @@ -582,14 +439,14 @@ fn split_vec<'a, T: std::cmp::PartialEq>(input: &'a [T], delimiter: &T) -> Vec = vec![";", ";", ";"].into_iter().map(|s| s.to_string()).collect(); + let v: Vec<_> = vec![";", "hi", "bye", ";", "1", ";", ";", "2", "2", ";"].into_iter().map(|s| s.to_string()).collect(); let res = split_vec(&v[..], &";".to_string()); - assert_eq!(res.len(), 2) + println!("{:?}", v); + println!("{:?}", res); } }