diff --git a/Cargo.toml b/Cargo.toml index a593e43..f81f4a3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,10 +1,17 @@ [package] name = "lndups" -version = "0.1.0" +version = "0.1.4" edition = "2021" +description = "Hardlink duplicate files" +authors = ["George H "] +repository = "https://github.com/ascendforever/lndups" +license = "GPL-3.0-or-later" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -shlex = "1.2" +shlex = "1.3" +smallvec = "1.13" structopt = "0.3" + +unicode-width = "<0.1.14" # subdependency causing compilation failure diff --git a/README.md b/README.md index d29e849..743eb49 100644 --- a/README.md +++ b/README.md @@ -2,3 +2,7 @@ Hardlink duplicate files Linux is the sole supported OS simply because I don't need it for another OS. It can be modified to be multiplatform though. + + +## Installation +`cargo install lndups` or `curl -O 'https://github.com/ascendforever/lndups/raw/refs/heads/main/prebuilt-x86-64-linux/lndups'` diff --git a/prebuilt-x86-64-linux/lndups b/prebuilt-x86-64-linux/lndups index 06531d9..6134397 100755 Binary files a/prebuilt-x86-64-linux/lndups and b/prebuilt-x86-64-linux/lndups differ diff --git a/src/main.rs b/src/main.rs index 8bcfcc0..97ca443 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,29 +1,82 @@ extern crate shlex; +extern crate smallvec; extern crate structopt; use std::borrow::Borrow; -use std::io::{Read, Write, BufReader, BufRead}; -use std::path::{Path,PathBuf}; +use std::cell::RefCell; use std::collections::HashMap; +use std::io::{Read, Write, BufReader, BufRead}; use std::os::linux::fs::MetadataExt as MetadataExtLinux; +use std::path::{Path, PathBuf}; use crate::structopt::StructOpt; +use crate::smallvec::*; + +macro_rules! s_arg_target_file_name { () => { "target-file" } } macro_rules! s_default_target_separator { () => { ";" } } -fn main() -> Result<(), i32> { - match process_args() { - ProcessArgsResult::Ok(run_paths, cfg) => { - for paths in run_paths { - run(paths, &cfg); - } - Ok(()) - }, - ProcessArgsResult::Exit => Ok(()), - ProcessArgsResult::ExitError => Err(1), - } + +#[derive(StructOpt)] +#[structopt( + about="Hardlink duplicate files recursively\nSymlinks are treated as normal files", + usage=concat!(env!("CARGO_PKG_NAME"), " [OPTION]... TARGET... ['", s_default_target_separator!(), "' TARGET...]") +)] +struct CLIArguments { + #[structopt(short, long, parse(from_occurrences), help="Increase verbosity")] + verbose: i8, + + #[structopt(short, long, parse(from_occurrences), help="Decrease verbosity")] + quiet: i8, + + #[structopt(long, help=concat!( + "Disable brace notation for output\n", + " Ex: /home/user/{dir,backup}/file", + ))] + no_brace_output: bool, + + #[structopt(long, help=concat!( + "Perform no operations on the filesystem", + ))] + dry_run: bool, + + #[structopt(short="i", help=concat!( + "Prompt once before operating\n", + "Doesn't occurs if no targets are provided", + ))] + prompt: bool, + + #[structopt(short, long, value_name="VALUE", help=concat!( + "Minimum file size to be considered for hardlinking\n", + "Never goes below 1 (the default)", + ))] + min_size: Option, + + #[structopt(short, long, value_name="SEPARATOR", help=concat!( + "Separator between sets of targets (default: ", s_default_target_separator!(), ")", + ))] + separator: Option, + + #[structopt(long=s_arg_target_file_name!(), value_name="FILE", help=concat!( + "File to source targets from (can be '-' for stdin)\n", + "Same rules as CLI argument targets apply\n", + "Mutually exclusive with CLI argument targets", + ))] + file_containing_targets: Option, + + #[structopt(value_name="TARGET", help=concat!( + "Target files and directories (recursive)\n", + "Each SEPARATOR denotes a new set of targets\n", + " Each set of targets are separate from all other sets\n", + " All targets must be on the same device\n", + "All symlinks are ignored\n", + "'-' is not treated as special\n", + "Mutually exclusive with --", s_arg_target_file_name!(), + ))] + targets: Vec, } + struct Config { dry_run: bool, min_size: u64, @@ -32,55 +85,285 @@ struct Config { } -#[derive(StructOpt)] -#[structopt( - about="Hardlink duplicate files recursively\nSymlinks are treated as normal files", - usage=concat!(env!("CARGO_PKG_NAME"), " [OPTION]... TARGET... ['", s_default_target_separator!(), "' TARGET...]") -)] -struct CLIArguments { - #[structopt(short, long, parse(from_occurrences), - help="Increase verbosity")] - verbose: i8, - #[structopt(short, long, parse(from_occurrences), - help="Decrease verbosity")] - quiet: i8, +fn main() -> Result<(), i32> { + let mut args = CLIArguments::from_args(); + let verbosity = args.verbose - args.quiet; - #[structopt(long, - help="Disable brace notation for output\n Ex: /home/user/{dir,backup}/file")] - no_brace_output: bool, + let config = Config { + min_size: args.min_size.map(|v| if v > 1 { v } else { 1 }).unwrap_or(1), + no_brace_output: args.no_brace_output, + dry_run: args.dry_run, + verbosity + }; - #[structopt(long, - help="Perform no operations on the filesystem")] - dry_run: bool, + let run_targets: Vec> = obtain_run_targets( + args.file_containing_targets.as_ref(), + &mut args.targets, + args.separator.as_ref().unwrap_or(&s_default_target_separator!().to_string()), + verbosity, + )?; + if run_targets.is_empty() { + if verbosity >= 1 { + println!("No targets provided"); + } + return Ok(()); + } - #[structopt(short="i", - help="Prompt once before operating\nDoesn't occurs if no targets are provided")] - prompt: bool, + let run_paths: Vec> = obtain_run_paths( + run_targets.iter().map(|v| v.iter()), + verbosity, + )?; - #[structopt(short, long, value_name="VALUE", - help="Minimum file size to be considered for hardlinking\nNever goes below 1 (the default)")] - min_size: Option, + for paths in &run_paths { + if let Err(s) = check_all_same_device(paths) { + eprintln!("{}", s); + return Err(1); + } + } - #[structopt(short, long, value_name="SEPARATOR", - help=concat!("Separator between sets of targets (default: ", s_default_target_separator!(), ")"))] - separator: Option, + if run_paths.len() == 0 { + return Ok(()); + } - #[structopt(long, value_name="FILE", - help="File to source arguments from (can be '-' for stdin)")] - argument_file: Option, + if args.prompt { + if !prompt_confirm(&run_targets) { + return Ok(()); + } + } - #[structopt(value_name="TARGET", - help="Target files and directories (recursive)\nEach SEPARATOR denotes a new set of targets\n Each set of targets are separate from all other sets\n All targets must be on the same device\nAll symlinks are ignored\n'-' is not treated as special")] - targets: Vec, + for paths in run_paths { + run(paths, &config); + } + + Ok(()) } +/// result may be empty; contents each nonempty +fn obtain_run_targets<'a>( + arg_file: Option<&String>, + arg_targets: &'a mut Vec, + separator: &String, verbosity: i8 +) -> Result>, i32> { + if let Some(arg_file) = &arg_file { + if !arg_targets.is_empty() { + if verbosity >= 0 { + eprintln!("No targets should be provided as cli arguments if arguments are being read from file"); + } + return Err(1); + } + if let Err(s) = { + if *arg_file == "-" { + read_lines(std::io::stdin().lock(), arg_targets) + } else { + read_file_lines(Path::new(&arg_file), arg_targets) + } + } { + if verbosity >= 0 { + eprintln!("Error reading file containing targets: {}", s); + } + return Err(1); + } + } else { + for target in arg_targets.iter() { + if target.contains('\0') { + if verbosity >= 0 { + eprintln!("Paths can never contain null byte: {}", target); + } + return Err(1); + } + } + } + + let mut run_targets = split_vec(arg_targets, &separator); + for i in (0..run_targets.len()).rev() { + if run_targets[i].len() == 0 { + run_targets.swap_remove(i); + } + } + Ok(run_targets) +} + + +/// result has no symlinks; may be empty; contents each nonempty +fn obtain_run_paths(run_targets: T, verbosity: i8) -> Result>, i32> +where + T: Iterator + ExactSizeIterator, + Y: Iterator + ExactSizeIterator, + U: AsRef, +{ + let mut run_paths: Vec> = Vec::with_capacity(run_targets.len()); + for spaths in run_targets { + let mut paths = Vec::with_capacity(spaths.len()); + for spath in spaths { + let path = Path::new(spath.as_ref()).canonicalize().map_err(|_| { + if verbosity >= 1 { + eprintln!("Failed to retrieve absolute path for {}", shlex::try_quote(spath.as_ref()).unwrap()); + } + 1 + })?; + let pwmd = PathWithMetadata::new(path).map_err(|s| { + if verbosity >= 1 { + eprintln!("{}", s); + } + 1 + })?; + if !pwmd.md().file_type().is_symlink() { + paths.push(pwmd); + } + } + if paths.len() > 0 { + run_paths.push(paths); + } + } + Ok(run_paths) +} + + +/// perform a full run +fn run(pwmds: Vec, cfg: &Config) { + let mut registry: HashMap> = HashMap::new(); + for pwmd in pwmds { + register(pwmd, &mut registry, cfg); + } + registry.retain(|_,files| files.len() >= 2); + + let mut stdout_buffer = (cfg.verbosity >= 0).then(|| std::io::BufWriter::new(std::io::stdout().lock())); + + if let Some(stdout_buffer) = &mut stdout_buffer { + if cfg.verbosity >= 0 { + writeln!(stdout_buffer, "Considering {} total files for duplicates", registry.iter().map(|(_,files)| files.len()).sum::()).unwrap(); + } + } + + for (fsize, pwmds) in registry { + run_one_size(fsize, &pwmds, cfg, stdout_buffer.as_mut()); + } +} + +fn run_one_size(fsize: u64, pwmds: &[PathWithMetadata], cfg: &Config, mut stdout_buffer: Option<&mut W>) { + if let Some(stdout_buffer) = stdout_buffer.as_mut() { + if cfg.verbosity >= 1 { + writeln!(stdout_buffer, "Considering {} files of size {} for duplicates", pwmds.len(), fsize).unwrap(); + } + } + // if cfg.verbosity >= 0 { + // pwmds.sort_by_key(|pwmd| pwmd.path.file_name().unwrap_or_default().to_string_lossy().to_string()); + // } + let mut by_inode: Vec> = Vec::with_capacity((pwmds.len() as f64 * 0.8) as usize); // each nonempty + let mut inodes: Vec = Vec::with_capacity(by_inode.len()); + for pwmd in pwmds { + let inode: u64 = pwmd.md().st_ino(); + match inodes.binary_search(&inode) { + Ok(i) => { + by_inode[i].push(pwmd); + }, + Err(i) => { + inodes.insert(i, inode); + by_inode.insert(i, smallvec![pwmd]); + } + } + } + drop(inodes); + by_inode.sort_by(|a,b| b.len().cmp(&a.len())); // descending size order + + // compare each with eachother + let mut i = 0; + while i < by_inode.len() { + let mut j = i+1; + while j < by_inode.len() { + let (keeps, replaces) = get2mut(&mut by_inode, i, j); + if hardlink_all(keeps, replaces, cfg, stdout_buffer.as_mut()) { + by_inode.swap_remove(j); + } else { + j += 1; + } + } + i += 1; + } +} + + +/// recursively register path or its contents if directory into registry +/// eprints errors +fn register( + pwmd: PathWithMetadata, + registry: &mut HashMap>, + cfg: &Config, +) { + if pwmd.md().file_type().is_symlink() { + return; + } + + if pwmd.path.is_file() { + let size = pwmd.md().st_size(); + if size >= cfg.min_size { + registry.entry(size).or_default().push(pwmd); + } + return; + } + + if pwmd.path.is_dir() { match std::fs::read_dir(&pwmd.path) { + Ok(entries) => for entry in entries { match entry { + Ok(entry) => match PathWithMetadata::new(entry.path()) { + Ok(child_pwmd) => register(child_pwmd, registry, cfg), + Err(s) => if cfg.verbosity >= 1 { + eprintln!("{}", s); + }, + }, + Err(error) => if cfg.verbosity >= 1 { + eprintln!("Failed to inspect {}: {}", shlex::try_quote(&pwmd.path.to_string_lossy()).unwrap(), error); + }, + } }, + Err(error) => if cfg.verbosity >= 1 { + eprintln!("Failed to read dir {}: {}", shlex::try_quote(&pwmd.path.to_string_lossy()).unwrap(), error); + }, + } } +} + + + +struct PathWithMetadata { + pub path: PathBuf, + md: RefCell, +} +impl PathWithMetadata { + pub fn new(path: PathBuf) -> Result{ + let md = RefCell::new(Self::get_md(&path)?); + Ok(PathWithMetadata{ path, md }) + } + #[inline(always)] + pub fn md(&self) -> std::cell::Ref { + self.md.borrow() + } + pub fn reset_md(&self) -> Result<(), String> { + *self.md.borrow_mut() = Self::get_md(&self.path)?; + Ok(()) + } + fn get_md(path: &PathBuf) -> Result { + std::fs::symlink_metadata(path).map_err(|_| format!("Failed to retrive metadata for {}", shlex::try_quote(&path.to_string_lossy()).unwrap())) + } + +} +impl AsRef for PathWithMetadata { + fn as_ref(&self) -> &PathBuf { + return &self.path; + } +} +impl AsRef for PathWithMetadata { + fn as_ref(&self) -> &Path { + return &self.path.as_ref(); + } +} + + + /// return whether or not user gave confirmation fn prompt_confirm<'a, T: Borrow<[Y]>, Y: AsRef>(run_targets: &[T]) -> bool { println!("Are you sure you want to link all duplicates in each of these sets of targets?"); for spaths in run_targets { - println!(" {}", shlex::join(spaths.borrow().iter().map(|s| s.as_ref()))); + println!(" {}", shlex::try_join(spaths.borrow().iter().map(|s| s.as_ref())).unwrap()); } print!("> "); std::io::stdout().flush().unwrap_or_else(|_| ()); @@ -96,210 +379,104 @@ fn prompt_confirm<'a, T: Borrow<[Y]>, Y: AsRef>(run_targets: &[T]) -> bool response.to_lowercase().starts_with("y") } +fn read_lines(reader: impl BufRead, dest: &mut Vec) -> Result<(), String> { + for line in reader.lines() { + dest.push(line.map_err(|e| format!("Error reading line: {}", e))?); + } + Ok(()) +} fn read_file_lines(path: &Path, dest: &mut Vec) -> Result<(), String> { if !path.is_file() { - return Err(format!("File does not exist or is not a normal file ({})", shlex::quote(&path.to_string_lossy()))); + return Err(format!("File does not exist or is not a normal file ({})", shlex::try_quote(&path.to_string_lossy()).unwrap())); } - if let Ok(f) = std::fs::File::open(path) { - let reader = BufReader::new(f); - for line in reader.lines() { - match line { - Ok(line) => dest.push(line), - Err(err) => return Err(format!("Error reading line: {}", err)) - } - } - Ok(()) - } else { - Err(format!("Could not open {}", shlex::quote(&path.to_string_lossy()))) - } -} - -enum ProcessArgsResult { - Ok(Vec>, Config), - Exit, - ExitError, -} - -/// may exit -fn process_args() -> ProcessArgsResult { - let mut args = CLIArguments::from_args(); - let verbosity = args.verbose - args.quiet; - - let config = Config { - min_size: args.min_size.map(|v| if v > 1 { v } else { 1 }).unwrap_or(1), - no_brace_output: args.no_brace_output, - dry_run: args.dry_run, - verbosity - }; - - if let Some(arg_file) = args.argument_file { - if !args.targets.is_empty() { - eprintln!("No targets should be provided as cli arguments if arguments are being read from file"); - return ProcessArgsResult::ExitError; - } - let path = Path::new(&arg_file); - if let Err(s) = read_file_lines(path, &mut args.targets) { - eprintln!("Error reading argument file: {}", s); - return ProcessArgsResult::ExitError; - } - } - - let run_targets: Vec> = split_vec(&args.targets, &args.separator.unwrap_or(s_default_target_separator!().to_string())); - - if run_targets.is_empty() { - if verbosity > 0 { - println!("No targets provided"); - } - return ProcessArgsResult::Exit; - } - - if args.prompt { - if !prompt_confirm(&run_targets) { - std::process::exit(0); - } - } - - let mut bad = false; - let run_paths: Vec> = run_targets.iter().enumerate().map( - |(_,spaths)| spaths.iter().map( - |spath| Path::new(spath).canonicalize().unwrap_or_else( - |_| { - eprintln!("Failed to retrieve absolute path for {}", shlex::quote(spath)); - bad = true; - Default::default() - } - ) - ).collect() - ).collect(); - if bad { - return ProcessArgsResult::ExitError; - } - - - for paths in &run_paths { - if let Err(s) = check_all_same_device(paths) { - eprintln!("{}", s); - return ProcessArgsResult::ExitError; - } - } - - ProcessArgsResult::Ok(run_paths, config) + let reader = BufReader::new(std::fs::File::open(path).map_err( + |e| format!("Could not open {}: {}", shlex::try_quote(&path.to_string_lossy()).unwrap(), e) + )?); + read_lines(reader, dest) } -/// exit on error -fn get_st_dev(file: &PathBuf) -> Result { - if let Ok(metadata) = std::fs::metadata(file) { - Ok(metadata.st_dev()) - } else { - Err(format!("Failed to retrive device id for {}", shlex::quote(&file.to_string_lossy()))) - } -} - -fn check_all_same_device(paths: &[PathBuf]) -> Result<(), String> { - if paths.len() <= 1 { +fn check_all_same_device(pwmds: &[PathWithMetadata]) -> Result<(), String> { + if pwmds.len() <= 1 { return Ok(()) } - let first_device_id = get_st_dev(&paths[0])?; - let mut wrong: Vec<&PathBuf> = Vec::new(); - for path in &paths[1..] { - if get_st_dev(path)? != first_device_id { - wrong.push(path); - } + let mut by_dev: HashMap> = Default::default(); + for pwmd in pwmds.iter() { + by_dev.entry(pwmd.md().st_dev()).or_default().push(pwmd); } - if wrong.is_empty() { - Ok(()) - } else { - let mut s = String::with_capacity(wrong.len()*128); // 75 max estimated len of path, 53 for prefix msg + nl - for path in wrong { - s.push_str("Device ids must all be the same; got different for: {}"); - s.push_str(&shlex::quote(&path.to_string_lossy())); - s.push_str("\n"); - } - s.pop(); // remove last newline - Err(s) + if by_dev.len() <= 1 { + return Ok(()); } + let mut lines = Vec::with_capacity(1+by_dev.len()); + lines.push(String::from("Device ids must all be the same; got paths on different devices:")); + lines.extend(by_dev.into_iter().map(|(dev,pwmds)| { + if pwmds.len() == 1 { + format!(" Device {}: {} path: {}", dev, pwmds.len(), &shlex::try_quote(&pwmds[0].path.to_string_lossy()).unwrap()) + } else { + format!(" Device {}: {} paths", dev, pwmds.len()) + } + })); + Err(lines.join("\n")) } -/// perform a full run -fn run(paths: Vec, cfg: &Config) { - let mut registry: HashMap> = HashMap::new(); +/// get two mutable references in an array +/// expects correct inputs +fn get2mut<'a, T>(v: &'a mut [T], i: usize, j: usize) -> (&'a mut T, &'a mut T) { + let (left, right) = v.split_at_mut(j); + (&mut left[i], &mut right[0]) +} - for path in paths { - register(path.to_path_buf(), &mut registry, cfg); + +fn hardlink(keep: &PathWithMetadata, replace: &PathWithMetadata) -> Result<(), String> { + std::fs::remove_file(&replace.path).map_err(|_| "Failed to remove for hardlinking")?; + std::fs::hard_link(&keep.path, &replace.path).map_err(|_| { + match std::fs::copy(&keep.path, &replace.path) { + Ok(_) => "Failed to hardlink (copied instead)", + Err(_) => "Failed to hardlink or copy" // awful scenario but i believe it is impossible since i don't see how you could remove a file yet not create one in its place + } + })?; + replace.reset_md()?; + Ok(()) +} + +/// returns whether linking was done +/// eprints errors +fn hardlink_all<'a, 'b, T, W: Write>(keeps: &'a mut SmallVec, replaces: &'a mut SmallVec, cfg: &Config, mut stdout_buffer: Option<&mut W>) -> bool +where T: smallvec::Array, +{ + if !cmp(&replaces.first().unwrap().path, &keeps.first().unwrap().path).unwrap_or(false) { + return false; } - registry.retain(|_,files| files.len() >= 2); - - let mut stdout_buffer = if cfg.verbosity >= 0 { - let stdout = std::io::stdout(); - let stdout_buffer = std::io::BufWriter::new(stdout.lock()); - Some(stdout_buffer) - } else { - None - }; - - if let Some(stdout_buffer) = &mut stdout_buffer { - if cfg.verbosity >= 0 { - writeln!(stdout_buffer, "Considering {} total files for duplicates", registry.iter().map(|(_,files)| files.len()).sum::()).unwrap(); - } - } - - for (fsize, mut files) in registry { - if files.len() > 8 { - files.sort_by_key(|path| path.file_name().unwrap_or_default().to_string_lossy().to_string()); - } - if let Some(stdout_buffer) = &mut stdout_buffer { - if cfg.verbosity > 1 { - writeln!(stdout_buffer, "Considering {} files of size {} for duplicates", files.len(), fsize).unwrap(); - } - } - for i in (0..files.len()).rev() { - let f1 = &files[i]; - for j in (0..i).rev() { - let f2 = &files[j]; - if !are_hardlinked(f1, f2) && cmp(f1, f2).unwrap_or(false) { - if !cfg.dry_run { - if let Err(msg) = hardlink(f1, f2) { - eprintln!("{}: {}", msg, format_pair(f1, f2, cfg)); - continue - } - } - if let Some(stdout_buffer) = &mut stdout_buffer { - if cfg.verbosity >= 0 { - writeln!(stdout_buffer, "hardlinked {}", format_pair(f1, f2, cfg)).unwrap(); - } - } + for replace in replaces.into_iter() { + let keep = keeps.first().unwrap(); + if !cfg.dry_run { + if let Err(msg) = hardlink(keep, replace) { + if cfg.verbosity >= 0 { + eprintln!("{}: {}", msg, format_pair(&keep.path.to_string_lossy(), &replace.path.to_string_lossy(), cfg)); } + continue // path no longer valid } } - } -} - - -fn hardlink(f1: &PathBuf, f2: &PathBuf) -> Result<(), &'static str> { - if let Err(_) = std::fs::remove_file(f2) { - Err("Failed to remove second file for hardlinking") - } else if let Err(_) = std::fs::hard_link(f1, f2) { // same as ln in terms of args: left args's inode becomes right arg's inode - match std::fs::copy(f1, f2) { - Ok(_) => Err("Failed to hardlink (copied instead)"), - Err(_) => Err("Failed to hardlink or copy") + if let Some(stdout_buffer) = stdout_buffer.as_mut() { + if cfg.verbosity >= 0 { + writeln!(stdout_buffer, "hardlinked {}", format_pair(&keep.path.to_string_lossy(), &replace.path.to_string_lossy(), cfg)).unwrap(); + } } - } else { - Ok(()) + drop(keep); + keeps.push(replace); } + true } -fn format_pair(f1: &PathBuf, f2: &PathBuf, cfg: &Config) -> String { - let f1s = f1.to_string_lossy(); - let f2s = f2.to_string_lossy(); +fn format_pair(f1s: &str, f2s: &str, cfg: &Config) -> String { if cfg.no_brace_output { return format!( "{} {}", - shlex::quote(&f1s), - shlex::quote(&f2s) + shlex::try_quote(&f1s).unwrap(), + shlex::try_quote(&f2s).unwrap() ) } @@ -310,78 +487,43 @@ fn format_pair(f1: &PathBuf, f2: &PathBuf, cfg: &Config) -> String { if prefixlong && suffixlong { format!( "{}{{{},{}}}{}", - shlex::quote(prefix), - shlex::quote(&f1s[ prefix.len()..std::cmp::max(prefix.len(), f1s.len()-suffix.len()) ]), - shlex::quote(&f2s[ prefix.len()..std::cmp::max(prefix.len(), f2s.len()-suffix.len()) ]), - shlex::quote(suffix) + shlex::try_quote(prefix).unwrap(), + shlex::try_quote(&f1s[ prefix.len()..std::cmp::max(prefix.len(), f1s.len()-suffix.len()) ]).unwrap(), + shlex::try_quote(&f2s[ prefix.len()..std::cmp::max(prefix.len(), f2s.len()-suffix.len()) ]).unwrap(), + shlex::try_quote(suffix).unwrap() ) } else if prefixlong { format!( "{}{{{},{}}}", - shlex::quote(prefix), - shlex::quote(&f1s[prefix.len()..]), - shlex::quote(&f2s[prefix.len()..]) + shlex::try_quote(prefix).unwrap(), + shlex::try_quote(&f1s[prefix.len()..]).unwrap(), + shlex::try_quote(&f2s[prefix.len()..]).unwrap() ) } else if suffixlong { format!( "{{{},{}}}{}", - shlex::quote(&f1s[..f1s.len()-suffix.len()]), - shlex::quote(&f2s[..f2s.len()-suffix.len()]), - shlex::quote(suffix), + shlex::try_quote(&f1s[..f1s.len()-suffix.len()]).unwrap(), + shlex::try_quote(&f2s[..f2s.len()-suffix.len()]).unwrap(), + shlex::try_quote(suffix).unwrap(), ) } else { format!( "{} <-> {}", - shlex::quote(&f1s), - shlex::quote(&f2s) + shlex::try_quote(&f1s).unwrap(), + shlex::try_quote(&f2s).unwrap() ) } } -/// recursively register path or its contents if directory into registry -fn register(path: PathBuf, registry: &mut HashMap>, cfg: &Config) { - if let Ok(metadata) = std::fs::symlink_metadata(&path) { - if metadata.file_type().is_symlink() { - return - } - - if path.is_file() { - let size = metadata.st_size(); - if size >= cfg.min_size { - registry.entry(size).or_insert_with(|| Vec::new()).push(path); - } - } else if path.is_dir() { - if let Ok(entries) = std::fs::read_dir(path) { - for entry in entries { - if let Ok(entry) = entry { - register(entry.path(), registry, cfg); - } - } - } - } - } -} - - -fn are_hardlinked(f1: &PathBuf, f2: &PathBuf) -> bool { - if let (Ok(md1), Ok(md2)) = (std::fs::metadata(f1), std::fs::metadata(f2)) { - md1.st_ino() == md2.st_ino() - } else { - false - } -} - - /// check equality of contents of two paths to files -fn cmp(f1: &PathBuf, f2: &PathBuf) -> std::io::Result { - if let (Ok(mut f1), Ok(mut f2)) = (std::fs::File::open(f1), std::fs::File::open(f2)) { - cmp_files(&mut f1, &mut f2) - } else { Ok(false) } +/// does not check sizes +fn cmp(f1: impl AsRef, f2: impl AsRef) -> std::io::Result { + cmp_read(std::fs::File::open(f1)?, std::fs::File::open(f2)?) } /// check equality of contents of two open files -fn cmp_files(f1: &mut std::fs::File, f2: &mut std::fs::File) -> std::io::Result { +fn cmp_read(mut f1: impl Read, mut f2: impl Read) -> std::io::Result { let buff1: &mut [u8] = &mut [0; 1024]; let buff2: &mut [u8] = &mut [0; 1024]; loop { @@ -419,6 +561,7 @@ fn common_suffix<'a>(s1: &'a str, s2: &'a str) -> &'a str { } +/// double delimiters will result in empty vecs fn split_vec<'a, T: std::cmp::PartialEq>(input: &'a [T], delimiter: &T) -> Vec> { let mut result: Vec> = Vec::new(); @@ -439,14 +582,14 @@ fn split_vec<'a, T: std::cmp::PartialEq>(input: &'a [T], delimiter: &T) -> Vec = vec![";", "hi", "bye", ";", "1", ";", ";", "2", "2", ";"].into_iter().map(|s| s.to_string()).collect(); + let v: Vec<_> = vec![";", ";", ";"].into_iter().map(|s| s.to_string()).collect(); let res = split_vec(&v[..], &";".to_string()); - println!("{:?}", v); - println!("{:?}", res); + assert_eq!(res.len(), 2) } }