Compare commits
10 commits
c5f5d20ed6
...
a64fc7c188
Author | SHA1 | Date | |
---|---|---|---|
a64fc7c188 | |||
7cca163224 | |||
48564ebec9 | |||
dd52c80da4 | |||
56f9121efc | |||
e2730b4d7a | |||
b51fab3ca1 | |||
e395e7f9c4 | |||
86aa59bad8 | |||
8cb8a2e3a4 |
4 changed files with 434 additions and 280 deletions
11
Cargo.toml
11
Cargo.toml
|
@ -1,10 +1,17 @@
|
||||||
[package]
|
[package]
|
||||||
name = "lndups"
|
name = "lndups"
|
||||||
version = "0.1.0"
|
version = "0.1.4"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
description = "Hardlink duplicate files"
|
||||||
|
authors = ["George H <georgeh124v2@gmail.com>"]
|
||||||
|
repository = "https://github.com/ascendforever/lndups"
|
||||||
|
license = "GPL-3.0-or-later"
|
||||||
|
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
shlex = "1.2"
|
shlex = "1.3"
|
||||||
|
smallvec = "1.13"
|
||||||
structopt = "0.3"
|
structopt = "0.3"
|
||||||
|
|
||||||
|
unicode-width = "<0.1.14" # subdependency causing compilation failure
|
||||||
|
|
|
@ -2,3 +2,7 @@
|
||||||
Hardlink duplicate files
|
Hardlink duplicate files
|
||||||
|
|
||||||
Linux is the sole supported OS simply because I don't need it for another OS. It can be modified to be multiplatform though.
|
Linux is the sole supported OS simply because I don't need it for another OS. It can be modified to be multiplatform though.
|
||||||
|
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
`cargo install lndups` or `curl -O 'https://github.com/ascendforever/lndups/raw/refs/heads/main/prebuilt-x86-64-linux/lndups'`
|
||||||
|
|
Binary file not shown.
707
src/main.rs
707
src/main.rs
|
@ -1,29 +1,82 @@
|
||||||
extern crate shlex;
|
extern crate shlex;
|
||||||
|
extern crate smallvec;
|
||||||
extern crate structopt;
|
extern crate structopt;
|
||||||
use std::borrow::Borrow;
|
use std::borrow::Borrow;
|
||||||
use std::io::{Read, Write, BufReader, BufRead};
|
use std::cell::RefCell;
|
||||||
use std::path::{Path,PathBuf};
|
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
use std::io::{Read, Write, BufReader, BufRead};
|
||||||
use std::os::linux::fs::MetadataExt as MetadataExtLinux;
|
use std::os::linux::fs::MetadataExt as MetadataExtLinux;
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
use crate::structopt::StructOpt;
|
use crate::structopt::StructOpt;
|
||||||
|
use crate::smallvec::*;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
macro_rules! s_arg_target_file_name { () => { "target-file" } }
|
||||||
macro_rules! s_default_target_separator { () => { ";" } }
|
macro_rules! s_default_target_separator { () => { ";" } }
|
||||||
|
|
||||||
fn main() -> Result<(), i32> {
|
|
||||||
match process_args() {
|
#[derive(StructOpt)]
|
||||||
ProcessArgsResult::Ok(run_paths, cfg) => {
|
#[structopt(
|
||||||
for paths in run_paths {
|
about="Hardlink duplicate files recursively\nSymlinks are treated as normal files",
|
||||||
run(paths, &cfg);
|
usage=concat!(env!("CARGO_PKG_NAME"), " [OPTION]... TARGET... ['", s_default_target_separator!(), "' TARGET...]")
|
||||||
}
|
)]
|
||||||
Ok(())
|
struct CLIArguments {
|
||||||
},
|
#[structopt(short, long, parse(from_occurrences), help="Increase verbosity")]
|
||||||
ProcessArgsResult::Exit => Ok(()),
|
verbose: i8,
|
||||||
ProcessArgsResult::ExitError => Err(1),
|
|
||||||
}
|
#[structopt(short, long, parse(from_occurrences), help="Decrease verbosity")]
|
||||||
|
quiet: i8,
|
||||||
|
|
||||||
|
#[structopt(long, help=concat!(
|
||||||
|
"Disable brace notation for output\n",
|
||||||
|
" Ex: /home/user/{dir,backup}/file",
|
||||||
|
))]
|
||||||
|
no_brace_output: bool,
|
||||||
|
|
||||||
|
#[structopt(long, help=concat!(
|
||||||
|
"Perform no operations on the filesystem",
|
||||||
|
))]
|
||||||
|
dry_run: bool,
|
||||||
|
|
||||||
|
#[structopt(short="i", help=concat!(
|
||||||
|
"Prompt once before operating\n",
|
||||||
|
"Doesn't occurs if no targets are provided",
|
||||||
|
))]
|
||||||
|
prompt: bool,
|
||||||
|
|
||||||
|
#[structopt(short, long, value_name="VALUE", help=concat!(
|
||||||
|
"Minimum file size to be considered for hardlinking\n",
|
||||||
|
"Never goes below 1 (the default)",
|
||||||
|
))]
|
||||||
|
min_size: Option<u64>,
|
||||||
|
|
||||||
|
#[structopt(short, long, value_name="SEPARATOR", help=concat!(
|
||||||
|
"Separator between sets of targets (default: ", s_default_target_separator!(), ")",
|
||||||
|
))]
|
||||||
|
separator: Option<String>,
|
||||||
|
|
||||||
|
#[structopt(long=s_arg_target_file_name!(), value_name="FILE", help=concat!(
|
||||||
|
"File to source targets from (can be '-' for stdin)\n",
|
||||||
|
"Same rules as CLI argument targets apply\n",
|
||||||
|
"Mutually exclusive with CLI argument targets",
|
||||||
|
))]
|
||||||
|
file_containing_targets: Option<String>,
|
||||||
|
|
||||||
|
#[structopt(value_name="TARGET", help=concat!(
|
||||||
|
"Target files and directories (recursive)\n",
|
||||||
|
"Each SEPARATOR denotes a new set of targets\n",
|
||||||
|
" Each set of targets are separate from all other sets\n",
|
||||||
|
" All targets must be on the same device\n",
|
||||||
|
"All symlinks are ignored\n",
|
||||||
|
"'-' is not treated as special\n",
|
||||||
|
"Mutually exclusive with --", s_arg_target_file_name!(),
|
||||||
|
))]
|
||||||
|
targets: Vec<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
struct Config {
|
struct Config {
|
||||||
dry_run: bool,
|
dry_run: bool,
|
||||||
min_size: u64,
|
min_size: u64,
|
||||||
|
@ -32,55 +85,285 @@ struct Config {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#[derive(StructOpt)]
|
|
||||||
#[structopt(
|
|
||||||
about="Hardlink duplicate files recursively\nSymlinks are treated as normal files",
|
|
||||||
usage=concat!(env!("CARGO_PKG_NAME"), " [OPTION]... TARGET... ['", s_default_target_separator!(), "' TARGET...]")
|
|
||||||
)]
|
|
||||||
struct CLIArguments {
|
|
||||||
#[structopt(short, long, parse(from_occurrences),
|
|
||||||
help="Increase verbosity")]
|
|
||||||
verbose: i8,
|
|
||||||
|
|
||||||
#[structopt(short, long, parse(from_occurrences),
|
fn main() -> Result<(), i32> {
|
||||||
help="Decrease verbosity")]
|
let mut args = CLIArguments::from_args();
|
||||||
quiet: i8,
|
let verbosity = args.verbose - args.quiet;
|
||||||
|
|
||||||
#[structopt(long,
|
let config = Config {
|
||||||
help="Disable brace notation for output\n Ex: /home/user/{dir,backup}/file")]
|
min_size: args.min_size.map(|v| if v > 1 { v } else { 1 }).unwrap_or(1),
|
||||||
no_brace_output: bool,
|
no_brace_output: args.no_brace_output,
|
||||||
|
dry_run: args.dry_run,
|
||||||
|
verbosity
|
||||||
|
};
|
||||||
|
|
||||||
#[structopt(long,
|
let run_targets: Vec<Vec<&String>> = obtain_run_targets(
|
||||||
help="Perform no operations on the filesystem")]
|
args.file_containing_targets.as_ref(),
|
||||||
dry_run: bool,
|
&mut args.targets,
|
||||||
|
args.separator.as_ref().unwrap_or(&s_default_target_separator!().to_string()),
|
||||||
#[structopt(short="i",
|
verbosity,
|
||||||
help="Prompt once before operating\nDoesn't occurs if no targets are provided")]
|
)?;
|
||||||
prompt: bool,
|
if run_targets.is_empty() {
|
||||||
|
if verbosity >= 1 {
|
||||||
#[structopt(short, long, value_name="VALUE",
|
println!("No targets provided");
|
||||||
help="Minimum file size to be considered for hardlinking\nNever goes below 1 (the default)")]
|
|
||||||
min_size: Option<u64>,
|
|
||||||
|
|
||||||
#[structopt(short, long, value_name="SEPARATOR",
|
|
||||||
help=concat!("Separator between sets of targets (default: ", s_default_target_separator!(), ")"))]
|
|
||||||
separator: Option<String>,
|
|
||||||
|
|
||||||
#[structopt(long, value_name="FILE",
|
|
||||||
help="File to source arguments from (can be '-' for stdin)")]
|
|
||||||
argument_file: Option<String>,
|
|
||||||
|
|
||||||
#[structopt(value_name="TARGET",
|
|
||||||
help="Target files and directories (recursive)\nEach SEPARATOR denotes a new set of targets\n Each set of targets are separate from all other sets\n All targets must be on the same device\nAll symlinks are ignored\n'-' is not treated as special")]
|
|
||||||
targets: Vec<String>,
|
|
||||||
}
|
}
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
let run_paths: Vec<Vec<PathWithMetadata>> = obtain_run_paths(
|
||||||
|
run_targets.iter().map(|v| v.iter()),
|
||||||
|
verbosity,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
for paths in &run_paths {
|
||||||
|
if let Err(s) = check_all_same_device(paths) {
|
||||||
|
eprintln!("{}", s);
|
||||||
|
return Err(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if run_paths.len() == 0 {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
if args.prompt {
|
||||||
|
if !prompt_confirm(&run_targets) {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for paths in run_paths {
|
||||||
|
run(paths, &config);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// result may be empty; contents each nonempty
|
||||||
|
fn obtain_run_targets<'a>(
|
||||||
|
arg_file: Option<&String>,
|
||||||
|
arg_targets: &'a mut Vec<String>,
|
||||||
|
separator: &String, verbosity: i8
|
||||||
|
) -> Result<Vec<Vec<&'a String>>, i32> {
|
||||||
|
if let Some(arg_file) = &arg_file {
|
||||||
|
if !arg_targets.is_empty() {
|
||||||
|
if verbosity >= 0 {
|
||||||
|
eprintln!("No targets should be provided as cli arguments if arguments are being read from file");
|
||||||
|
}
|
||||||
|
return Err(1);
|
||||||
|
}
|
||||||
|
if let Err(s) = {
|
||||||
|
if *arg_file == "-" {
|
||||||
|
read_lines(std::io::stdin().lock(), arg_targets)
|
||||||
|
} else {
|
||||||
|
read_file_lines(Path::new(&arg_file), arg_targets)
|
||||||
|
}
|
||||||
|
} {
|
||||||
|
if verbosity >= 0 {
|
||||||
|
eprintln!("Error reading file containing targets: {}", s);
|
||||||
|
}
|
||||||
|
return Err(1);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for target in arg_targets.iter() {
|
||||||
|
if target.contains('\0') {
|
||||||
|
if verbosity >= 0 {
|
||||||
|
eprintln!("Paths can never contain null byte: {}", target);
|
||||||
|
}
|
||||||
|
return Err(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut run_targets = split_vec(arg_targets, &separator);
|
||||||
|
for i in (0..run_targets.len()).rev() {
|
||||||
|
if run_targets[i].len() == 0 {
|
||||||
|
run_targets.swap_remove(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(run_targets)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// result has no symlinks; may be empty; contents each nonempty
|
||||||
|
fn obtain_run_paths<T, Y, U>(run_targets: T, verbosity: i8) -> Result<Vec<Vec<PathWithMetadata>>, i32>
|
||||||
|
where
|
||||||
|
T: Iterator<Item=Y> + ExactSizeIterator,
|
||||||
|
Y: Iterator<Item=U> + ExactSizeIterator,
|
||||||
|
U: AsRef<str>,
|
||||||
|
{
|
||||||
|
let mut run_paths: Vec<Vec<PathWithMetadata>> = Vec::with_capacity(run_targets.len());
|
||||||
|
for spaths in run_targets {
|
||||||
|
let mut paths = Vec::with_capacity(spaths.len());
|
||||||
|
for spath in spaths {
|
||||||
|
let path = Path::new(spath.as_ref()).canonicalize().map_err(|_| {
|
||||||
|
if verbosity >= 1 {
|
||||||
|
eprintln!("Failed to retrieve absolute path for {}", shlex::try_quote(spath.as_ref()).unwrap());
|
||||||
|
}
|
||||||
|
1
|
||||||
|
})?;
|
||||||
|
let pwmd = PathWithMetadata::new(path).map_err(|s| {
|
||||||
|
if verbosity >= 1 {
|
||||||
|
eprintln!("{}", s);
|
||||||
|
}
|
||||||
|
1
|
||||||
|
})?;
|
||||||
|
if !pwmd.md().file_type().is_symlink() {
|
||||||
|
paths.push(pwmd);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if paths.len() > 0 {
|
||||||
|
run_paths.push(paths);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(run_paths)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// perform a full run
|
||||||
|
fn run(pwmds: Vec<PathWithMetadata>, cfg: &Config) {
|
||||||
|
let mut registry: HashMap<u64, Vec<PathWithMetadata>> = HashMap::new();
|
||||||
|
for pwmd in pwmds {
|
||||||
|
register(pwmd, &mut registry, cfg);
|
||||||
|
}
|
||||||
|
registry.retain(|_,files| files.len() >= 2);
|
||||||
|
|
||||||
|
let mut stdout_buffer = (cfg.verbosity >= 0).then(|| std::io::BufWriter::new(std::io::stdout().lock()));
|
||||||
|
|
||||||
|
if let Some(stdout_buffer) = &mut stdout_buffer {
|
||||||
|
if cfg.verbosity >= 0 {
|
||||||
|
writeln!(stdout_buffer, "Considering {} total files for duplicates", registry.iter().map(|(_,files)| files.len()).sum::<usize>()).unwrap();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (fsize, pwmds) in registry {
|
||||||
|
run_one_size(fsize, &pwmds, cfg, stdout_buffer.as_mut());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn run_one_size<W: Write>(fsize: u64, pwmds: &[PathWithMetadata], cfg: &Config, mut stdout_buffer: Option<&mut W>) {
|
||||||
|
if let Some(stdout_buffer) = stdout_buffer.as_mut() {
|
||||||
|
if cfg.verbosity >= 1 {
|
||||||
|
writeln!(stdout_buffer, "Considering {} files of size {} for duplicates", pwmds.len(), fsize).unwrap();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// if cfg.verbosity >= 0 {
|
||||||
|
// pwmds.sort_by_key(|pwmd| pwmd.path.file_name().unwrap_or_default().to_string_lossy().to_string());
|
||||||
|
// }
|
||||||
|
let mut by_inode: Vec<SmallVec<[&PathWithMetadata; 1]>> = Vec::with_capacity((pwmds.len() as f64 * 0.8) as usize); // each nonempty
|
||||||
|
let mut inodes: Vec<u64> = Vec::with_capacity(by_inode.len());
|
||||||
|
for pwmd in pwmds {
|
||||||
|
let inode: u64 = pwmd.md().st_ino();
|
||||||
|
match inodes.binary_search(&inode) {
|
||||||
|
Ok(i) => {
|
||||||
|
by_inode[i].push(pwmd);
|
||||||
|
},
|
||||||
|
Err(i) => {
|
||||||
|
inodes.insert(i, inode);
|
||||||
|
by_inode.insert(i, smallvec![pwmd]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
drop(inodes);
|
||||||
|
by_inode.sort_by(|a,b| b.len().cmp(&a.len())); // descending size order
|
||||||
|
|
||||||
|
// compare each with eachother
|
||||||
|
let mut i = 0;
|
||||||
|
while i < by_inode.len() {
|
||||||
|
let mut j = i+1;
|
||||||
|
while j < by_inode.len() {
|
||||||
|
let (keeps, replaces) = get2mut(&mut by_inode, i, j);
|
||||||
|
if hardlink_all(keeps, replaces, cfg, stdout_buffer.as_mut()) {
|
||||||
|
by_inode.swap_remove(j);
|
||||||
|
} else {
|
||||||
|
j += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// recursively register path or its contents if directory into registry
|
||||||
|
/// eprints errors
|
||||||
|
fn register(
|
||||||
|
pwmd: PathWithMetadata,
|
||||||
|
registry: &mut HashMap<u64, Vec<PathWithMetadata>>,
|
||||||
|
cfg: &Config,
|
||||||
|
) {
|
||||||
|
if pwmd.md().file_type().is_symlink() {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if pwmd.path.is_file() {
|
||||||
|
let size = pwmd.md().st_size();
|
||||||
|
if size >= cfg.min_size {
|
||||||
|
registry.entry(size).or_default().push(pwmd);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if pwmd.path.is_dir() { match std::fs::read_dir(&pwmd.path) {
|
||||||
|
Ok(entries) => for entry in entries { match entry {
|
||||||
|
Ok(entry) => match PathWithMetadata::new(entry.path()) {
|
||||||
|
Ok(child_pwmd) => register(child_pwmd, registry, cfg),
|
||||||
|
Err(s) => if cfg.verbosity >= 1 {
|
||||||
|
eprintln!("{}", s);
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Err(error) => if cfg.verbosity >= 1 {
|
||||||
|
eprintln!("Failed to inspect {}: {}", shlex::try_quote(&pwmd.path.to_string_lossy()).unwrap(), error);
|
||||||
|
},
|
||||||
|
} },
|
||||||
|
Err(error) => if cfg.verbosity >= 1 {
|
||||||
|
eprintln!("Failed to read dir {}: {}", shlex::try_quote(&pwmd.path.to_string_lossy()).unwrap(), error);
|
||||||
|
},
|
||||||
|
} }
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
struct PathWithMetadata {
|
||||||
|
pub path: PathBuf,
|
||||||
|
md: RefCell<std::fs::Metadata>,
|
||||||
|
}
|
||||||
|
impl PathWithMetadata {
|
||||||
|
pub fn new(path: PathBuf) -> Result<Self, String>{
|
||||||
|
let md = RefCell::new(Self::get_md(&path)?);
|
||||||
|
Ok(PathWithMetadata{ path, md })
|
||||||
|
}
|
||||||
|
#[inline(always)]
|
||||||
|
pub fn md(&self) -> std::cell::Ref<std::fs::Metadata> {
|
||||||
|
self.md.borrow()
|
||||||
|
}
|
||||||
|
pub fn reset_md(&self) -> Result<(), String> {
|
||||||
|
*self.md.borrow_mut() = Self::get_md(&self.path)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
fn get_md(path: &PathBuf) -> Result<std::fs::Metadata, String> {
|
||||||
|
std::fs::symlink_metadata(path).map_err(|_| format!("Failed to retrive metadata for {}", shlex::try_quote(&path.to_string_lossy()).unwrap()))
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
impl AsRef<PathBuf> for PathWithMetadata {
|
||||||
|
fn as_ref(&self) -> &PathBuf {
|
||||||
|
return &self.path;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl AsRef<Path> for PathWithMetadata {
|
||||||
|
fn as_ref(&self) -> &Path {
|
||||||
|
return &self.path.as_ref();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/// return whether or not user gave confirmation
|
/// return whether or not user gave confirmation
|
||||||
fn prompt_confirm<'a, T: Borrow<[Y]>, Y: AsRef<str>>(run_targets: &[T]) -> bool {
|
fn prompt_confirm<'a, T: Borrow<[Y]>, Y: AsRef<str>>(run_targets: &[T]) -> bool {
|
||||||
println!("Are you sure you want to link all duplicates in each of these sets of targets?");
|
println!("Are you sure you want to link all duplicates in each of these sets of targets?");
|
||||||
for spaths in run_targets {
|
for spaths in run_targets {
|
||||||
println!(" {}", shlex::join(spaths.borrow().iter().map(|s| s.as_ref())));
|
println!(" {}", shlex::try_join(spaths.borrow().iter().map(|s| s.as_ref())).unwrap());
|
||||||
}
|
}
|
||||||
print!("> ");
|
print!("> ");
|
||||||
std::io::stdout().flush().unwrap_or_else(|_| ());
|
std::io::stdout().flush().unwrap_or_else(|_| ());
|
||||||
|
@ -96,210 +379,104 @@ fn prompt_confirm<'a, T: Borrow<[Y]>, Y: AsRef<str>>(run_targets: &[T]) -> bool
|
||||||
response.to_lowercase().starts_with("y")
|
response.to_lowercase().starts_with("y")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn read_lines(reader: impl BufRead, dest: &mut Vec<String>) -> Result<(), String> {
|
||||||
|
for line in reader.lines() {
|
||||||
|
dest.push(line.map_err(|e| format!("Error reading line: {}", e))?);
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
fn read_file_lines(path: &Path, dest: &mut Vec<String>) -> Result<(), String> {
|
fn read_file_lines(path: &Path, dest: &mut Vec<String>) -> Result<(), String> {
|
||||||
if !path.is_file() {
|
if !path.is_file() {
|
||||||
return Err(format!("File does not exist or is not a normal file ({})", shlex::quote(&path.to_string_lossy())));
|
return Err(format!("File does not exist or is not a normal file ({})", shlex::try_quote(&path.to_string_lossy()).unwrap()));
|
||||||
}
|
}
|
||||||
if let Ok(f) = std::fs::File::open(path) {
|
let reader = BufReader::new(std::fs::File::open(path).map_err(
|
||||||
let reader = BufReader::new(f);
|
|e| format!("Could not open {}: {}", shlex::try_quote(&path.to_string_lossy()).unwrap(), e)
|
||||||
for line in reader.lines() {
|
)?);
|
||||||
match line {
|
read_lines(reader, dest)
|
||||||
Ok(line) => dest.push(line),
|
|
||||||
Err(err) => return Err(format!("Error reading line: {}", err))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
} else {
|
|
||||||
Err(format!("Could not open {}", shlex::quote(&path.to_string_lossy())))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
enum ProcessArgsResult {
|
|
||||||
Ok(Vec<Vec<PathBuf>>, Config),
|
|
||||||
Exit,
|
|
||||||
ExitError,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// may exit
|
|
||||||
fn process_args() -> ProcessArgsResult {
|
|
||||||
let mut args = CLIArguments::from_args();
|
|
||||||
let verbosity = args.verbose - args.quiet;
|
|
||||||
|
|
||||||
let config = Config {
|
|
||||||
min_size: args.min_size.map(|v| if v > 1 { v } else { 1 }).unwrap_or(1),
|
|
||||||
no_brace_output: args.no_brace_output,
|
|
||||||
dry_run: args.dry_run,
|
|
||||||
verbosity
|
|
||||||
};
|
|
||||||
|
|
||||||
if let Some(arg_file) = args.argument_file {
|
|
||||||
if !args.targets.is_empty() {
|
|
||||||
eprintln!("No targets should be provided as cli arguments if arguments are being read from file");
|
|
||||||
return ProcessArgsResult::ExitError;
|
|
||||||
}
|
|
||||||
let path = Path::new(&arg_file);
|
|
||||||
if let Err(s) = read_file_lines(path, &mut args.targets) {
|
|
||||||
eprintln!("Error reading argument file: {}", s);
|
|
||||||
return ProcessArgsResult::ExitError;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let run_targets: Vec<Vec<&String>> = split_vec(&args.targets, &args.separator.unwrap_or(s_default_target_separator!().to_string()));
|
|
||||||
|
|
||||||
if run_targets.is_empty() {
|
|
||||||
if verbosity > 0 {
|
|
||||||
println!("No targets provided");
|
|
||||||
}
|
|
||||||
return ProcessArgsResult::Exit;
|
|
||||||
}
|
|
||||||
|
|
||||||
if args.prompt {
|
|
||||||
if !prompt_confirm(&run_targets) {
|
|
||||||
std::process::exit(0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut bad = false;
|
|
||||||
let run_paths: Vec<Vec<PathBuf>> = run_targets.iter().enumerate().map(
|
|
||||||
|(_,spaths)| spaths.iter().map(
|
|
||||||
|spath| Path::new(spath).canonicalize().unwrap_or_else(
|
|
||||||
|_| {
|
|
||||||
eprintln!("Failed to retrieve absolute path for {}", shlex::quote(spath));
|
|
||||||
bad = true;
|
|
||||||
Default::default()
|
|
||||||
}
|
|
||||||
)
|
|
||||||
).collect()
|
|
||||||
).collect();
|
|
||||||
if bad {
|
|
||||||
return ProcessArgsResult::ExitError;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
for paths in &run_paths {
|
fn check_all_same_device(pwmds: &[PathWithMetadata]) -> Result<(), String> {
|
||||||
if let Err(s) = check_all_same_device(paths) {
|
if pwmds.len() <= 1 {
|
||||||
eprintln!("{}", s);
|
|
||||||
return ProcessArgsResult::ExitError;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ProcessArgsResult::Ok(run_paths, config)
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/// exit on error
|
|
||||||
fn get_st_dev(file: &PathBuf) -> Result<u64, String> {
|
|
||||||
if let Ok(metadata) = std::fs::metadata(file) {
|
|
||||||
Ok(metadata.st_dev())
|
|
||||||
} else {
|
|
||||||
Err(format!("Failed to retrive device id for {}", shlex::quote(&file.to_string_lossy())))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn check_all_same_device(paths: &[PathBuf]) -> Result<(), String> {
|
|
||||||
if paths.len() <= 1 {
|
|
||||||
return Ok(())
|
return Ok(())
|
||||||
}
|
}
|
||||||
let first_device_id = get_st_dev(&paths[0])?;
|
let mut by_dev: HashMap<u64, Vec<&PathWithMetadata>> = Default::default();
|
||||||
let mut wrong: Vec<&PathBuf> = Vec::new();
|
for pwmd in pwmds.iter() {
|
||||||
for path in &paths[1..] {
|
by_dev.entry(pwmd.md().st_dev()).or_default().push(pwmd);
|
||||||
if get_st_dev(path)? != first_device_id {
|
|
||||||
wrong.push(path);
|
|
||||||
}
|
}
|
||||||
|
if by_dev.len() <= 1 {
|
||||||
|
return Ok(());
|
||||||
}
|
}
|
||||||
if wrong.is_empty() {
|
let mut lines = Vec::with_capacity(1+by_dev.len());
|
||||||
|
lines.push(String::from("Device ids must all be the same; got paths on different devices:"));
|
||||||
|
lines.extend(by_dev.into_iter().map(|(dev,pwmds)| {
|
||||||
|
if pwmds.len() == 1 {
|
||||||
|
format!(" Device {}: {} path: {}", dev, pwmds.len(), &shlex::try_quote(&pwmds[0].path.to_string_lossy()).unwrap())
|
||||||
|
} else {
|
||||||
|
format!(" Device {}: {} paths", dev, pwmds.len())
|
||||||
|
}
|
||||||
|
}));
|
||||||
|
Err(lines.join("\n"))
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// get two mutable references in an array
|
||||||
|
/// expects correct inputs
|
||||||
|
fn get2mut<'a, T>(v: &'a mut [T], i: usize, j: usize) -> (&'a mut T, &'a mut T) {
|
||||||
|
let (left, right) = v.split_at_mut(j);
|
||||||
|
(&mut left[i], &mut right[0])
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
fn hardlink(keep: &PathWithMetadata, replace: &PathWithMetadata) -> Result<(), String> {
|
||||||
|
std::fs::remove_file(&replace.path).map_err(|_| "Failed to remove for hardlinking")?;
|
||||||
|
std::fs::hard_link(&keep.path, &replace.path).map_err(|_| {
|
||||||
|
match std::fs::copy(&keep.path, &replace.path) {
|
||||||
|
Ok(_) => "Failed to hardlink (copied instead)",
|
||||||
|
Err(_) => "Failed to hardlink or copy" // awful scenario but i believe it is impossible since i don't see how you could remove a file yet not create one in its place
|
||||||
|
}
|
||||||
|
})?;
|
||||||
|
replace.reset_md()?;
|
||||||
Ok(())
|
Ok(())
|
||||||
} else {
|
|
||||||
let mut s = String::with_capacity(wrong.len()*128); // 75 max estimated len of path, 53 for prefix msg + nl
|
|
||||||
for path in wrong {
|
|
||||||
s.push_str("Device ids must all be the same; got different for: {}");
|
|
||||||
s.push_str(&shlex::quote(&path.to_string_lossy()));
|
|
||||||
s.push_str("\n");
|
|
||||||
}
|
|
||||||
s.pop(); // remove last newline
|
|
||||||
Err(s)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// returns whether linking was done
|
||||||
/// perform a full run
|
/// eprints errors
|
||||||
fn run(paths: Vec<PathBuf>, cfg: &Config) {
|
fn hardlink_all<'a, 'b, T, W: Write>(keeps: &'a mut SmallVec<T>, replaces: &'a mut SmallVec<T>, cfg: &Config, mut stdout_buffer: Option<&mut W>) -> bool
|
||||||
let mut registry: HashMap<u64, Vec<PathBuf>> = HashMap::new();
|
where T: smallvec::Array<Item=&'b PathWithMetadata>,
|
||||||
|
{
|
||||||
for path in paths {
|
if !cmp(&replaces.first().unwrap().path, &keeps.first().unwrap().path).unwrap_or(false) {
|
||||||
register(path.to_path_buf(), &mut registry, cfg);
|
return false;
|
||||||
}
|
}
|
||||||
registry.retain(|_,files| files.len() >= 2);
|
for replace in replaces.into_iter() {
|
||||||
|
let keep = keeps.first().unwrap();
|
||||||
let mut stdout_buffer = if cfg.verbosity >= 0 {
|
|
||||||
let stdout = std::io::stdout();
|
|
||||||
let stdout_buffer = std::io::BufWriter::new(stdout.lock());
|
|
||||||
Some(stdout_buffer)
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
};
|
|
||||||
|
|
||||||
if let Some(stdout_buffer) = &mut stdout_buffer {
|
|
||||||
if cfg.verbosity >= 0 {
|
|
||||||
writeln!(stdout_buffer, "Considering {} total files for duplicates", registry.iter().map(|(_,files)| files.len()).sum::<usize>()).unwrap();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (fsize, mut files) in registry {
|
|
||||||
if files.len() > 8 {
|
|
||||||
files.sort_by_key(|path| path.file_name().unwrap_or_default().to_string_lossy().to_string());
|
|
||||||
}
|
|
||||||
if let Some(stdout_buffer) = &mut stdout_buffer {
|
|
||||||
if cfg.verbosity > 1 {
|
|
||||||
writeln!(stdout_buffer, "Considering {} files of size {} for duplicates", files.len(), fsize).unwrap();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for i in (0..files.len()).rev() {
|
|
||||||
let f1 = &files[i];
|
|
||||||
for j in (0..i).rev() {
|
|
||||||
let f2 = &files[j];
|
|
||||||
if !are_hardlinked(f1, f2) && cmp(f1, f2).unwrap_or(false) {
|
|
||||||
if !cfg.dry_run {
|
if !cfg.dry_run {
|
||||||
if let Err(msg) = hardlink(f1, f2) {
|
if let Err(msg) = hardlink(keep, replace) {
|
||||||
eprintln!("{}: {}", msg, format_pair(f1, f2, cfg));
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if let Some(stdout_buffer) = &mut stdout_buffer {
|
|
||||||
if cfg.verbosity >= 0 {
|
if cfg.verbosity >= 0 {
|
||||||
writeln!(stdout_buffer, "hardlinked {}", format_pair(f1, f2, cfg)).unwrap();
|
eprintln!("{}: {}", msg, format_pair(&keep.path.to_string_lossy(), &replace.path.to_string_lossy(), cfg));
|
||||||
}
|
}
|
||||||
|
continue // path no longer valid
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if let Some(stdout_buffer) = stdout_buffer.as_mut() {
|
||||||
|
if cfg.verbosity >= 0 {
|
||||||
|
writeln!(stdout_buffer, "hardlinked {}", format_pair(&keep.path.to_string_lossy(), &replace.path.to_string_lossy(), cfg)).unwrap();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
drop(keep);
|
||||||
|
keeps.push(replace);
|
||||||
}
|
}
|
||||||
|
true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
fn hardlink(f1: &PathBuf, f2: &PathBuf) -> Result<(), &'static str> {
|
fn format_pair(f1s: &str, f2s: &str, cfg: &Config) -> String {
|
||||||
if let Err(_) = std::fs::remove_file(f2) {
|
|
||||||
Err("Failed to remove second file for hardlinking")
|
|
||||||
} else if let Err(_) = std::fs::hard_link(f1, f2) { // same as ln in terms of args: left args's inode becomes right arg's inode
|
|
||||||
match std::fs::copy(f1, f2) {
|
|
||||||
Ok(_) => Err("Failed to hardlink (copied instead)"),
|
|
||||||
Err(_) => Err("Failed to hardlink or copy")
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
fn format_pair(f1: &PathBuf, f2: &PathBuf, cfg: &Config) -> String {
|
|
||||||
let f1s = f1.to_string_lossy();
|
|
||||||
let f2s = f2.to_string_lossy();
|
|
||||||
if cfg.no_brace_output {
|
if cfg.no_brace_output {
|
||||||
return format!(
|
return format!(
|
||||||
"{} {}",
|
"{} {}",
|
||||||
shlex::quote(&f1s),
|
shlex::try_quote(&f1s).unwrap(),
|
||||||
shlex::quote(&f2s)
|
shlex::try_quote(&f2s).unwrap()
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -310,78 +487,43 @@ fn format_pair(f1: &PathBuf, f2: &PathBuf, cfg: &Config) -> String {
|
||||||
if prefixlong && suffixlong {
|
if prefixlong && suffixlong {
|
||||||
format!(
|
format!(
|
||||||
"{}{{{},{}}}{}",
|
"{}{{{},{}}}{}",
|
||||||
shlex::quote(prefix),
|
shlex::try_quote(prefix).unwrap(),
|
||||||
shlex::quote(&f1s[ prefix.len()..std::cmp::max(prefix.len(), f1s.len()-suffix.len()) ]),
|
shlex::try_quote(&f1s[ prefix.len()..std::cmp::max(prefix.len(), f1s.len()-suffix.len()) ]).unwrap(),
|
||||||
shlex::quote(&f2s[ prefix.len()..std::cmp::max(prefix.len(), f2s.len()-suffix.len()) ]),
|
shlex::try_quote(&f2s[ prefix.len()..std::cmp::max(prefix.len(), f2s.len()-suffix.len()) ]).unwrap(),
|
||||||
shlex::quote(suffix)
|
shlex::try_quote(suffix).unwrap()
|
||||||
)
|
)
|
||||||
} else if prefixlong {
|
} else if prefixlong {
|
||||||
format!(
|
format!(
|
||||||
"{}{{{},{}}}",
|
"{}{{{},{}}}",
|
||||||
shlex::quote(prefix),
|
shlex::try_quote(prefix).unwrap(),
|
||||||
shlex::quote(&f1s[prefix.len()..]),
|
shlex::try_quote(&f1s[prefix.len()..]).unwrap(),
|
||||||
shlex::quote(&f2s[prefix.len()..])
|
shlex::try_quote(&f2s[prefix.len()..]).unwrap()
|
||||||
)
|
)
|
||||||
} else if suffixlong {
|
} else if suffixlong {
|
||||||
format!(
|
format!(
|
||||||
"{{{},{}}}{}",
|
"{{{},{}}}{}",
|
||||||
shlex::quote(&f1s[..f1s.len()-suffix.len()]),
|
shlex::try_quote(&f1s[..f1s.len()-suffix.len()]).unwrap(),
|
||||||
shlex::quote(&f2s[..f2s.len()-suffix.len()]),
|
shlex::try_quote(&f2s[..f2s.len()-suffix.len()]).unwrap(),
|
||||||
shlex::quote(suffix),
|
shlex::try_quote(suffix).unwrap(),
|
||||||
)
|
)
|
||||||
} else {
|
} else {
|
||||||
format!(
|
format!(
|
||||||
"{} <-> {}",
|
"{} <-> {}",
|
||||||
shlex::quote(&f1s),
|
shlex::try_quote(&f1s).unwrap(),
|
||||||
shlex::quote(&f2s)
|
shlex::try_quote(&f2s).unwrap()
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/// recursively register path or its contents if directory into registry
|
|
||||||
fn register(path: PathBuf, registry: &mut HashMap<u64, Vec<PathBuf>>, cfg: &Config) {
|
|
||||||
if let Ok(metadata) = std::fs::symlink_metadata(&path) {
|
|
||||||
if metadata.file_type().is_symlink() {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if path.is_file() {
|
|
||||||
let size = metadata.st_size();
|
|
||||||
if size >= cfg.min_size {
|
|
||||||
registry.entry(size).or_insert_with(|| Vec::new()).push(path);
|
|
||||||
}
|
|
||||||
} else if path.is_dir() {
|
|
||||||
if let Ok(entries) = std::fs::read_dir(path) {
|
|
||||||
for entry in entries {
|
|
||||||
if let Ok(entry) = entry {
|
|
||||||
register(entry.path(), registry, cfg);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
fn are_hardlinked(f1: &PathBuf, f2: &PathBuf) -> bool {
|
|
||||||
if let (Ok(md1), Ok(md2)) = (std::fs::metadata(f1), std::fs::metadata(f2)) {
|
|
||||||
md1.st_ino() == md2.st_ino()
|
|
||||||
} else {
|
|
||||||
false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/// check equality of contents of two paths to files
|
/// check equality of contents of two paths to files
|
||||||
fn cmp(f1: &PathBuf, f2: &PathBuf) -> std::io::Result<bool> {
|
/// does not check sizes
|
||||||
if let (Ok(mut f1), Ok(mut f2)) = (std::fs::File::open(f1), std::fs::File::open(f2)) {
|
fn cmp(f1: impl AsRef<Path>, f2: impl AsRef<Path>) -> std::io::Result<bool> {
|
||||||
cmp_files(&mut f1, &mut f2)
|
cmp_read(std::fs::File::open(f1)?, std::fs::File::open(f2)?)
|
||||||
} else { Ok(false) }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// check equality of contents of two open files
|
/// check equality of contents of two open files
|
||||||
fn cmp_files(f1: &mut std::fs::File, f2: &mut std::fs::File) -> std::io::Result<bool> {
|
fn cmp_read(mut f1: impl Read, mut f2: impl Read) -> std::io::Result<bool> {
|
||||||
let buff1: &mut [u8] = &mut [0; 1024];
|
let buff1: &mut [u8] = &mut [0; 1024];
|
||||||
let buff2: &mut [u8] = &mut [0; 1024];
|
let buff2: &mut [u8] = &mut [0; 1024];
|
||||||
loop {
|
loop {
|
||||||
|
@ -419,6 +561,7 @@ fn common_suffix<'a>(s1: &'a str, s2: &'a str) -> &'a str {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// double delimiters will result in empty vecs
|
||||||
fn split_vec<'a, T: std::cmp::PartialEq>(input: &'a [T], delimiter: &T) -> Vec<Vec<&'a T>> {
|
fn split_vec<'a, T: std::cmp::PartialEq>(input: &'a [T], delimiter: &T) -> Vec<Vec<&'a T>> {
|
||||||
let mut result: Vec<Vec<&T>> = Vec::new();
|
let mut result: Vec<Vec<&T>> = Vec::new();
|
||||||
|
|
||||||
|
@ -439,14 +582,14 @@ fn split_vec<'a, T: std::cmp::PartialEq>(input: &'a [T], delimiter: &T) -> Vec<V
|
||||||
result
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
#[test]
|
#[test]
|
||||||
fn _split_vec() {
|
fn _split_vec() {
|
||||||
let v: Vec<_> = vec![";", "hi", "bye", ";", "1", ";", ";", "2", "2", ";"].into_iter().map(|s| s.to_string()).collect();
|
let v: Vec<_> = vec![";", ";", ";"].into_iter().map(|s| s.to_string()).collect();
|
||||||
let res = split_vec(&v[..], &";".to_string());
|
let res = split_vec(&v[..], &";".to_string());
|
||||||
println!("{:?}", v);
|
assert_eq!(res.len(), 2)
|
||||||
println!("{:?}", res);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue