Compare commits
4 commits
a64fc7c188
...
61469fa6f0
Author | SHA1 | Date | |
---|---|---|---|
61469fa6f0 | |||
69b5e5e984 | |||
a49bc76b0e | |||
d995c9aa01 |
3 changed files with 53 additions and 45 deletions
|
@ -1,6 +1,6 @@
|
||||||
[package]
|
[package]
|
||||||
name = "lndups"
|
name = "lndups"
|
||||||
version = "0.1.4"
|
version = "0.1.5"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
description = "Hardlink duplicate files"
|
description = "Hardlink duplicate files"
|
||||||
authors = ["George H <georgeh124v2@gmail.com>"]
|
authors = ["George H <georgeh124v2@gmail.com>"]
|
||||||
|
@ -10,8 +10,8 @@ license = "GPL-3.0-or-later"
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
clap = { version = "4.5", features = ["derive"] }
|
||||||
shlex = "1.3"
|
shlex = "1.3"
|
||||||
smallvec = "1.13"
|
smallvec = "1.14"
|
||||||
structopt = "0.3"
|
|
||||||
|
|
||||||
unicode-width = "<0.1.14" # subdependency causing compilation failure
|
unicode-width = "<0.1.14" # subdependency causing compilation failure
|
||||||
|
|
Binary file not shown.
90
src/main.rs
90
src/main.rs
|
@ -1,14 +1,13 @@
|
||||||
extern crate shlex;
|
extern crate shlex;
|
||||||
extern crate smallvec;
|
extern crate smallvec;
|
||||||
extern crate structopt;
|
|
||||||
use std::borrow::Borrow;
|
use std::borrow::Borrow;
|
||||||
use std::cell::RefCell;
|
use std::cell::RefCell;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::io::{Read, Write, BufReader, BufRead};
|
use std::io::{Read, Write, BufReader, BufRead};
|
||||||
use std::os::linux::fs::MetadataExt as MetadataExtLinux;
|
use std::os::linux::fs::MetadataExt as MetadataExtLinux;
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use crate::structopt::StructOpt;
|
use clap::Parser;
|
||||||
use crate::smallvec::*;
|
use smallvec::*;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -16,54 +15,57 @@ macro_rules! s_arg_target_file_name { () => { "target-file" } }
|
||||||
macro_rules! s_default_target_separator { () => { ";" } }
|
macro_rules! s_default_target_separator { () => { ";" } }
|
||||||
|
|
||||||
|
|
||||||
#[derive(StructOpt)]
|
#[derive(Parser)]
|
||||||
#[structopt(
|
#[command(
|
||||||
about="Hardlink duplicate files recursively\nSymlinks are treated as normal files",
|
about=concat!(
|
||||||
usage=concat!(env!("CARGO_PKG_NAME"), " [OPTION]... TARGET... ['", s_default_target_separator!(), "' TARGET...]")
|
"Hardlink duplicate files recursively\n",
|
||||||
|
"Symlinks are treated as normal files",
|
||||||
|
),
|
||||||
|
// usage=concat!(env!("CARGO_PKG_NAME"), " [OPTION]... TARGET... ['", s_default_target_separator!(), "' TARGET...]")
|
||||||
)]
|
)]
|
||||||
struct CLIArguments {
|
struct CLIArguments {
|
||||||
#[structopt(short, long, parse(from_occurrences), help="Increase verbosity")]
|
#[arg(short, long, action=clap::ArgAction::Count, help="Increase verbosity")]
|
||||||
verbose: i8,
|
verbose: i8,
|
||||||
|
|
||||||
#[structopt(short, long, parse(from_occurrences), help="Decrease verbosity")]
|
#[arg(short, long, action=clap::ArgAction::Count, help="Decrease verbosity")]
|
||||||
quiet: i8,
|
quiet: i8,
|
||||||
|
|
||||||
#[structopt(long, help=concat!(
|
#[arg(long, help=concat!(
|
||||||
"Disable brace notation for output\n",
|
"Disable brace notation for output\n",
|
||||||
" Ex: /home/user/{dir,backup}/file",
|
" Ex: /home/user/{dir,backup}/file",
|
||||||
))]
|
))]
|
||||||
no_brace_output: bool,
|
no_brace_output: bool,
|
||||||
|
|
||||||
#[structopt(long, help=concat!(
|
#[arg(long, help=concat!(
|
||||||
"Perform no operations on the filesystem",
|
"Perform no operations on the filesystem",
|
||||||
))]
|
))]
|
||||||
dry_run: bool,
|
dry_run: bool,
|
||||||
|
|
||||||
#[structopt(short="i", help=concat!(
|
#[arg(short='i', help=concat!(
|
||||||
"Prompt once before operating\n",
|
"Prompt once before operating\n",
|
||||||
"Doesn't occurs if no targets are provided",
|
"Doesn't occurs if no targets are provided",
|
||||||
))]
|
))]
|
||||||
prompt: bool,
|
prompt: bool,
|
||||||
|
|
||||||
#[structopt(short, long, value_name="VALUE", help=concat!(
|
#[arg(short, long, value_name="VALUE", help=concat!(
|
||||||
"Minimum file size to be considered for hardlinking\n",
|
"Minimum file size to be considered for hardlinking\n",
|
||||||
"Never goes below 1 (the default)",
|
"Never goes below 1 (the default)",
|
||||||
))]
|
))]
|
||||||
min_size: Option<u64>,
|
min_size: Option<u64>,
|
||||||
|
|
||||||
#[structopt(short, long, value_name="SEPARATOR", help=concat!(
|
#[arg(short, long, value_name="SEPARATOR", help=concat!(
|
||||||
"Separator between sets of targets (default: ", s_default_target_separator!(), ")",
|
"Separator between sets of targets (default: ", s_default_target_separator!(), ")",
|
||||||
))]
|
))]
|
||||||
separator: Option<String>,
|
separator: Option<String>,
|
||||||
|
|
||||||
#[structopt(long=s_arg_target_file_name!(), value_name="FILE", help=concat!(
|
#[arg(long=s_arg_target_file_name!(), value_name="FILE", help=concat!(
|
||||||
"File to source targets from (can be '-' for stdin)\n",
|
"File to source targets from (can be '-' for stdin)\n",
|
||||||
"Same rules as CLI argument targets apply\n",
|
"Same rules as CLI argument targets apply\n",
|
||||||
"Mutually exclusive with CLI argument targets",
|
"Mutually exclusive with CLI argument targets",
|
||||||
))]
|
))]
|
||||||
file_containing_targets: Option<String>,
|
file_containing_targets: Option<String>,
|
||||||
|
|
||||||
#[structopt(value_name="TARGET", help=concat!(
|
#[arg(value_name="TARGET", help=concat!(
|
||||||
"Target files and directories (recursive)\n",
|
"Target files and directories (recursive)\n",
|
||||||
"Each SEPARATOR denotes a new set of targets\n",
|
"Each SEPARATOR denotes a new set of targets\n",
|
||||||
" Each set of targets are separate from all other sets\n",
|
" Each set of targets are separate from all other sets\n",
|
||||||
|
@ -87,11 +89,11 @@ struct Config {
|
||||||
|
|
||||||
|
|
||||||
fn main() -> Result<(), i32> {
|
fn main() -> Result<(), i32> {
|
||||||
let mut args = CLIArguments::from_args();
|
let mut args = CLIArguments::parse();
|
||||||
let verbosity = args.verbose - args.quiet;
|
let verbosity = args.verbose - args.quiet;
|
||||||
|
|
||||||
let config = Config {
|
let config = Config {
|
||||||
min_size: args.min_size.map(|v| if v > 1 { v } else { 1 }).unwrap_or(1),
|
min_size: args.min_size.map(|v| std::cmp::max(v, 1)).unwrap_or(1),
|
||||||
no_brace_output: args.no_brace_output,
|
no_brace_output: args.no_brace_output,
|
||||||
dry_run: args.dry_run,
|
dry_run: args.dry_run,
|
||||||
verbosity
|
verbosity
|
||||||
|
@ -127,13 +129,13 @@ fn main() -> Result<(), i32> {
|
||||||
}
|
}
|
||||||
|
|
||||||
if args.prompt {
|
if args.prompt {
|
||||||
if !prompt_confirm(&run_targets) {
|
if !prompt_confirm(&run_targets).map_err(|_| { eprintln!("IO Error during confirmation prompt"); 1 })? {
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for paths in run_paths {
|
for paths in run_paths {
|
||||||
run(paths, &config);
|
run(paths, &config).map_err(|_| 1)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
@ -187,7 +189,8 @@ fn obtain_run_targets<'a>(
|
||||||
|
|
||||||
|
|
||||||
/// result has no symlinks; may be empty; contents each nonempty
|
/// result has no symlinks; may be empty; contents each nonempty
|
||||||
fn obtain_run_paths<T, Y, U>(run_targets: T, verbosity: i8) -> Result<Vec<Vec<PathWithMetadata>>, i32>
|
fn obtain_run_paths<T, Y, U>(run_targets: T, verbosity: i8)
|
||||||
|
-> Result<Vec<Vec<PathWithMetadata>>, i32>
|
||||||
where
|
where
|
||||||
T: Iterator<Item=Y> + ExactSizeIterator,
|
T: Iterator<Item=Y> + ExactSizeIterator,
|
||||||
Y: Iterator<Item=U> + ExactSizeIterator,
|
Y: Iterator<Item=U> + ExactSizeIterator,
|
||||||
|
@ -222,7 +225,7 @@ where
|
||||||
|
|
||||||
|
|
||||||
/// perform a full run
|
/// perform a full run
|
||||||
fn run(pwmds: Vec<PathWithMetadata>, cfg: &Config) {
|
fn run(pwmds: Vec<PathWithMetadata>, cfg: &Config) -> std::io::Result<()> {
|
||||||
let mut registry: HashMap<u64, Vec<PathWithMetadata>> = HashMap::new();
|
let mut registry: HashMap<u64, Vec<PathWithMetadata>> = HashMap::new();
|
||||||
for pwmd in pwmds {
|
for pwmd in pwmds {
|
||||||
register(pwmd, &mut registry, cfg);
|
register(pwmd, &mut registry, cfg);
|
||||||
|
@ -233,25 +236,31 @@ fn run(pwmds: Vec<PathWithMetadata>, cfg: &Config) {
|
||||||
|
|
||||||
if let Some(stdout_buffer) = &mut stdout_buffer {
|
if let Some(stdout_buffer) = &mut stdout_buffer {
|
||||||
if cfg.verbosity >= 0 {
|
if cfg.verbosity >= 0 {
|
||||||
writeln!(stdout_buffer, "Considering {} total files for duplicates", registry.iter().map(|(_,files)| files.len()).sum::<usize>()).unwrap();
|
writeln!(stdout_buffer,
|
||||||
|
"Considering {} total files for duplicates",
|
||||||
|
registry.iter().map(|(_,files)| files.len()).sum::<usize>()
|
||||||
|
)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (fsize, pwmds) in registry {
|
for (fsize, pwmds) in registry {
|
||||||
run_one_size(fsize, &pwmds, cfg, stdout_buffer.as_mut());
|
run_one_size(fsize, &pwmds, cfg, stdout_buffer.as_mut())?;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn run_one_size<W: Write>(fsize: u64, pwmds: &[PathWithMetadata], cfg: &Config, mut stdout_buffer: Option<&mut W>) {
|
fn run_one_size<W: Write>(fsize: u64, pwmds: &[PathWithMetadata], cfg: &Config, mut stdout_buffer: Option<&mut W>) -> std::io::Result<()> {
|
||||||
if let Some(stdout_buffer) = stdout_buffer.as_mut() {
|
if let Some(stdout_buffer) = stdout_buffer.as_mut() {
|
||||||
if cfg.verbosity >= 1 {
|
if cfg.verbosity >= 1 {
|
||||||
writeln!(stdout_buffer, "Considering {} files of size {} for duplicates", pwmds.len(), fsize).unwrap();
|
writeln!(stdout_buffer, "Considering {} files of size {} for duplicates", pwmds.len(), fsize)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// if cfg.verbosity >= 0 {
|
// if cfg.verbosity >= 0 {
|
||||||
// pwmds.sort_by_key(|pwmd| pwmd.path.file_name().unwrap_or_default().to_string_lossy().to_string());
|
// pwmds.sort_by_key(|pwmd| pwmd.path.file_name().unwrap_or_default().to_string_lossy().to_string());
|
||||||
// }
|
// }
|
||||||
let mut by_inode: Vec<SmallVec<[&PathWithMetadata; 1]>> = Vec::with_capacity((pwmds.len() as f64 * 0.8) as usize); // each nonempty
|
let mut by_inode: Vec<SmallVec<[&PathWithMetadata; 1]>>
|
||||||
|
= Vec::with_capacity((pwmds.len() as f64 * 0.8) as usize); // each nonempty
|
||||||
let mut inodes: Vec<u64> = Vec::with_capacity(by_inode.len());
|
let mut inodes: Vec<u64> = Vec::with_capacity(by_inode.len());
|
||||||
for pwmd in pwmds {
|
for pwmd in pwmds {
|
||||||
let inode: u64 = pwmd.md().st_ino();
|
let inode: u64 = pwmd.md().st_ino();
|
||||||
|
@ -282,6 +291,8 @@ fn run_one_size<W: Write>(fsize: u64, pwmds: &[PathWithMetadata], cfg: &Config,
|
||||||
}
|
}
|
||||||
i += 1;
|
i += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -334,7 +345,7 @@ impl PathWithMetadata {
|
||||||
Ok(PathWithMetadata{ path, md })
|
Ok(PathWithMetadata{ path, md })
|
||||||
}
|
}
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
pub fn md(&self) -> std::cell::Ref<std::fs::Metadata> {
|
pub fn md<'a>(&'a self) -> std::cell::Ref<'a, std::fs::Metadata> {
|
||||||
self.md.borrow()
|
self.md.borrow()
|
||||||
}
|
}
|
||||||
pub fn reset_md(&self) -> Result<(), String> {
|
pub fn reset_md(&self) -> Result<(), String> {
|
||||||
|
@ -360,23 +371,21 @@ impl AsRef<Path> for PathWithMetadata {
|
||||||
|
|
||||||
|
|
||||||
/// return whether or not user gave confirmation
|
/// return whether or not user gave confirmation
|
||||||
fn prompt_confirm<'a, T: Borrow<[Y]>, Y: AsRef<str>>(run_targets: &[T]) -> bool {
|
fn prompt_confirm<'a, T: Borrow<[Y]>, Y: AsRef<str>>(run_targets: &[T]) -> std::io::Result<bool> {
|
||||||
println!("Are you sure you want to link all duplicates in each of these sets of targets?");
|
{
|
||||||
|
let mut stdout_buffer = std::io::BufWriter::new(std::io::stdout().lock());
|
||||||
|
writeln!(&mut stdout_buffer, "Are you sure you want to link all duplicates in each of these sets of targets?")?;
|
||||||
for spaths in run_targets {
|
for spaths in run_targets {
|
||||||
println!(" {}", shlex::try_join(spaths.borrow().iter().map(|s| s.as_ref())).unwrap());
|
writeln!(&mut stdout_buffer, " {}", shlex::try_join(spaths.borrow().iter().map(|s| s.as_ref())).unwrap())?;
|
||||||
|
}
|
||||||
|
write!(&mut stdout_buffer, "> ")?;
|
||||||
|
stdout_buffer.flush().unwrap_or_else(|_| ());
|
||||||
}
|
}
|
||||||
print!("> ");
|
|
||||||
std::io::stdout().flush().unwrap_or_else(|_| ());
|
|
||||||
|
|
||||||
let mut response = String::new();
|
let mut response = String::new();
|
||||||
std::io::stdin().read_line(&mut response).unwrap_or_else(
|
std::io::stdin().read_line(&mut response)?;
|
||||||
|_| {
|
|
||||||
eprintln!("Problem reading input");
|
|
||||||
std::process::exit(1);
|
|
||||||
}
|
|
||||||
);
|
|
||||||
|
|
||||||
response.to_lowercase().starts_with("y")
|
Ok(response.to_lowercase().starts_with("y"))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn read_lines(reader: impl BufRead, dest: &mut Vec<String>) -> Result<(), String> {
|
fn read_lines(reader: impl BufRead, dest: &mut Vec<String>) -> Result<(), String> {
|
||||||
|
@ -464,7 +473,6 @@ where T: smallvec::Array<Item=&'b PathWithMetadata>,
|
||||||
writeln!(stdout_buffer, "hardlinked {}", format_pair(&keep.path.to_string_lossy(), &replace.path.to_string_lossy(), cfg)).unwrap();
|
writeln!(stdout_buffer, "hardlinked {}", format_pair(&keep.path.to_string_lossy(), &replace.path.to_string_lossy(), cfg)).unwrap();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
drop(keep);
|
|
||||||
keeps.push(replace);
|
keeps.push(replace);
|
||||||
}
|
}
|
||||||
true
|
true
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue