Jump to content

File size distribution: Difference between revisions

Update to Rust code to make it readable, and a better way of completing task
No edit summary
(Update to Rust code to make it readable, and a better way of completing task)
Line 716:
<lang rust>
use std::error::Error;
use std::marker::PhantomData;
use std::path::{Path, PathBuf};
use std::{env, fmt, io, time};
use walkdir::{DirEntry, WalkDir};
 
Line 724 ⟶ 725:
let args: Vec<String> = env::args().collect();
 
let root = parse_path(&args).expect("not a valid path");
// Assign root from cmd input
let root: &Path = parse_path(&args).expect("not a valid path");
// Recursively build directory
let dir = WalkDir::new(&root);
 
let (files, dirs): (Vec<PathBuf>, Vec<PathBuf>) = {
// Retrieve all entries from WalkDir
let pool = pool(dir).expect("unable to retrieve entries from WalkDir");
// check and pull all paths that are files, seperating from all paths that are directories
partition_from(pool).expect("unable to partition files from directories")
};
 
let (fs_count, dr_count) = (files.len(), dirs.len());
let header = format!("++ File size distribution for : {} ++\n", &root.display());
let (file_counter, total_size) = file_count(files);
 
{
let (fcount, dcount): (usize, usize) = (files.len(), dirs.len());
println!("++ File size distribution for : {} ++\n", &root.display());
let (size_by_count, total_size): ([u64; 6], u64) = file_count(files);
println!("Files @ 0B : {:4}", file_counter[0]);
let out_size = format!(
println!("Files > 1B - 1,023B : {:4}", file_counter[1]);
"\nFiles @ 0B : {}\nFiles > 1B - 1,023B : {}\nFiles > 1KB - 1,023KB : {}\nFiles > 1MB - 1,023MB : {}\nFiles > 1GB - 1,023GB : {}\nFiles > 1TB+ : {}\n",
println!("Files > 1KB - 1,023KB : {:4}", file_counter[2]);
size_by_count[0], size_by_count[1], size_by_count[2], size_by_count[3], size_by_count[4], size_by_count[5],
println!("Files > 1MB - 1,023MB : {:4}", file_counter[3]);
);
println!("Files > 1GB - 1,023GB : {:4}", file_counter[4]);
println!("Files > 1TB+ : {:4}\n", file_counter[5]);
 
println!("Files encountered: {}", fs_count);
let out_stat = format!(
println!("Directories traversed: {}", dr_count);
"\nTotal number of files counted: {}\nTotal number of directories traversed: {}\nTotal size of all files: {} bytes\n",
fcount, dcount, total_sizeprintln!(
"Total size of all files: {}\n",
);
Filesize::<Kilobytes>::from(total_size)
);
}
 
let end = time::Instant::now();
let run_len = formatprintln!("\nRunRun time: {:?}\n", end.duration_since(start));
 
println!("{}{}{}{}", header, out_size, out_stat, run_len);
Ok(())
}
 
// If there's no `args` entered, the executable will search it's own path.
fn parse_path(args: &[String]) -> Result<&Path, io::Error> {
// If there's no `args` entered, the executable will search it's own path.
if args.len() == 1 {
match args.len() {
Ok(Path::new(&args[0]))
1 => Ok(Path::new(&args[0])),
} else {
_ => Ok(Path::new(&args[1])),
}
}
 
fn pool(dir: WalkDir) -> Result<Vec<DirEntry>, Box<dyn Error>> {
// Take the iter `WalkDir` and checkCheck each item for errors, droppingand drop allpossible invalid `DirEntry`s
Ok(dir.into_iter().filter_map(|e| e.ok()).collect())
}
 
fn partition_from(pool: Vec<DirEntry>) -> Result<(Vec<PathBuf>, Vec<PathBuf>), Box<dyn Error>> {
// WithRead each`Path` from `DirEntry`, pullchecking theif `Path` fromis it,a then check what kind of `File` the `Path`file pointsor atdirectory.
Ok(pool
.into_iter()
Line 779 ⟶ 780:
 
fn file_count(files: Vec<PathBuf>) -> ([u64; 6], u64) {
let mut fc_by_sizecounter: [u64; 6] = [0; 6];
for file in &files {
match Filesize::<Bytes>::from(file).bytes {
// metadata().len() returns u64 / bytes
0 => counter[0] += 1, // Empty file
match file
1..=1_023 => counter[1] += 1, // 1 byte to 0.99KB
.metadata()
1_024..=1_048_575 => counter[2] += 1, // 1 kilo to 0.99MB
.expect("error with metadata while matching")
1_048_576..=1_073_741_823 => counter[3] += 1, // 1 mega to 0.99GB
.len()
1_073_741_824..=1_099_511_627_775 => counter[4] += 1, // 1 giga to 0.99TB
{
1_099_511_627_776..=std::u64::MAX => counter[5] += 1, // 1 terabyte or larger
// Empty
0 => fc_by_size[0] += 1,}
}
// 1 byte to 999 bytes
 
1u64..=1023u64 => fc_by_size[1] += 1,
let total_file_size = files
// 1kb to 0.99 kb
.iter()
1024u64..=1_048_575_u64 => fc_by_size[2] += 1,
.fold(0, |acc, file| acc + Filesize::<Bytes>::from(file).bytes);
// 1 mb to 0.99 mb
(counter, total_file_size)
1_048_576_u64..=1_073_741_823_u64 => fc_by_size[3] += 1,
}
// 1 gb to 0.99 gb
 
1_073_741_824_u64..=109_951_162_775_u64 => fc_by_size[4] += 1,
trait SizeUnit: Copy {
// 1 tb or larger
fn singular_name() -> String;
109_951_162_776_u64..=std::u64::MAX => fc_by_size[5] += 1,
fn num_byte_in_unit() -> u64;
}
 
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug)]
struct Bytes;
impl SizeUnit for Bytes {
fn singular_name() -> String {
"B".to_string()
}
fn num_byte_in_unit() -> u64 {
1
}
}
 
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug)]
struct Kilobytes;
impl SizeUnit for Kilobytes {
fn singular_name() -> String {
"KB".to_string()
}
fn num_byte_in_unit() -> u64 {
1_024
}
}
 
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug)]
struct Filesize<T: SizeUnit> {
bytes: u64,
unit: PhantomData<T>,
}
 
impl<T> From<u64> for Filesize<T>
where
T: SizeUnit,
{
fn from(n: u64) -> Self {
Filesize {
bytes: n * T::num_byte_in_unit(),
unit: PhantomData,
}
}
}
 
impl<T> From<Filesize<T>> for u64
where
T: SizeUnit,
{
fn from(fsz: Filesize<T>) -> u64 {
((fsz.bytes as f64) / (T::num_byte_in_unit() as f64)) as u64
}
}
 
impl<T> fmt::Display for Filesize<T>
where
T: SizeUnit,
{
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
// convert value in associated units to float
let size_val = ((self.bytes as f64) / (T::num_byte_in_unit() as f64)) as u64;
 
// plural?
let name_plural = match size_val {
1 => "",
_ => "s",
};
 
write!(
f,
"{} {}{}",
(self.bytes as f64) / (T::num_byte_in_unit() as f64),
T::singular_name(),
name_plural
)
}
}
 
// Can be expanded for From<File>, or any type that has an alias for Metadata
let total_file_size: u64 = files.iter().fold(0, |acc, f| {
impl<T> From<&PathBuf> for Filesize<T>
acc + f
where
.metadata()
T: SizeUnit,
.expect("error with metadata while folding")
{
.len()
fn from(f: &PathBuf) -> Self {
});
Filesize {
(fc_by_size, total_file_size)
bytes: f
.metadata()
.expect("error with metadata from pathbuf into filesize")
.len(),
unit: PhantomData,
}
}
}
</lang>
{{out}}
<pre>
++ File size distribution for : C:\Users\..\Documents ++
 
Files @ 0B : 932 956
Files > 1B - 1,023B : 36383724
Files > 1KB - 1,023KB : 43434511
Files > 1MB - 1,023MB : 916 930
Files > 1GB - 1,023GB : 0
Files > 1TB+ : 0
 
Files encountered: 10121
Total number of files counted: 9829
Total number of directoriesDirectories traversed: 20052057
Total size of all files: 51962560795264133277 bytesKBs
 
Run time: 1.5290189s5671626s
</pre>
 
Anonymous user
Cookies help us deliver our services. By using our services, you agree to our use of cookies.