File size distribution: Difference between revisions

m (Update Lang example: Fix spelling of Lang)
 
(4 intermediate revisions by 3 users not shown)
Line 1,009:
1000000 4
10000000 4</syntaxhighlight>
 
=={{header|Java}}==
<syntaxhighlight lang="java">
 
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
 
public final class FileSizeDistribution {
 
public static void main(String[] aArgs) throws IOException {
List<Path> fileNames = Files.list(Path.of("."))
.filter( file -> ! Files.isDirectory(file) )
.map(Path::getFileName)
.toList();
Map<Integer, Integer> fileSizes = new HashMap<Integer, Integer>();
for ( Path path : fileNames ) {
fileSizes.merge(String.valueOf(Files.size(path)).length(), 1, Integer::sum);
}
final int fileCount = fileSizes.values().stream().mapToInt(Integer::valueOf).sum();
System.out.println("File size distribution for directory \".\":" + System.lineSeparator());
System.out.println("File size in bytes | Number of files | Percentage");
System.out.println("-------------------------------------------------");
for ( int key : fileSizes.keySet() ) {
final int value = fileSizes.get(key);
System.out.println(String.format("%s%d%s%d%15d%15.1f%%",
" 10^", ( key - 1 ), " to 10^", key, value, ( 100.0 * value ) / fileCount));
}
}
 
}
</syntaxhighlight>
{{ out }}
<pre>
File size distribution for directory ".":
 
File size in bytes | Number of files | Percentage
-------------------------------------------------
10^0 to 10^1 1 0.2%
10^1 to 10^2 1 0.2%
10^2 to 10^3 5 1.1%
10^3 to 10^4 3 0.6%
10^4 to 10^5 161 34.0%
10^5 to 10^6 196 41.4%
10^6 to 10^7 98 20.7%
10^7 to 10^8 9 1.9%
</pre>
 
=={{header|jq}}==
'''Works with jq, the C implementation of jq'''
 
'''Works with gojq, the Go implementation of jq'''
 
'''Works with jaq, the Rust implementation of jq'''
 
This entry illustrates how jq plays nicely with other command-line
tools; in this case jc (https://kellyjonbrazil.github.io/jc) is used to JSONify the output of `ls -Rl`.
 
(jq could also be used to parse the raw output of `ls`, but it would no doubt
be tricky to achieve portability.)
 
The invocation of jc and jq would be along the following lines:
<pre>
jc --ls -lR | jq -c -f file-size-distribution.jq
</pre>
 
In the present case, the output from the call to `histogram` is a stream of [category, count] pairs
beginning with [0, _] showing the number of files of size 0; thereafter, the boundaries
of the categories are defined logarithmically, i.e. a file of size of $n is assigned to
the category `1 + ($n | log10 | trunc)`.
 
The output shown below for an actual directory tree suggests a
unimodal distribution of file sizes.
 
<syntaxhighlight lang="jq">
# bag of words
def bow(stream):
reduce stream as $word ({}; .[($word|tostring)] += 1);
 
# `stream` is expected to be a stream of non-negative numbers or numeric strings.
# The output is a stream of [bucket, count] pairs, sorted by the value of `bucket`.
# No sorting except for the sorting of these bucket boundaries takes place.
def histogram(stream):
bow(stream)
| to_entries
| map( [(.key | tonumber), .value] )
| sort_by(.[0])
| .[];
 
histogram(.[] | .size | if . == 0 then 0 else 1 + (log10 | trunc) end)
</syntaxhighlight>
{{output}}
<pre>
[0,9]
[1,67]
[2,616]
[3,6239]
[4,3679]
[5,213]
[6,56]
[7,40]
[8,20]
[9,4]
[10,1]
</pre>
 
=={{header|Julia}}==
Line 1,136 ⟶ 1,247:
# Replace "<pathToIO.lm>" with the location where the io.lm Lang module was installed to without "<" and ">"
ln.loadModule(<pathToIO.lm>)
 
 
fp.fileSizeDistribution = (&sizes, $[totalSize], $file) -> {
Line 1,164 ⟶ 1,276:
if($len == 0) {
fn.arraySet(&sizes, [0,] parser.op(+|&sizes[0]))= 1
}else {
$index = fn.int(fn.log10($len))
fn.arraySet(&sizes, [$index,] parser.op(+|&sizes[$index]))= 1
}
}
Line 1,967 ⟶ 2,079:
{{libheader|Wren-math}}
{{libheader|Wren-fmt}}
<syntaxhighlight lang="ecmascriptwren">import "io" for Directory, File, Stat
import "os" for Process
import "./math" for Math
import "./fmt" for Fmt
 
var sizes = List.filled(12, 0)
2,442

edits