File size distribution: Difference between revisions

Content added Content deleted
(Simplify and accelerate UNIX shell solution massively)
(UNIX shell: fix wrong sort for files more than 1GB and "prettyfy" the output)
Line 1,605: Line 1,605:
{{works with|Bourne Shell}}
{{works with|Bourne Shell}}
Use POSIX conformant code unless the environment variable GNU is set to anything not empty.
Use POSIX conformant code unless the environment variable GNU is set to anything not empty.
<lang sh>#!/bin/sh
<lang sh>
#!/bin/sh
set -eu

#!/bin/sh
set -eu
set -eu


if [ ${GNU:-} ]
if [ ${GNU:-} ]
then
then
find -- "${1:-.}" -type f -exec du -b -- {} +
find -- "${1:-.}" -type f -exec du -b -- {} +
else
else
# Use a subshell to remove the last "total" line per each ARG_MAX
# Use a subshell to remove the last "total" line per each ARG_MAX
find -- "${1:-.}" -type f -exec sh -c 'wc -c -- "$@" | sed \$d' argv0 {} +
find -- "${1:-.}" -type f -exec sh -c 'wc -c -- "$@" | sed \$d' argv0 {} +
fi | awk '
fi | awk '
{
{
++hist[$1 ? length($1) - 1 : -1]
++hist[$1 ? length($1) - 1 : -1]
total += $1
total += $1
}
}
END {
END {
print total, NR
for (i in hist)
for (i in hist)
print (i == -1 ? 0 : "1e" i) "\t" hist[i]
print i, hist[i]
print "Total: " total " bytes in " NR " files"
}' | sort</lang>
}' | \
{
read total
tabs -8
sort -n | awk -vtotal="$total" -vOFS='\t' '
BEGIN {
split("KB MB GB TB PB", u); u[0] = "B"
print "From", "To", "Count\n"
}
$1 == -1 {print "0B", "0B", $2; next}
{
print 10 ** ($1 % 3) u[int($1 / 3)],
10 ** (($1 + 1) % 3) u[int(($1 + 1) / 3)],
$2
}
END {
$0 = total
l = length($1) - 1
printf "\nTotal: %.1f %s in %d files\n",
$1 / (10 ** l), u[int(l / 3)], $2}'
}</lang>
{{out}}
{{out}}
<pre>$ time ~/fsd.sh
<pre>$ time ~/fsd.sh
0 4
From To Count

1e0 66
1e1 66
0B 0B 13
1e2 1418
1B 10B 74
1e3 1026
10B 100B 269
1e4 1564
100B 1KB 5894
1e5 60083
1KB 10KB 12727
1e6 16282
10KB 100KB 12755
1e7 3881
100KB 1MB 110922
1e8 1444
1MB 10MB 50019
1e9 16
10MB 100MB 17706
100MB 1GB 5056
Total: 612404756079 bytes in 85850 files
1GB 10GB 1139
~/fsd.sh 0.60s user 0.98s system 134% cpu 1.182 total
10GB 100GB 141
100GB 1TB 1

Total: 8.9 TB in 216716 files
~/fsd.sh 1.28s user 2.55s system 134% cpu 2.842 total
$ time GNU=1 ~/fsd.sh
$ time GNU=1 ~/fsd.sh
0 4
From To Count

1e0 66
1e1 66
0B 0B 13
1e2 1418
1B 10B 74
1e3 1026
10B 100B 269
1e4 1564
100B 1KB 5894
1e5 60083
1KB 10KB 12727
1e6 16282
10KB 100KB 12755
1e7 3881
100KB 1MB 110922
1e8 1444
1MB 10MB 50019
1e9 16
10MB 100MB 17706
100MB 1GB 5056
Total: 612404756079 bytes in 85850 files
1GB 10GB 1139
GNU=1 ~/fsd.sh 0.35s user 0.48s system 135% cpu 0.613 total</pre>
10GB 100GB 141
100GB 1TB 1

Total: 8.9 TB in 216716 files
GNU=1 ~/fsd.sh 0.81s user 1.33s system 135% cpu 1.586 total</pre>


=={{header|zkl}}==
=={{header|zkl}}==