File size distribution: Difference between revisions

UNIX shell: further simplification, only need one awk invocation
(UNIX shell: fix wrong sort for files more than 1GB and "prettyfy" the output)
(UNIX shell: further simplification, only need one awk invocation)
Line 1,605:
{{works with|Bourne Shell}}
Use POSIX conformant code unless the environment variable GNU is set to anything not empty.
<lang sh>#!/bin/sh
#!/bin/sh
set -eu
 
#!/bin/sh
set -eu
 
tabs -8
if [ ${GNU:-} ]
then
find -- "${1:-.}" -type f -exec du -b -- {} +
else
# Use a subshell to remove the last "total" line per each ARG_MAX
find -- "${1:-.}" -type f -exec sh -c 'wc -c -- "$@" | sed \$d' argv0 {} +
fi | awk -vOFS='\t' '
BEGIN {split("KB MB GB TB PB", u); u[0] = "B"}
{
{
++hist[$1 ? length($1) - 1 : -1]
total += $1
}
}
END {
print total, NR
max = -2
for (i in hist)
print for (i, in hist[i])
max = (i > max ? i : max)
}' | \
 
{
print "From", "To", "Count\n"
read total
for (i = -1; i <= max; ++i)
tabs -8
{
sort -n | awk -vtotal="$total" -vOFS='\t' '
for if (i in hist)
BEGIN {
{
split("KB MB GB TB PB", u); u[0] = "B"
if (i == -1)
print "From", "To", "Count\n"
print "0B", "0B", hist[i]
}
else
$1 == -1 {print "0B", "0B", $2; next}
print 10 ** (i % 3) u[int(i / 3)],
{
print 10 ** ($1 (i + 1) % 3) u[int($1 (i + 1) / 3)],
10 ** (($1 + 1) % 3) u[int(($1 + 1) / 3) hist[i],
}
$2
}
}
l = length($1total) - 1
END {
printf "\nTotal: %.1f %s in %d files\n",
$0 = total
$1 total / (10 ** l), u[int(l / 3)], $2}'NR
l = length($1) - 1
}'</lang>
printf "\nTotal: %.1f %s in %d files\n",
$1 / (10 ** l), u[int(l / 3)], $2}'
}</lang>
{{out}}
<pre>$ time ~/fsd.sh
Anonymous user