File size distribution: Difference between revisions
Content added Content deleted
(Simplify and accelerate UNIX shell solution massively) |
(UNIX shell: fix wrong sort for files more than 1GB and "prettyfy" the output) |
||
Line 1,605: | Line 1,605: | ||
{{works with|Bourne Shell}} |
{{works with|Bourne Shell}} |
||
Use POSIX conformant code unless the environment variable GNU is set to anything not empty. |
Use POSIX conformant code unless the environment variable GNU is set to anything not empty. |
||
<lang sh> |
<lang sh> |
||
#!/bin/sh |
|||
set -eu |
|||
#!/bin/sh |
|||
set -eu |
set -eu |
||
if [ ${GNU:-} ] |
if [ ${GNU:-} ] |
||
then |
then |
||
find -- "${1:-.}" -type f -exec du -b -- {} + |
|||
else |
else |
||
# Use a subshell to remove the last "total" line per each ARG_MAX |
|||
find -- "${1:-.}" -type f -exec sh -c 'wc -c -- "$@" | sed \$d' argv0 {} + |
|||
fi | awk ' |
fi | awk ' |
||
{ |
|||
{ |
|||
++hist[$1 ? length($1) - 1 : -1] |
|||
total += $1 |
|||
} |
|||
} |
|||
END { |
|||
print total, NR |
|||
for (i in hist) |
|||
print (i == -1 ? 0 : "1e" i) "\t" hist[i] |
|||
print i, hist[i] |
|||
print "Total: " total " bytes in " NR " files" |
|||
}' | \ |
|||
{ |
|||
read total |
|||
tabs -8 |
|||
sort -n | awk -vtotal="$total" -vOFS='\t' ' |
|||
BEGIN { |
|||
split("KB MB GB TB PB", u); u[0] = "B" |
|||
print "From", "To", "Count\n" |
|||
} |
|||
$1 == -1 {print "0B", "0B", $2; next} |
|||
{ |
|||
print 10 ** ($1 % 3) u[int($1 / 3)], |
|||
10 ** (($1 + 1) % 3) u[int(($1 + 1) / 3)], |
|||
$2 |
|||
} |
|||
END { |
|||
$0 = total |
|||
l = length($1) - 1 |
|||
printf "\nTotal: %.1f %s in %d files\n", |
|||
$1 / (10 ** l), u[int(l / 3)], $2}' |
|||
}</lang> |
|||
{{out}} |
{{out}} |
||
<pre>$ time ~/fsd.sh |
<pre>$ time ~/fsd.sh |
||
From To Count |
|||
1e0 66 |
|||
0B 0B 13 |
|||
1B 10B 74 |
|||
10B 100B 269 |
|||
100B 1KB 5894 |
|||
1KB 10KB 12727 |
|||
10KB 100KB 12755 |
|||
100KB 1MB 110922 |
|||
1MB 10MB 50019 |
|||
10MB 100MB 17706 |
|||
100MB 1GB 5056 |
|||
⚫ | |||
1GB 10GB 1139 |
|||
⚫ | |||
10GB 100GB 141 |
|||
100GB 1TB 1 |
|||
⚫ | |||
⚫ | |||
$ time GNU=1 ~/fsd.sh |
$ time GNU=1 ~/fsd.sh |
||
From To Count |
|||
1e0 66 |
|||
0B 0B 13 |
|||
1B 10B 74 |
|||
10B 100B 269 |
|||
100B 1KB 5894 |
|||
1KB 10KB 12727 |
|||
10KB 100KB 12755 |
|||
100KB 1MB 110922 |
|||
1MB 10MB 50019 |
|||
10MB 100MB 17706 |
|||
100MB 1GB 5056 |
|||
⚫ | |||
1GB 10GB 1139 |
|||
⚫ | |||
10GB 100GB 141 |
|||
100GB 1TB 1 |
|||
⚫ | |||
⚫ | |||
=={{header|zkl}}== |
=={{header|zkl}}== |