File size distribution: Difference between revisions
Content added Content deleted
m (→{{header|Haskell}}: fix non terminating condition) |
|||
Line 391: | Line 391: | ||
</pre> |
</pre> |
||
=={{header|Haskell}}== |
=={{header|Haskell}}== |
||
Uses a grouped frequency distribution. Program arguments are optional. Arguments include starting directory and initial frequency distribution group size. Distribution groups of 0 are removed. After the first frequency distribution is computed it further breaks it down for any group that exceeds 25% of the total file count. |
Uses a grouped frequency distribution. Program arguments are optional. Arguments include starting directory and initial frequency distribution group size. Distribution groups of 0 are removed. After the first frequency distribution is computed it further breaks it down for any group that exceeds 25% of the total file count, when possible. |
||
<lang haskell>{-# LANGUAGE TupleSections, LambdaCase #-} |
<lang haskell>{-# LANGUAGE TupleSections, LambdaCase #-} |
||
Line 418: | Line 418: | ||
range = maximum xs - minimum xs |
range = maximum xs - minimum xs |
||
groupSize = succ $ ceiling $ realToFrac range / realToFrac totalGroups |
groupSize = succ $ ceiling $ realToFrac range / realToFrac totalGroups |
||
groups = |
groups = takeWhile (<=groupSize + maximum xs) $ iterate (+groupSize) 0 |
||
groupMinMax = (,0) <$> zip groups (pred <$> tail groups) |
groupMinMax = (,0) <$> zip groups (pred <$> tail groups) |
||
Line 426: | Line 426: | ||
if d >= min && d <= max |
if d >= min && d <= max |
||
then ((min, max), succ count) |
then ((min, max), succ count) |
||
else g |
else g |
||
) gs |
|||
fileSizes :: [Item] -> [Integer] |
fileSizes :: [Item] -> [Integer] |
||
Line 460: | Line 462: | ||
expandGroups :: Int -> [Integer] -> Integer -> [FrequencyGroup] -> [FrequencyGroup] |
expandGroups :: Int -> [Integer] -> Integer -> [FrequencyGroup] -> [FrequencyGroup] |
||
expandGroups gsize fileSizes groupThreshold |
expandGroups gsize fileSizes groupThreshold = loop 15 |
||
| all ((<= groupThreshold) . snd) groups = groups |
|||
| otherwise = expandGroups gsize fileSizes groupThreshold $ expand groups |
|||
where |
where |
||
loop 0 gs = gs -- break out in case we can't go below threshold |
|||
loop n gs |
|||
| all ((<= groupThreshold) . snd) gs = gs |
|||
| otherwise = loop (pred n) $ expand gs |
|||
expand = ((\g@((min, max), count) -> |
expand = ((\g@((min, max), count) -> |
||
if count > groupThreshold then |
if count > groupThreshold then |
||
Line 505: | Line 510: | ||
percentage :: Double |
percentage :: Double |
||
percentage = (realToFrac count / realToFrac filesCount) * 100 |
percentage = (realToFrac count / realToFrac filesCount) * 100 |
||
bars = replicate (round |
bars = replicate (round percentage) '█' |
||
parseArgs :: [String] -> Either String (FilePath, Int) |
parseArgs :: [String] -> Either String (FilePath, Int) |
||
Line 558: | Line 563: | ||
. initialGroups gsize</lang> |
. initialGroups gsize</lang> |
||
{{out}} |
{{out}} |
||
<pre style="height: 50rem;">$ filedist |
<pre style="height: 50rem;">$ filedist ~/Music |
||
Using 4 worker threads |
Using 4 worker threads |
||
Total files: |
Total files: 688 |
||
Total folders: |
Total folders: 663 |
||
Total size: |
Total size: 986MB |
||
Distribution: |
Distribution: |
||
0B <-> 80B = 7 1.017%: █ |
|||
0B <-> 75B = 39305 9.103%: ████████████████████████████████████ |
|||
81B <-> 161B = 74 10.756%: ███████████ |
|||
76B <-> 151B = 36175 8.378%: ██████████████████████████████████ |
|||
162B <-> 242B = 112 16.279%: ████████████████ |
|||
152B <-> 227B = 27747 6.426%: ██████████████████████████ |
|||
243B <-> 323B = 99 14.390%: ██████████████ |
|||
228B <-> 303B = 19148 4.434%: ██████████████████ |
|||
322B <-> 643B = 23 3.343%: ███ |
|||
301B <-> 601B = 50919 11.792%: ███████████████████████████████████████████████ |
|||
644B <-> 965B = 2 0.291%: |
|||
602B <-> 902B = 41885 9.700%: ███████████████████████████████████████ |
|||
966B <-> 1KB = 1 0.145%: |
|||
903B <-> 1KB = 43986 10.187%: █████████████████████████████████████████ |
|||
3KB <-> 6KB = 12 1.744%: ██ |
|||
1KB <-> 2KB = 61277 14.191%: █████████████████████████████████████████████████████████ |
|||
6KB <-> 10KB = 22 3.198%: ███ |
|||
2KB <-> 4KB = 29473 6.826%: ███████████████████████████ |
|||
10KB <-> 13KB = 12 1.744%: ██ |
|||
4KB <-> 5KB = 17620 4.081%: ████████████████ |
|||
14KB <-> 27KB = 15 2.180%: ██ |
|||
5KB <-> 9KB = 28951 6.705%: ███████████████████████████ |
|||
27KB <-> 41KB = 6 0.872%: █ |
|||
41KB <-> 54KB = 22 3.198%: ███ |
|||
54KB <-> 108KB = 99 14.390%: ██████████████ |
|||
108KB <-> 163KB = 23 3.343%: ███ |
|||
163KB <-> 217KB = 8 1.163%: █ |
|||
236KB <-> 473KB = 3 0.436%: |
|||
709KB <-> 946KB = 44 6.395%: ██████ |
|||
3MB <-> 5MB = 4 0.581%: █ |
|||
5MB <-> 7MB = 21 3.052%: ███ |
|||
7MB <-> 13MB = 72 10.465%: ██████████ |
|||
13MB <-> 20MB = 6 0.872%: █ |
|||
20MB <-> 27MB = 1 0.145%: |
|||
2MB <-> 4MB = 110 0.025%: |
|||
4MB <-> 5MB = 51 0.012%: |
|||
5MB <-> 9MB = 52 0.012%: |
|||
9MB <-> 14MB = 19 0.004%: |
|||
14MB <-> 19MB = 8 0.002%: |
|||
20MB <-> 40MB = 17 0.004%: |
|||
40MB <-> 61MB = 5 0.001%: |
|||
61MB <-> 81MB = 3 0.001%: |
|||
98MB <-> 196MB = 8 0.002%: |
|||
294MB <-> 392MB = 1 0.000%: |
|||
$ filedist ~/Music 10 |
|||
# Smaller set |
|||
$ filedist |
|||
Using 4 worker threads |
Using 4 worker threads |
||
Total files: |
Total files: 688 |
||
Total folders: |
Total folders: 663 |
||
Total size: |
Total size: 986MB |
||
Distribution: |
Distribution: |
||
0B <-> |
0B <-> 88B = 7 1.017%: █ |
||
89B <-> 177B = 75 10.901%: ███████████ |
|||
59B <-> 117B = 77 5.604%: ██████████████████████ |
|||
178B <-> 266B = 156 22.674%: ███████████████████████ |
|||
118B <-> 176B = 72 5.240%: █████████████████████ |
|||
267B <-> 355B = 57 8.285%: ████████ |
|||
177B <-> 235B = 176 12.809%: ███████████████████████████████████████████████████ |
|||
356B <-> 444B = 20 2.907%: ███ |
|||
232B <-> 463B = 338 24.600%: ██████████████████████████████████████████████████████████████████████████████████████████████████ |
|||
801B <-> 889B = 2 0.291%: |
|||
464B <-> 695B = 88 6.405%: ██████████████████████████ |
|||
959B <-> 2KB = 1 0.145%: |
|||
4KB <-> 5KB = 1 0.145%: |
|||
926B <-> 2KB = 169 12.300%: █████████████████████████████████████████████████ |
|||
5KB <-> 6KB = 1 0.145%: |
|||
6KB <-> 7KB = 11 1.599%: ██ |
|||
7KB <-> 7KB = 10 1.453%: █ |
|||
4KB <-> 8KB = 121 8.806%: ███████████████████████████████████ |
|||
7KB <-> 8KB = 4 0.581%: █ |
|||
8KB <-> 9KB = 7 1.017%: █ |
|||
9KB <-> 19KB = 21 3.052%: ███ |
|||
19KB <-> 28KB = 6 0.872%: █ |
|||
28KB <-> 38KB = 4 0.581%: █ |
|||
38KB <-> 47KB = 12 1.744%: ██ |
|||
47KB <-> 57KB = 16 2.326%: ██ |
|||
57KB <-> 66KB = 23 3.343%: ███ |
|||
66KB <-> 75KB = 26 3.779%: ████ |
|||
75KB <-> 85KB = 15 2.180%: ██ |
|||
85KB <-> 94KB = 17 2.471%: ██ |
|||
95KB <-> 189KB = 42 6.105%: ██████ |
|||
189KB <-> 284KB = 4 0.581%: █ |
|||
284KB <-> 378KB = 2 0.291%: |
|||
851KB <-> 946KB = 44 6.395%: ██████ |
|||
3MB <-> 5MB = 5 0.727%: █ |
|||
5MB <-> 8MB = 41 5.959%: ██████ |
|||
8MB <-> 11MB = 35 5.087%: █████ |
|||
# Increase distribution group to 10 using optional arguments |
|||
11MB <-> 13MB = 16 2.326%: ██ |
|||
$ filedist . 10 |
|||
13MB <-> 16MB = 3 0.436%: |
|||
Using 4 worker threads |
|||
16MB <-> 19MB = 3 0.436%: |
|||
Total files: 1374 |
|||
24MB <-> 27MB = 1 0.145%: |
|||
Total folders: 455 |
|||
Total size: 620MB |
|||
Distribution: |
|||
0B <-> 87B = 48 3.493%: ██████████████ |
|||
88B <-> 175B = 137 9.971%: ████████████████████████████████████████ |
|||
176B <-> 263B = 184 13.392%: ██████████████████████████████████████████████████████ |
|||
264B <-> 351B = 78 5.677%: ███████████████████████ |
|||
352B <-> 439B = 208 15.138%: █████████████████████████████████████████████████████████████ |
|||
440B <-> 527B = 91 6.623%: ██████████████████████████ |
|||
528B <-> 615B = 20 1.456%: ██████ |
|||
616B <-> 703B = 24 1.747%: ███████ |
|||
704B <-> 791B = 28 2.038%: ████████ |
|||
792B <-> 879B = 13 0.946%: ████ |
|||
871B <-> 2KB = 168 12.227%: █████████████████████████████████████████████████ |
|||
2KB <-> 3KB = 58 4.221%: █████████████████ |
|||
3KB <-> 3KB = 31 2.256%: █████████ |
|||
3KB <-> 4KB = 11 0.801%: ███ |
|||
4KB <-> 5KB = 20 1.456%: ██████ |
|||
5KB <-> 6KB = 51 3.712%: ███████████████ |
|||
6KB <-> 7KB = 31 2.256%: █████████ |
|||
7KB <-> 8KB = 14 1.019%: ████ |
|||
8KB <-> 9KB = 4 0.291%: █ |
|||
9KB <-> 17KB = 23 1.674%: ███████ |
|||
17KB <-> 26KB = 5 0.364%: █ |
|||
26KB <-> 34KB = 13 0.946%: ████ |
|||
34KB <-> 43KB = 5 0.364%: █ |
|||
43KB <-> 51KB = 6 0.437%: ██ |
|||
51KB <-> 60KB = 1 0.073%: |
|||
60KB <-> 68KB = 4 0.291%: █ |
|||
68KB <-> 77KB = 2 0.146%: █ |
|||
77KB <-> 85KB = 2 0.146%: █ |
|||
94KB <-> 188KB = 17 1.237%: █████ |
|||
188KB <-> 283KB = 4 0.291%: █ |
|||
848KB <-> 942KB = 6 0.437%: ██ |
|||
1MB <-> 2MB = 50 3.639%: ███████████████ |
|||
2MB <-> 3MB = 1 0.073%: |
|||
3MB <-> 4MB = 7 0.509%: ██ |
|||
4MB <-> 5MB = 5 0.364%: █ |
|||
6MB <-> 7MB = 1 0.073%: |
|||
9MB <-> 10MB = 1 0.073%: |
|||
39MB <-> 78MB = 1 0.073%: |
|||
353MB <-> 392MB = 1 0.073%: |
|||
</pre> |
</pre> |
||