Anonymous user
File size distribution: Difference between revisions
m
→{{header|Haskell}}
Line 392:
=={{header|Haskell}}==
Uses a grouped frequency distribution. Program arguments are optional. Arguments include starting directory and initial frequency distribution group size. Distribution groups of 0 are removed. After the first frequency distribution is computed it further breaks it down for any group that exceeds 25% of the total file count, when possible.
<lang haskell>{-# LANGUAGE LambdaCase #-}
<lang haskell>import Control.Concurrent (forkIO, setNumCapabilities)▼
import Control.Concurrent.Chan (Chan, newChan, readChan,
writeChan, writeList2Chan)
Line 424 ⟶ 426:
fileSizes :: [Item] -> [Integer]
fileSizes = foldr f [] where f (File _ n) acc = n:acc
f _ acc = acc▼
▲ f _ acc = acc
folders :: [Item] -> [FilePath]
folders = foldr f [] where f (Folder p) acc = p:acc
f _ acc = acc▼
▲ f _ acc = acc
totalBytes :: [Item] -> Integer
Line 462 ⟶ 460:
placeGroups n fgMap = case findGroup n groupMinMax of
Just k -> Map.alter incrementCount k fgMap
Nothing ->
expandGroups :: Int -- ^ Desired number of frequency groups.
-> [Integer] -- ^ List of collected file sizes
-> Integer -- ^ Computed frequency group limit.
-> FrequencyGroups -- ^ Expanded 'FrequencyGroups'
expandGroups gsize fileSizes groupThreshold
| groupThreshold > 0 = loop 15 $ frequencyGroups gsize
| otherwise = frequencyGroups gsize
where
loop 0 gs = gs -- break out in case we can't go below threshold
loop n gs | all (<= groupThreshold) $ Map.elems gs = gs
| otherwise = loop (pred n) (expand gs)▼
▲ | otherwise = loop (pred n) (expand gs)
expand :: FrequencyGroups -> FrequencyGroups
Line 491 ⟶ 489:
:: FGKey -- ^ Group Key
-> Integer -- ^ Count
-> Maybe (FGKey, FrequencyGroups) -- ^
groupsFromGroup (min, max) count
| length range > 1 = Just ((min, max), frequencyGroups gsize range)
| otherwise = Nothing
where
range = filter (\n -> n >= min && n <= max)
displaySize :: Integer -> String
Line 533 ⟶ 531:
percentage :: Double
percentage = (realToFrac count / realToFrac filesCount) * 100
bars | size == 0 = "▍"
| otherwise = replicate size '█'
parseArgs :: [String] -> Either String (FilePath, Int)
Line 563:
main :: IO ()
main = parseArgs <$> getArgs >>= \case
Left errorMessage -> hPutStrLn stderr errorMessage
Right (path, groupSize) -> do
items <- parallelItemCollector path
let (fileCount, folderCount) = counts items
printf "Total files: %d\nTotal folders: %d\n" fileCount folderCount
printf "Total size: %s\n" $ displaySize $ totalBytes items
putStrLn $ replicate 46 '-'
let results = expandGroups groupSize (
mapM_ (displayFrequency fileCount) $ Map.assocs results
where
▲ sizes = sort . fileSizes
groupThreshold = round . (*0.25) . realToFrac</lang>
{{out}}
<pre style="height: 50rem;">$ filedist ~/Music
Using 4 worker threads
Total files: 688
Line 597 ⟶ 592:
243B <-> 323B = 99 14.390%: ██████████████
323B <-> 645B = 23 3.343%: ███
646B <-> 968B = 2 0.291%:
969B <-> 1.26KB = 1 0.145%:
3.19KB <-> 6.38KB = 12 1.744%: ██
6.38KB <-> 9.58KB = 22 3.198%: ███
Line 608 ⟶ 603:
108.41KB <-> 162.61KB = 23 3.343%: ███
162.61KB <-> 216.81KB = 8 1.163%: █
236.46KB <-> 472.93KB = 3 0.436%:
709.39KB <-> 945.85KB = 44 6.395%: ██████
3.30MB <-> 4.96MB = 4 0.581%: █
Line 614 ⟶ 609:
6.67MB <-> 13.33MB = 72 10.465%: ██████████
13.33MB <-> 20.00MB = 6 0.872%: █
20.00MB <-> 26.66MB = 1 0.145%:
$ filedist ~/Music 10
Line 631 ⟶ 626:
267B <-> 355B = 57 8.285%: ████████
356B <-> 444B = 20 2.907%: ███
801B <-> 889B = 2 0.291%:
959B <-> 1.87KB = 1 0.145%:
3.75KB <-> 4.68KB = 1 0.145%:
4.68KB <-> 5.62KB = 1 0.145%:
5.62KB <-> 6.55KB = 11 1.599%: ██
6.56KB <-> 7.49KB = 10 1.453%: █
Line 650 ⟶ 645:
94.59KB <-> 189.17KB = 42 6.105%: ██████
189.17KB <-> 283.76KB = 4 0.581%: █
283.76KB <-> 378.35KB = 2 0.291%:
851.28KB <-> 945.87KB = 44 6.395%: ██████
2.67MB <-> 5.33MB = 5 0.727%: █
Line 656 ⟶ 651:
8.00MB <-> 10.67MB = 35 5.087%: █████
10.67MB <-> 13.33MB = 16 2.326%: ██
13.33MB <-> 16.00MB = 3 0.436%:
16.00MB <-> 18.67MB = 3 0.436%:
24.00MB <-> 26.66MB = 1 0.145%:
</pre>
|