File size distribution: Difference between revisions

m
Line 392:
=={{header|Haskell}}==
Uses a grouped frequency distribution. Program arguments are optional. Arguments include starting directory and initial frequency distribution group size. Distribution groups of 0 are removed. After the first frequency distribution is computed it further breaks it down for any group that exceeds 25% of the total file count, when possible.
<lang haskell>{-# LANGUAGE LambdaCase #-}
<lang haskell>import Control.Concurrent (forkIO, setNumCapabilities)
 
<lang haskell>import Control.Concurrent (forkIO, setNumCapabilities)
import Control.Concurrent.Chan (Chan, newChan, readChan,
writeChan, writeList2Chan)
Line 424 ⟶ 426:
 
fileSizes :: [Item] -> [Integer]
fileSizes = foldr f [] where f (File _ n) acc = n:acc
f _ acc = acc
where
f (File _ n) acc = n:acc
f _ acc = acc
 
folders :: [Item] -> [FilePath]
folders = foldr f [] where f (Folder p) acc = p:acc
f _ acc = acc
where
f (Folder p) acc = p:acc
f _ acc = acc
 
totalBytes :: [Item] -> Integer
Line 462 ⟶ 460:
placeGroups n fgMap = case findGroup n groupMinMax of
Just k -> Map.alter incrementCount k fgMap
Nothing -> errorfgMap -- "Should never happen".
 
expandGroups :: Int -- ^ Desired number of frequency groups.
-> [Integer] -- ^ List of collected file sizes. Must be sorted.
-> Integer -- ^ Computed frequency group limit.
-> FrequencyGroups -- ^ Expanded 'FrequencyGroups'
expandGroups gsize fileSizes groupThreshold
| groupThreshold > 0 = loop 15 $ frequencyGroups gsize fileSizessortedFileSizes
| otherwise = frequencyGroups gsize fileSizessortedFileSizes
where
sizessortedFileSizes = sort . fileSizes
loop 0 gs = gs -- break out in case we can't go below threshold
loop n gs | all (<= groupThreshold) $ Map.elems gs = gs
| otherwise = loop (pred n) (expand gs)
| all (<= groupThreshold) $ Map.elems gs = gs
| otherwise = loop (pred n) (expand gs)
 
expand :: FrequencyGroups -> FrequencyGroups
Line 491 ⟶ 489:
:: FGKey -- ^ Group Key
-> Integer -- ^ Count
-> Maybe (FGKey, FrequencyGroups) -- ^ TupleReturns with key andexpanded 'FrequencyGroups' towith replacebase thekey keyit replaces.
groupsFromGroup (min, max) count
| length range > 1 = Just ((min, max), frequencyGroups gsize range)
| otherwise = Nothing
where
range = filter (\n -> n >= min && n <= max) fileSizessortedFileSizes
 
displaySize :: Integer -> String
Line 533 ⟶ 531:
percentage :: Double
percentage = (realToFrac count / realToFrac filesCount) * 100
barssize = replicate (round percentage) '█'
bars | size == 0 = "▍"
| otherwise = replicate size '█'
 
parseArgs :: [String] -> Either String (FilePath, Int)
Line 563:
 
main :: IO ()
main = parseArgs <$> getArgs >>= \case
main = do
args <- getArgs
case parseArgs args of
Left errorMessage -> hPutStrLn stderr errorMessage
Right (path, groupSize) -> do
items <- parallelItemCollector path
let (fileCount, folderCount) = counts items
printf "Total files: %d\nTotal folders: %d\n" fileCount folderCount
printf "Total folders: %d\n" folderCount
printf "Total size: %s\n" $ displaySize $ totalBytes items
putStrLnprintf "\nDistribution:\n\n%9s <-> %9s %7s\n" "From" "To" "Count"
printf "%9s <-> %9s %7s\n" "From" "To" "Count"
putStrLn $ replicate 46 '-'
let results = expandGroups groupSize (sizesfileSizes items) (groupThreshold fileCount)
mapM_ (displayFrequency fileCount) $ Map.assocs results
where
sizes = sort . fileSizes
groupThreshold = round . (*0.25) . realToFrac</lang>
{{out}}
<pre style="height: 50rem;">$ filedist ~/Music 1 ↵
Using 4 worker threads
Total files: 688
Line 597 ⟶ 592:
243B <-> 323B = 99 14.390%: ██████████████
323B <-> 645B = 23 3.343%: ███
646B <-> 968B = 2 0.291%:
969B <-> 1.26KB = 1 0.145%:
3.19KB <-> 6.38KB = 12 1.744%: ██
6.38KB <-> 9.58KB = 22 3.198%: ███
Line 608 ⟶ 603:
108.41KB <-> 162.61KB = 23 3.343%: ███
162.61KB <-> 216.81KB = 8 1.163%: █
236.46KB <-> 472.93KB = 3 0.436%:
709.39KB <-> 945.85KB = 44 6.395%: ██████
3.30MB <-> 4.96MB = 4 0.581%: █
Line 614 ⟶ 609:
6.67MB <-> 13.33MB = 72 10.465%: ██████████
13.33MB <-> 20.00MB = 6 0.872%: █
20.00MB <-> 26.66MB = 1 0.145%:
 
$ filedist ~/Music 10
Line 631 ⟶ 626:
267B <-> 355B = 57 8.285%: ████████
356B <-> 444B = 20 2.907%: ███
801B <-> 889B = 2 0.291%:
959B <-> 1.87KB = 1 0.145%:
3.75KB <-> 4.68KB = 1 0.145%:
4.68KB <-> 5.62KB = 1 0.145%:
5.62KB <-> 6.55KB = 11 1.599%: ██
6.56KB <-> 7.49KB = 10 1.453%: █
Line 650 ⟶ 645:
94.59KB <-> 189.17KB = 42 6.105%: ██████
189.17KB <-> 283.76KB = 4 0.581%: █
283.76KB <-> 378.35KB = 2 0.291%:
851.28KB <-> 945.87KB = 44 6.395%: ██████
2.67MB <-> 5.33MB = 5 0.727%: █
Line 656 ⟶ 651:
8.00MB <-> 10.67MB = 35 5.087%: █████
10.67MB <-> 13.33MB = 16 2.326%: ██
13.33MB <-> 16.00MB = 3 0.436%:
16.00MB <-> 18.67MB = 3 0.436%:
24.00MB <-> 26.66MB = 1 0.145%:
</pre>
 
Anonymous user