File size distribution: Difference between revisions

added Haskell
m (Minor edit to C++ code - exclude symbolic links)
(added Haskell)
Line 390:
Total size of files : 74,205,408 bytes
</pre>
=={{header|Haskell}}==
Uses grouped frequency distribution. Program arguments include directory and frequency distribution group size.
<lang haskell>{-# LANGUAGE TupleSections #-}
 
import Control.Monad (filterM, join)
import Data.Char (isDigit)
import Data.List (sort, genericLength, genericTake)
import System.Directory (getDirectoryContents, doesFileExist
, canonicalizePath, doesDirectoryExist)
import System.Environment (getArgs)
import System.FilePath.Posix (pathSeparator, (</>))
import System.IO (hFileSize, withFile, IOMode(ReadMode), FilePath)
import Text.Printf (printf)
 
data Item = File FilePath Integer
| Folder FilePath
deriving (Show)
 
frequencyGroups :: (Integral a) => Int -> [a] -> [((a,a), a)]
frequencyGroups totalGroups xs = placeGroups xs groupMinMax
where
range = maximum xs - minimum xs
groupSize = succ $ ceiling $ realToFrac range / realToFrac totalGroups
groups = genericTake (succ totalGroups) (iterate (+groupSize) 0)
groupMinMax = (,0) <$> zip groups (pred <$> tail groups)
 
placeGroups [] gs = gs
placeGroups (d:ds) gs = placeGroups ds $
fmap (\g@((min,max), count) -> if d >= min && d <= max
then ((min, max), succ count)
else g) gs
 
fileSizes :: [Item] -> [Integer]
fileSizes = foldr f []
where
f (File _ n) acc = n : acc
f _ acc = acc
 
totalBytes :: [Item] -> Integer
totalBytes = sum . fileSizes
 
displaySize :: Integer -> String
displaySize n
| n <= 2^10 = show n <> "B"
| n >= 2^10 && n <= 2^20 = show (n `div` 2^10) <> "KB"
| n >= 2^20 && n <= 2^30 = show (n `div` 2^20) <> "MB"
| n >= 2^30 && n <= 2^40 = show (n `div` 2^30) <> "GB"
| n >= 2^40 && n <= 2^50 = show (n `div` 2^40) <> "TB"
| otherwise = "Too large!"
 
collectItems :: FilePath -> IO [Item]
collectItems p = do
contents <- fmap (p </>) <$> getDirectoryContents p
files <- filterM doesFileExist contents
folders <- drop 2 <$> filterM doesDirectoryExist contents
a <- mapM (\f -> File f <$> withFile f ReadMode hFileSize) files
b <- mapM collectItems folders
pure $ a <> join b <> fmap Folder folders
 
displayFrequency :: ((Integer, Integer), Integer) -> IO ()
displayFrequency ((min, max), count) =
printf "%5s <-> %5s = %d\n" (displaySize min) (displaySize max) count
 
parseArgs :: [String] -> Either String (FilePath, Int)
parseArgs (x:y:xs)
| all isDigit y = Right (x, read y)
| otherwise = Left "Invalid frequency group size"
parseArgs (x:xs) = Right (x, 4)
parseArgs _ = Right (".", 4)
 
main :: IO ()
main = do
args <- getArgs
case parseArgs args of
Left errorMessage -> putStrLn errorMessage
Right (path, groupSize) -> do
items <- collectItems path
let (fileCount, folderCount) = counts items
printf "Total files: %d\n" fileCount
printf "Total folders: %d\n" folderCount
printf "Total size: %s\n" $ displaySize $ totalBytes items
putStrLn "\nDistribution:"
mapM_ displayFrequency $ filteredGroups groupSize items
where
counts :: [Item] -> (Int, Int)
counts =
foldr (\x (a, b) -> case x of File _ _ -> (succ a, b)
Folder _ -> (a, succ b)
) (0, 0)
 
filteredGroups n = filter ((>0) . snd ) . frequencyGroups n . sort . fileSizes</lang>
{{out}}
<pre>$ filedist . 10
Total files: 431814
Total folders: 65548
Total size: 6GB
 
Distribution:
0B <-> 39MB = 431798
39MB <-> 78MB = 6
78MB <-> 117MB = 3
117MB <-> 156MB = 5
156MB <-> 195MB = 1
352MB <-> 391MB = 1</pre>
=={{header|Julia}}==
{{works with|Julia|0.6}}
Anonymous user