File size distribution: Difference between revisions
Content added Content deleted
m (Minor edit to C++ code - exclude symbolic links) |
(added Haskell) |
||
Line 390: | Line 390: | ||
Total size of files : 74,205,408 bytes |
Total size of files : 74,205,408 bytes |
||
</pre> |
</pre> |
||
=={{header|Haskell}}== |
|||
Uses grouped frequency distribution. Program arguments include directory and frequency distribution group size. |
|||
<lang haskell>{-# LANGUAGE TupleSections #-} |
|||
import Control.Monad (filterM, join) |
|||
import Data.Char (isDigit) |
|||
import Data.List (sort, genericLength, genericTake) |
|||
import System.Directory (getDirectoryContents, doesFileExist |
|||
, canonicalizePath, doesDirectoryExist) |
|||
import System.Environment (getArgs) |
|||
import System.FilePath.Posix (pathSeparator, (</>)) |
|||
import System.IO (hFileSize, withFile, IOMode(ReadMode), FilePath) |
|||
import Text.Printf (printf) |
|||
data Item = File FilePath Integer |
|||
| Folder FilePath |
|||
deriving (Show) |
|||
frequencyGroups :: (Integral a) => Int -> [a] -> [((a,a), a)] |
|||
frequencyGroups totalGroups xs = placeGroups xs groupMinMax |
|||
where |
|||
range = maximum xs - minimum xs |
|||
groupSize = succ $ ceiling $ realToFrac range / realToFrac totalGroups |
|||
groups = genericTake (succ totalGroups) (iterate (+groupSize) 0) |
|||
groupMinMax = (,0) <$> zip groups (pred <$> tail groups) |
|||
placeGroups [] gs = gs |
|||
placeGroups (d:ds) gs = placeGroups ds $ |
|||
fmap (\g@((min,max), count) -> if d >= min && d <= max |
|||
then ((min, max), succ count) |
|||
else g) gs |
|||
fileSizes :: [Item] -> [Integer] |
|||
fileSizes = foldr f [] |
|||
where |
|||
f (File _ n) acc = n : acc |
|||
f _ acc = acc |
|||
totalBytes :: [Item] -> Integer |
|||
totalBytes = sum . fileSizes |
|||
displaySize :: Integer -> String |
|||
displaySize n |
|||
| n <= 2^10 = show n <> "B" |
|||
| n >= 2^10 && n <= 2^20 = show (n `div` 2^10) <> "KB" |
|||
| n >= 2^20 && n <= 2^30 = show (n `div` 2^20) <> "MB" |
|||
| n >= 2^30 && n <= 2^40 = show (n `div` 2^30) <> "GB" |
|||
| n >= 2^40 && n <= 2^50 = show (n `div` 2^40) <> "TB" |
|||
| otherwise = "Too large!" |
|||
collectItems :: FilePath -> IO [Item] |
|||
collectItems p = do |
|||
contents <- fmap (p </>) <$> getDirectoryContents p |
|||
files <- filterM doesFileExist contents |
|||
folders <- drop 2 <$> filterM doesDirectoryExist contents |
|||
a <- mapM (\f -> File f <$> withFile f ReadMode hFileSize) files |
|||
b <- mapM collectItems folders |
|||
pure $ a <> join b <> fmap Folder folders |
|||
displayFrequency :: ((Integer, Integer), Integer) -> IO () |
|||
displayFrequency ((min, max), count) = |
|||
printf "%5s <-> %5s = %d\n" (displaySize min) (displaySize max) count |
|||
parseArgs :: [String] -> Either String (FilePath, Int) |
|||
parseArgs (x:y:xs) |
|||
| all isDigit y = Right (x, read y) |
|||
| otherwise = Left "Invalid frequency group size" |
|||
parseArgs (x:xs) = Right (x, 4) |
|||
parseArgs _ = Right (".", 4) |
|||
main :: IO () |
|||
main = do |
|||
args <- getArgs |
|||
case parseArgs args of |
|||
Left errorMessage -> putStrLn errorMessage |
|||
Right (path, groupSize) -> do |
|||
items <- collectItems path |
|||
let (fileCount, folderCount) = counts items |
|||
printf "Total files: %d\n" fileCount |
|||
printf "Total folders: %d\n" folderCount |
|||
printf "Total size: %s\n" $ displaySize $ totalBytes items |
|||
putStrLn "\nDistribution:" |
|||
mapM_ displayFrequency $ filteredGroups groupSize items |
|||
where |
|||
counts :: [Item] -> (Int, Int) |
|||
counts = |
|||
foldr (\x (a, b) -> case x of File _ _ -> (succ a, b) |
|||
Folder _ -> (a, succ b) |
|||
) (0, 0) |
|||
filteredGroups n = filter ((>0) . snd ) . frequencyGroups n . sort . fileSizes</lang> |
|||
{{out}} |
|||
<pre>$ filedist . 10 |
|||
Total files: 431814 |
|||
Total folders: 65548 |
|||
Total size: 6GB |
|||
Distribution: |
|||
0B <-> 39MB = 431798 |
|||
39MB <-> 78MB = 6 |
|||
78MB <-> 117MB = 3 |
|||
117MB <-> 156MB = 5 |
|||
156MB <-> 195MB = 1 |
|||
352MB <-> 391MB = 1</pre> |
|||
=={{header|Julia}}== |
=={{header|Julia}}== |
||
{{works with|Julia|0.6}} |
{{works with|Julia|0.6}} |