File size distribution: Difference between revisions

Content added Content deleted
Line 398: Line 398:
import Data.List (sort, genericLength, genericTake)
import Data.List (sort, genericLength, genericTake)
import System.Directory (getDirectoryContents, doesFileExist
import System.Directory (getDirectoryContents, doesFileExist
, canonicalizePath, doesDirectoryExist)
, doesDirectoryExist)
import System.Environment (getArgs)
import System.Environment (getArgs)
import System.FilePath.Posix (pathSeparator, (</>))
import System.FilePath.Posix (pathSeparator, (</>))
Line 408: Line 408:
deriving (Show)
deriving (Show)


type FrequencyGroup = ((Integer, Integer), Integer)
frequencyGroups :: (Integral a) => Int -> [a] -> [((a, a), a)]

frequencyGroups :: Int -> [Integer] -> [FrequencyGroup]
frequencyGroups totalGroups xs = placeGroups xs groupMinMax
frequencyGroups totalGroups xs = placeGroups xs groupMinMax
where
where
range = maximum xs - minimum xs
range = maximum xs - minimum xs
groupSize = succ $ ceiling $ realToFrac range / realToFrac totalGroups
groupSize = succ $ ceiling $ realToFrac range / realToFrac totalGroups
groups = genericTake (succ totalGroups) (iterate (+groupSize) 0)
groups = genericTake (succ totalGroups) $ iterate (+groupSize) 0
groupMinMax = (,0) <$> zip groups (pred <$> tail groups)
groupMinMax = (,0) <$> zip groups (pred <$> tail groups)


Line 451: Line 453:
pure $ a <> join b <> fmap Folder folders
pure $ a <> join b <> fmap Folder folders


displayFrequency :: ((Integer, Integer), Integer) -> IO ()
displayFrequency :: FrequencyGroup -> IO ()
displayFrequency ((min, max), count) =
displayFrequency ((min, max), count) =
printf "%5s <-> %5s = %d\n" (displaySize min) (displaySize max) count
printf "%5s <-> %5s = %d\n" (displaySize min) (displaySize max) count