Jump to content

File size distribution: Difference between revisions

m
→‎{{header|Haskell}}: use Data.Map. Some other optimizations.
m (→‎{{header|Haskell}}: use Data.Map. Some other optimizations.)
Line 392:
=={{header|Haskell}}==
Uses a grouped frequency distribution. Program arguments are optional. Arguments include starting directory and initial frequency distribution group size. Distribution groups of 0 are removed. After the first frequency distribution is computed it further breaks it down for any group that exceeds 25% of the total file count, when possible.
<lang haskell>import Control.Concurrent (forkIO, setNumCapabilities)
<lang haskell>{-# LANGUAGE TupleSections #-}
import Control.Concurrent.Chan (Chan, newChan, readChan,
 
, doesDirectoryExist writeChan, pathIsSymbolicLinkwriteList2Chan)
import Control.Concurrent (forkIO, setNumCapabilities)
import Control.Concurrent.ChanException (Chan, newChan, readChan, writeChan (IOException, writeList2Chancatch)
import Control.ExceptionMonad (catchfilterM, IOException)forever, join,
import Control.Monad (filterM, join, replicateM, replicateM_, forever, (>=>))
import Data.Char Control.Parallel.Strategies (parTraversable, rseq, (isDigit)using,
import Data.List (sort withStrategy)
import GHC Data.ConcChar (getNumProcessorsisDigit)
import System Data.DirectoryList (getDirectoryContentsfind, doesFileExistsort)
import qualified Data.Map.Strict as Map
, doesDirectoryExist, pathIsSymbolicLink)
import System GHC.EnvironmentConc (getArgsgetNumProcessors)
import System.FilePath.PosixDirectory ((</>))doesDirectoryExist, doesFileExist,
getDirectoryContents,
import System.IO (hFileSize, withFile, IOMode(ReadMode), FilePath
, hPutStrLn, stderr pathIsSymbolicLink)
import Text.Printf System.Environment (printf, hPrintf (getArgs)
import System.FilePath.Posix ((</>))
import System.IO (hFileSize, withFileFilePath, IOMode (ReadMode), FilePath
hFileSize, hPutStrLn, stderr,
withFile)
import Text.Printf (hPrintf, printf)
 
data Item = File FilePath Integer | Folder FilePath deriving (Show)
 
type FrequencyGroup = ((Integer, Integer), Integer)
type FGKey = (Integer, Integer)
type FrequencyGroup = ((Integer, Integer)FGKey, Integer)
type FrequencyGroups = Map.Map FGKey Integer
 
newFrequencyGroups :: FrequencyGroups
newFrequencyGroups = Map.empty
 
fileSizes :: [Item] -> [Integer]
Line 431 ⟶ 442:
Folder _ -> (a, succ b)) (0, 0)
 
-- |Creates a 'FrequencyGroups' from the provided size and data set.
frequencyGroups :: Int -> [Integer] -> [FrequencyGroup]
frequencyGroups _:: []Int = [] -- ^ Desired number of frequency groups.
-> [Integer] -- ^ List of collected file sizes. Must be sorted.
frequencyGroups totalGroups xs
-> FrequencyGroups -- ^ Returns a 'FrequencyGroups' for the file sizes.
| length xs == 1 = [((head xs, head xs), 1)]
frequencyGroups _ [] = newFrequencyGroups
| otherwise = placeGroups xs groupMinMax
frequencyGroups totalGroups xs
| length xs == 1 = [( Map.singleton (head xs, head xs), 1)]
| otherwise = foldr placeGroups newFrequencyGroups xs `using` parTraversable rseq
where
range = maximum xs - minimum xs
groupSize = succ $ ceiling $ realToFrac range / realToFrac totalGroups
groups = takeWhile (<=groupSize + maximum xs) $ iterate (+groupSize) 0
groupMinMax = (,0) <$> zip groups (pred <$> tail groups)
findGroup n = find (\(low, high) -> n >= low && n <= high)
 
incrementCount (Just n) = Just (succ n) -- Update count for range.
placeGroups [] = id
incrementCount Nothing = Just 1 -- Insert new range with initial count.
placeGroups (d:ds) = placeGroups ds .
fmap (\g@((min,max), count) ->
if d >= min && d <= max
then ((min, max), succ count)
else g
)
 
placeGroups n fgMap = case findGroup n groupMinMax of
expandGroups :: Int -> [Integer] -> Integer -> [FrequencyGroup] -> [FrequencyGroup]
Just k -> Map.alter incrementCount k fgMap
Nothing -> error "Should never happen"
 
expandGroups :: Int -- ^ Desired number of frequency groups.
-> [Integer] -- ^ List of collected file sizes. Must be sorted.
-> Integer -- ^ Computed frequency group limit. Values above this will be further expanded.
-> FrequencyGroups -- ^ Initial 'FrequencyGroups'
-> FrequencyGroups -- ^ Expanded 'FrequencyGroups'
expandGroups gsize fileSizes groupThreshold
| groupThreshold > 0 = loop 15
Line 457 ⟶ 475:
loop 0 gs = gs -- break out in case we can't go below threshold
loop n gs
| all ((<= groupThreshold) $ Map. snd)elems gs = gs
| otherwise = loop (pred n) $ (expand gs)
 
expand :: FrequencyGroups -> FrequencyGroups
expand = foldr f . withStrategy (parTraversable rseq) <*>
Map.mapWithKey groupsFromGroup . overThreshold
 
)where
overThreshold = Map.filter (> groupThreshold)
 
f :: Maybe (FGKey, FrequencyGroups) -- ^ expanded frequency group
expand = ((\g@((min, max), count) ->
-> FrequencyGroups -- ^ accumulator
if count > groupThreshold then
-> FrequencyGroups -- ^ merged accumulator
groupsFromGroup g
f (Just (k, fg)) acc = Map.union (Map.delete k acc) fg
else
[g]f Nothing acc = acc
) =<<)
 
groupsFromGroup g
groupsFromGroup ((min, max), count)
:: FGKey -- ^ Group Key
| length range > 1 = frequencyGroups gsize range
-> Integer -- ^ Count
| otherwise = [((min, max), count)]
-> Maybe (FGKey, FrequencyGroups) -- ^ Tuple with key and 'FrequencyGroups' to replace the key
where
collectBetween min max = filtergroupsFromGroup (\n -> n >= min && n <=, max) count
| length range > 1 = collectBetweenJust ((min, max), fileSizesfrequencyGroups gsize range)
| otherwise = Nothing
else g where
collectBetween min max = filter (\n -> n >= min && n <= max)
range = collectBetween min max fileSizes
 
displaySize :: Integer -> String
Line 554 ⟶ 582:
putStrLn $ replicate 37 '-'
let results = expandedGroups groupSize (sizes items) (groupThreshold fileCount) items
.-- let results = initialGroups gsize</lang>groupSize items
mapM_ (displayFrequency fileCount) $ Map.assocs results
where
sizes = sort . fileSizes
initialGroups n = filter ((>0) . snd) . frequencyGroups n . sizes
groupThreshold = round . (*0.25) . realToFrac
expandedGroups gsize sizes n = filter ((>0) . snd)
. expandGroups gsize sizes n . initialGroups gsize</lang>
. initialGroups gsize</lang>
{{out}}
<pre style="height: 50rem;">$ filedist ~/Music
Anonymous user
Cookies help us deliver our services. By using our services, you agree to our use of cookies.