Anonymous user
File size distribution: Difference between revisions
m
→{{header|Haskell}}: use Data.Map. Some other optimizations.
m (→{{header|Haskell}}: use Data.Map. Some other optimizations.) |
|||
Line 392:
=={{header|Haskell}}==
Uses a grouped frequency distribution. Program arguments are optional. Arguments include starting directory and initial frequency distribution group size. Distribution groups of 0 are removed. After the first frequency distribution is computed it further breaks it down for any group that exceeds 25% of the total file count, when possible.
<lang haskell>import Control.Concurrent (forkIO, setNumCapabilities)▼
import Control.Concurrent.Chan (Chan, newChan, readChan,
▲import Control.Concurrent (forkIO, setNumCapabilities)
import Control.
import Control.
import
import
import
import qualified Data.Map.Strict as Map
▲ , doesDirectoryExist, pathIsSymbolicLink)
import
import System.
getDirectoryContents,
import System.IO (hFileSize, withFile, IOMode(ReadMode), FilePath▼
import
import System.FilePath.Posix ((</>))
hFileSize, hPutStrLn, stderr,
withFile)
import Text.Printf (hPrintf, printf)
data Item = File FilePath Integer | Folder FilePath deriving (Show)
type FrequencyGroup = ((Integer, Integer), Integer)▼
type FGKey = (Integer, Integer)
type FrequencyGroups = Map.Map FGKey Integer
newFrequencyGroups :: FrequencyGroups
newFrequencyGroups = Map.empty
fileSizes :: [Item] -> [Integer]
Line 431 ⟶ 442:
Folder _ -> (a, succ b)) (0, 0)
-- |Creates a 'FrequencyGroups' from the provided size and data set.
frequencyGroups
-> [Integer] -- ^ List of collected file sizes. Must be sorted.
frequencyGroups totalGroups xs ▼
-> FrequencyGroups -- ^ Returns a 'FrequencyGroups' for the file sizes.
| length xs == 1 = [((head xs, head xs), 1)]▼
frequencyGroups _ [] = newFrequencyGroups
| otherwise = foldr placeGroups newFrequencyGroups xs `using` parTraversable rseq
where
range = maximum xs - minimum xs
groupSize = succ $ ceiling $ realToFrac range / realToFrac totalGroups
groups = takeWhile (<=groupSize + maximum xs) $ iterate (+groupSize) 0
groupMinMax =
findGroup n = find (\(low, high) -> n >= low && n <= high)
incrementCount (Just n) = Just (succ n) -- Update count for range.
incrementCount Nothing = Just 1 -- Insert new range with initial count.
else g▼
)▼
placeGroups n fgMap = case findGroup n groupMinMax of
Just k -> Map.alter incrementCount k fgMap
Nothing -> error "Should never happen"
expandGroups :: Int -- ^ Desired number of frequency groups.
-> [Integer] -- ^ List of collected file sizes. Must be sorted.
-> Integer -- ^ Computed frequency group limit. Values above this will be further expanded.
-> FrequencyGroups -- ^ Initial 'FrequencyGroups'
-> FrequencyGroups -- ^ Expanded 'FrequencyGroups'
expandGroups gsize fileSizes groupThreshold
| groupThreshold > 0 = loop 15
Line 457 ⟶ 475:
loop 0 gs = gs -- break out in case we can't go below threshold
loop n gs
| all
| otherwise = loop (pred n)
expand :: FrequencyGroups -> FrequencyGroups
expand = foldr f . withStrategy (parTraversable rseq) <*>
Map.mapWithKey groupsFromGroup . overThreshold
overThreshold = Map.filter (> groupThreshold)
f :: Maybe (FGKey, FrequencyGroups) -- ^ expanded frequency group
-> FrequencyGroups -- ^ accumulator
-> FrequencyGroups -- ^ merged accumulator
groupsFromGroup g▼
f (Just (k, fg)) acc = Map.union (Map.delete k acc) fg
:: FGKey -- ^ Group Key
-> Integer -- ^ Count
-> Maybe (FGKey, FrequencyGroups) -- ^ Tuple with key and 'FrequencyGroups' to replace the key
| length range > 1 =
| otherwise = Nothing
collectBetween min max = filter (\n -> n >= min && n <= max)
range = collectBetween min max fileSizes
displaySize :: Integer -> String
Line 554 ⟶ 582:
putStrLn $ replicate 37 '-'
let results = expandedGroups groupSize (sizes items) (groupThreshold fileCount) items
mapM_ (displayFrequency fileCount) $ Map.assocs results
where
sizes = sort . fileSizes
initialGroups n =
groupThreshold = round . (*0.25) . realToFrac
expandedGroups gsize sizes n =
▲ . initialGroups gsize</lang>
{{out}}
<pre style="height: 50rem;">$ filedist ~/Music
|