<lang Haskell>module MostFrequentK
where
import Data.List ( nubBynub , sortBy , groupBy )
import qualified Data.Set as S
|otherwise = count xs k
orderStatisticsorderedStatistics :: String -> [(Char , Int)] ▼
--frequency of letters
orderedStatistics s = sortBy myCriterion $ nub $ zip s ( map (\c -> count s c ) s )
letterFrequency :: String -> [( Char , Int )] ▼
letterFrequency s = nubBy (\e f -> fst e == fst f ) letterfrequencies
where
letterfrequenciesmyCriterion = zip s:: (Char map, (\cInt) -> count(Char s c, Int) s-> )Ordering
myCriterion :: ( Charc1 , Intn1) -> ( Char c2, Int n2) -> Ordering▼
|n1 > n2 = LT
|n1 < n2 = GT
|n1 == myCriterion d en2 = compare ( found ( fst d )c1 s ) ( found ( fst e )c2 s ) ▼
toPairsfound :: StringChar -> [String ] -> Int▼
found ce strs = ( length $ takeWhile ( /= ce ) str ) + 1 s▼
letterStatisticsmostFreqKHashing :: String -> [(CharInt ,-> Int)]String
mostFreqKHashing s n = foldl ((++)) [] $ map toString $ take n $ orderedStatistics s
letterStatistics myWord = sortBy (\c d -> compare ( snd c ) ( snd d ) ) $ letterFrequency myWord
--frequency of letters , if identical, ordered by first occurrence in string
--function mostFrequentKHashing starts at the last elements, therefore the sublists have to be reversed
▲orderStatistics :: String -> [(Char , Int)]
orderStatistics s = concat $ map ( reverse . ( sortBy myCriterion ) ) orderedStatistics
where
toString :: (Char , Int ) -> String ▼
orderedStatistics = groupBy (\g h -> snd g == snd h ) $ letterStatistics s
foundtoString ::( Charc ->, Stringi ) = c : ->show Inti
▲ found c str = ( length $ takeWhile ( /= c ) str ) + 1
▲ myCriterion :: (Char , Int) -> (Char , Int ) -> Ordering
▲ myCriterion d e = compare ( found ( fst d ) s ) ( found ( fst e ) s )
mostFrequentKHashing :: String -> Int -> String
mostFrequentKHashing s n = toString lastElement ++ toString secondFromLast
statistics = orderStatistics s
lastElement = last statistics
secondFromLast = last $ init statistics
▲ toString :: (Char , Int ) -> String
toString ( c , i ) = c : show i
mostFreqKSimilarity :: String -> String -> Int
mostFreqKSimilarity s t = fromEnumsnd ($ lasthead $ headS.toList list$ )S.fromList -( 48doublets s ) `S.intersection`
S.fromList ( doublets t )
where
▲ toPairs :: String -> [String]
toPair :: String -> (Char , Int)
toPairs st = [take 2 $ drop start st | start <- [0,2 ..length st - 2]] ▼
listtoPair =s S.toList= ( S.fromList ( toPairshead s ), `S.intersection` S.fromListfromEnum ( toPairshead t$ tail s ) - 48 )
▲letterFrequency doublets :: String -> [( Char , Int )]
▲ toPairsdoublets ststr = map toPair [take 2 $ drop start ststr | start <- [0 , 2 ..length ststr - 2]]
mostFreqKSDF :: String -> String -> Int -> Int -> Int ▼
mostFreqKSDF s1 s2 k dist = dist - mostFreqKSimilarity ( mostFrequentKHashing s1 k ) ( mostFrequentKHashing s2 k )
▲mostFreqKSDF :: String -> String -> Int -> Int -> Int
mostFreqKSDF s t n = mostFreqKSimilarity ( mostFreqKHashing s n ) (mostFreqKHashing t n )
</lang>
{{out}}
|