Textonyms: Difference between revisions
Content added Content deleted
m (→{{header|Haskell}}: hlint, hindent – minor tidying) |
(→{{header|Haskell}}: Added a variant expressed in terms of Data.Map and traverse. Updated output.) |
||
Line 878: | Line 878: | ||
["", "Top 5 in length:"] ++ |
["", "Top 5 in length:"] ++ |
||
fmap |
fmap |
||
showTextonym |
''Italic text'' showTextonym |
||
(take 5 $ sortBy (flip compare `on` (length . fst . head)) textonymList)</lang> |
(take 5 $ sortBy (flip compare `on` (length . fst . head)) textonymList)</lang> |
||
{{out}} |
{{out}} |
||
Line 898: | Line 898: | ||
49376746242 => hydrophobia hydrophobic |
49376746242 => hydrophobia hydrophobic |
||
2668368466 => contention convention </pre> |
2668368466 => contention convention </pre> |
||
Or, in terms of ''Data.Map'' and ''traverse'': |
|||
<lang haskell>{-# LANGUAGE TupleSections #-} |
|||
import Data.List (groupBy, sortBy, sortOn) |
|||
import qualified Data.Map as M |
|||
import Data.Maybe (mapMaybe) |
|||
import Data.Ord (comparing) |
|||
import Data.Function (on) |
|||
digitEncoded :: [String] -> [(String, String)] |
|||
digitEncoded = mapMaybe (\cs -> traverse toKey cs >>= (Just . (, cs))) |
|||
codeGrouped :: [(String, String)] -> [[(String, String)]] |
|||
codeGrouped = groupBy (on (==) fst) . sortOn fst |
|||
textonymSubset :: [[(String, String)]] -> [[(String, String)]] |
|||
textonymSubset = filter ((1 <) . length) |
|||
toKey :: Char -> Maybe Char |
|||
toKey = flip M.lookup charDict |
|||
charDict :: M.Map Char Char |
|||
charDict = |
|||
M.fromList $ |
|||
concat $ |
|||
zipWith |
|||
(fmap . flip (,)) |
|||
(head . show <$> [2 ..]) |
|||
(words "abc def ghi jkl mno pqrs tuv wxyz") |
|||
showTextonym :: Int -> [(String, String)] -> String |
|||
showTextonym w ts = |
|||
concat [justifyRight w ' ' (fst (head ts)), " -> ", unwords $ fmap snd ts] |
|||
justifyRight :: Int -> Char -> String -> String |
|||
justifyRight n c = (drop . length) <*> (replicate n c ++) |
|||
main :: IO () |
|||
main = do |
|||
let fp = "unixdict.txt" |
|||
s <- readFile fp |
|||
let encodings = digitEncoded $ lines s |
|||
codeGroups = codeGrouped encodings |
|||
textonyms = textonymSubset codeGroups |
|||
ambiguous = take 5 $ sortBy (flip (comparing length)) textonyms |
|||
longer = take 5 $ sortBy (flip (comparing (length . fst . head))) textonyms |
|||
colWidth xs = maximum $ map (length . fst . head) xs |
|||
[wa, wl] = colWidth <$> [ambiguous, longer] |
|||
mapM_ putStrLn $ |
|||
[ "There are " ++ |
|||
show (length encodings) ++ |
|||
" words in " ++ |
|||
fp ++ " which can be represented by the digit key mapping." |
|||
, "They require " ++ |
|||
show (length codeGroups) ++ " digit combinations to represent them." |
|||
, show (length textonyms) ++ " digit combinations represent textonyms." |
|||
, "" |
|||
, "5 most ambiguous:" |
|||
] ++ |
|||
fmap (showTextonym wa) ambiguous ++ |
|||
["", "5 longest:"] ++ fmap (showTextonym wl) longer</lang> |
|||
{{Out}} |
|||
<pre>There are 24978 words in unixdict.txt which can be represented by the digit key mapping. |
|||
They require 22903 digit combinations to represent them. |
|||
1473 digit combinations represent textonyms. |
|||
5 most ambiguous: |
|||
269 -> amy any bmw bow box boy cow cox coy |
|||
729 -> paw pax pay paz raw ray saw sax say |
|||
2273 -> acre bard bare base cape card care case |
|||
726 -> pam pan ram ran sam san sao scm |
|||
426 -> gam gao ham han ian ibm ibn |
|||
5 longest: |
|||
25287876746242 -> claustrophobia claustrophobic |
|||
7244967473642 -> schizophrenia schizophrenic |
|||
666628676342 -> onomatopoeia onomatopoeic |
|||
49376746242 -> hydrophobia hydrophobic |
|||
2668368466 -> contention convention</pre> |
|||
=={{header|Io}}== |
=={{header|Io}}== |