Letter frequency: Difference between revisions

Content deleted Content added
No edit summary
Hout (talk | contribs)
→‎{{header|Python}}: Added a second functional version, structured as a fold
Line 2,539: Line 2,539:


=={{header|Python}}==
=={{header|Python}}==
===Functional===
===Using collections.Counter===
====Using collections.Counter====
{{works with|Python|2.7+ and 3.1+}}
{{works with|Python|2.7+ and 3.1+}}
<lang python>import collections, sys
<lang python>import collections, sys
Line 2,549: Line 2,550:
print(filecharcount(f))</lang>
print(filecharcount(f))</lang>


===Not using collections.Counter===
====As a fold====
Character counting can be conveniently expressed in terms of fold/reduce. See the example below, which also generates column-wrapped output:
{{Works with|Python|3}}
<lang python>'''Character counting as a fold'''

from functools import reduce
from itertools import repeat
from os.path import expanduser


# charCounts :: String -> Dict
def charCounts(s):
'''A dictionary of
(character, frequency) mappings
'''
def tally(dct, c):
dct[c] = 1 + dct[c] if c in dct else 1
return dct
return reduce(tally, list(s), {})


# TEST ----------------------------------------------------
# main :: IO ()
def main():
'''Listing in descending order of frequency.'''

print(
tabulated('Descending order of frequency:\n')(
lambda x: repr(x[0])
)(lambda x: str(x[1]))(5)(lambda x: x)(
sorted(
charCounts(
readFile('~/Code/charCount/readme.txt')
).items(),
key=swap,
reverse=True
)
)
)


# GENERIC -------------------------------------------------

# chunksOf :: Int -> [a] -> [[a]]
def chunksOf(n):
'''A series of lists of length n,
subdividing the contents of xs.
Where the length of xs is not evenly divible,
the final list will be shorter than n.'''
return lambda xs: reduce(
lambda a, i: a + [xs[i:n + i]],
range(0, len(xs), n), []
) if 0 < n else []


# compose (<<<) :: (b -> c) -> (a -> b) -> a -> c
def compose(g):
'''Right to left function composition.'''
return lambda f: lambda x: g(f(x))


# readFile :: FilePath -> IO String
def readFile(fp):
'''The contents of any file at the path
derived by expanding any ~ in fp.'''
with open(expanduser(fp), 'r', encoding='utf-8') as f:
return f.read()


# paddedMatrix :: a -> [[a]] -> [[a]]
def paddedMatrix(v):
''''A list of rows padded to equal length
(where needed) with instances of the value v.'''
def go(rows):
return paddedRows(
len(max(rows, key=len))
)(v)(rows)
return lambda rows: go(rows) if rows else []


# paddedRows :: Int -> a -> [[a]] -[[a]]
def paddedRows(n):
'''A list of rows padded (but never truncated)
to length n with copies of value v.'''
def go(v, xs):
def pad(x):
d = n - len(x)
return (x + list(repeat(v, d))) if 0 < d else x
return list(map(pad, xs))
return lambda v: lambda xs: go(v, xs) if xs else []


# showColumns :: Int -> [String] -> String
def showColumns(n):
'''A column-wrapped string
derived from a list of rows.'''
def go(xs):
def fit(col):
w = len(max(col, key=len))

def pad(x):
return x.ljust(4 + w, ' ')
return ''.join(map(pad, col)).rstrip()

q, r = divmod(len(xs), n)
return '\n'.join(map(
fit,
zip(*paddedMatrix('')(
chunksOf(q + int(bool(r)))(xs)
))
))
return lambda xs: go(xs)


# swap :: (a, b) -> (b, a)
def swap(tpl):
'''The swapped components of a pair.'''
return (tpl[1], tpl[0])


# tabulated :: String -> (a -> String) ->
# (b -> String) ->
# Int ->
# (a -> b) -> [a] -> String
def tabulated(s):
'''Heading -> x display function -> fx display function ->
number of columns -> f -> value list -> tabular string.'''
def go(xShow, fxShow, intCols, f, xs):
def mxw(fshow, g):
return max(map(compose(len)(fshow), map(g, xs)))
w = mxw(xShow, lambda x: x)
fw = mxw(fxShow, f)
return s + '\n' + showColumns(intCols)([
xShow(x).rjust(w, ' ') + ' -> ' + (
fxShow(f(x)).rjust(fw, ' ')
)
for x in xs
])
return lambda xShow: lambda fxShow: lambda nCols: (
lambda f: lambda xs: go(
xShow, fxShow, nCols, f, xs
)
)


# MAIN ---
if __name__ == '__main__':
main()</lang>
{{Out}}
<pre>Descending order of frequency:

' ' -> 568 ')' -> 62 'v' -> 25 'w' -> 7 '5' -> 3
'\t' -> 382 '(' -> 62 '1' -> 24 'k' -> 7 '4' -> 3
'e' -> 274 'd' -> 60 'G' -> 22 '9' -> 6 '+' -> 3
'n' -> 233 'g' -> 59 ']' -> 17 'S' -> 5 '¬' -> 2
'\n' -> 228 'u' -> 58 '[' -> 17 'R' -> 5 '=' -> 2
't' -> 204 '|' -> 54 'λ' -> 16 'M' -> 5 '.' -> 2
's' -> 198 'x' -> 53 '2' -> 15 'F' -> 5 'L' -> 1
'-' -> 178 'm' -> 52 'N' -> 11 '<' -> 5 'C' -> 1
'i' -> 145 'c' -> 52 '}' -> 10 '6' -> 5 'A' -> 1
'o' -> 126 'h' -> 47 '{' -> 10 'z' -> 4 '3' -> 1
'f' -> 100 ':' -> 47 'T' -> 10 "'" -> 4 '&' -> 1
'r' -> 96 ',' -> 38 'I' -> 10 '^' -> 3 '$' -> 1
'a' -> 86 'b' -> 32 '0' -> 10 'E' -> 3
'l' -> 70 'y' -> 31 '"' -> 10 '8' -> 3
'p' -> 68 '>' -> 28 'J' -> 9 '7' -> 3</pre>

===Procedural===
====Without using collections.Counter====
<lang python>import string
<lang python>import string
if hasattr(string, 'ascii_lowercase'):
if hasattr(string, 'ascii_lowercase'):
Line 2,579: Line 2,748:
Using a numerically indexed array (list) for this is artificial and clutters the code somewhat.
Using a numerically indexed array (list) for this is artificial and clutters the code somewhat.


===Using defaultdict===
====Using defaultdict====
{{works with|Python|2.5+ and 3.x}}
{{works with|Python|2.5+ and 3.x}}
<lang python>...
<lang python>...