Jump to content

N-grams: Difference between revisions

2,311 bytes added ,  2 months ago
m (→‎{{header|Haskell}}: Disaggregated one function, expanded range of test.)
Line 767:
(' LIV', 1)]
</pre>
 
 
And a strict variant, compositionally assembled from some basics:
 
<syntaxhighlight lang="python">from itertools import (islice)
from functools import (reduce)
from operator import (add)
 
 
def nGramCounts(n, s):
'''A dictionary of all nGrams of dimension n in s,
with the frequency of their occurrence.
'''
return reduce(
lambda a, gram: insertWith(add, gram, 1, a),
nGrams(n, s),
{}
)
 
 
def nGrams(n, s):
'''All case-insensitive sequences of length n in the string s.'''
return (''.join(t) for t in windows(n, list(s.upper())))
 
 
# ----------------------- GENERICS -----------------------
 
def insertWith(f, k, x, dct):
'''A new dictionary updated with a
(key, f(value, x)) tuple.
Where there is no existing value for the key,
the supplied x is used as the default.
'''
return dict(dct, **{k: f(dct[k], x) if k in dct else x})
 
 
def tails(xs):
'''All final segments of xs, longest first.'''
return (xs[i:] for i in range(0, 1 + len(xs)))
 
 
def windows(n, xs):
'''Sliding windows of dimension n.'''
return zip(*islice(tails(xs), n))
 
 
 
# ------------------------- TEST -------------------------
if __name__ == "__main__":
import pprint
 
EXAMPLE = "Live and let live"
 
for dimension in range(1, 5):
result = sorted(nGramCounts(dimension, EXAMPLE).items())
print(
f"{len(result)} {dimension}-grams of {EXAMPLE!r}:\n",
pprint.pformat(result),
end="\n\n",
)</syntaxhighlight>
{{Out}}
<pre>9 1-grams of 'Live and let live':
[(' ', 3),
('A', 1),
('D', 1),
('E', 3),
('I', 2),
('L', 3),
('N', 1),
('T', 1),
('V', 2)]
 
12 2-grams of 'Live and let live':
[(' A', 1),
(' L', 2),
('AN', 1),
('D ', 1),
('E ', 1),
('ET', 1),
('IV', 2),
('LE', 1),
('LI', 2),
('ND', 1),
('T ', 1),
('VE', 2)]
 
13 3-grams of 'Live and let live':
[(' AN', 1),
(' LE', 1),
(' LI', 1),
('AND', 1),
('D L', 1),
('E A', 1),
('ET ', 1),
('IVE', 2),
('LET', 1),
('LIV', 2),
('ND ', 1),
('T L', 1),
('VE ', 1)]
 
13 4-grams of 'Live and let live':
[(' AND', 1),
(' LET', 1),
(' LIV', 1),
('AND ', 1),
('D LE', 1),
('E AN', 1),
('ET L', 1),
('IVE ', 1),
('LET ', 1),
('LIVE', 2),
('ND L', 1),
('T LI', 1),
('VE A', 1)]</pre>
 
=={{header|Raku}}==
9,655

edits

Cookies help us deliver our services. By using our services, you agree to our use of cookies.