N-grams: Difference between revisions

Content added Content deleted
(added Arturo)
(Add Python)
Line 202:
LIVE: 2
</pre>
</pre>
 
=={{header|Python}}==
 
This example generates n-grams lazily, much like the [https://docs.python.org/3/library/itertools.html#itertools-recipes sliding_window recipe] from the Python itertools docs.
 
<syntaxhighlight lang="python">
from collections import Counter
from collections import deque
from itertools import islice
 
 
def n_grams(text, n):
"""Generate contiguous sequences of _n_ characters from _text_."""
it = iter(text.upper())
ngram = deque(islice(it, n), maxlen=n)
if len(ngram) == n:
yield "".join(ngram)
for ch in it:
ngram.append(ch)
yield "".join(ngram)
 
 
if __name__ == "__main__":
import pprint
 
example = "Live and let live"
 
for n in range(2, 5):
result = Counter(n_grams(example, n)).most_common()
print(
f"{len(result)} {n}-grams of {example!r}:\n",
pprint.pformat(result, compact=True),
end="\n\n",
)
</syntaxhighlight>
 
{{out}}
<pre>
12 2-grams of 'Live and let live':
[('LI', 2), ('IV', 2), ('VE', 2), (' L', 2), ('E ', 1), (' A', 1), ('AN', 1),
('ND', 1), ('D ', 1), ('LE', 1), ('ET', 1), ('T ', 1)]
 
13 3-grams of 'Live and let live':
[('LIV', 2), ('IVE', 2), ('VE ', 1), ('E A', 1), (' AN', 1), ('AND', 1),
('ND ', 1), ('D L', 1), (' LE', 1), ('LET', 1), ('ET ', 1), ('T L', 1),
(' LI', 1)]
 
13 4-grams of 'Live and let live':
[('LIVE', 2), ('IVE ', 1), ('VE A', 1), ('E AN', 1), (' AND', 1), ('AND ', 1),
('ND L', 1), ('D LE', 1), (' LET', 1), ('LET ', 1), ('ET L', 1), ('T LI', 1),
(' LIV', 1)]
</pre>