N-grams: Difference between revisions
Content added Content deleted
Drkameleon (talk | contribs) (added Arturo) |
(Add Python) |
||
Line 202:
LIVE: 2
</pre>
</pre>
=={{header|Python}}==
This example generates n-grams lazily, much like the [https://docs.python.org/3/library/itertools.html#itertools-recipes sliding_window recipe] from the Python itertools docs.
<syntaxhighlight lang="python">
from collections import Counter
from collections import deque
from itertools import islice
def n_grams(text, n):
"""Generate contiguous sequences of _n_ characters from _text_."""
it = iter(text.upper())
ngram = deque(islice(it, n), maxlen=n)
if len(ngram) == n:
yield "".join(ngram)
for ch in it:
ngram.append(ch)
yield "".join(ngram)
if __name__ == "__main__":
import pprint
example = "Live and let live"
for n in range(2, 5):
result = Counter(n_grams(example, n)).most_common()
print(
f"{len(result)} {n}-grams of {example!r}:\n",
pprint.pformat(result, compact=True),
end="\n\n",
)
</syntaxhighlight>
{{out}}
<pre>
12 2-grams of 'Live and let live':
[('LI', 2), ('IV', 2), ('VE', 2), (' L', 2), ('E ', 1), (' A', 1), ('AN', 1),
('ND', 1), ('D ', 1), ('LE', 1), ('ET', 1), ('T ', 1)]
13 3-grams of 'Live and let live':
[('LIV', 2), ('IVE', 2), ('VE ', 1), ('E A', 1), (' AN', 1), ('AND', 1),
('ND ', 1), ('D L', 1), (' LE', 1), ('LET', 1), ('ET ', 1), ('T L', 1),
(' LI', 1)]
13 4-grams of 'Live and let live':
[('LIVE', 2), ('IVE ', 1), ('VE A', 1), ('E AN', 1), (' AND', 1), ('AND ', 1),
('ND L', 1), ('D LE', 1), (' LET', 1), ('LET ', 1), ('ET L', 1), ('T LI', 1),
(' LIV', 1)]
</pre>
|