N-grams: Difference between revisions
Content added Content deleted
Drkameleon (talk | contribs) (added Arturo) |
(Add Python) |
||
Line 202: | Line 202: | ||
LIVE: 2 |
LIVE: 2 |
||
</pre> |
</pre> |
||
</pre> |
|||
=={{header|Python}}== |
|||
This example generates n-grams lazily, much like the [https://docs.python.org/3/library/itertools.html#itertools-recipes sliding_window recipe] from the Python itertools docs. |
|||
<syntaxhighlight lang="python"> |
|||
from collections import Counter |
|||
from collections import deque |
|||
from itertools import islice |
|||
def n_grams(text, n): |
|||
"""Generate contiguous sequences of _n_ characters from _text_.""" |
|||
it = iter(text.upper()) |
|||
ngram = deque(islice(it, n), maxlen=n) |
|||
if len(ngram) == n: |
|||
yield "".join(ngram) |
|||
for ch in it: |
|||
ngram.append(ch) |
|||
yield "".join(ngram) |
|||
if __name__ == "__main__": |
|||
import pprint |
|||
example = "Live and let live" |
|||
for n in range(2, 5): |
|||
result = Counter(n_grams(example, n)).most_common() |
|||
print( |
|||
f"{len(result)} {n}-grams of {example!r}:\n", |
|||
pprint.pformat(result, compact=True), |
|||
end="\n\n", |
|||
) |
|||
</syntaxhighlight> |
|||
{{out}} |
|||
<pre> |
|||
12 2-grams of 'Live and let live': |
|||
[('LI', 2), ('IV', 2), ('VE', 2), (' L', 2), ('E ', 1), (' A', 1), ('AN', 1), |
|||
('ND', 1), ('D ', 1), ('LE', 1), ('ET', 1), ('T ', 1)] |
|||
13 3-grams of 'Live and let live': |
|||
[('LIV', 2), ('IVE', 2), ('VE ', 1), ('E A', 1), (' AN', 1), ('AND', 1), |
|||
('ND ', 1), ('D L', 1), (' LE', 1), ('LET', 1), ('ET ', 1), ('T L', 1), |
|||
(' LI', 1)] |
|||
13 4-grams of 'Live and let live': |
|||
[('LIVE', 2), ('IVE ', 1), ('VE A', 1), ('E AN', 1), (' AND', 1), ('AND ', 1), |
|||
('ND L', 1), ('D LE', 1), (' LET', 1), ('LET ', 1), ('ET L', 1), ('T LI', 1), |
|||
(' LIV', 1)] |
|||
</pre> |
</pre> |
||