N-grams: Difference between revisions
→{{header|Python}}: Add simpler (and faster) implementation
(→{{header|Python}}: Add simpler (and faster) implementation) |
|||
Line 718:
=={{header|Python}}==
<syntaxhighlight lang="python">
This example generates n-grams lazily, much like the [https://docs.python.org/3/library/itertools.html#itertools-recipes sliding_window recipe] from the Python itertools docs.▼
from collections import Counter
from typing import Iterable
def n_grams(text: str, n: int) -> Iterable[str]:
"""Generate contiguous sequences of _n_ characters from _text_."""
if n < 1:
raise ValueError("n must be an integer > 0")
text = text.upper()
return (text[i : (i + n)] for i in range(len(text) - n + 1))
def main() -> None:
for n in range(2, 5):
counts = Counter(n_grams(example_text, n)).most_common()
print(
f"{len(counts)} {n}-grams of {example_text!r}:\n",
pprint.pformat(counts, compact=True),
end="\n\n",
)
if __name__ == "__main__":▼
main()
</syntaxhighlight>
{{out}}
<pre>
12 2-grams of 'Live and let live':
[('LI', 2), ('IV', 2), ('VE', 2), (' L', 2), ('E ', 1), (' A', 1), ('AN', 1),
('ND', 1), ('D ', 1), ('LE', 1), ('ET', 1), ('T ', 1)]
13 3-grams of 'Live and let live':
[('LIV', 2), ('IVE', 2), ('VE ', 1), ('E A', 1), (' AN', 1), ('AND', 1),
('ND ', 1), ('D L', 1), (' LE', 1), ('LET', 1), ('ET ', 1), ('T L', 1),
(' LI', 1)]
13 4-grams of 'Live and let live':
[('LIVE', 2), ('IVE ', 1), ('VE A', 1), ('E AN', 1), (' AND', 1), ('AND ', 1),
('ND L', 1), ('D LE', 1), (' LET', 1), ('LET ', 1), ('ET L', 1), ('T LI', 1),
(' LIV', 1)]
</pre>
===Sliding window===
▲This example
<syntaxhighlight lang="python">
import pprint
from collections import Counter
from collections import deque
from itertools import islice
from typing import Iterable
def n_grams(text: str, n: int) -> Iterable[str]:
"""Generate contiguous sequences of _n_ characters from _text_."""
it = iter(text.upper())
if len(
yield "".join(
for
yield "".join(
def main() -> None:
▲if __name__ == "__main__":
example_text = "Live and let live"
▲ import pprint
▲ example = "Live and let live"
for n in range(2, 5):
print(
f"{len(
pprint.pformat(
end="\n\n",
)
if __name__ == "__main__":
main()
</syntaxhighlight>
|