N-grams: Difference between revisions

N-grams (view source)

Revision as of 14:24, 8 April 2024

17,961 bytes added , 1 month ago

→‎{{header|Python}}: Add simpler (and faster) implementation

Jgrprior

140

edits

Revision as of 12:12, 22 April 2023 (view source) Petelomax (talk \| contribs) m (→‎{{header\|Phix}}: (find ok too remark)) ← Older edit		Revision as of 14:24, 8 April 2024 (view source) Jgrprior (talk \| contribs) (→‎{{header\|Python}}: Add simpler (and faster) implementation) Newer edit →
(23 intermediate revisions by 10 users not shown)
Line 23: ;* [[Sorensen–Dice_coefficient\|Related task: Sorensen–Dice coefficient]] =={{header\|ALGOL 68}}== <syntaxhighlight lang="algol68"> BEGIN # split text into n-grams - n character substrings # MODE NGRAM = STRUCT( STRING gram, INT count ); OP UCASE = ( CHAR c )CHAR: # return c converted to upper case # IF c >= "a" AND c <= "z" THEN REPR( ( ABS c - ABS "a" ) + ABS "A" ) ELSE c FI; OP UCASE = ( STRING s )STRING: # return s converted to upper case # BEGIN STRING uc := s; FOR i FROM LWB uc TO UPB uc DO uc[ i ] := UCASE uc[ i ] OD; uc END # UCASE # ; OP LENGTH = ( STRING s )INT: ( UPB s + 1 ) - LWB s; # returns the n-grams of text - using a simple array to contain the # # n-grams - for longer strings, an associative array might be better # PRIO GRAMS = 1; OP GRAMS = ( INT n, STRING text )[]NGRAM: BEGIN [ 1 : ( LENGTH text + 1 ) - n ]NGRAM result; # initially assume # INT count := 0; # all the n-grams will be unique # FOR pos FROM LWB text TO ( UPB text + 1 )- n DO STRING ng = UCASE text[ pos : pos + ( n - 1 ) ]; BOOL found := FALSE; INT g pos := 0; FOR g FROM 1 TO count WHILE g pos := g; NOT ( found := ng = ( gram OF result )[ g ] ) DO SKIP OD; IF NOT found THEN result[ count +:= 1 ] := ( ng, 1 ) ELSE ( count OF result )[ g pos ] +:= 1 FI OD; result[ 1 : count ] END # NGRAMS # ; # prints the ngrams in ngrams # PROC print ngrams = ( STRING title, text, []NGRAM ngrams )VOID: BEGIN print( ( title, "-grams of """, text, """:", newline ) ); FOR g FROM LWB ngrams TO UPB ngrams DO print( ( " """, ( gram OF ngrams )[ g ] ) ); print( ( """: ", whole( ( count OF ngrams )[ g ], 0 ), newline ) ) OD END # print ngrams # ; STRING test = "Live and let live"; print ngrams( "bi", test, 2 GRAMS test ); print ngrams( "tri", test, 3 GRAMS test ); print ngrams( "quad", test, 4 GRAMS test ) END </syntaxhighlight> {{out}} <pre> bi-grams of "Live and let live": "LI": 2 "IV": 2 "VE": 2 "E ": 1 " A": 1 "AN": 1 "ND": 1 "D ": 1 " L": 2 "LE": 1 "ET": 1 "T ": 1 tri-grams of "Live and let live": "LIV": 2 "IVE": 2 "VE ": 1 "E A": 1 " AN": 1 "AND": 1 "ND ": 1 "D L": 1 " LE": 1 "LET": 1 "ET ": 1 "T L": 1 " LI": 1 quad-grams of "Live and let live": "LIVE": 2 "IVE ": 1 "VE A": 1 "E AN": 1 " AND": 1 "AND ": 1 "ND L": 1 "D LE": 1 " LET": 1 "LET ": 1 "ET L": 1 "T LI": 1 " LIV": 1 </pre> =={{header\|Arturo}}== Line 82 ⟶ 184: "T LI" 1 " LIV" 1</pre> =={{header\|C}}== <syntaxhighlight lang="c">#include <stdio.h> #include <stdbool.h> #include <ctype.h> #include <string.h> #define MAX_N 4 #define MAX_NGRAMS 20 typedef struct { char str[MAX_N+1]; int freq; } ngram; void strUpper(char s) { while (s) { s = toupper(s); s++; } } void ngrams(int n, char text) { int i, j, count = 0; size_t len = strlen(text); bool found; char temp[MAX_N+1] = {'\0'}; ngram ng, ngrams[MAX_NGRAMS]; char s[len+1]; strcpy(s, text); strUpper(s); for (i = 0; i <= len-n; ++i) { strncpy(temp, s + i, n); found = false; for (j = 0; j < count; ++j) { if (!strcmp(ngrams[j].str, temp)) { ngrams[j].freq++; found = true; break; } } if (!found) { strncpy(ng.str, temp, n); ng.freq = 1; ngrams[count++] = ng; } } for (i = 0; i < count; ++i) { printf("(\"%s\": %d) ", ngrams[i].str, ngrams[i].freq); if (!((i+1)%5)) printf("\n"); } printf("\n\n"); } int main() { int n; char text = "Live and let live"; for (n = 2; n <= MAX_N; ++n) { printf("All %d-grams of '%s' and their frequencies:\n", n, text); ngrams(n, text); } return 0; }</syntaxhighlight> {{out}} <pre> All 2-grams of 'Live and let live' and their frequencies: ("LI": 2) ("IV": 2) ("VE": 2) ("E ": 1) (" A": 1) ("AN": 1) ("ND": 1) ("D ": 1) (" L": 2) ("LE": 1) ("ET": 1) ("T ": 1) All 3-grams of 'Live and let live' and their frequencies: ("LIV": 2) ("IVE": 2) ("VE ": 1) ("E A": 1) (" AN": 1) ("AND": 1) ("ND ": 1) ("D L": 1) (" LE": 1) ("LET": 1) ("ET ": 1) ("T L": 1) (" LI": 1) All 4-grams of 'Live and let live' and their frequencies: ("LIVE": 2) ("IVE ": 1) ("VE A": 1) ("E AN": 1) (" AND": 1) ("AND ": 1) ("ND L": 1) ("D LE": 1) (" LET": 1) ("LET ": 1) ("ET L": 1) ("T LI": 1) (" LIV": 1) </pre> =={{header\|Common Lisp}}== Line 108 ⟶ 291: </syntaxhighlight> =={{header\|F_Sharp\|F#}}== <syntaxhighlight lang="fsharp"> // N-grams. Nigel Galloway: April 2nd., 2024 let gram (n:string) g=let n=n.ToUpper() in n\|>Seq.windowed g\|>Seq.countBy id for n,g in (gram "Live and let live" 2) do printfn "%A %d" n g </syntaxhighlight> {{out}} <pre> [\|'L'; 'I'\|] 2 [\|'I'; 'V'\|] 2 [\|'V'; 'E'\|] 2 [\|'E'; ' '\|] 1 [\|' '; 'A'\|] 1 [\|'A'; 'N'\|] 1 [\|'N'; 'D'\|] 1 [\|'D'; ' '\|] 1 [\|' '; 'L'\|] 2 [\|'L'; 'E'\|] 1 [\|'E'; 'T'\|] 1 [\|'T'; ' '\|] 1 </pre> =={{header\|Factor}}== {{works with\|Factor\|0.99 2022-04-03}} Line 132 ⟶ 336: } </pre> =={{header\|Haskell}}== <syntaxhighlight lang=haskell>import Control.Applicative (ZipList (ZipList, getZipList)) import Data.Char (toUpper) import Data.List (tails) import qualified Data.Map.Strict as M ------------------- MAP OF N-GRAM COUNTS ----------------- nGramCounts :: Int -> String -> M.Map String Int nGramCounts n = foldr (flip (M.insertWith (+)) 1) M.empty . windows n ------------------------- GENERIC ------------------------ windows :: Int -> [a] -> [[a]] windows n = transpose . take n . tails transpose :: [[a]] -> [[a]] transpose [] = [] transpose xs = getZipList (traverse ZipList xs) --------------------------- TEST ------------------------- main :: IO () main = let sample = toUpper <$> "Live and let live" in mapM_ ( \n -> putStrLn (show n <> "-GRAMS:") >> mapM_ print ((M.assocs . nGramCounts n) sample) >> putStrLn "" ) [0 .. 4]</syntaxhighlight> {{Out}} <pre>0-GRAMS: 1-GRAMS: (" ",3) ("A",1) ("D",1) ("E",3) ("I",2) ("L",3) ("N",1) ("T",1) ("V",2) 2-GRAMS: (" A",1) (" L",2) ("AN",1) ("D ",1) ("E ",1) ("ET",1) ("IV",2) ("LE",1) ("LI",2) ("ND",1) ("T ",1) ("VE",2) 3-GRAMS: (" AN",1) (" LE",1) (" LI",1) ("AND",1) ("D L",1) ("E A",1) ("ET ",1) ("IVE",2) ("LET",1) ("LIV",2) ("ND ",1) ("T L",1) ("VE ",1) 4-GRAMS: (" AND",1) (" LET",1) (" LIV",1) ("AND ",1) ("D LE",1) ("E AN",1) ("ET L",1) ("IVE ",1) ("LET ",1) ("LIVE",2) ("ND L",1) ("T LI",1) ("VE A",1)</pre> =={{header\|jq}}== Line 203 ⟶ 500: </pre> </pre> =={{header\|Julia}}== <syntaxhighlight lang="julia">function ngrams(str::AbstractString, n; uppercaseinput = true) s = uppercaseinput ? uppercase(str) : str unique([(ng, count(ng, s)) for ng in [SubString(s, i:i+n-1) for i=1:length(s)-n+1]]) end function eightcolumns(arr) for (i, elem) in pairs(arr) print(lpad(elem, 10), i % 8 == 0 ? "\n" : "") end println("\n") end const s = "Live and let live" ngrams(s, 1) \|> eightcolumns ngrams(s, 2) \|> eightcolumns ngrams(s, 2, uppercaseinput = false) \|> eightcolumns </syntaxhighlight>{{out}} <pre> ("L", 3) ("I", 2) ("V", 2) ("E", 3) (" ", 3) ("A", 1) ("N", 1) ("D", 1) ("T", 1) ("LI", 2) ("IV", 2) ("VE", 2) ("E ", 1) (" A", 1) ("AN", 1) ("ND", 1) ("D ", 1) (" L", 2) ("LE", 1) ("ET", 1) ("T ", 1) ("Li", 1) ("iv", 2) ("ve", 2) ("e ", 1) (" a", 1) ("an", 1) ("nd", 1) ("d ", 1) (" l", 2) ("le", 1) ("et", 1) ("t ", 1) ("li", 1) </pre> =={{header\|Nim}}== <syntaxhighlight lang="Nim">import std/[strutils, tables] type NGrams = CountTable[string] func ngrams(text: string; n: Positive): NGrams = for i in 0..(text.len - n): result.inc(text[i..<(i + n)].toLowerAscii) const Text = "Live and let live" for n in 2..4: echo n, "-grams:" var ng = Text.ngrams(n) ng.sort() # To display n-grams with higher score first. for key, count in ng: echo "“$1”: $2".format(key, count) echo() </syntaxhighlight> {{out}} <pre>2-grams: “ve”: 2 “li”: 2 “iv”: 2 “ l”: 2 “d ”: 1 “et”: 1 “t ”: 1 “an”: 1 “nd”: 1 “e ”: 1 “le”: 1 “ a”: 1 3-grams: “ive”: 2 “liv”: 2 “ le”: 1 “nd ”: 1 “and”: 1 “et ”: 1 “ve ”: 1 “t l”: 1 “ an”: 1 “d l”: 1 “e a”: 1 “let”: 1 “ li”: 1 4-grams: “live”: 2 “ liv”: 1 “ and”: 1 “e an”: 1 “ let”: 1 “and ”: 1 “d le”: 1 “t li”: 1 “nd l”: 1 “et l”: 1 “ive ”: 1 “let ”: 1 “ve a”: 1 </pre> =={{header\|Perl}}== <syntaxhighlight lang="perl" line>use v5.36; sub n_gram ($n, $line) { my %N; map { $N{substr lc($line),$_,$n}++ } 0..length($line)-$n; %N } my %bi_grams = n_gram 2, 'Live and let live'; say qq\|'$_' - $bi_grams{$_}\| for sort keys %bi_grams; say ''; my %tri_grams = n_gram 3, 'Live and let live'; say qq\|'$_' - $tri_grams{$_}\| for sort keys %tri_grams;</syntaxhighlight> {{out}} <pre>' a' - 1 ' l' - 2 'an' - 1 'd ' - 1 'e ' - 1 'et' - 1 'iv' - 2 'le' - 1 'li' - 2 'nd' - 1 't ' - 1 've' - 2 ' an' - 1 ' le' - 1 ' li' - 1 'and' - 1 'd l' - 1 'e a' - 1 'et ' - 1 'ive' - 2 'let' - 1 'liv' - 2 'nd ' - 1 't l' - 1 've ' - 1</pre> =={{header\|Phix}}== A dictionary is used to find the index of already-seen n-grams, even though a simpler find() would be good enough for this task.<br> I have replicated most orderings found on this page, the task description order corresponds to orig/freq,<br> and jq is alpha/freq ~~but~~with high last, but there is no equivalent for the Factor or Raku orderings here ;-). <!--<syntaxhighlight lang="phix">(phixonline)--> <span style="color: #008080;">with</span> <span style="color: #008080;">javascript_semantics</span> Line 281 ⟶ 718: =={{header\|Python}}== <syntaxhighlight lang="python"> ~~This example generates n-grams lazily, much like the [https://docs.python.org/3/library/itertools.html#itertools-recipes sliding_window recipe] from the Python itertools docs.~~ import pprint from collections import Counter from typing import Iterable def n_grams(text: str, n: int) -> Iterable[str]: """Generate contiguous sequences of _n_ characters from _text_.""" if n < 1: raise ValueError("n must be an integer > 0") text = text.upper() return (text[i : (i + n)] for i in range(len(text) - n + 1)) def main() -> None: example_text = "Live and let live" for n in range(2, 5): counts = Counter(n_grams(example_text, n)).most_common() print( f"{len(counts)} {n}-grams of {example_text!r}:\n", pprint.pformat(counts, compact=True), end="\n\n", ) if __name__ == "__main__": main() </syntaxhighlight> {{out}} <pre> 12 2-grams of 'Live and let live': [('LI', 2), ('IV', 2), ('VE', 2), (' L', 2), ('E ', 1), (' A', 1), ('AN', 1), ('ND', 1), ('D ', 1), ('LE', 1), ('ET', 1), ('T ', 1)] 13 3-grams of 'Live and let live': [('LIV', 2), ('IVE', 2), ('VE ', 1), ('E A', 1), (' AN', 1), ('AND', 1), ('ND ', 1), ('D L', 1), (' LE', 1), ('LET', 1), ('ET ', 1), ('T L', 1), (' LI', 1)] 13 4-grams of 'Live and let live': [('LIVE', 2), ('IVE ', 1), ('VE A', 1), ('E AN', 1), (' AND', 1), ('AND ', 1), ('ND L', 1), ('D LE', 1), (' LET', 1), ('LET ', 1), ('ET L', 1), ('T LI', 1), (' LIV', 1)] </pre> ===Sliding window=== This example takes inspiration from the [https://docs.python.org/3/library/itertools.html#itertools-recipes sliding_window recipe] found in Python's itertools docs. <syntaxhighlight lang="python"> import pprint from collections import Counter from collections import deque from itertools import islice from typing import Iterable def n_grams(text: str, n: int) -> Iterable[str]: """Generate contiguous sequences of _n_ characters from _text_.""" it = iter(text.upper()) ~~ngram~~n_gram = deque(islice(it, n), maxlen=n) if len(~~ngram~~n_gram) == n: yield "".join(~~ngram~~n_gram) for chx in it: ~~ngram~~n_gram.append(chx) yield "".join(~~ngram~~n_gram) def main() -> None: ~~if __name__ == "__main__":~~ example_text = "Live and let live" ~~import pprint~~ ~~example = "Live and let live"~~ for n in range(2, 5): ~~result~~counts = Counter(n_grams(~~example~~example_text, n)).most_common() print( f"{len(~~result~~counts)} {n}-grams of {~~example~~example_text!r}:\n", pprint.pformat(~~result~~counts, compact=True), end="\n\n", ) if __name__ == "__main__": main() </syntaxhighlight> Line 330 ⟶ 822: (' LIV', 1)] </pre> And a strict variant, compositionally assembled from some basics: <syntaxhighlight lang="python">from itertools import (islice) from functools import (reduce) from operator import (add) def nGramCounts(n, s): '''A dictionary of all nGrams of dimension n in s, with the frequency of their occurrence. ''' return reduce( lambda a, gram: insertWith(add, gram, 1, a), nGrams(n, s), {} ) def nGrams(n, s): '''All case-insensitive sequences of length n in the string s.''' return (''.join(t) for t in windows(n, list(s.upper()))) # ----------------------- GENERICS ----------------------- def insertWith(f, k, x, dct): '''A new dictionary updated with a (key, f(value, x)) tuple. Where there is no existing value for the key, the supplied x is used as the default. ''' return dict(dct, {k: f(dct[k], x) if k in dct else x}) def tails(xs): '''All final segments of xs, longest first.''' return (xs[i:] for i in range(0, 1 + len(xs))) def windows(n, xs): '''Sliding windows of dimension n.''' return zip(islice(tails(xs), n)) # ------------------------- TEST ------------------------- if __name__ == "__main__": import pprint EXAMPLE = "Live and let live" for dimension in range(1, 5): result = sorted(nGramCounts(dimension, EXAMPLE).items()) print( f"{len(result)} {dimension}-grams of {EXAMPLE!r}:\n", pprint.pformat(result), end="\n\n", )</syntaxhighlight> {{Out}} <pre>9 1-grams of 'Live and let live': [(' ', 3), ('A', 1), ('D', 1), ('E', 3), ('I', 2), ('L', 3), ('N', 1), ('T', 1), ('V', 2)] 12 2-grams of 'Live and let live': [(' A', 1), (' L', 2), ('AN', 1), ('D ', 1), ('E ', 1), ('ET', 1), ('IV', 2), ('LE', 1), ('LI', 2), ('ND', 1), ('T ', 1), ('VE', 2)] 13 3-grams of 'Live and let live': [(' AN', 1), (' LE', 1), (' LI', 1), ('AND', 1), ('D L', 1), ('E A', 1), ('ET ', 1), ('IVE', 2), ('LET', 1), ('LIV', 2), ('ND ', 1), ('T L', 1), ('VE ', 1)] 13 4-grams of 'Live and let live': [(' AND', 1), (' LET', 1), (' LIV', 1), ('AND ', 1), ('D LE', 1), ('E AN', 1), ('ET L', 1), ('IVE ', 1), ('LET ', 1), ('LIVE', 2), ('ND L', 1), ('T LI', 1), ('VE A', 1)]</pre> =={{header\|Raku}}== Line 339 ⟶ 946: <pre>("IV"=>2,"T "=>1,"VE"=>2,"E "=>1,"LE"=>1,"AN"=>1,"LI"=>2,"ND"=>1,"ET"=>1," L"=>2," A"=>1,"D "=>1).Bag ("ET "=>1,"AND"=>1,"LIV"=>2," LI"=>1,"ND "=>1," LE"=>1,"IVE"=>2,"E A"=>1,"VE "=>1,"T L"=>1,"D L"=>1,"LET"=>1," AN"=>1).Bag</pre> =={{header\|RPL}}== {{works with\|Halcyon Calc\|4.2.8}} {\| class="wikitable" ! RPL code ! Comment \|- \| ≪ → text n ≪ { } DUP n text SIZE '''FOR''' j text j n - 1 + j SUB '''IF''' DUP2 POS '''THEN''' LAST 4 ROLL SWAP DUP2 GET 1 + PUT SWAP DROP SWAP '''ELSE''' + SWAP 1 + SWAP '''END''' '''NEXT SHOWG''' ≫ ≫ ‘'''-GRAMS'''’ STO ≪ { } 1 3 PICK SIZE '''FOR''' j OVER j GET "=" + 4 PICK j GET →STR + + '''NEXT''' ROT ROT DROP2 ≫ ‘'''SHOWG'''’ STO \| '''-GRAMS''' ''( text n -- { "ngram=count".. } ) '' Initialize 2 empty lists; for j = n to length(text): ngram = text[j-n+1..j] if ngram already in ngram list increase counter in other list get rid of ngram else add to ngram list and set counter at 1 on the other list Show results '''SHOWG''' ''( { "ngram".. } { counts.. } -- { "ngram=count".. } ) '' \|} {{in}} <pre> "LIVE AND LET LIVE" 2 -GRAMS "LIVE AND LET LIVE" 3 -GRAMS "LIVE AND LET LIVE" 4 -GRAMS </pre> {{out}} <pre> 3: { "LI=2" "IV=2" "VE=2" "E =1" " A=1" "AN=1" "ND=1" "D =1" " L=2" "LE=1" "ET=1" "T =1" } 2: { "LIV=2" "IVE=2" "VE =1" "E A=1" " AN=1" "AND=1" "ND =1" "D L=1" " LE=1" "LET=1" "ET =1" "T L=1" " LI=1" } 1: { "LIVE=2" "IVE =1" "VE A=1" "E AN=1" " AND=1" "AND =1" "ND L=1" "D LE=1" " LET=1" "LET =1" "ET L=1" "T LI=1" " LIV=1" } </pre> =={{header\|Wren}}== ===Version 1 (Sorted order)=== {{libheader\|Wren-str}} {{libheader\|Wren-maputil}} {{libheader\|Wren-fmt}} <syntaxhighlight lang="~~ecmascript~~wren">import "./str" for Str import "./maputil" for ~~MapUtil~~MultiSet import "./fmt" for Fmt Line 353 ⟶ 1,011: for (i in 0..text.count-n) { var ngram = text[i...i+n] ~~MapUtil~~MultiSet.~~increase~~add(ngrams, ngram) } return ngrams Line 389 ⟶ 1,047: ("D LE" : 1) ("E AN" : 1) ("ET L" : 1) ("IVE " : 1) ("LET " : 1) ("ND L" : 1) ("T LI" : 1) ("VE A" : 1) </pre> ===Version 2 (Original order)=== {{libheader\|Wren-ordered}} The iteration order of 'Map' objects in Wren is undefined though they can subsequently be sorted into a particular order as the first version shows. However, to maintain the original order of insertion we need to use one of the classes in the above module which automatically keep track of such order when items are added or removed. <syntaxhighlight lang="wren">import "./str" for Str import "./ordered" for OrderedBag import "./fmt" for Fmt var findNgrams = Fn.new { \|n, text\| text = Str.upper(text) var ngrams = OrderedBag.new() for (i in 0..text.count-n) { var ngram = text[i...i+n] ngrams.add(ngram) } return ngrams } var text = "Live and let live" for (n in [2, 3, 4]) { var ngrams = findNgrams.call(n, text) System.print("All %(n)-grams of '%(text)' and their frequencies:") var ng = ngrams.toList.map { \|me\| "(\"%(me.key)\" : %(me.value))"} Fmt.tprint("$s ", ng, 5) System.print() }</syntaxhighlight> {{out}} <pre> All 2-grams of 'Live and let live' and their frequencies: ("LI" : 2) ("IV" : 2) ("VE" : 2) ("E " : 1) (" A" : 1) ("AN" : 1) ("ND" : 1) ("D " : 1) (" L" : 2) ("LE" : 1) ("ET" : 1) ("T " : 1) All 3-grams of 'Live and let live' and their frequencies: ("LIV" : 2) ("IVE" : 2) ("VE " : 1) ("E A" : 1) (" AN" : 1) ("AND" : 1) ("ND " : 1) ("D L" : 1) (" LE" : 1) ("LET" : 1) ("ET " : 1) ("T L" : 1) (" LI" : 1) All 4-grams of 'Live and let live' and their frequencies: ("LIVE" : 2) ("IVE " : 1) ("VE A" : 1) ("E AN" : 1) (" AND" : 1) ("AND " : 1) ("ND L" : 1) ("D LE" : 1) (" LET" : 1) ("LET " : 1) ("ET L" : 1) ("T LI" : 1) (" LIV" : 1) </pre> =={{header\|XPL0}}== <syntaxhighlight lang "XPL0">int Dict(100), Count(100), Size; proc LookUp(Wd); \Add word to dictionary, or increment its count int Wd, I; [for I:= 0 to Size-1 do if Dict(I) = Wd then [Count(I):= Count(I)+1; return; ]; Dict(Size):= Wd; Count(Size):= 1; Size:= Size+1; ]; proc ShowNGram(N, Str); \Show N-grams for string char N, Str; int I, J, Wd, Ch; [IntOut(0, N); Text(0, "-grams:^m^j"); Size:= 0; I:= 0; loop [Wd:= 0; for J:= 0 to N-1 do [Ch:= Str(I+J); if Ch = $A0 then quit; \terminating space if Ch>=^a and Ch<=^z then Ch:= Ch & ~$20; Wd:= Wd<<8 + Ch; ]; I:= I+1; LookUp(Wd); ]; for I:= 0 to Size-1 do [Wd:= Dict(I); for J:= N-1 downto 0 do ChOut(0, Wd>>(J*8)); ChOut(0, ^ ); IntOut(0, Count(I)); if rem(I/5) = 4 then CrLf(0) else ChOut(0, 9\tab\); ]; CrLf(0); ]; int N; for N:= 2 to 4 do ShowNGram(N, "Live and let live ")</syntaxhighlight> {{out}} <pre> 2-grams: LI 2 IV 2 VE 2 E 1 A 1 AN 1 ND 1 D 1 L 2 LE 1 ET 1 T 1 3-grams: LIV 2 IVE 2 VE 1 E A 1 AN 1 AND 1 ND 1 D L 1 LE 1 LET 1 ET 1 T L 1 LI 1 4-grams: LIVE 2 IVE 1 VE A 1 E AN 1 AND 1 AND 1 ND L 1 D LE 1 LET 1 LET 1 ET L 1 T LI 1 LIV 1 </pre>