N-grams: Difference between revisions

17,961 bytes added ,  1 month ago
→‎{{header|Python}}: Add simpler (and faster) implementation
m (→‎{{header|Phix}}: (find ok too remark))
(→‎{{header|Python}}: Add simpler (and faster) implementation)
(23 intermediate revisions by 10 users not shown)
Line 23:
;* [[Sorensen–Dice_coefficient|Related task: Sorensen–Dice coefficient]]
 
 
=={{header|ALGOL 68}}==
<syntaxhighlight lang="algol68">
BEGIN # split text into n-grams - n character substrings #
 
MODE NGRAM = STRUCT( STRING gram, INT count );
 
OP UCASE = ( CHAR c )CHAR: # return c converted to upper case #
IF c >= "a" AND c <= "z" THEN REPR( ( ABS c - ABS "a" ) + ABS "A" ) ELSE c FI;
OP UCASE = ( STRING s )STRING: # return s converted to upper case #
BEGIN
STRING uc := s;
FOR i FROM LWB uc TO UPB uc DO uc[ i ] := UCASE uc[ i ] OD;
uc
END # UCASE # ;
OP LENGTH = ( STRING s )INT: ( UPB s + 1 ) - LWB s;
 
# returns the n-grams of text - using a simple array to contain the #
# n-grams - for longer strings, an associative array might be better #
PRIO GRAMS = 1;
OP GRAMS = ( INT n, STRING text )[]NGRAM:
BEGIN
[ 1 : ( LENGTH text + 1 ) - n ]NGRAM result; # initially assume #
INT count := 0; # all the n-grams will be unique #
FOR pos FROM LWB text TO ( UPB text + 1 )- n DO
STRING ng = UCASE text[ pos : pos + ( n - 1 ) ];
BOOL found := FALSE;
INT g pos := 0;
FOR g FROM 1 TO count
WHILE g pos := g;
NOT ( found := ng = ( gram OF result )[ g ] )
DO SKIP OD;
IF NOT found THEN
result[ count +:= 1 ] := ( ng, 1 )
ELSE
( count OF result )[ g pos ] +:= 1
FI
OD;
result[ 1 : count ]
END # NGRAMS # ;
 
# prints the ngrams in ngrams #
PROC print ngrams = ( STRING title, text, []NGRAM ngrams )VOID:
BEGIN
print( ( title, "-grams of """, text, """:", newline ) );
FOR g FROM LWB ngrams TO UPB ngrams DO
print( ( " """, ( gram OF ngrams )[ g ] ) );
print( ( """: ", whole( ( count OF ngrams )[ g ], 0 ), newline ) )
OD
END # print ngrams # ;
 
STRING test = "Live and let live";
print ngrams( "bi", test, 2 GRAMS test );
print ngrams( "tri", test, 3 GRAMS test );
print ngrams( "quad", test, 4 GRAMS test )
 
END
</syntaxhighlight>
{{out}}
<pre>
bi-grams of "Live and let live":
"LI": 2
"IV": 2
"VE": 2
"E ": 1
" A": 1
"AN": 1
"ND": 1
"D ": 1
" L": 2
"LE": 1
"ET": 1
"T ": 1
tri-grams of "Live and let live":
"LIV": 2
"IVE": 2
"VE ": 1
"E A": 1
" AN": 1
"AND": 1
"ND ": 1
"D L": 1
" LE": 1
"LET": 1
"ET ": 1
"T L": 1
" LI": 1
quad-grams of "Live and let live":
"LIVE": 2
"IVE ": 1
"VE A": 1
"E AN": 1
" AND": 1
"AND ": 1
"ND L": 1
"D LE": 1
" LET": 1
"LET ": 1
"ET L": 1
"T LI": 1
" LIV": 1
</pre>
 
=={{header|Arturo}}==
Line 82 ⟶ 184:
"T LI" 1
" LIV" 1</pre>
 
=={{header|C}}==
<syntaxhighlight lang="c">#include <stdio.h>
#include <stdbool.h>
#include <ctype.h>
#include <string.h>
 
#define MAX_N 4
#define MAX_NGRAMS 20
 
typedef struct {
char str[MAX_N+1];
int freq;
} ngram;
 
void *strUpper(char *s) {
while (*s) {
*s = toupper(*s);
s++;
}
}
 
void ngrams(int n, char *text) {
int i, j, count = 0;
size_t len = strlen(text);
bool found;
char temp[MAX_N+1] = {'\0'};
ngram ng, ngrams[MAX_NGRAMS];
char s[len+1];
strcpy(s, text);
strUpper(s);
for (i = 0; i <= len-n; ++i) {
strncpy(temp, s + i, n);
found = false;
for (j = 0; j < count; ++j) {
if (!strcmp(ngrams[j].str, temp)) {
ngrams[j].freq++;
found = true;
break;
}
}
if (!found) {
strncpy(ng.str, temp, n);
ng.freq = 1;
ngrams[count++] = ng;
}
}
for (i = 0; i < count; ++i) {
printf("(\"%s\": %d) ", ngrams[i].str, ngrams[i].freq);
if (!((i+1)%5)) printf("\n");
}
printf("\n\n");
}
 
int main() {
int n;
char *text = "Live and let live";
for (n = 2; n <= MAX_N; ++n) {
printf("All %d-grams of '%s' and their frequencies:\n", n, text);
ngrams(n, text);
}
return 0;
}</syntaxhighlight>
 
{{out}}
<pre>
All 2-grams of 'Live and let live' and their frequencies:
("LI": 2) ("IV": 2) ("VE": 2) ("E ": 1) (" A": 1)
("AN": 1) ("ND": 1) ("D ": 1) (" L": 2) ("LE": 1)
("ET": 1) ("T ": 1)
 
All 3-grams of 'Live and let live' and their frequencies:
("LIV": 2) ("IVE": 2) ("VE ": 1) ("E A": 1) (" AN": 1)
("AND": 1) ("ND ": 1) ("D L": 1) (" LE": 1) ("LET": 1)
("ET ": 1) ("T L": 1) (" LI": 1)
 
All 4-grams of 'Live and let live' and their frequencies:
("LIVE": 2) ("IVE ": 1) ("VE A": 1) ("E AN": 1) (" AND": 1)
("AND ": 1) ("ND L": 1) ("D LE": 1) (" LET": 1) ("LET ": 1)
("ET L": 1) ("T LI": 1) (" LIV": 1)
</pre>
 
=={{header|Common Lisp}}==
Line 108 ⟶ 291:
</syntaxhighlight>
 
=={{header|F_Sharp|F#}}==
<syntaxhighlight lang="fsharp">
// N-grams. Nigel Galloway: April 2nd., 2024
let gram (n:string) g=let n=n.ToUpper() in n|>Seq.windowed g|>Seq.countBy id
for n,g in (gram "Live and let live" 2) do printfn "%A %d" n g
</syntaxhighlight>
{{out}}
<pre>
[|'L'; 'I'|] 2
[|'I'; 'V'|] 2
[|'V'; 'E'|] 2
[|'E'; ' '|] 1
[|' '; 'A'|] 1
[|'A'; 'N'|] 1
[|'N'; 'D'|] 1
[|'D'; ' '|] 1
[|' '; 'L'|] 2
[|'L'; 'E'|] 1
[|'E'; 'T'|] 1
[|'T'; ' '|] 1
</pre>
=={{header|Factor}}==
{{works with|Factor|0.99 2022-04-03}}
Line 132 ⟶ 336:
}
</pre>
 
=={{header|Haskell}}==
 
<syntaxhighlight lang=haskell>import Control.Applicative (ZipList (ZipList, getZipList))
import Data.Char (toUpper)
import Data.List (tails)
import qualified Data.Map.Strict as M
 
------------------- MAP OF N-GRAM COUNTS -----------------
 
nGramCounts :: Int -> String -> M.Map String Int
nGramCounts n =
foldr (flip (M.insertWith (+)) 1) M.empty . windows n
 
 
------------------------- GENERIC ------------------------
 
windows :: Int -> [a] -> [[a]]
windows n = transpose . take n . tails
 
transpose :: [[a]] -> [[a]]
transpose [] = []
transpose xs = getZipList (traverse ZipList xs)
 
 
--------------------------- TEST -------------------------
main :: IO ()
main =
let sample = toUpper <$> "Live and let live"
in mapM_
( \n ->
putStrLn (show n <> "-GRAMS:")
>> mapM_ print ((M.assocs . nGramCounts n) sample)
>> putStrLn ""
)
[0 .. 4]</syntaxhighlight>
{{Out}}
<pre>0-GRAMS:
 
1-GRAMS:
(" ",3)
("A",1)
("D",1)
("E",3)
("I",2)
("L",3)
("N",1)
("T",1)
("V",2)
 
2-GRAMS:
(" A",1)
(" L",2)
("AN",1)
("D ",1)
("E ",1)
("ET",1)
("IV",2)
("LE",1)
("LI",2)
("ND",1)
("T ",1)
("VE",2)
 
3-GRAMS:
(" AN",1)
(" LE",1)
(" LI",1)
("AND",1)
("D L",1)
("E A",1)
("ET ",1)
("IVE",2)
("LET",1)
("LIV",2)
("ND ",1)
("T L",1)
("VE ",1)
 
4-GRAMS:
(" AND",1)
(" LET",1)
(" LIV",1)
("AND ",1)
("D LE",1)
("E AN",1)
("ET L",1)
("IVE ",1)
("LET ",1)
("LIVE",2)
("ND L",1)
("T LI",1)
("VE A",1)</pre>
 
=={{header|jq}}==
Line 203 ⟶ 500:
</pre>
</pre>
 
=={{header|Julia}}==
<syntaxhighlight lang="julia">function ngrams(str::AbstractString, n; uppercaseinput = true)
s = uppercaseinput ? uppercase(str) : str
unique([(ng, count(ng, s)) for ng in [SubString(s, i:i+n-1) for i=1:length(s)-n+1]])
end
 
function eightcolumns(arr)
for (i, elem) in pairs(arr)
print(lpad(elem, 10), i % 8 == 0 ? "\n" : "")
end
println("\n")
end
const s = "Live and let live"
 
ngrams(s, 1) |> eightcolumns
ngrams(s, 2) |> eightcolumns
ngrams(s, 2, uppercaseinput = false) |> eightcolumns
</syntaxhighlight>{{out}}
<pre>
("L", 3) ("I", 2) ("V", 2) ("E", 3) (" ", 3) ("A", 1) ("N", 1) ("D", 1)
("T", 1)
 
("LI", 2) ("IV", 2) ("VE", 2) ("E ", 1) (" A", 1) ("AN", 1) ("ND", 1) ("D ", 1)
(" L", 2) ("LE", 1) ("ET", 1) ("T ", 1)
 
("Li", 1) ("iv", 2) ("ve", 2) ("e ", 1) (" a", 1) ("an", 1) ("nd", 1) ("d ", 1)
(" l", 2) ("le", 1) ("et", 1) ("t ", 1) ("li", 1)
</pre>
 
=={{header|Nim}}==
<syntaxhighlight lang="Nim">import std/[strutils, tables]
 
type NGrams = CountTable[string]
 
func ngrams(text: string; n: Positive): NGrams =
for i in 0..(text.len - n):
result.inc(text[i..<(i + n)].toLowerAscii)
 
const Text = "Live and let live"
 
for n in 2..4:
echo n, "-grams:"
var ng = Text.ngrams(n)
ng.sort() # To display n-grams with higher score first.
for key, count in ng:
echo "“$1”: $2".format(key, count)
echo()
</syntaxhighlight>
 
{{out}}
<pre>2-grams:
“ve”: 2
“li”: 2
“iv”: 2
“ l”: 2
“d ”: 1
“et”: 1
“t ”: 1
“an”: 1
“nd”: 1
“e ”: 1
“le”: 1
“ a”: 1
 
3-grams:
“ive”: 2
“liv”: 2
“ le”: 1
“nd ”: 1
“and”: 1
“et ”: 1
“ve ”: 1
“t l”: 1
“ an”: 1
“d l”: 1
“e a”: 1
“let”: 1
“ li”: 1
 
4-grams:
“live”: 2
“ liv”: 1
“ and”: 1
“e an”: 1
“ let”: 1
“and ”: 1
“d le”: 1
“t li”: 1
“nd l”: 1
“et l”: 1
“ive ”: 1
“let ”: 1
“ve a”: 1
</pre>
 
=={{header|Perl}}==
<syntaxhighlight lang="perl" line>use v5.36;
 
sub n_gram ($n, $line) {
my %N;
map { $N{substr lc($line),$_,$n}++ } 0..length($line)-$n;
%N
}
 
my %bi_grams = n_gram 2, 'Live and let live';
say qq|'$_' - $bi_grams{$_}| for sort keys %bi_grams;
 
say '';
 
my %tri_grams = n_gram 3, 'Live and let live';
say qq|'$_' - $tri_grams{$_}| for sort keys %tri_grams;</syntaxhighlight>
{{out}}
<pre>' a' - 1
' l' - 2
'an' - 1
'd ' - 1
'e ' - 1
'et' - 1
'iv' - 2
'le' - 1
'li' - 2
'nd' - 1
't ' - 1
've' - 2
 
' an' - 1
' le' - 1
' li' - 1
'and' - 1
'd l' - 1
'e a' - 1
'et ' - 1
'ive' - 2
'let' - 1
'liv' - 2
'nd ' - 1
't l' - 1
've ' - 1</pre>
 
=={{header|Phix}}==
A dictionary is used to find the index of already-seen n-grams, even though a simpler find() would be good enough for this task.<br>
I have replicated most orderings found on this page, the task description order corresponds to orig/freq,<br>
and jq is alpha/freq butwith high last, but there is no equivalent for the Factor or Raku orderings here ;-).
<!--<syntaxhighlight lang="phix">(phixonline)-->
<span style="color: #008080;">with</span> <span style="color: #008080;">javascript_semantics</span>
Line 281 ⟶ 718:
=={{header|Python}}==
 
<syntaxhighlight lang="python">
This example generates n-grams lazily, much like the [https://docs.python.org/3/library/itertools.html#itertools-recipes sliding_window recipe] from the Python itertools docs.
import pprint
from collections import Counter
from typing import Iterable
 
 
def n_grams(text: str, n: int) -> Iterable[str]:
"""Generate contiguous sequences of _n_ characters from _text_."""
if n < 1:
raise ValueError("n must be an integer > 0")
 
text = text.upper()
return (text[i : (i + n)] for i in range(len(text) - n + 1))
 
 
def main() -> None:
example_text = "Live and let live"
 
for n in range(2, 5):
counts = Counter(n_grams(example_text, n)).most_common()
print(
f"{len(counts)} {n}-grams of {example_text!r}:\n",
pprint.pformat(counts, compact=True),
end="\n\n",
)
 
 
if __name__ == "__main__":
main()
</syntaxhighlight>
 
{{out}}
<pre>
12 2-grams of 'Live and let live':
[('LI', 2), ('IV', 2), ('VE', 2), (' L', 2), ('E ', 1), (' A', 1), ('AN', 1),
('ND', 1), ('D ', 1), ('LE', 1), ('ET', 1), ('T ', 1)]
 
13 3-grams of 'Live and let live':
[('LIV', 2), ('IVE', 2), ('VE ', 1), ('E A', 1), (' AN', 1), ('AND', 1),
('ND ', 1), ('D L', 1), (' LE', 1), ('LET', 1), ('ET ', 1), ('T L', 1),
(' LI', 1)]
 
13 4-grams of 'Live and let live':
[('LIVE', 2), ('IVE ', 1), ('VE A', 1), ('E AN', 1), (' AND', 1), ('AND ', 1),
('ND L', 1), ('D LE', 1), (' LET', 1), ('LET ', 1), ('ET L', 1), ('T LI', 1),
(' LIV', 1)]
</pre>
 
 
===Sliding window===
 
This example takes inspiration from the [https://docs.python.org/3/library/itertools.html#itertools-recipes sliding_window recipe] found in Python's itertools docs.
 
<syntaxhighlight lang="python">
import pprint
from collections import Counter
from collections import deque
from itertools import islice
from typing import Iterable
 
 
def n_grams(text: str, n: int) -> Iterable[str]:
"""Generate contiguous sequences of _n_ characters from _text_."""
it = iter(text.upper())
ngramn_gram = deque(islice(it, n), maxlen=n)
if len(ngramn_gram) == n:
yield "".join(ngramn_gram)
for chx in it:
ngramn_gram.append(chx)
yield "".join(ngramn_gram)
 
 
def main() -> None:
if __name__ == "__main__":
example_text = "Live and let live"
import pprint
 
example = "Live and let live"
 
for n in range(2, 5):
resultcounts = Counter(n_grams(exampleexample_text, n)).most_common()
print(
f"{len(resultcounts)} {n}-grams of {exampleexample_text!r}:\n",
pprint.pformat(resultcounts, compact=True),
end="\n\n",
)
 
 
if __name__ == "__main__":
main()
</syntaxhighlight>
 
Line 330 ⟶ 822:
(' LIV', 1)]
</pre>
 
 
And a strict variant, compositionally assembled from some basics:
 
<syntaxhighlight lang="python">from itertools import (islice)
from functools import (reduce)
from operator import (add)
 
 
def nGramCounts(n, s):
'''A dictionary of all nGrams of dimension n in s,
with the frequency of their occurrence.
'''
return reduce(
lambda a, gram: insertWith(add, gram, 1, a),
nGrams(n, s),
{}
)
 
 
def nGrams(n, s):
'''All case-insensitive sequences of length n in the string s.'''
return (''.join(t) for t in windows(n, list(s.upper())))
 
 
# ----------------------- GENERICS -----------------------
 
def insertWith(f, k, x, dct):
'''A new dictionary updated with a
(key, f(value, x)) tuple.
Where there is no existing value for the key,
the supplied x is used as the default.
'''
return dict(dct, **{k: f(dct[k], x) if k in dct else x})
 
 
def tails(xs):
'''All final segments of xs, longest first.'''
return (xs[i:] for i in range(0, 1 + len(xs)))
 
 
def windows(n, xs):
'''Sliding windows of dimension n.'''
return zip(*islice(tails(xs), n))
 
 
 
# ------------------------- TEST -------------------------
if __name__ == "__main__":
import pprint
 
EXAMPLE = "Live and let live"
 
for dimension in range(1, 5):
result = sorted(nGramCounts(dimension, EXAMPLE).items())
print(
f"{len(result)} {dimension}-grams of {EXAMPLE!r}:\n",
pprint.pformat(result),
end="\n\n",
)</syntaxhighlight>
{{Out}}
<pre>9 1-grams of 'Live and let live':
[(' ', 3),
('A', 1),
('D', 1),
('E', 3),
('I', 2),
('L', 3),
('N', 1),
('T', 1),
('V', 2)]
 
12 2-grams of 'Live and let live':
[(' A', 1),
(' L', 2),
('AN', 1),
('D ', 1),
('E ', 1),
('ET', 1),
('IV', 2),
('LE', 1),
('LI', 2),
('ND', 1),
('T ', 1),
('VE', 2)]
 
13 3-grams of 'Live and let live':
[(' AN', 1),
(' LE', 1),
(' LI', 1),
('AND', 1),
('D L', 1),
('E A', 1),
('ET ', 1),
('IVE', 2),
('LET', 1),
('LIV', 2),
('ND ', 1),
('T L', 1),
('VE ', 1)]
 
13 4-grams of 'Live and let live':
[(' AND', 1),
(' LET', 1),
(' LIV', 1),
('AND ', 1),
('D LE', 1),
('E AN', 1),
('ET L', 1),
('IVE ', 1),
('LET ', 1),
('LIVE', 2),
('ND L', 1),
('T LI', 1),
('VE A', 1)]</pre>
 
=={{header|Raku}}==
Line 339 ⟶ 946:
<pre>("IV"=>2,"T "=>1,"VE"=>2,"E "=>1,"LE"=>1,"AN"=>1,"LI"=>2,"ND"=>1,"ET"=>1," L"=>2," A"=>1,"D "=>1).Bag
("ET "=>1,"AND"=>1,"LIV"=>2," LI"=>1,"ND "=>1," LE"=>1,"IVE"=>2,"E A"=>1,"VE "=>1,"T L"=>1,"D L"=>1,"LET"=>1," AN"=>1).Bag</pre>
 
=={{header|RPL}}==
{{works with|Halcyon Calc|4.2.8}}
{| class="wikitable"
! RPL code
! Comment
|-
|
≪ → text n
≪ { } DUP n text SIZE '''FOR''' j
text j n - 1 + j SUB
'''IF''' DUP2 POS '''THEN'''
LAST 4 ROLL SWAP DUP2 GET 1 + PUT
SWAP DROP SWAP
'''ELSE''' + SWAP 1 + SWAP '''END'''
'''NEXT SHOWG'''
≫ ≫ ‘'''-GRAMS'''’ STO
≪ { } 1 3 PICK SIZE '''FOR''' j
OVER j GET "=" + 4 PICK j GET →STR + + '''NEXT'''
ROT ROT DROP2
≫ ‘'''SHOWG'''’ STO
|
'''-GRAMS''' ''( text n -- { "ngram=count".. } ) ''
Initialize 2 empty lists; for j = n to length(text):
ngram = text[j-n+1..j]
if ngram already in ngram list
increase counter in other list
get rid of ngram
else add to ngram list and set counter at 1 on the other list
Show results
'''SHOWG''' ''( { "ngram".. } { counts.. } -- { "ngram=count".. } ) ''
|}
{{in}}
<pre>
"LIVE AND LET LIVE" 2 -GRAMS
"LIVE AND LET LIVE" 3 -GRAMS
"LIVE AND LET LIVE" 4 -GRAMS
</pre>
{{out}}
<pre>
3: { "LI=2" "IV=2" "VE=2" "E =1" " A=1" "AN=1" "ND=1" "D =1" " L=2" "LE=1" "ET=1" "T =1" }
2: { "LIV=2" "IVE=2" "VE =1" "E A=1" " AN=1" "AND=1" "ND =1" "D L=1" " LE=1" "LET=1" "ET =1" "T L=1" " LI=1" }
1: { "LIVE=2" "IVE =1" "VE A=1" "E AN=1" " AND=1" "AND =1" "ND L=1" "D LE=1" " LET=1" "LET =1" "ET L=1" "T LI=1" " LIV=1" }
</pre>
 
=={{header|Wren}}==
===Version 1 (Sorted order)===
{{libheader|Wren-str}}
{{libheader|Wren-maputil}}
{{libheader|Wren-fmt}}
<syntaxhighlight lang="ecmascriptwren">import "./str" for Str
import "./maputil" for MapUtilMultiSet
import "./fmt" for Fmt
 
Line 353 ⟶ 1,011:
for (i in 0..text.count-n) {
var ngram = text[i...i+n]
MapUtilMultiSet.increaseadd(ngrams, ngram)
}
return ngrams
Line 389 ⟶ 1,047:
("D LE" : 1) ("E AN" : 1) ("ET L" : 1) ("IVE " : 1) ("LET " : 1)
("ND L" : 1) ("T LI" : 1) ("VE A" : 1)
</pre>
===Version 2 (Original order)===
{{libheader|Wren-ordered}}
The iteration order of 'Map' objects in Wren is undefined though they can subsequently be sorted into a particular order as the first version shows. However, to maintain the original order of insertion we need to use one of the classes in the above module which automatically keep track of such order when items are added or removed.
<syntaxhighlight lang="wren">import "./str" for Str
import "./ordered" for OrderedBag
import "./fmt" for Fmt
 
var findNgrams = Fn.new { |n, text|
text = Str.upper(text)
var ngrams = OrderedBag.new()
for (i in 0..text.count-n) {
var ngram = text[i...i+n]
ngrams.add(ngram)
}
return ngrams
}
 
var text = "Live and let live"
for (n in [2, 3, 4]) {
var ngrams = findNgrams.call(n, text)
System.print("All %(n)-grams of '%(text)' and their frequencies:")
var ng = ngrams.toList.map { |me| "(\"%(me.key)\" : %(me.value))"}
Fmt.tprint("$s ", ng, 5)
System.print()
}</syntaxhighlight>
 
{{out}}
<pre>
All 2-grams of 'Live and let live' and their frequencies:
("LI" : 2) ("IV" : 2) ("VE" : 2) ("E " : 1) (" A" : 1)
("AN" : 1) ("ND" : 1) ("D " : 1) (" L" : 2) ("LE" : 1)
("ET" : 1) ("T " : 1)
 
All 3-grams of 'Live and let live' and their frequencies:
("LIV" : 2) ("IVE" : 2) ("VE " : 1) ("E A" : 1) (" AN" : 1)
("AND" : 1) ("ND " : 1) ("D L" : 1) (" LE" : 1) ("LET" : 1)
("ET " : 1) ("T L" : 1) (" LI" : 1)
 
All 4-grams of 'Live and let live' and their frequencies:
("LIVE" : 2) ("IVE " : 1) ("VE A" : 1) ("E AN" : 1) (" AND" : 1)
("AND " : 1) ("ND L" : 1) ("D LE" : 1) (" LET" : 1) ("LET " : 1)
("ET L" : 1) ("T LI" : 1) (" LIV" : 1)
</pre>
 
=={{header|XPL0}}==
<syntaxhighlight lang "XPL0">int Dict(100), Count(100), Size;
 
proc LookUp(Wd); \Add word to dictionary, or increment its count
int Wd, I;
[for I:= 0 to Size-1 do
if Dict(I) = Wd then
[Count(I):= Count(I)+1;
return;
];
Dict(Size):= Wd;
Count(Size):= 1;
Size:= Size+1;
];
 
proc ShowNGram(N, Str); \Show N-grams for string
char N, Str;
int I, J, Wd, Ch;
[IntOut(0, N); Text(0, "-grams:^m^j");
Size:= 0; I:= 0;
loop [Wd:= 0;
for J:= 0 to N-1 do
[Ch:= Str(I+J);
if Ch = $A0 then quit; \terminating space
if Ch>=^a and Ch<=^z then Ch:= Ch & ~$20;
Wd:= Wd<<8 + Ch;
];
I:= I+1;
LookUp(Wd);
];
for I:= 0 to Size-1 do
[Wd:= Dict(I);
for J:= N-1 downto 0 do
ChOut(0, Wd>>(J*8));
ChOut(0, ^ );
IntOut(0, Count(I));
if rem(I/5) = 4 then CrLf(0) else ChOut(0, 9\tab\);
];
CrLf(0);
];
 
int N;
for N:= 2 to 4 do ShowNGram(N, "Live and let live ")</syntaxhighlight>
{{out}}
<pre>
2-grams:
LI 2 IV 2 VE 2 E 1 A 1
AN 1 ND 1 D 1 L 2 LE 1
ET 1 T 1
3-grams:
LIV 2 IVE 2 VE 1 E A 1 AN 1
AND 1 ND 1 D L 1 LE 1 LET 1
ET 1 T L 1 LI 1
4-grams:
LIVE 2 IVE 1 VE A 1 E AN 1 AND 1
AND 1 ND L 1 D LE 1 LET 1 LET 1
ET L 1 T LI 1 LIV 1
</pre>
140

edits