N-grams: Difference between revisions

26,673 bytes added ,  1 month ago
Add ABC
(Add ABC)
 
(16 intermediate revisions by 3 users not shown)
Line 23:
;* [[Sorensen–Dice_coefficient|Related task: Sorensen–Dice coefficient]]
 
 
=={{header|ABC}}==
<syntaxhighlight lang="abc">HOW TO RETURN n grams str:
PUT {} IN grams
FOR i IN {1..#str-n+1}:
PUT str@i|n IN part
SELECT:
part in keys grams:
PUT grams[part]+1 IN grams[part]
ELSE:
PUT 1 IN grams[part]
RETURN grams
HOW TO SHOW n GRAMS FOR str:
PUT n grams str IN grams
PUT 0 IN col
WRITE "`n`-grams for '`str`':"/
FOR gr IN keys grams:
WRITE "'`gr`' - `grams[gr]`" << 12
IF col mod 5 = 4: WRITE /
PUT col+1 IN col
WRITE /
FOR n IN {2;3;4}:
SHOW n GRAMS FOR "LIVE AND LET LIVE"
WRITE /</syntaxhighlight>
{{out}}
<pre>2-grams for 'LIVE AND LET LIVE':
' A' - 1 ' L' - 2 'AN' - 1 'D ' - 1 'E ' - 1
'ET' - 1 'IV' - 2 'LE' - 1 'LI' - 2 'ND' - 1
'T ' - 1 'VE' - 2
 
3-grams for 'LIVE AND LET LIVE':
' AN' - 1 ' LE' - 1 ' LI' - 1 'AND' - 1 'D L' - 1
'E A' - 1 'ET ' - 1 'IVE' - 2 'LET' - 1 'LIV' - 2
'ND ' - 1 'T L' - 1 'VE ' - 1
 
4-grams for 'LIVE AND LET LIVE':
' AND' - 1 ' LET' - 1 ' LIV' - 1 'AND ' - 1 'D LE' - 1
'E AN' - 1 'ET L' - 1 'IVE ' - 1 'LET ' - 1 'LIVE' - 2
'ND L' - 1 'T LI' - 1 'VE A' - 1</pre>
 
=={{header|ALGOL 68}}==
Line 125 ⟶ 166:
" LIV": 1
</pre>
 
=={{header|APL}}==
{{works with|Dyalog APL}}
<syntaxhighlight lang="apl">ngrams ← (⊣,(≢⊢))⌸,/</syntaxhighlight>
{{out}}
<pre> 2 3 4 ngrams¨ ⊂'LIVE AND LET LIVE'
LI 2 LIV 2 LIVE 2
IV 2 IVE 2 IVE 1
VE 2 VE 1 VE A 1
E 1 E A 1 E AN 1
A 1 AN 1 AND 1
AN 1 AND 1 AND 1
ND 1 ND 1 ND L 1
D 1 D L 1 D LE 1
L 2 LE 1 LET 1
LE 1 LET 1 LET 1
ET 1 ET 1 ET L 1
T 1 T L 1 T LI 1
LI 1 LIV 1</pre>
 
=={{header|Arturo}}==
Line 184 ⟶ 244:
"T LI" 1
" LIV" 1</pre>
 
=={{header|BASIC}}==
<syntaxhighlight lang="basic">10 DEFINT A-Z
20 S$ = "LIVE AND LET LIVE"
30 FOR N=2 TO 4: GOSUB 100: NEXT N
40 END
100 REM PRINT N-GRAMS OF S$
105 PRINT USING "#-grams of '";N;: PRINT S$;"':"
110 DIM P$(LEN(S$)-N+1), C(LEN(S$)-N+1)
120 FD = 0
130 FOR I=1 TO LEN(S$)-N+1
140 PA$ = MID$(S$,I,N)
150 IF FD = 0 THEN 190
160 FOR J=1 TO FD
170 IF P$(J) = PA$ THEN C(J) = C(J)+1: GOTO 210
180 NEXT J
190 FD = FD+1
200 P$(FD) = PA$ : C(FD) = 1
210 NEXT I
220 FOR I=1 TO FD
230 PRINT "'";P$(I);"': ";C(I),
240 NEXT I
250 PRINT: PRINT
260 ERASE P$, C
270 RETURN</syntaxhighlight>
{{out}}
<pre>2 grams of 'LIVE AND LET LIVE':
'LI': 2 'IV': 2 'VE': 2 'E ': 1 ' A': 1
'AN': 1 'ND': 1 'D ': 1 ' L': 2 'LE': 1
'ET': 1 'T ': 1
 
3 grams of 'LIVE AND LET LIVE':
'LIV': 2 'IVE': 2 'VE ': 1 'E A': 1 ' AN': 1
'AND': 1 'ND ': 1 'D L': 1 ' LE': 1 'LET': 1
'ET ': 1 'T L': 1 ' LI': 1
 
4 grams of 'LIVE AND LET LIVE':
'LIVE': 2 'IVE ': 1 'VE A': 1 'E AN': 1 ' AND': 1
'AND ': 1 'ND L': 1 'D LE': 1 ' LET': 1 'LET ': 1
'ET L': 1 'T LI': 1 ' LIV': 1</pre>
 
=={{header|BCPL}}==
<syntaxhighlight lang="bcpl">get "libhdr"
 
let equal(str, n, i, j) = valof
$( for k=0 to n-1
unless str%(i+k) = str%(j+k) resultis false
resultis true
$)
 
let findngrams(n, str, res) = valof
$( let found = 0
 
for i=1 to str%0-n+1
$( for j=0 to found-1
$( if equal(str, n, i, res!(2*j))
$( res!(2*j+1) := res!(2*j+1) + 1
goto nextitem
$)
$)
res!(2*found) := i
res!(2*found+1) := 1
found := found + 1
nextitem: loop
$)
resultis found
$)
 
let showngrams(n, str) be
$( let res = vec 64
let amt = findngrams(n, str, res)
writef("%N-grams of '%S':*N", n, str)
for i=0 to amt-1
$( wrch('*'')
for j=res!(2*i) to res!(2*i)+n-1 do wrch(str%j)
writef("' - %N",res!(2*i+1))
wrch(i rem 5=4 -> '*N', '*T')
$)
wrch('*N')
$)
 
let start() be
for n=2 to 4 do showngrams(n, "LIVE AND LET LIVE")</syntaxhighlight>
{{out}}
<pre>2-grams of 'LIVE AND LET LIVE':
'LI' - 2 'IV' - 2 'VE' - 2 'E ' - 1 ' A' - 1
'AN' - 1 'ND' - 1 'D ' - 1 ' L' - 2 'LE' - 1
'ET' - 1 'T ' - 1
3-grams of 'LIVE AND LET LIVE':
'LIV' - 2 'IVE' - 2 'VE ' - 1 'E A' - 1 ' AN' - 1
'AND' - 1 'ND ' - 1 'D L' - 1 ' LE' - 1 'LET' - 1
'ET ' - 1 'T L' - 1 ' LI' - 1
4-grams of 'LIVE AND LET LIVE':
'LIVE' - 2 'IVE ' - 1 'VE A' - 1 'E AN' - 1 ' AND' - 1
'AND ' - 1 'ND L' - 1 'D LE' - 1 ' LET' - 1 'LET ' - 1
'ET L' - 1 'T LI' - 1 ' LIV' - 1</pre>
 
=={{header|BQN}}==
<syntaxhighlight lang="bqn">Ngrams ← (⊏∾≠)¨ ∘ (⊐⊸⊔) ∘ (<˘∘↕)
Show ← > ("'" ∾ ⊣ ∾ "': " ∾ (•Fmt⊢))´¨
 
2‿3‿4 Show∘Ngrams¨ <"LIVE AND LET LIVE"</syntaxhighlight>
{{out}}
<pre>┌─
· ┌─ ┌─ ┌─
╵"'LI': 2 ╵"'LIV': 2 ╵"'LIVE': 2
'IV': 2 'IVE': 2 'IVE ': 1
'VE': 2 'VE ': 1 'VE A': 1
'E ': 1 'E A': 1 'E AN': 1
' A': 1 ' AN': 1 ' AND': 1
'AN': 1 'AND': 1 'AND ': 1
'ND': 1 'ND ': 1 'ND L': 1
'D ': 1 'D L': 1 'D LE': 1
' L': 2 ' LE': 1 ' LET': 1
'LE': 1 'LET': 1 'LET ': 1
'ET': 1 'ET ': 1 'ET L': 1
'T ': 1" 'T L': 1 'T LI': 1
┘ ' LI': 1" ' LIV': 1"
┘ ┘
┘</pre>
 
=={{header|C}}==
Line 265 ⟶ 446:
("ET L": 1) ("T LI": 1) (" LIV": 1)
</pre>
 
=={{header|C++}}==
<syntaxhighlight lang="cpp">#include <iostream>
#include <map>
#include <string>
 
std::map<std::string, int> find_ngrams(int n, const std::string& s)
{
std::map<std::string, int> ngrams;
size_t max_loc = s.length() - n;
for (size_t i = 0; i <= max_loc; i++)
ngrams[s.substr(i, n)]++;
return ngrams;
}
 
void print_ngrams(const std::map<std::string, int>& ngrams)
{
int col = 0;
for (const auto& [ngram, count] : ngrams) {
std::cout << "'" << ngram << "' - " << count;
if (col++ % 5 == 4)
std::cout << std::endl;
else
std::cout << '\t';
}
std::cout << std::endl;
}
 
int main(void)
{
std::string s("LIVE AND LET LIVE");
for (int n=2; n<=4; n++) {
std::cout << n << "-grams of '" << s << ":" << std::endl;
print_ngrams(find_ngrams(n, s));
}
return 0;
}</syntaxhighlight>
{{out}}
<pre>2-grams of 'LIVE AND LET LIVE:
' A' - 1 ' L' - 2 'AN' - 1 'D ' - 1 'E ' - 1
'ET' - 1 'IV' - 2 'LE' - 1 'LI' - 2 'ND' - 1
'T ' - 1 'VE' - 2
3-grams of 'LIVE AND LET LIVE:
' AN' - 1 ' LE' - 1 ' LI' - 1 'AND' - 1 'D L' - 1
'E A' - 1 'ET ' - 1 'IVE' - 2 'LET' - 1 'LIV' - 2
'ND ' - 1 'T L' - 1 'VE ' - 1
4-grams of 'LIVE AND LET LIVE:
' AND' - 1 ' LET' - 1 ' LIV' - 1 'AND ' - 1 'D LE' - 1
'E AN' - 1 'ET L' - 1 'IVE ' - 1 'LET ' - 1 'LIVE' - 2
'ND L' - 1 'T LI' - 1 'VE A' - 1</pre>
 
=={{header|CLU}}==
<syntaxhighlight lang="clu">parts = iter (n: int, s: string) yields (string)
for i: int in int$from_to(1, string$size(s)-n+1) do
yield(string$substr(s, i, n))
end
end parts
 
ngram = struct[str: string, count: int]
 
find_ngrams = proc (n: int, s: string) returns (sequence[ngram])
ng: array[ngram] := array[ngram]$[]
for part: string in parts(n, s) do
begin
for i: int in array[ngram]$indexes(ng) do
if ng[i].str = part then exit found(i) end
end
array[ngram]$addh(ng, ngram${str: part, count: 1})
end
except when found(i: int):
ng[i] := ngram${str: ng[i].str, count: ng[i].count + 1}
end
end
return(sequence[ngram]$a2s(ng))
end find_ngrams
 
show_ngrams = proc (s: stream, n: int, str: string)
ngrams: sequence[ngram] := find_ngrams(n, str)
col: int := 0
for ng: ngram in sequence[ngram]$elements(ngrams) do
stream$putleft(s, "'" || ng.str || "' - " ||
int$unparse(ng.count), 15)
if col // 5 = 4 then stream$putl(s, "") end
col := col + 1
end
stream$putl(s, "")
end show_ngrams
 
start_up = proc ()
po: stream := stream$primary_output()
s: string := "LIVE AND LET LIVE"
for n: int in int$from_to(2, 4) do
stream$putl(po, int$unparse(n) || "-grams of '" || s || "':")
show_ngrams(po, n, s)
stream$putl(po, "")
end
end start_up</syntaxhighlight>
{{out}}
<pre>2-grams of 'LIVE AND LET LIVE':
'LI' - 2 'IV' - 2 'VE' - 2 'E ' - 1 ' A' - 1
'AN' - 1 'ND' - 1 'D ' - 1 ' L' - 2 'LE' - 1
'ET' - 1 'T ' - 1
 
3-grams of 'LIVE AND LET LIVE':
'LIV' - 2 'IVE' - 2 'VE ' - 1 'E A' - 1 ' AN' - 1
'AND' - 1 'ND ' - 1 'D L' - 1 ' LE' - 1 'LET' - 1
'ET ' - 1 'T L' - 1 ' LI' - 1
 
4-grams of 'LIVE AND LET LIVE':
'LIVE' - 2 'IVE ' - 1 'VE A' - 1 'E AN' - 1 ' AND' - 1
'AND ' - 1 'ND L' - 1 'D LE' - 1 ' LET' - 1 'LET ' - 1
'ET L' - 1 'T LI' - 1 ' LIV' - 1</pre>
 
=={{header|Common Lisp}}==
Line 290 ⟶ 587:
("ND" . 1) ("D " . 1) ("LE" . 1) ("ET" . 1) ("T " . 1))
</syntaxhighlight>
 
=={{header|Cowgol}}==
<syntaxhighlight lang="cowgol">include "cowgol.coh";
include "strings.coh";
 
record Ngram is
ptr: [uint8];
size: intptr;
count: intptr;
end record;
 
sub PrintNgram(ngram: [Ngram]) is
print_char('\'');
var ptr := ngram.ptr;
var n := ngram.size;
while n > 0 loop
print_char([ptr]);
ptr := @next ptr;
n := n - 1;
end loop;
print("' - ");
print_i32(ngram.count as uint32);
end sub;
 
sub MemCmp(n: intptr, a: [uint8], b: [uint8]): (eq: uint8) is
eq := 1;
while n>0 loop
if [a] != [b] then
eq := 0;
return;
end if;
a := @next a;
b := @next b;
n := n - 1;
end loop;
end sub;
 
sub FindNgrams(n: intptr, str: [uint8], result: [Ngram]): (amount: intptr) is
var nextres := result;
amount := 0;
sub NewNgram(pos: [uint8]) is
nextres.ptr := pos;
nextres.size := n;
nextres.count := 1;
nextres := @next nextres;
amount := amount + 1;
end sub;
sub IncNgram(pos: [uint8]) is
if amount == 0 then
NewNgram(pos);
return;
end if;
var curres := result;
var left := amount;
while left > 0 loop
if MemCmp(n, pos, curres.ptr) != 0 then
curres.count := curres.count + 1;
return;
end if;
left := left - 1;
curres := @next curres;
end loop;
NewNgram(pos);
end sub;
var charsleft := StrLen(str) - n + 1;
while charsleft > 0 loop
IncNgram(str);
str := @next str;
charsleft := charsleft - 1;
end loop;
end sub;
 
sub ShowNgrams(n: intptr, str: [uint8]) is
var ngrams: Ngram[128];
print_i32(n as uint32);
print("-grams of '");
print(str);
print("':\n");
var amount := FindNgrams(n, str, &ngrams[0]) as @indexof ngrams;
var i: @indexof ngrams := 0;
while i < amount loop
PrintNgram(&ngrams[i]);
if i % 5 == 4
then print_nl();
else print_char('\t');
end if;
i := i + 1;
end loop;
print_nl();
print_nl();
end sub;
 
var str := "LIVE AND LET LIVE";
ShowNgrams(2, str);
ShowNgrams(3, str);
ShowNgrams(4, str);</syntaxhighlight>
{{out}}
<pre>2-grams of 'LIVE AND LET LIVE':
'LI' - 2 'IV' - 2 'VE' - 2 'E ' - 1 ' A' - 1
'AN' - 1 'ND' - 1 'D ' - 1 ' L' - 2 'LE' - 1
'ET' - 1 'T ' - 1
 
3-grams of 'LIVE AND LET LIVE':
'LIV' - 2 'IVE' - 2 'VE ' - 1 'E A' - 1 ' AN' - 1
'AND' - 1 'ND ' - 1 'D L' - 1 ' LE' - 1 'LET' - 1
'ET ' - 1 'T L' - 1 ' LI' - 1
 
4-grams of 'LIVE AND LET LIVE':
'LIVE' - 2 'IVE ' - 1 'VE A' - 1 'E AN' - 1 ' AND' - 1
'AND ' - 1 'ND L' - 1 'D LE' - 1 ' LET' - 1 'LET ' - 1
'ET L' - 1 'T LI' - 1 ' LIV' - 1</pre>
 
=={{header|Draco}}==
<syntaxhighlight lang="draco">\util.g
 
type Ngram = struct {
*char nptr;
word length;
word amount;
*Ngram next;
};
 
proc equal_n(word n; *char a, b) bool:
while n>0 and a* = b* do
a := a+1;
b := b+1;
n := n-1
od;
n = 0
corp
 
proc write_nchars(word n; *char ptr) void:
word i;
for i from 1 upto n do
write(ptr*);
ptr := ptr + 1;
od
corp
 
proc write_ngrams(*Ngram ngram) void:
word i;
i := 0;
while ngram /= nil do
write("'");
write_nchars(ngram*.length, ngram*.nptr);
write("' - ", ngram*.amount);
if i % 5=4
then writeln()
else write('\t')
fi;
i := i+1;
ngram := ngram*.next
od
corp
 
proc new_ngram(word n; *char ptr) *Ngram:
*Ngram ngram;
ngram := new(Ngram);
ngram*.length := n;
ngram*.nptr := ptr;
ngram*.amount := 1;
ngram*.next := nil;
ngram
corp;
 
proc inc_ngram(*Ngram ngram; word n; *char ptr) *Ngram:
*Ngram begin, lastn;
begin := ngram;
if begin = nil then
new_ngram(n, ptr)
else
while
ngram /= nil and not equal_n(n, ptr, ngram*.nptr)
do
lastn := ngram;
ngram := ngram*.next
od;
if ngram /= nil then
ngram*.amount := ngram*.amount + 1
else
lastn*.next := new_ngram(n, ptr)
fi;
begin
fi
corp
 
proc find_ngrams(word n; *char string) *Ngram:
*Ngram ngrams;
word maxpos, i;
ngrams := nil;
maxpos := CharsLen(string) - n;
for i from 0 upto maxpos do
ngrams := inc_ngram(ngrams, n, string + i)
od;
ngrams
corp
 
proc main() void:
*char string = "LIVE AND LET LIVE";
word n;
for n from 2 upto 4 do
writeln(n, "-grams of '", string, "':");
write_ngrams(find_ngrams(n, string));
writeln();
od;
corp</syntaxhighlight>
{{out}}
<pre>2-grams of 'LIVE AND LET LIVE':
'LI' - 2 'IV' - 2 'VE' - 2 'E ' - 1 ' A' - 1
'AN' - 1 'ND' - 1 'D ' - 1 ' L' - 2 'LE' - 1
'ET' - 1 'T ' - 1
3-grams of 'LIVE AND LET LIVE':
'LIV' - 2 'IVE' - 2 'VE ' - 1 'E A' - 1 ' AN' - 1
'AND' - 1 'ND ' - 1 'D L' - 1 ' LE' - 1 'LET' - 1
'ET ' - 1 'T L' - 1 ' LI' - 1
4-grams of 'LIVE AND LET LIVE':
'LIVE' - 2 'IVE ' - 1 'VE A' - 1 'E AN' - 1 ' AND' - 1
'AND ' - 1 'ND L' - 1 'D LE' - 1 ' LET' - 1 'LET ' - 1
'ET L' - 1 'T LI' - 1 ' LIV' - 1</pre>
 
=={{header|F_Sharp|F#}}==
Line 312 ⟶ 836:
[|'T'; ' '|] 1
</pre>
 
=={{header|Factor}}==
{{works with|Factor|0.99 2022-04-03}}
Line 347 ⟶ 872:
 
nGramCounts :: Int -> String -> M.Map String Int
nGramCounts n s =
foldr (flip (M.insertWith (+)) 1) M.empty . windows n
foldr
 
(flip (M.insertWith (+)) 1)
 
M.empty
------------------------- GENERIC ------------------------
(windows n s)
 
windows :: Int -> [a] -> [[a]]
windows n = getZipList . traverse ZipListtranspose . take n . tails
 
transpose :: [[a]] -> [[a]]
transpose [] = []
transpose xs = getZipList (traverse ZipList xs)
 
 
Line 367 ⟶ 896:
>> putStrLn ""
)
[2,0 3,.. 4]</syntaxhighlight>
{{Out}}
<pre>20-GRAMS:
 
1-GRAMS:
(" ",3)
("A",1)
("D",1)
("E",3)
("I",2)
("L",3)
("N",1)
("T",1)
("V",2)
 
2-GRAMS:
(" A",1)
(" L",2)
Line 513 ⟶ 1,055:
(" l", 2) ("le", 1) ("et", 1) ("t ", 1) ("li", 1)
</pre>
 
=={{header|Miranda}}==
<syntaxhighlight lang="miranda">main :: [sys_message]
main = concat (map (testcase s) [2,3,4])
where s = "LIVE AND LET LIVE"
 
testcase :: [char]->num->[sys_message]
testcase s n = [Stdout (show n ++ "-grams of '" ++ s ++ ":'\n"),
Stdout (showngrams n s),
Stdout "\n"]
 
showngrams :: num->[char]->[char]
showngrams n s = lay (map concat (splitn 6 cols))
where ng = ngrams n s
cols = [ljustify 12 (showngram ng') | ng'<-ng]
 
showngram :: ([char],num)->[char]
showngram (s,i) = concat ["\"", s, "\": ", show i]
 
splitn :: num->[*]->[[*]]
splitn n [] = []
splitn n ls = take n ls:splitn n (drop n ls)
 
ngrams :: num->[*]->[([*],num)]
ngrams n = count . group n
 
group :: num->[*]->[[*]]
group n ls = [], if #ls < n
group n ls = take n ls : group n (tl ls)
 
count :: [*]->[(*,num)]
count = foldl incelem []
 
incelem :: [(*,num)]->*->[(*,num)]
incelem [] el = [(el, 1)]
incelem ((el,n):cs) el = (el,n+1):cs
incelem (c:cs) el = c:incelem cs el</syntaxhighlight>
{{out}}
<pre>2-grams of 'LIVE AND LET LIVE:'
"LI": 2 "IV": 2 "VE": 2 "E ": 1 " A": 1 "AN": 1
"ND": 1 "D ": 1 " L": 2 "LE": 1 "ET": 1 "T ": 1
 
3-grams of 'LIVE AND LET LIVE:'
"LIV": 2 "IVE": 2 "VE ": 1 "E A": 1 " AN": 1 "AND": 1
"ND ": 1 "D L": 1 " LE": 1 "LET": 1 "ET ": 1 "T L": 1
" LI": 1
 
4-grams of 'LIVE AND LET LIVE:'
"LIVE": 2 "IVE ": 1 "VE A": 1 "E AN": 1 " AND": 1 "AND ": 1
"ND L": 1 "D LE": 1 " LET": 1 "LET ": 1 "ET L": 1 "T LI": 1
" LIV": 1</pre>
 
=={{header|Nim}}==
Line 698 ⟶ 1,291:
alpha/freq: LIVE 2, AND 1, LET 1, LIV 1, AND 1, D LE 1, E AN 1, ET L 1, IVE 1, LET 1, ND L 1, T LI 1, VE A 1
</pre>
 
=={{header|PL/M}}==
<syntaxhighlight lang="plm">100H:
BDOS: PROCEDURE (F,A); DECLARE F BYTE, A ADDRESS; GO TO 5; END BDOS;
EXIT: PROCEDURE; GO TO 0; END EXIT;
PR$CH: PROCEDURE (C); DECLARE C BYTE; CALL BDOS(2, C); END PR$CH;
PR$STR: PROCEDURE (S); DECLARE S ADDRESS; CALL BDOS(9, S); END PR$STR;
 
PR$NUM: PROCEDURE (N);
DECLARE N ADDRESS;
DECLARE S (6) BYTE INITIAL ('.....$');
DECLARE I BYTE;
I = 5;
DIGIT:
I = I - 1;
S(I) = N MOD 10 + '0';
IF (N := N / 10) > 0 THEN GO TO DIGIT;
CALL PR$STR(.S(I));
END PR$NUM;
 
PR$NSTR: PROCEDURE (N, STR);
DECLARE (STR, N) ADDRESS, CH BASED STR BYTE;
DO WHILE N>0;
CALL PR$CH(CH);
STR = STR+1;
N = N-1;
END;
END PR$NSTR;
 
CMP$NSTR: PROCEDURE (N, STRA, STRB) BYTE;
DECLARE (STRA, STRB, N, I) ADDRESS;
DECLARE A BASED STRA BYTE;
DECLARE B BASED STRB BYTE;
DO I=0 TO N-1;
IF A(I) <> B(I) THEN RETURN 0;
END;
RETURN 0FFH;
END CMP$NSTR;
 
STR$LEN: PROCEDURE (STR) ADDRESS;
DECLARE (N, STR) ADDRESS, S BASED STR BYTE;
N = 0;
DO WHILE S(N) <> '$';
N = N+1;
END;
RETURN N;
END STR$LEN;
 
FIND$NGRAMS: PROCEDURE (N, STR, RSLT) ADDRESS;
DECLARE (N, I, J, STR, RSLT, FOUND) ADDRESS;
DECLARE S BASED STR BYTE;
DECLARE ITEM BASED RSLT ADDRESS;
DECLARE MAXPOS ADDRESS;
MAXPOS = STR$LEN(STR) - N;
FOUND = 0;
DO I = 0 TO MAXPOS;
IF FOUND = 0 THEN GO TO NOT$FOUND;
DO J = 0 TO FOUND;
IF CMP$NSTR(N, .S(I), ITEM(2*J)) THEN DO;
ITEM(2*J+1) = ITEM(2*J+1) + 1;
GO TO NEXT$ITEM;
END;
END;
NOT$FOUND:
ITEM(2*FOUND) = .S(I);
ITEM(2*FOUND+1) = 1;
FOUND = FOUND + 1;
NEXT$ITEM:
END;
RETURN FOUND;
END FIND$NGRAMS;
 
PRINT$NGRAMS: PROCEDURE (N, STR);
DECLARE (N, I, STR) ADDRESS;
DECLARE RESULT (64) ADDRESS;
DECLARE AMOUNT ADDRESS;
CALL PR$CH(N + '0');
CALL PR$STR(.'-GRAMS OF ''$');
CALL PR$STR(STR);
CALL PR$STR(.(''': ', 13, 10, '$'));
 
AMOUNT = FIND$NGRAMS(N, STR, .RESULT);
DO I = 0 TO AMOUNT - 1;
CALL PR$CH('''');
CALL PR$NSTR(N, RESULT(2*I));
CALL PR$STR(.''' - $');
CALL PR$NUM(RESULT(2*I+1));
IF I MOD 5 = 4
THEN CALL PR$STR(.(13,10,'$'));
ELSE CALL PR$CH(9);
END;
CALL PR$STR(.(13,10,'$'));
END PRINT$NGRAMS;
 
DECLARE STRING DATA ('LIVE AND LET LIVE$');
DECLARE N BYTE;
 
DO N = 2 TO 4;
CALL PRINT$NGRAMS(N, .STRING);
END;
CALL EXIT;
EOF</syntaxhighlight>
{{out}}
<pre>2-GRAMS OF 'LIVE AND LET LIVE':
'LI' - 2 'IV' - 2 'VE' - 2 'E ' - 1 ' A' - 1
'AN' - 1 'ND' - 1 'D ' - 1 ' L' - 2 'LE' - 1
'ET' - 1 'T ' - 1
3-GRAMS OF 'LIVE AND LET LIVE':
'LIV' - 2 'VE ' - 1 'E A' - 1 ' AN' - 1 'AND' - 1
'ND ' - 1 'D L' - 1 ' LE' - 1 'LET' - 1 'ET ' - 1
'T L' - 1 ' LI' - 1 'IVE' - 1
4-GRAMS OF 'LIVE AND LET LIVE':
'LIVE' - 2 'IVE ' - 1 'VE A' - 1 'E AN' - 1 ' AND' - 1
'AND ' - 1 'ND L' - 1 'D LE' - 1 ' LET' - 1 'LET ' - 1
'ET L' - 1 'T LI' - 1 ' LIV' - 1</pre>
 
=={{header|Python}}==
 
<syntaxhighlight lang="python">
This example generates n-grams lazily, much like the [https://docs.python.org/3/library/itertools.html#itertools-recipes sliding_window recipe] from the Python itertools docs.
import pprint
from collections import Counter
from typing import Iterable
 
 
def n_grams(text: str, n: int) -> Iterable[str]:
"""Generate contiguous sequences of _n_ characters from _text_."""
if n < 1:
raise ValueError("n must be an integer > 0")
 
text = text.upper()
return (text[i : (i + n)] for i in range(len(text) - n + 1))
 
 
def main() -> None:
example_text = "Live and let live"
 
for n in range(2, 5):
counts = Counter(n_grams(example_text, n)).most_common()
print(
f"{len(counts)} {n}-grams of {example_text!r}:\n",
pprint.pformat(counts, compact=True),
end="\n\n",
)
 
 
if __name__ == "__main__":
main()
</syntaxhighlight>
 
{{out}}
<pre>
12 2-grams of 'Live and let live':
[('LI', 2), ('IV', 2), ('VE', 2), (' L', 2), ('E ', 1), (' A', 1), ('AN', 1),
('ND', 1), ('D ', 1), ('LE', 1), ('ET', 1), ('T ', 1)]
 
13 3-grams of 'Live and let live':
[('LIV', 2), ('IVE', 2), ('VE ', 1), ('E A', 1), (' AN', 1), ('AND', 1),
('ND ', 1), ('D L', 1), (' LE', 1), ('LET', 1), ('ET ', 1), ('T L', 1),
(' LI', 1)]
 
13 4-grams of 'Live and let live':
[('LIVE', 2), ('IVE ', 1), ('VE A', 1), ('E AN', 1), (' AND', 1), ('AND ', 1),
('ND L', 1), ('D LE', 1), (' LET', 1), ('LET ', 1), ('ET L', 1), ('T LI', 1),
(' LIV', 1)]
</pre>
 
 
===Sliding window===
 
This example takes inspiration from the [https://docs.python.org/3/library/itertools.html#itertools-recipes sliding_window recipe] found in Python's itertools docs.
 
<syntaxhighlight lang="python">
import pprint
from collections import Counter
from collections import deque
from itertools import islice
from typing import Iterable
 
 
def n_grams(text: str, n: int) -> Iterable[str]:
"""Generate contiguous sequences of _n_ characters from _text_."""
it = iter(text.upper())
ngramn_gram = deque(islice(it, n), maxlen=n)
if len(ngramn_gram) == n:
yield "".join(ngramn_gram)
for chx in it:
ngramn_gram.append(chx)
yield "".join(ngramn_gram)
 
 
def main() -> None:
if __name__ == "__main__":
example_text = "Live and let live"
import pprint
 
example = "Live and let live"
 
for n in range(2, 5):
resultcounts = Counter(n_grams(exampleexample_text, n)).most_common()
print(
f"{len(resultcounts)} {n}-grams of {exampleexample_text!r}:\n",
pprint.pformat(resultcounts, compact=True),
end="\n\n",
)
 
 
if __name__ == "__main__":
main()
</syntaxhighlight>
 
Line 750 ⟶ 1,516:
(' LIV', 1)]
</pre>
 
 
And a strict variant, compositionally assembled from some basics:
 
<syntaxhighlight lang="python">from itertools import (islice)
from functools import (reduce)
from operator import (add)
 
 
def nGramCounts(n, s):
'''A dictionary of all nGrams of dimension n in s,
with the frequency of their occurrence.
'''
return reduce(
lambda a, gram: insertWith(add, gram, 1, a),
nGrams(n, s),
{}
)
 
 
def nGrams(n, s):
'''All case-insensitive sequences of length n in the string s.'''
return (''.join(t) for t in windows(n, list(s.upper())))
 
 
# ----------------------- GENERICS -----------------------
 
def insertWith(f, k, x, dct):
'''A new dictionary updated with a
(key, f(value, x)) tuple.
Where there is no existing value for the key,
the supplied x is used as the default.
'''
return dict(dct, **{k: f(dct[k], x) if k in dct else x})
 
 
def tails(xs):
'''All final segments of xs, longest first.'''
return (xs[i:] for i in range(0, 1 + len(xs)))
 
 
def windows(n, xs):
'''Sliding windows of dimension n.'''
return zip(*islice(tails(xs), n))
 
 
 
# ------------------------- TEST -------------------------
if __name__ == "__main__":
import pprint
 
EXAMPLE = "Live and let live"
 
for dimension in range(1, 5):
result = sorted(nGramCounts(dimension, EXAMPLE).items())
print(
f"{len(result)} {dimension}-grams of {EXAMPLE!r}:\n",
pprint.pformat(result),
end="\n\n",
)</syntaxhighlight>
{{Out}}
<pre>9 1-grams of 'Live and let live':
[(' ', 3),
('A', 1),
('D', 1),
('E', 3),
('I', 2),
('L', 3),
('N', 1),
('T', 1),
('V', 2)]
 
12 2-grams of 'Live and let live':
[(' A', 1),
(' L', 2),
('AN', 1),
('D ', 1),
('E ', 1),
('ET', 1),
('IV', 2),
('LE', 1),
('LI', 2),
('ND', 1),
('T ', 1),
('VE', 2)]
 
13 3-grams of 'Live and let live':
[(' AN', 1),
(' LE', 1),
(' LI', 1),
('AND', 1),
('D L', 1),
('E A', 1),
('ET ', 1),
('IVE', 2),
('LET', 1),
('LIV', 2),
('ND ', 1),
('T L', 1),
('VE ', 1)]
 
13 4-grams of 'Live and let live':
[(' AND', 1),
(' LET', 1),
(' LIV', 1),
('AND ', 1),
('D LE', 1),
('E AN', 1),
('ET L', 1),
('IVE ', 1),
('LET ', 1),
('LIVE', 2),
('ND L', 1),
('T LI', 1),
('VE A', 1)]</pre>
 
=={{header|Raku}}==
Line 759 ⟶ 1,640:
<pre>("IV"=>2,"T "=>1,"VE"=>2,"E "=>1,"LE"=>1,"AN"=>1,"LI"=>2,"ND"=>1,"ET"=>1," L"=>2," A"=>1,"D "=>1).Bag
("ET "=>1,"AND"=>1,"LIV"=>2," LI"=>1,"ND "=>1," LE"=>1,"IVE"=>2,"E A"=>1,"VE "=>1,"T L"=>1,"D L"=>1,"LET"=>1," AN"=>1).Bag</pre>
 
=={{header|Refal}}==
<syntaxhighlight lang="refal">$ENTRY Go {
, 'LIVE AND LET LIVE': e.Str
= <ShowNgrams 2 e.Str>
<ShowNgrams 3 e.Str>
<ShowNgrams 4 e.Str>;
};
 
ShowNgrams {
s.N e.Str =
<Prout <Symb s.N> '-grams of "' e.Str '":'>
<ShowLines 5 <Ngrams s.N e.Str>>
<Prout>;
};
 
ShowLines {
s.N = ;
s.N e.X, <First s.N e.X>: (e.L) e.R =
<Prout <Each DispNgram e.L>> <ShowLines s.N e.R>;
};
 
Each {
s.F = ;
s.F t.I e.Is = <Mu s.F t.I> <Each s.F e.Is>;
};
 
DispNgram {
((e.S) s.C) = '(' e.S ') - ' <Symb s.C> ' ';
};
 
Ngrams {
s.N e.Str = <Count () <Groups s.N e.Str>>;
};
 
Groups {
s.N e.X, <Lenw e.X>: s.L e.X, <Compare s.L s.N>: {
'-' = ;
s.C, <First s.N e.X>: (e.G) e.R, e.X: s.Z e.Y =
(e.G) <Groups s.N e.Y>;
}
};
 
Count {
(e.Cs) = e.Cs;
(e.Cs) t.I e.Is = <Count (<Inc (e.Cs) t.I>) e.Is>;
};
 
Inc {
(e.X (t.I s.C) e.Y) t.I = e.X (t.I <+ 1 s.C>) e.Y;
(e.X) t.I = e.X (t.I 1);
};</syntaxhighlight>
{{out}}
<pre>2-grams of "LIVE AND LET LIVE":
(LI) - 2 (IV) - 2 (VE) - 2 (E ) - 1 ( A) - 1
(AN) - 1 (ND) - 1 (D ) - 1 ( L) - 2 (LE) - 1
(ET) - 1 (T ) - 1
 
3-grams of "LIVE AND LET LIVE":
(LIV) - 2 (IVE) - 2 (VE ) - 1 (E A) - 1 ( AN) - 1
(AND) - 1 (ND ) - 1 (D L) - 1 ( LE) - 1 (LET) - 1
(ET ) - 1 (T L) - 1 ( LI) - 1
 
4-grams of "LIVE AND LET LIVE":
(LIVE) - 2 (IVE ) - 1 (VE A) - 1 (E AN) - 1 ( AND) - 1
(AND ) - 1 (ND L) - 1 (D LE) - 1 ( LET) - 1 (LET ) - 1
(ET L) - 1 (T LI) - 1 ( LIV) - 1</pre>
 
=={{header|RPL}}==
Line 808 ⟶ 1,756:
2: { "LIV=2" "IVE=2" "VE =1" "E A=1" " AN=1" "AND=1" "ND =1" "D L=1" " LE=1" "LET=1" "ET =1" "T L=1" " LI=1" }
1: { "LIVE=2" "IVE =1" "VE A=1" "E AN=1" " AND=1" "AND =1" "ND L=1" "D LE=1" " LET=1" "LET =1" "ET L=1" "T LI=1" " LIV=1" }
</pre>
 
=={{header|SETL}}==
<syntaxhighlight lang="setl">program find_ngrams;
input := "LIVE AND LET LIVE";
loop for size in [2,3,4] do
print(str size+"-grams of '"+input+"':");
ng := ngrams(input, size);
col := 0;
loop for count = ng(ngram) do
nprint(rpad("'" + ngram + "': " + str count, 10));
if (col +:= 1) mod 8 = 0 then print; end if;
end loop;
print;
print;
end loop;
 
proc ngrams(input, size);
ng := {};
loop for i in [1..#input-size+1] do
ng(input(i..i+size-1)) +:= 1;
end loop;
return ng;
end proc;
end program;</syntaxhighlight>
{{out}}
<pre>2-grams of 'LIVE AND LET LIVE':
' A': 1 ' L': 2 'AN': 1 'D ': 1 'E ': 1 'ET': 1 'IV': 2 'LE': 1
'LI': 2 'ND': 1 'T ': 1 'VE': 2
 
3-grams of 'LIVE AND LET LIVE':
' AN': 1 ' LE': 1 ' LI': 1 'AND': 1 'D L': 1 'E A': 1 'ET ': 1 'IVE': 2
'LET': 1 'LIV': 2 'ND ': 1 'T L': 1 'VE ': 1
 
4-grams of 'LIVE AND LET LIVE':
' AND': 1 ' LET': 1 ' LIV': 1 'AND ': 1 'D LE': 1 'E AN': 1 'ET L': 1 'IVE ': 1
'LET ': 1 'LIVE': 2 'ND L': 1 'T LI': 1 'VE A': 1
 
</pre>
 
2,115

edits