N-grams: Difference between revisions

22,681 bytes added ,  1 month ago
Add ABC
(→‎{{header|Python}}: Add simpler (and faster) implementation)
(Add ABC)
 
(12 intermediate revisions by the same user not shown)
Line 23:
;* [[Sorensen–Dice_coefficient|Related task: Sorensen–Dice coefficient]]
 
 
=={{header|ABC}}==
<syntaxhighlight lang="abc">HOW TO RETURN n grams str:
PUT {} IN grams
FOR i IN {1..#str-n+1}:
PUT str@i|n IN part
SELECT:
part in keys grams:
PUT grams[part]+1 IN grams[part]
ELSE:
PUT 1 IN grams[part]
RETURN grams
HOW TO SHOW n GRAMS FOR str:
PUT n grams str IN grams
PUT 0 IN col
WRITE "`n`-grams for '`str`':"/
FOR gr IN keys grams:
WRITE "'`gr`' - `grams[gr]`" << 12
IF col mod 5 = 4: WRITE /
PUT col+1 IN col
WRITE /
FOR n IN {2;3;4}:
SHOW n GRAMS FOR "LIVE AND LET LIVE"
WRITE /</syntaxhighlight>
{{out}}
<pre>2-grams for 'LIVE AND LET LIVE':
' A' - 1 ' L' - 2 'AN' - 1 'D ' - 1 'E ' - 1
'ET' - 1 'IV' - 2 'LE' - 1 'LI' - 2 'ND' - 1
'T ' - 1 'VE' - 2
 
3-grams for 'LIVE AND LET LIVE':
' AN' - 1 ' LE' - 1 ' LI' - 1 'AND' - 1 'D L' - 1
'E A' - 1 'ET ' - 1 'IVE' - 2 'LET' - 1 'LIV' - 2
'ND ' - 1 'T L' - 1 'VE ' - 1
 
4-grams for 'LIVE AND LET LIVE':
' AND' - 1 ' LET' - 1 ' LIV' - 1 'AND ' - 1 'D LE' - 1
'E AN' - 1 'ET L' - 1 'IVE ' - 1 'LET ' - 1 'LIVE' - 2
'ND L' - 1 'T LI' - 1 'VE A' - 1</pre>
 
=={{header|ALGOL 68}}==
Line 125 ⟶ 166:
" LIV": 1
</pre>
 
=={{header|APL}}==
{{works with|Dyalog APL}}
<syntaxhighlight lang="apl">ngrams ← (⊣,(≢⊢))⌸,/</syntaxhighlight>
{{out}}
<pre> 2 3 4 ngrams¨ ⊂'LIVE AND LET LIVE'
LI 2 LIV 2 LIVE 2
IV 2 IVE 2 IVE 1
VE 2 VE 1 VE A 1
E 1 E A 1 E AN 1
A 1 AN 1 AND 1
AN 1 AND 1 AND 1
ND 1 ND 1 ND L 1
D 1 D L 1 D LE 1
L 2 LE 1 LET 1
LE 1 LET 1 LET 1
ET 1 ET 1 ET L 1
T 1 T L 1 T LI 1
LI 1 LIV 1</pre>
 
=={{header|Arturo}}==
Line 184 ⟶ 244:
"T LI" 1
" LIV" 1</pre>
 
=={{header|BASIC}}==
<syntaxhighlight lang="basic">10 DEFINT A-Z
20 S$ = "LIVE AND LET LIVE"
30 FOR N=2 TO 4: GOSUB 100: NEXT N
40 END
100 REM PRINT N-GRAMS OF S$
105 PRINT USING "#-grams of '";N;: PRINT S$;"':"
110 DIM P$(LEN(S$)-N+1), C(LEN(S$)-N+1)
120 FD = 0
130 FOR I=1 TO LEN(S$)-N+1
140 PA$ = MID$(S$,I,N)
150 IF FD = 0 THEN 190
160 FOR J=1 TO FD
170 IF P$(J) = PA$ THEN C(J) = C(J)+1: GOTO 210
180 NEXT J
190 FD = FD+1
200 P$(FD) = PA$ : C(FD) = 1
210 NEXT I
220 FOR I=1 TO FD
230 PRINT "'";P$(I);"': ";C(I),
240 NEXT I
250 PRINT: PRINT
260 ERASE P$, C
270 RETURN</syntaxhighlight>
{{out}}
<pre>2 grams of 'LIVE AND LET LIVE':
'LI': 2 'IV': 2 'VE': 2 'E ': 1 ' A': 1
'AN': 1 'ND': 1 'D ': 1 ' L': 2 'LE': 1
'ET': 1 'T ': 1
 
3 grams of 'LIVE AND LET LIVE':
'LIV': 2 'IVE': 2 'VE ': 1 'E A': 1 ' AN': 1
'AND': 1 'ND ': 1 'D L': 1 ' LE': 1 'LET': 1
'ET ': 1 'T L': 1 ' LI': 1
 
4 grams of 'LIVE AND LET LIVE':
'LIVE': 2 'IVE ': 1 'VE A': 1 'E AN': 1 ' AND': 1
'AND ': 1 'ND L': 1 'D LE': 1 ' LET': 1 'LET ': 1
'ET L': 1 'T LI': 1 ' LIV': 1</pre>
 
=={{header|BCPL}}==
<syntaxhighlight lang="bcpl">get "libhdr"
 
let equal(str, n, i, j) = valof
$( for k=0 to n-1
unless str%(i+k) = str%(j+k) resultis false
resultis true
$)
 
let findngrams(n, str, res) = valof
$( let found = 0
 
for i=1 to str%0-n+1
$( for j=0 to found-1
$( if equal(str, n, i, res!(2*j))
$( res!(2*j+1) := res!(2*j+1) + 1
goto nextitem
$)
$)
res!(2*found) := i
res!(2*found+1) := 1
found := found + 1
nextitem: loop
$)
resultis found
$)
 
let showngrams(n, str) be
$( let res = vec 64
let amt = findngrams(n, str, res)
writef("%N-grams of '%S':*N", n, str)
for i=0 to amt-1
$( wrch('*'')
for j=res!(2*i) to res!(2*i)+n-1 do wrch(str%j)
writef("' - %N",res!(2*i+1))
wrch(i rem 5=4 -> '*N', '*T')
$)
wrch('*N')
$)
 
let start() be
for n=2 to 4 do showngrams(n, "LIVE AND LET LIVE")</syntaxhighlight>
{{out}}
<pre>2-grams of 'LIVE AND LET LIVE':
'LI' - 2 'IV' - 2 'VE' - 2 'E ' - 1 ' A' - 1
'AN' - 1 'ND' - 1 'D ' - 1 ' L' - 2 'LE' - 1
'ET' - 1 'T ' - 1
3-grams of 'LIVE AND LET LIVE':
'LIV' - 2 'IVE' - 2 'VE ' - 1 'E A' - 1 ' AN' - 1
'AND' - 1 'ND ' - 1 'D L' - 1 ' LE' - 1 'LET' - 1
'ET ' - 1 'T L' - 1 ' LI' - 1
4-grams of 'LIVE AND LET LIVE':
'LIVE' - 2 'IVE ' - 1 'VE A' - 1 'E AN' - 1 ' AND' - 1
'AND ' - 1 'ND L' - 1 'D LE' - 1 ' LET' - 1 'LET ' - 1
'ET L' - 1 'T LI' - 1 ' LIV' - 1</pre>
 
=={{header|BQN}}==
<syntaxhighlight lang="bqn">Ngrams ← (⊏∾≠)¨ ∘ (⊐⊸⊔) ∘ (<˘∘↕)
Show ← > ("'" ∾ ⊣ ∾ "': " ∾ (•Fmt⊢))´¨
 
2‿3‿4 Show∘Ngrams¨ <"LIVE AND LET LIVE"</syntaxhighlight>
{{out}}
<pre>┌─
· ┌─ ┌─ ┌─
╵"'LI': 2 ╵"'LIV': 2 ╵"'LIVE': 2
'IV': 2 'IVE': 2 'IVE ': 1
'VE': 2 'VE ': 1 'VE A': 1
'E ': 1 'E A': 1 'E AN': 1
' A': 1 ' AN': 1 ' AND': 1
'AN': 1 'AND': 1 'AND ': 1
'ND': 1 'ND ': 1 'ND L': 1
'D ': 1 'D L': 1 'D LE': 1
' L': 2 ' LE': 1 ' LET': 1
'LE': 1 'LET': 1 'LET ': 1
'ET': 1 'ET ': 1 'ET L': 1
'T ': 1" 'T L': 1 'T LI': 1
┘ ' LI': 1" ' LIV': 1"
┘ ┘
┘</pre>
 
=={{header|C}}==
Line 265 ⟶ 446:
("ET L": 1) ("T LI": 1) (" LIV": 1)
</pre>
 
=={{header|C++}}==
<syntaxhighlight lang="cpp">#include <iostream>
#include <map>
#include <string>
 
std::map<std::string, int> find_ngrams(int n, const std::string& s)
{
std::map<std::string, int> ngrams;
size_t max_loc = s.length() - n;
for (size_t i = 0; i <= max_loc; i++)
ngrams[s.substr(i, n)]++;
return ngrams;
}
 
void print_ngrams(const std::map<std::string, int>& ngrams)
{
int col = 0;
for (const auto& [ngram, count] : ngrams) {
std::cout << "'" << ngram << "' - " << count;
if (col++ % 5 == 4)
std::cout << std::endl;
else
std::cout << '\t';
}
std::cout << std::endl;
}
 
int main(void)
{
std::string s("LIVE AND LET LIVE");
for (int n=2; n<=4; n++) {
std::cout << n << "-grams of '" << s << ":" << std::endl;
print_ngrams(find_ngrams(n, s));
}
return 0;
}</syntaxhighlight>
{{out}}
<pre>2-grams of 'LIVE AND LET LIVE:
' A' - 1 ' L' - 2 'AN' - 1 'D ' - 1 'E ' - 1
'ET' - 1 'IV' - 2 'LE' - 1 'LI' - 2 'ND' - 1
'T ' - 1 'VE' - 2
3-grams of 'LIVE AND LET LIVE:
' AN' - 1 ' LE' - 1 ' LI' - 1 'AND' - 1 'D L' - 1
'E A' - 1 'ET ' - 1 'IVE' - 2 'LET' - 1 'LIV' - 2
'ND ' - 1 'T L' - 1 'VE ' - 1
4-grams of 'LIVE AND LET LIVE:
' AND' - 1 ' LET' - 1 ' LIV' - 1 'AND ' - 1 'D LE' - 1
'E AN' - 1 'ET L' - 1 'IVE ' - 1 'LET ' - 1 'LIVE' - 2
'ND L' - 1 'T LI' - 1 'VE A' - 1</pre>
 
=={{header|CLU}}==
<syntaxhighlight lang="clu">parts = iter (n: int, s: string) yields (string)
for i: int in int$from_to(1, string$size(s)-n+1) do
yield(string$substr(s, i, n))
end
end parts
 
ngram = struct[str: string, count: int]
 
find_ngrams = proc (n: int, s: string) returns (sequence[ngram])
ng: array[ngram] := array[ngram]$[]
for part: string in parts(n, s) do
begin
for i: int in array[ngram]$indexes(ng) do
if ng[i].str = part then exit found(i) end
end
array[ngram]$addh(ng, ngram${str: part, count: 1})
end
except when found(i: int):
ng[i] := ngram${str: ng[i].str, count: ng[i].count + 1}
end
end
return(sequence[ngram]$a2s(ng))
end find_ngrams
 
show_ngrams = proc (s: stream, n: int, str: string)
ngrams: sequence[ngram] := find_ngrams(n, str)
col: int := 0
for ng: ngram in sequence[ngram]$elements(ngrams) do
stream$putleft(s, "'" || ng.str || "' - " ||
int$unparse(ng.count), 15)
if col // 5 = 4 then stream$putl(s, "") end
col := col + 1
end
stream$putl(s, "")
end show_ngrams
 
start_up = proc ()
po: stream := stream$primary_output()
s: string := "LIVE AND LET LIVE"
for n: int in int$from_to(2, 4) do
stream$putl(po, int$unparse(n) || "-grams of '" || s || "':")
show_ngrams(po, n, s)
stream$putl(po, "")
end
end start_up</syntaxhighlight>
{{out}}
<pre>2-grams of 'LIVE AND LET LIVE':
'LI' - 2 'IV' - 2 'VE' - 2 'E ' - 1 ' A' - 1
'AN' - 1 'ND' - 1 'D ' - 1 ' L' - 2 'LE' - 1
'ET' - 1 'T ' - 1
 
3-grams of 'LIVE AND LET LIVE':
'LIV' - 2 'IVE' - 2 'VE ' - 1 'E A' - 1 ' AN' - 1
'AND' - 1 'ND ' - 1 'D L' - 1 ' LE' - 1 'LET' - 1
'ET ' - 1 'T L' - 1 ' LI' - 1
 
4-grams of 'LIVE AND LET LIVE':
'LIVE' - 2 'IVE ' - 1 'VE A' - 1 'E AN' - 1 ' AND' - 1
'AND ' - 1 'ND L' - 1 'D LE' - 1 ' LET' - 1 'LET ' - 1
'ET L' - 1 'T LI' - 1 ' LIV' - 1</pre>
 
=={{header|Common Lisp}}==
Line 290 ⟶ 587:
("ND" . 1) ("D " . 1) ("LE" . 1) ("ET" . 1) ("T " . 1))
</syntaxhighlight>
 
=={{header|Cowgol}}==
<syntaxhighlight lang="cowgol">include "cowgol.coh";
include "strings.coh";
 
record Ngram is
ptr: [uint8];
size: intptr;
count: intptr;
end record;
 
sub PrintNgram(ngram: [Ngram]) is
print_char('\'');
var ptr := ngram.ptr;
var n := ngram.size;
while n > 0 loop
print_char([ptr]);
ptr := @next ptr;
n := n - 1;
end loop;
print("' - ");
print_i32(ngram.count as uint32);
end sub;
 
sub MemCmp(n: intptr, a: [uint8], b: [uint8]): (eq: uint8) is
eq := 1;
while n>0 loop
if [a] != [b] then
eq := 0;
return;
end if;
a := @next a;
b := @next b;
n := n - 1;
end loop;
end sub;
 
sub FindNgrams(n: intptr, str: [uint8], result: [Ngram]): (amount: intptr) is
var nextres := result;
amount := 0;
sub NewNgram(pos: [uint8]) is
nextres.ptr := pos;
nextres.size := n;
nextres.count := 1;
nextres := @next nextres;
amount := amount + 1;
end sub;
sub IncNgram(pos: [uint8]) is
if amount == 0 then
NewNgram(pos);
return;
end if;
var curres := result;
var left := amount;
while left > 0 loop
if MemCmp(n, pos, curres.ptr) != 0 then
curres.count := curres.count + 1;
return;
end if;
left := left - 1;
curres := @next curres;
end loop;
NewNgram(pos);
end sub;
var charsleft := StrLen(str) - n + 1;
while charsleft > 0 loop
IncNgram(str);
str := @next str;
charsleft := charsleft - 1;
end loop;
end sub;
 
sub ShowNgrams(n: intptr, str: [uint8]) is
var ngrams: Ngram[128];
print_i32(n as uint32);
print("-grams of '");
print(str);
print("':\n");
var amount := FindNgrams(n, str, &ngrams[0]) as @indexof ngrams;
var i: @indexof ngrams := 0;
while i < amount loop
PrintNgram(&ngrams[i]);
if i % 5 == 4
then print_nl();
else print_char('\t');
end if;
i := i + 1;
end loop;
print_nl();
print_nl();
end sub;
 
var str := "LIVE AND LET LIVE";
ShowNgrams(2, str);
ShowNgrams(3, str);
ShowNgrams(4, str);</syntaxhighlight>
{{out}}
<pre>2-grams of 'LIVE AND LET LIVE':
'LI' - 2 'IV' - 2 'VE' - 2 'E ' - 1 ' A' - 1
'AN' - 1 'ND' - 1 'D ' - 1 ' L' - 2 'LE' - 1
'ET' - 1 'T ' - 1
 
3-grams of 'LIVE AND LET LIVE':
'LIV' - 2 'IVE' - 2 'VE ' - 1 'E A' - 1 ' AN' - 1
'AND' - 1 'ND ' - 1 'D L' - 1 ' LE' - 1 'LET' - 1
'ET ' - 1 'T L' - 1 ' LI' - 1
 
4-grams of 'LIVE AND LET LIVE':
'LIVE' - 2 'IVE ' - 1 'VE A' - 1 'E AN' - 1 ' AND' - 1
'AND ' - 1 'ND L' - 1 'D LE' - 1 ' LET' - 1 'LET ' - 1
'ET L' - 1 'T LI' - 1 ' LIV' - 1</pre>
 
=={{header|Draco}}==
<syntaxhighlight lang="draco">\util.g
 
type Ngram = struct {
*char nptr;
word length;
word amount;
*Ngram next;
};
 
proc equal_n(word n; *char a, b) bool:
while n>0 and a* = b* do
a := a+1;
b := b+1;
n := n-1
od;
n = 0
corp
 
proc write_nchars(word n; *char ptr) void:
word i;
for i from 1 upto n do
write(ptr*);
ptr := ptr + 1;
od
corp
 
proc write_ngrams(*Ngram ngram) void:
word i;
i := 0;
while ngram /= nil do
write("'");
write_nchars(ngram*.length, ngram*.nptr);
write("' - ", ngram*.amount);
if i % 5=4
then writeln()
else write('\t')
fi;
i := i+1;
ngram := ngram*.next
od
corp
 
proc new_ngram(word n; *char ptr) *Ngram:
*Ngram ngram;
ngram := new(Ngram);
ngram*.length := n;
ngram*.nptr := ptr;
ngram*.amount := 1;
ngram*.next := nil;
ngram
corp;
 
proc inc_ngram(*Ngram ngram; word n; *char ptr) *Ngram:
*Ngram begin, lastn;
begin := ngram;
if begin = nil then
new_ngram(n, ptr)
else
while
ngram /= nil and not equal_n(n, ptr, ngram*.nptr)
do
lastn := ngram;
ngram := ngram*.next
od;
if ngram /= nil then
ngram*.amount := ngram*.amount + 1
else
lastn*.next := new_ngram(n, ptr)
fi;
begin
fi
corp
 
proc find_ngrams(word n; *char string) *Ngram:
*Ngram ngrams;
word maxpos, i;
ngrams := nil;
maxpos := CharsLen(string) - n;
for i from 0 upto maxpos do
ngrams := inc_ngram(ngrams, n, string + i)
od;
ngrams
corp
 
proc main() void:
*char string = "LIVE AND LET LIVE";
word n;
for n from 2 upto 4 do
writeln(n, "-grams of '", string, "':");
write_ngrams(find_ngrams(n, string));
writeln();
od;
corp</syntaxhighlight>
{{out}}
<pre>2-grams of 'LIVE AND LET LIVE':
'LI' - 2 'IV' - 2 'VE' - 2 'E ' - 1 ' A' - 1
'AN' - 1 'ND' - 1 'D ' - 1 ' L' - 2 'LE' - 1
'ET' - 1 'T ' - 1
3-grams of 'LIVE AND LET LIVE':
'LIV' - 2 'IVE' - 2 'VE ' - 1 'E A' - 1 ' AN' - 1
'AND' - 1 'ND ' - 1 'D L' - 1 ' LE' - 1 'LET' - 1
'ET ' - 1 'T L' - 1 ' LI' - 1
4-grams of 'LIVE AND LET LIVE':
'LIVE' - 2 'IVE ' - 1 'VE A' - 1 'E AN' - 1 ' AND' - 1
'AND ' - 1 'ND L' - 1 'D LE' - 1 ' LET' - 1 'LET ' - 1
'ET L' - 1 'T LI' - 1 ' LIV' - 1</pre>
 
=={{header|F_Sharp|F#}}==
Line 312 ⟶ 836:
[|'T'; ' '|] 1
</pre>
 
=={{header|Factor}}==
{{works with|Factor|0.99 2022-04-03}}
Line 530 ⟶ 1,055:
(" l", 2) ("le", 1) ("et", 1) ("t ", 1) ("li", 1)
</pre>
 
=={{header|Miranda}}==
<syntaxhighlight lang="miranda">main :: [sys_message]
main = concat (map (testcase s) [2,3,4])
where s = "LIVE AND LET LIVE"
 
testcase :: [char]->num->[sys_message]
testcase s n = [Stdout (show n ++ "-grams of '" ++ s ++ ":'\n"),
Stdout (showngrams n s),
Stdout "\n"]
 
showngrams :: num->[char]->[char]
showngrams n s = lay (map concat (splitn 6 cols))
where ng = ngrams n s
cols = [ljustify 12 (showngram ng') | ng'<-ng]
 
showngram :: ([char],num)->[char]
showngram (s,i) = concat ["\"", s, "\": ", show i]
 
splitn :: num->[*]->[[*]]
splitn n [] = []
splitn n ls = take n ls:splitn n (drop n ls)
 
ngrams :: num->[*]->[([*],num)]
ngrams n = count . group n
 
group :: num->[*]->[[*]]
group n ls = [], if #ls < n
group n ls = take n ls : group n (tl ls)
 
count :: [*]->[(*,num)]
count = foldl incelem []
 
incelem :: [(*,num)]->*->[(*,num)]
incelem [] el = [(el, 1)]
incelem ((el,n):cs) el = (el,n+1):cs
incelem (c:cs) el = c:incelem cs el</syntaxhighlight>
{{out}}
<pre>2-grams of 'LIVE AND LET LIVE:'
"LI": 2 "IV": 2 "VE": 2 "E ": 1 " A": 1 "AN": 1
"ND": 1 "D ": 1 " L": 2 "LE": 1 "ET": 1 "T ": 1
 
3-grams of 'LIVE AND LET LIVE:'
"LIV": 2 "IVE": 2 "VE ": 1 "E A": 1 " AN": 1 "AND": 1
"ND ": 1 "D L": 1 " LE": 1 "LET": 1 "ET ": 1 "T L": 1
" LI": 1
 
4-grams of 'LIVE AND LET LIVE:'
"LIVE": 2 "IVE ": 1 "VE A": 1 "E AN": 1 " AND": 1 "AND ": 1
"ND L": 1 "D LE": 1 " LET": 1 "LET ": 1 "ET L": 1 "T LI": 1
" LIV": 1</pre>
 
=={{header|Nim}}==
Line 715 ⟶ 1,291:
alpha/freq: LIVE 2, AND 1, LET 1, LIV 1, AND 1, D LE 1, E AN 1, ET L 1, IVE 1, LET 1, ND L 1, T LI 1, VE A 1
</pre>
 
=={{header|PL/M}}==
<syntaxhighlight lang="plm">100H:
BDOS: PROCEDURE (F,A); DECLARE F BYTE, A ADDRESS; GO TO 5; END BDOS;
EXIT: PROCEDURE; GO TO 0; END EXIT;
PR$CH: PROCEDURE (C); DECLARE C BYTE; CALL BDOS(2, C); END PR$CH;
PR$STR: PROCEDURE (S); DECLARE S ADDRESS; CALL BDOS(9, S); END PR$STR;
 
PR$NUM: PROCEDURE (N);
DECLARE N ADDRESS;
DECLARE S (6) BYTE INITIAL ('.....$');
DECLARE I BYTE;
I = 5;
DIGIT:
I = I - 1;
S(I) = N MOD 10 + '0';
IF (N := N / 10) > 0 THEN GO TO DIGIT;
CALL PR$STR(.S(I));
END PR$NUM;
 
PR$NSTR: PROCEDURE (N, STR);
DECLARE (STR, N) ADDRESS, CH BASED STR BYTE;
DO WHILE N>0;
CALL PR$CH(CH);
STR = STR+1;
N = N-1;
END;
END PR$NSTR;
 
CMP$NSTR: PROCEDURE (N, STRA, STRB) BYTE;
DECLARE (STRA, STRB, N, I) ADDRESS;
DECLARE A BASED STRA BYTE;
DECLARE B BASED STRB BYTE;
DO I=0 TO N-1;
IF A(I) <> B(I) THEN RETURN 0;
END;
RETURN 0FFH;
END CMP$NSTR;
 
STR$LEN: PROCEDURE (STR) ADDRESS;
DECLARE (N, STR) ADDRESS, S BASED STR BYTE;
N = 0;
DO WHILE S(N) <> '$';
N = N+1;
END;
RETURN N;
END STR$LEN;
 
FIND$NGRAMS: PROCEDURE (N, STR, RSLT) ADDRESS;
DECLARE (N, I, J, STR, RSLT, FOUND) ADDRESS;
DECLARE S BASED STR BYTE;
DECLARE ITEM BASED RSLT ADDRESS;
DECLARE MAXPOS ADDRESS;
MAXPOS = STR$LEN(STR) - N;
FOUND = 0;
DO I = 0 TO MAXPOS;
IF FOUND = 0 THEN GO TO NOT$FOUND;
DO J = 0 TO FOUND;
IF CMP$NSTR(N, .S(I), ITEM(2*J)) THEN DO;
ITEM(2*J+1) = ITEM(2*J+1) + 1;
GO TO NEXT$ITEM;
END;
END;
NOT$FOUND:
ITEM(2*FOUND) = .S(I);
ITEM(2*FOUND+1) = 1;
FOUND = FOUND + 1;
NEXT$ITEM:
END;
RETURN FOUND;
END FIND$NGRAMS;
 
PRINT$NGRAMS: PROCEDURE (N, STR);
DECLARE (N, I, STR) ADDRESS;
DECLARE RESULT (64) ADDRESS;
DECLARE AMOUNT ADDRESS;
CALL PR$CH(N + '0');
CALL PR$STR(.'-GRAMS OF ''$');
CALL PR$STR(STR);
CALL PR$STR(.(''': ', 13, 10, '$'));
 
AMOUNT = FIND$NGRAMS(N, STR, .RESULT);
DO I = 0 TO AMOUNT - 1;
CALL PR$CH('''');
CALL PR$NSTR(N, RESULT(2*I));
CALL PR$STR(.''' - $');
CALL PR$NUM(RESULT(2*I+1));
IF I MOD 5 = 4
THEN CALL PR$STR(.(13,10,'$'));
ELSE CALL PR$CH(9);
END;
CALL PR$STR(.(13,10,'$'));
END PRINT$NGRAMS;
 
DECLARE STRING DATA ('LIVE AND LET LIVE$');
DECLARE N BYTE;
 
DO N = 2 TO 4;
CALL PRINT$NGRAMS(N, .STRING);
END;
CALL EXIT;
EOF</syntaxhighlight>
{{out}}
<pre>2-GRAMS OF 'LIVE AND LET LIVE':
'LI' - 2 'IV' - 2 'VE' - 2 'E ' - 1 ' A' - 1
'AN' - 1 'ND' - 1 'D ' - 1 ' L' - 2 'LE' - 1
'ET' - 1 'T ' - 1
3-GRAMS OF 'LIVE AND LET LIVE':
'LIV' - 2 'VE ' - 1 'E A' - 1 ' AN' - 1 'AND' - 1
'ND ' - 1 'D L' - 1 ' LE' - 1 'LET' - 1 'ET ' - 1
'T L' - 1 ' LI' - 1 'IVE' - 1
4-GRAMS OF 'LIVE AND LET LIVE':
'LIVE' - 2 'IVE ' - 1 'VE A' - 1 'E AN' - 1 ' AND' - 1
'AND ' - 1 'ND L' - 1 'D LE' - 1 ' LET' - 1 'LET ' - 1
'ET L' - 1 'T LI' - 1 ' LIV' - 1</pre>
 
=={{header|Python}}==
Line 946 ⟶ 1,640:
<pre>("IV"=>2,"T "=>1,"VE"=>2,"E "=>1,"LE"=>1,"AN"=>1,"LI"=>2,"ND"=>1,"ET"=>1," L"=>2," A"=>1,"D "=>1).Bag
("ET "=>1,"AND"=>1,"LIV"=>2," LI"=>1,"ND "=>1," LE"=>1,"IVE"=>2,"E A"=>1,"VE "=>1,"T L"=>1,"D L"=>1,"LET"=>1," AN"=>1).Bag</pre>
 
=={{header|Refal}}==
<syntaxhighlight lang="refal">$ENTRY Go {
, 'LIVE AND LET LIVE': e.Str
= <ShowNgrams 2 e.Str>
<ShowNgrams 3 e.Str>
<ShowNgrams 4 e.Str>;
};
 
ShowNgrams {
s.N e.Str =
<Prout <Symb s.N> '-grams of "' e.Str '":'>
<ShowLines 5 <Ngrams s.N e.Str>>
<Prout>;
};
 
ShowLines {
s.N = ;
s.N e.X, <First s.N e.X>: (e.L) e.R =
<Prout <Each DispNgram e.L>> <ShowLines s.N e.R>;
};
 
Each {
s.F = ;
s.F t.I e.Is = <Mu s.F t.I> <Each s.F e.Is>;
};
 
DispNgram {
((e.S) s.C) = '(' e.S ') - ' <Symb s.C> ' ';
};
 
Ngrams {
s.N e.Str = <Count () <Groups s.N e.Str>>;
};
 
Groups {
s.N e.X, <Lenw e.X>: s.L e.X, <Compare s.L s.N>: {
'-' = ;
s.C, <First s.N e.X>: (e.G) e.R, e.X: s.Z e.Y =
(e.G) <Groups s.N e.Y>;
}
};
 
Count {
(e.Cs) = e.Cs;
(e.Cs) t.I e.Is = <Count (<Inc (e.Cs) t.I>) e.Is>;
};
 
Inc {
(e.X (t.I s.C) e.Y) t.I = e.X (t.I <+ 1 s.C>) e.Y;
(e.X) t.I = e.X (t.I 1);
};</syntaxhighlight>
{{out}}
<pre>2-grams of "LIVE AND LET LIVE":
(LI) - 2 (IV) - 2 (VE) - 2 (E ) - 1 ( A) - 1
(AN) - 1 (ND) - 1 (D ) - 1 ( L) - 2 (LE) - 1
(ET) - 1 (T ) - 1
 
3-grams of "LIVE AND LET LIVE":
(LIV) - 2 (IVE) - 2 (VE ) - 1 (E A) - 1 ( AN) - 1
(AND) - 1 (ND ) - 1 (D L) - 1 ( LE) - 1 (LET) - 1
(ET ) - 1 (T L) - 1 ( LI) - 1
 
4-grams of "LIVE AND LET LIVE":
(LIVE) - 2 (IVE ) - 1 (VE A) - 1 (E AN) - 1 ( AND) - 1
(AND ) - 1 (ND L) - 1 (D LE) - 1 ( LET) - 1 (LET ) - 1
(ET L) - 1 (T LI) - 1 ( LIV) - 1</pre>
 
=={{header|RPL}}==
Line 995 ⟶ 1,756:
2: { "LIV=2" "IVE=2" "VE =1" "E A=1" " AN=1" "AND=1" "ND =1" "D L=1" " LE=1" "LET=1" "ET =1" "T L=1" " LI=1" }
1: { "LIVE=2" "IVE =1" "VE A=1" "E AN=1" " AND=1" "AND =1" "ND L=1" "D LE=1" " LET=1" "LET =1" "ET L=1" "T LI=1" " LIV=1" }
</pre>
 
=={{header|SETL}}==
<syntaxhighlight lang="setl">program find_ngrams;
input := "LIVE AND LET LIVE";
loop for size in [2,3,4] do
print(str size+"-grams of '"+input+"':");
ng := ngrams(input, size);
col := 0;
loop for count = ng(ngram) do
nprint(rpad("'" + ngram + "': " + str count, 10));
if (col +:= 1) mod 8 = 0 then print; end if;
end loop;
print;
print;
end loop;
 
proc ngrams(input, size);
ng := {};
loop for i in [1..#input-size+1] do
ng(input(i..i+size-1)) +:= 1;
end loop;
return ng;
end proc;
end program;</syntaxhighlight>
{{out}}
<pre>2-grams of 'LIVE AND LET LIVE':
' A': 1 ' L': 2 'AN': 1 'D ': 1 'E ': 1 'ET': 1 'IV': 2 'LE': 1
'LI': 2 'ND': 1 'T ': 1 'VE': 2
 
3-grams of 'LIVE AND LET LIVE':
' AN': 1 ' LE': 1 ' LI': 1 'AND': 1 'D L': 1 'E A': 1 'ET ': 1 'IVE': 2
'LET': 1 'LIV': 2 'ND ': 1 'T L': 1 'VE ': 1
 
4-grams of 'LIVE AND LET LIVE':
' AND': 1 ' LET': 1 ' LIV': 1 'AND ': 1 'D LE': 1 'E AN': 1 'ET L': 1 'IVE ': 1
'LET ': 1 'LIVE': 2 'ND L': 1 'T LI': 1 'VE A': 1
 
</pre>
 
2,115

edits