N-grams: Difference between revisions

9,279 bytes added ,  1 month ago
Add ABC
(Add BQN)
(Add ABC)
 
(4 intermediate revisions by the same user not shown)
Line 23:
;* [[Sorensen–Dice_coefficient|Related task: Sorensen–Dice coefficient]]
 
 
=={{header|ABC}}==
<syntaxhighlight lang="abc">HOW TO RETURN n grams str:
PUT {} IN grams
FOR i IN {1..#str-n+1}:
PUT str@i|n IN part
SELECT:
part in keys grams:
PUT grams[part]+1 IN grams[part]
ELSE:
PUT 1 IN grams[part]
RETURN grams
HOW TO SHOW n GRAMS FOR str:
PUT n grams str IN grams
PUT 0 IN col
WRITE "`n`-grams for '`str`':"/
FOR gr IN keys grams:
WRITE "'`gr`' - `grams[gr]`" << 12
IF col mod 5 = 4: WRITE /
PUT col+1 IN col
WRITE /
FOR n IN {2;3;4}:
SHOW n GRAMS FOR "LIVE AND LET LIVE"
WRITE /</syntaxhighlight>
{{out}}
<pre>2-grams for 'LIVE AND LET LIVE':
' A' - 1 ' L' - 2 'AN' - 1 'D ' - 1 'E ' - 1
'ET' - 1 'IV' - 2 'LE' - 1 'LI' - 2 'ND' - 1
'T ' - 1 'VE' - 2
 
3-grams for 'LIVE AND LET LIVE':
' AN' - 1 ' LE' - 1 ' LI' - 1 'AND' - 1 'D L' - 1
'E A' - 1 'ET ' - 1 'IVE' - 2 'LET' - 1 'LIV' - 2
'ND ' - 1 'T L' - 1 'VE ' - 1
 
4-grams for 'LIVE AND LET LIVE':
' AND' - 1 ' LET' - 1 ' LIV' - 1 'AND ' - 1 'D LE' - 1
'E AN' - 1 'ET L' - 1 'IVE ' - 1 'LET ' - 1 'LIVE' - 2
'ND L' - 1 'T LI' - 1 'VE A' - 1</pre>
 
=={{header|ALGOL 68}}==
Line 243 ⟶ 284:
'AND ': 1 'ND L': 1 'D LE': 1 ' LET': 1 'LET ': 1
'ET L': 1 'T LI': 1 ' LIV': 1</pre>
 
=={{header|BCPL}}==
<syntaxhighlight lang="bcpl">get "libhdr"
 
let equal(str, n, i, j) = valof
$( for k=0 to n-1
unless str%(i+k) = str%(j+k) resultis false
resultis true
$)
 
let findngrams(n, str, res) = valof
$( let found = 0
 
for i=1 to str%0-n+1
$( for j=0 to found-1
$( if equal(str, n, i, res!(2*j))
$( res!(2*j+1) := res!(2*j+1) + 1
goto nextitem
$)
$)
res!(2*found) := i
res!(2*found+1) := 1
found := found + 1
nextitem: loop
$)
resultis found
$)
 
let showngrams(n, str) be
$( let res = vec 64
let amt = findngrams(n, str, res)
writef("%N-grams of '%S':*N", n, str)
for i=0 to amt-1
$( wrch('*'')
for j=res!(2*i) to res!(2*i)+n-1 do wrch(str%j)
writef("' - %N",res!(2*i+1))
wrch(i rem 5=4 -> '*N', '*T')
$)
wrch('*N')
$)
 
let start() be
for n=2 to 4 do showngrams(n, "LIVE AND LET LIVE")</syntaxhighlight>
{{out}}
<pre>2-grams of 'LIVE AND LET LIVE':
'LI' - 2 'IV' - 2 'VE' - 2 'E ' - 1 ' A' - 1
'AN' - 1 'ND' - 1 'D ' - 1 ' L' - 2 'LE' - 1
'ET' - 1 'T ' - 1
3-grams of 'LIVE AND LET LIVE':
'LIV' - 2 'IVE' - 2 'VE ' - 1 'E A' - 1 ' AN' - 1
'AND' - 1 'ND ' - 1 'D L' - 1 ' LE' - 1 'LET' - 1
'ET ' - 1 'T L' - 1 ' LI' - 1
4-grams of 'LIVE AND LET LIVE':
'LIVE' - 2 'IVE ' - 1 'VE A' - 1 'E AN' - 1 ' AND' - 1
'AND ' - 1 'ND L' - 1 'D LE' - 1 ' LET' - 1 'LET ' - 1
'ET L' - 1 'T LI' - 1 ' LIV' - 1</pre>
 
=={{header|BQN}}==
Line 348 ⟶ 446:
("ET L": 1) ("T LI": 1) (" LIV": 1)
</pre>
 
=={{header|C++}}==
<syntaxhighlight lang="cpp">#include <iostream>
#include <map>
#include <string>
 
std::map<std::string, int> find_ngrams(int n, const std::string& s)
{
std::map<std::string, int> ngrams;
size_t max_loc = s.length() - n;
for (size_t i = 0; i <= max_loc; i++)
ngrams[s.substr(i, n)]++;
return ngrams;
}
 
void print_ngrams(const std::map<std::string, int>& ngrams)
{
int col = 0;
for (const auto& [ngram, count] : ngrams) {
std::cout << "'" << ngram << "' - " << count;
if (col++ % 5 == 4)
std::cout << std::endl;
else
std::cout << '\t';
}
std::cout << std::endl;
}
 
int main(void)
{
std::string s("LIVE AND LET LIVE");
for (int n=2; n<=4; n++) {
std::cout << n << "-grams of '" << s << ":" << std::endl;
print_ngrams(find_ngrams(n, s));
}
return 0;
}</syntaxhighlight>
{{out}}
<pre>2-grams of 'LIVE AND LET LIVE:
' A' - 1 ' L' - 2 'AN' - 1 'D ' - 1 'E ' - 1
'ET' - 1 'IV' - 2 'LE' - 1 'LI' - 2 'ND' - 1
'T ' - 1 'VE' - 2
3-grams of 'LIVE AND LET LIVE:
' AN' - 1 ' LE' - 1 ' LI' - 1 'AND' - 1 'D L' - 1
'E A' - 1 'ET ' - 1 'IVE' - 2 'LET' - 1 'LIV' - 2
'ND ' - 1 'T L' - 1 'VE ' - 1
4-grams of 'LIVE AND LET LIVE:
' AND' - 1 ' LET' - 1 ' LIV' - 1 'AND ' - 1 'D LE' - 1
'E AN' - 1 'ET L' - 1 'IVE ' - 1 'LET ' - 1 'LIVE' - 2
'ND L' - 1 'T LI' - 1 'VE A' - 1</pre>
 
=={{header|CLU}}==
<syntaxhighlight lang="clu">parts = iter (n: int, s: string) yields (string)
for i: int in int$from_to(1, string$size(s)-n+1) do
yield(string$substr(s, i, n))
end
end parts
 
ngram = struct[str: string, count: int]
 
find_ngrams = proc (n: int, s: string) returns (sequence[ngram])
ng: array[ngram] := array[ngram]$[]
for part: string in parts(n, s) do
begin
for i: int in array[ngram]$indexes(ng) do
if ng[i].str = part then exit found(i) end
end
array[ngram]$addh(ng, ngram${str: part, count: 1})
end
except when found(i: int):
ng[i] := ngram${str: ng[i].str, count: ng[i].count + 1}
end
end
return(sequence[ngram]$a2s(ng))
end find_ngrams
 
show_ngrams = proc (s: stream, n: int, str: string)
ngrams: sequence[ngram] := find_ngrams(n, str)
col: int := 0
for ng: ngram in sequence[ngram]$elements(ngrams) do
stream$putleft(s, "'" || ng.str || "' - " ||
int$unparse(ng.count), 15)
if col // 5 = 4 then stream$putl(s, "") end
col := col + 1
end
stream$putl(s, "")
end show_ngrams
 
start_up = proc ()
po: stream := stream$primary_output()
s: string := "LIVE AND LET LIVE"
for n: int in int$from_to(2, 4) do
stream$putl(po, int$unparse(n) || "-grams of '" || s || "':")
show_ngrams(po, n, s)
stream$putl(po, "")
end
end start_up</syntaxhighlight>
{{out}}
<pre>2-grams of 'LIVE AND LET LIVE':
'LI' - 2 'IV' - 2 'VE' - 2 'E ' - 1 ' A' - 1
'AN' - 1 'ND' - 1 'D ' - 1 ' L' - 2 'LE' - 1
'ET' - 1 'T ' - 1
 
3-grams of 'LIVE AND LET LIVE':
'LIV' - 2 'IVE' - 2 'VE ' - 1 'E A' - 1 ' AN' - 1
'AND' - 1 'ND ' - 1 'D L' - 1 ' LE' - 1 'LET' - 1
'ET ' - 1 'T L' - 1 ' LI' - 1
 
4-grams of 'LIVE AND LET LIVE':
'LIVE' - 2 'IVE ' - 1 'VE A' - 1 'E AN' - 1 ' AND' - 1
'AND ' - 1 'ND L' - 1 'D LE' - 1 ' LET' - 1 'LET ' - 1
'ET L' - 1 'T LI' - 1 ' LIV' - 1</pre>
 
=={{header|Common Lisp}}==
Line 486 ⟶ 700:
'ET ' - 1 'T L' - 1 ' LI' - 1
 
4-grams of 'LIVE AND LET LIVE':
'LIVE' - 2 'IVE ' - 1 'VE A' - 1 'E AN' - 1 ' AND' - 1
'AND ' - 1 'ND L' - 1 'D LE' - 1 ' LET' - 1 'LET ' - 1
'ET L' - 1 'T LI' - 1 ' LIV' - 1</pre>
 
=={{header|Draco}}==
<syntaxhighlight lang="draco">\util.g
 
type Ngram = struct {
*char nptr;
word length;
word amount;
*Ngram next;
};
 
proc equal_n(word n; *char a, b) bool:
while n>0 and a* = b* do
a := a+1;
b := b+1;
n := n-1
od;
n = 0
corp
 
proc write_nchars(word n; *char ptr) void:
word i;
for i from 1 upto n do
write(ptr*);
ptr := ptr + 1;
od
corp
 
proc write_ngrams(*Ngram ngram) void:
word i;
i := 0;
while ngram /= nil do
write("'");
write_nchars(ngram*.length, ngram*.nptr);
write("' - ", ngram*.amount);
if i % 5=4
then writeln()
else write('\t')
fi;
i := i+1;
ngram := ngram*.next
od
corp
 
proc new_ngram(word n; *char ptr) *Ngram:
*Ngram ngram;
ngram := new(Ngram);
ngram*.length := n;
ngram*.nptr := ptr;
ngram*.amount := 1;
ngram*.next := nil;
ngram
corp;
 
proc inc_ngram(*Ngram ngram; word n; *char ptr) *Ngram:
*Ngram begin, lastn;
begin := ngram;
if begin = nil then
new_ngram(n, ptr)
else
while
ngram /= nil and not equal_n(n, ptr, ngram*.nptr)
do
lastn := ngram;
ngram := ngram*.next
od;
if ngram /= nil then
ngram*.amount := ngram*.amount + 1
else
lastn*.next := new_ngram(n, ptr)
fi;
begin
fi
corp
 
proc find_ngrams(word n; *char string) *Ngram:
*Ngram ngrams;
word maxpos, i;
ngrams := nil;
maxpos := CharsLen(string) - n;
for i from 0 upto maxpos do
ngrams := inc_ngram(ngrams, n, string + i)
od;
ngrams
corp
 
proc main() void:
*char string = "LIVE AND LET LIVE";
word n;
for n from 2 upto 4 do
writeln(n, "-grams of '", string, "':");
write_ngrams(find_ngrams(n, string));
writeln();
od;
corp</syntaxhighlight>
{{out}}
<pre>2-grams of 'LIVE AND LET LIVE':
'LI' - 2 'IV' - 2 'VE' - 2 'E ' - 1 ' A' - 1
'AN' - 1 'ND' - 1 'D ' - 1 ' L' - 2 'LE' - 1
'ET' - 1 'T ' - 1
3-grams of 'LIVE AND LET LIVE':
'LIV' - 2 'IVE' - 2 'VE ' - 1 'E A' - 1 ' AN' - 1
'AND' - 1 'ND ' - 1 'D L' - 1 ' LE' - 1 'LET' - 1
'ET ' - 1 'T L' - 1 ' LI' - 1
4-grams of 'LIVE AND LET LIVE':
'LIVE' - 2 'IVE ' - 1 'VE A' - 1 'E AN' - 1 ' AND' - 1
2,115

edits