N-grams: Difference between revisions

3,384 bytes added ,  1 month ago
Add ABC
(Add C++)
(Add ABC)
 
(One intermediate revision by the same user not shown)
Line 23:
;* [[Sorensen–Dice_coefficient|Related task: Sorensen–Dice coefficient]]
 
 
=={{header|ABC}}==
<syntaxhighlight lang="abc">HOW TO RETURN n grams str:
PUT {} IN grams
FOR i IN {1..#str-n+1}:
PUT str@i|n IN part
SELECT:
part in keys grams:
PUT grams[part]+1 IN grams[part]
ELSE:
PUT 1 IN grams[part]
RETURN grams
HOW TO SHOW n GRAMS FOR str:
PUT n grams str IN grams
PUT 0 IN col
WRITE "`n`-grams for '`str`':"/
FOR gr IN keys grams:
WRITE "'`gr`' - `grams[gr]`" << 12
IF col mod 5 = 4: WRITE /
PUT col+1 IN col
WRITE /
FOR n IN {2;3;4}:
SHOW n GRAMS FOR "LIVE AND LET LIVE"
WRITE /</syntaxhighlight>
{{out}}
<pre>2-grams for 'LIVE AND LET LIVE':
' A' - 1 ' L' - 2 'AN' - 1 'D ' - 1 'E ' - 1
'ET' - 1 'IV' - 2 'LE' - 1 'LI' - 2 'ND' - 1
'T ' - 1 'VE' - 2
 
3-grams for 'LIVE AND LET LIVE':
' AN' - 1 ' LE' - 1 ' LI' - 1 'AND' - 1 'D L' - 1
'E A' - 1 'ET ' - 1 'IVE' - 2 'LET' - 1 'LIV' - 2
'ND ' - 1 'T L' - 1 'VE ' - 1
 
4-grams for 'LIVE AND LET LIVE':
' AND' - 1 ' LET' - 1 ' LIV' - 1 'AND ' - 1 'D LE' - 1
'E AN' - 1 'ET L' - 1 'IVE ' - 1 'LET ' - 1 'LIVE' - 2
'ND L' - 1 'T LI' - 1 'VE A' - 1</pre>
 
=={{header|ALGOL 68}}==
Line 457 ⟶ 498:
'E AN' - 1 'ET L' - 1 'IVE ' - 1 'LET ' - 1 'LIVE' - 2
'ND L' - 1 'T LI' - 1 'VE A' - 1</pre>
 
=={{header|CLU}}==
<syntaxhighlight lang="clu">parts = iter (n: int, s: string) yields (string)
for i: int in int$from_to(1, string$size(s)-n+1) do
yield(string$substr(s, i, n))
end
end parts
 
ngram = struct[str: string, count: int]
 
find_ngrams = proc (n: int, s: string) returns (sequence[ngram])
ng: array[ngram] := array[ngram]$[]
for part: string in parts(n, s) do
begin
for i: int in array[ngram]$indexes(ng) do
if ng[i].str = part then exit found(i) end
end
array[ngram]$addh(ng, ngram${str: part, count: 1})
end
except when found(i: int):
ng[i] := ngram${str: ng[i].str, count: ng[i].count + 1}
end
end
return(sequence[ngram]$a2s(ng))
end find_ngrams
 
show_ngrams = proc (s: stream, n: int, str: string)
ngrams: sequence[ngram] := find_ngrams(n, str)
col: int := 0
for ng: ngram in sequence[ngram]$elements(ngrams) do
stream$putleft(s, "'" || ng.str || "' - " ||
int$unparse(ng.count), 15)
if col // 5 = 4 then stream$putl(s, "") end
col := col + 1
end
stream$putl(s, "")
end show_ngrams
 
start_up = proc ()
po: stream := stream$primary_output()
s: string := "LIVE AND LET LIVE"
for n: int in int$from_to(2, 4) do
stream$putl(po, int$unparse(n) || "-grams of '" || s || "':")
show_ngrams(po, n, s)
stream$putl(po, "")
end
end start_up</syntaxhighlight>
{{out}}
<pre>2-grams of 'LIVE AND LET LIVE':
'LI' - 2 'IV' - 2 'VE' - 2 'E ' - 1 ' A' - 1
'AN' - 1 'ND' - 1 'D ' - 1 ' L' - 2 'LE' - 1
'ET' - 1 'T ' - 1
 
3-grams of 'LIVE AND LET LIVE':
'LIV' - 2 'IVE' - 2 'VE ' - 1 'E A' - 1 ' AN' - 1
'AND' - 1 'ND ' - 1 'D L' - 1 ' LE' - 1 'LET' - 1
'ET ' - 1 'T L' - 1 ' LI' - 1
 
4-grams of 'LIVE AND LET LIVE':
'LIVE' - 2 'IVE ' - 1 'VE A' - 1 'E AN' - 1 ' AND' - 1
'AND ' - 1 'ND L' - 1 'D LE' - 1 ' LET' - 1 'LET ' - 1
'ET L' - 1 'T LI' - 1 ' LIV' - 1</pre>
 
=={{header|Common Lisp}}==
2,115

edits