N-grams: Difference between revisions

← Older edit

N-grams (view source)

Revision as of 18:11, 10 May 2024

60,261 bytes added , 28 days ago

Add ABC

Not a robot

2,114

edits

Revision as of 17:53, 21 April 2023 (view source) PureFox (talk \| contribs) (Added Wren) ← Older edit		Latest revision as of 18:11, 10 May 2024 (view source) Not a robot (talk \| contribs) (Add ABC)
(44 intermediate revisions by 15 users not shown)
Line 23: ;* [[Sorensen–Dice_coefficient\|Related task: Sorensen–Dice coefficient]] =={{header\|ABC}}== <syntaxhighlight lang="abc">HOW TO RETURN n grams str: PUT {} IN grams FOR i IN {1..#str-n+1}: PUT str@i\|n IN part SELECT: part in keys grams: PUT grams[part]+1 IN grams[part] ELSE: PUT 1 IN grams[part] RETURN grams HOW TO SHOW n GRAMS FOR str: PUT n grams str IN grams PUT 0 IN col WRITE "`n`-grams for '`str`':"/ FOR gr IN keys grams: WRITE "'`gr`' - `grams[gr]`" << 12 IF col mod 5 = 4: WRITE / PUT col+1 IN col WRITE / FOR n IN {2;3;4}: SHOW n GRAMS FOR "LIVE AND LET LIVE" WRITE /</syntaxhighlight> {{out}} <pre>2-grams for 'LIVE AND LET LIVE': ' A' - 1 ' L' - 2 'AN' - 1 'D ' - 1 'E ' - 1 'ET' - 1 'IV' - 2 'LE' - 1 'LI' - 2 'ND' - 1 'T ' - 1 'VE' - 2 3-grams for 'LIVE AND LET LIVE': ' AN' - 1 ' LE' - 1 ' LI' - 1 'AND' - 1 'D L' - 1 'E A' - 1 'ET ' - 1 'IVE' - 2 'LET' - 1 'LIV' - 2 'ND ' - 1 'T L' - 1 'VE ' - 1 4-grams for 'LIVE AND LET LIVE': ' AND' - 1 ' LET' - 1 ' LIV' - 1 'AND ' - 1 'D LE' - 1 'E AN' - 1 'ET L' - 1 'IVE ' - 1 'LET ' - 1 'LIVE' - 2 'ND L' - 1 'T LI' - 1 'VE A' - 1</pre> =={{header\|ALGOL 68}}== <syntaxhighlight lang="algol68"> BEGIN # split text into n-grams - n character substrings # MODE NGRAM = STRUCT( STRING gram, INT count ); OP UCASE = ( CHAR c )CHAR: # return c converted to upper case # IF c >= "a" AND c <= "z" THEN REPR( ( ABS c - ABS "a" ) + ABS "A" ) ELSE c FI; OP UCASE = ( STRING s )STRING: # return s converted to upper case # BEGIN STRING uc := s; FOR i FROM LWB uc TO UPB uc DO uc[ i ] := UCASE uc[ i ] OD; uc END # UCASE # ; OP LENGTH = ( STRING s )INT: ( UPB s + 1 ) - LWB s; # returns the n-grams of text - using a simple array to contain the # # n-grams - for longer strings, an associative array might be better # PRIO GRAMS = 1; OP GRAMS = ( INT n, STRING text )[]NGRAM: BEGIN [ 1 : ( LENGTH text + 1 ) - n ]NGRAM result; # initially assume # INT count := 0; # all the n-grams will be unique # FOR pos FROM LWB text TO ( UPB text + 1 )- n DO STRING ng = UCASE text[ pos : pos + ( n - 1 ) ]; BOOL found := FALSE; INT g pos := 0; FOR g FROM 1 TO count WHILE g pos := g; NOT ( found := ng = ( gram OF result )[ g ] ) DO SKIP OD; IF NOT found THEN result[ count +:= 1 ] := ( ng, 1 ) ELSE ( count OF result )[ g pos ] +:= 1 FI OD; result[ 1 : count ] END # NGRAMS # ; # prints the ngrams in ngrams # PROC print ngrams = ( STRING title, text, []NGRAM ngrams )VOID: BEGIN print( ( title, "-grams of """, text, """:", newline ) ); FOR g FROM LWB ngrams TO UPB ngrams DO print( ( " """, ( gram OF ngrams )[ g ] ) ); print( ( """: ", whole( ( count OF ngrams )[ g ], 0 ), newline ) ) OD END # print ngrams # ; STRING test = "Live and let live"; print ngrams( "bi", test, 2 GRAMS test ); print ngrams( "tri", test, 3 GRAMS test ); print ngrams( "quad", test, 4 GRAMS test ) END </syntaxhighlight> {{out}} <pre> bi-grams of "Live and let live": "LI": 2 "IV": 2 "VE": 2 "E ": 1 " A": 1 "AN": 1 "ND": 1 "D ": 1 " L": 2 "LE": 1 "ET": 1 "T ": 1 tri-grams of "Live and let live": "LIV": 2 "IVE": 2 "VE ": 1 "E A": 1 " AN": 1 "AND": 1 "ND ": 1 "D L": 1 " LE": 1 "LET": 1 "ET ": 1 "T L": 1 " LI": 1 quad-grams of "Live and let live": "LIVE": 2 "IVE ": 1 "VE A": 1 "E AN": 1 " AND": 1 "AND ": 1 "ND L": 1 "D LE": 1 " LET": 1 "LET ": 1 "ET L": 1 "T LI": 1 " LIV": 1 </pre> =={{header\|APL}}== {{works with\|Dyalog APL}} <syntaxhighlight lang="apl">ngrams ← (⊣,(≢⊢))⌸,/</syntaxhighlight> {{out}} <pre> 2 3 4 ngrams¨ ⊂'LIVE AND LET LIVE' LI 2 LIV 2 LIVE 2 IV 2 IVE 2 IVE 1 VE 2 VE 1 VE A 1 E 1 E A 1 E AN 1 A 1 AN 1 AND 1 AN 1 AND 1 AND 1 ND 1 ND 1 ND L 1 D 1 D L 1 D LE 1 L 2 LE 1 LET 1 LE 1 LET 1 LET 1 ET 1 ET 1 ET L 1 T 1 T L 1 T LI 1 LI 1 LIV 1</pre> =={{header\|Arturo}}== <syntaxhighlight lang="arturo">ngrams: function [s :string n :integer][ 0..sub size s n \| map 'i -> slice upper s i i+n-1 \| tally ] loop [2 3 4] 'n [ print ~"\|n\|-grams:" loop ngrams "Live and let live" n [k v] -> print [~{"\|k\|"} v] print "" ]</syntaxhighlight> {{out}} <pre>2-grams: "LI" 2 "IV" 2 "VE" 2 "E " 1 " A" 1 "AN" 1 "ND" 1 "D " 1 " L" 2 "LE" 1 "ET" 1 "T " 1 3-grams: "LIV" 2 "IVE" 2 "VE " 1 "E A" 1 " AN" 1 "AND" 1 "ND " 1 "D L" 1 " LE" 1 "LET" 1 "ET " 1 "T L" 1 " LI" 1 4-grams: "LIVE" 2 "IVE " 1 "VE A" 1 "E AN" 1 " AND" 1 "AND " 1 "ND L" 1 "D LE" 1 " LET" 1 "LET " 1 "ET L" 1 "T LI" 1 " LIV" 1</pre> =={{header\|BASIC}}== <syntaxhighlight lang="basic">10 DEFINT A-Z 20 S$ = "LIVE AND LET LIVE" 30 FOR N=2 TO 4: GOSUB 100: NEXT N 40 END 100 REM PRINT N-GRAMS OF S$ 105 PRINT USING "#-grams of '";N;: PRINT S$;"':" 110 DIM P$(LEN(S$)-N+1), C(LEN(S$)-N+1) 120 FD = 0 130 FOR I=1 TO LEN(S$)-N+1 140 PA$ = MID$(S$,I,N) 150 IF FD = 0 THEN 190 160 FOR J=1 TO FD 170 IF P$(J) = PA$ THEN C(J) = C(J)+1: GOTO 210 180 NEXT J 190 FD = FD+1 200 P$(FD) = PA$ : C(FD) = 1 210 NEXT I 220 FOR I=1 TO FD 230 PRINT "'";P$(I);"': ";C(I), 240 NEXT I 250 PRINT: PRINT 260 ERASE P$, C 270 RETURN</syntaxhighlight> {{out}} <pre>2 grams of 'LIVE AND LET LIVE': 'LI': 2 'IV': 2 'VE': 2 'E ': 1 ' A': 1 'AN': 1 'ND': 1 'D ': 1 ' L': 2 'LE': 1 'ET': 1 'T ': 1 3 grams of 'LIVE AND LET LIVE': 'LIV': 2 'IVE': 2 'VE ': 1 'E A': 1 ' AN': 1 'AND': 1 'ND ': 1 'D L': 1 ' LE': 1 'LET': 1 'ET ': 1 'T L': 1 ' LI': 1 4 grams of 'LIVE AND LET LIVE': 'LIVE': 2 'IVE ': 1 'VE A': 1 'E AN': 1 ' AND': 1 'AND ': 1 'ND L': 1 'D LE': 1 ' LET': 1 'LET ': 1 'ET L': 1 'T LI': 1 ' LIV': 1</pre> =={{header\|BCPL}}== <syntaxhighlight lang="bcpl">get "libhdr" let equal(str, n, i, j) = valof $( for k=0 to n-1 unless str%(i+k) = str%(j+k) resultis false resultis true $) let findngrams(n, str, res) = valof $( let found = 0 for i=1 to str%0-n+1 $( for j=0 to found-1 $( if equal(str, n, i, res!(2j)) $( res!(2j+1) := res!(2j+1) + 1 goto nextitem $) $) res!(2found) := i res!(2found+1) := 1 found := found + 1 nextitem: loop $) resultis found $) let showngrams(n, str) be $( let res = vec 64 let amt = findngrams(n, str, res) writef("%N-grams of '%S':N", n, str) for i=0 to amt-1 $( wrch(''') for j=res!(2i) to res!(2i)+n-1 do wrch(str%j) writef("' - %N",res!(2i+1)) wrch(i rem 5=4 -> 'N', 'T') $) wrch('N') $) let start() be for n=2 to 4 do showngrams(n, "LIVE AND LET LIVE")</syntaxhighlight> {{out}} <pre>2-grams of 'LIVE AND LET LIVE': 'LI' - 2 'IV' - 2 'VE' - 2 'E ' - 1 ' A' - 1 'AN' - 1 'ND' - 1 'D ' - 1 ' L' - 2 'LE' - 1 'ET' - 1 'T ' - 1 3-grams of 'LIVE AND LET LIVE': 'LIV' - 2 'IVE' - 2 'VE ' - 1 'E A' - 1 ' AN' - 1 'AND' - 1 'ND ' - 1 'D L' - 1 ' LE' - 1 'LET' - 1 'ET ' - 1 'T L' - 1 ' LI' - 1 4-grams of 'LIVE AND LET LIVE': 'LIVE' - 2 'IVE ' - 1 'VE A' - 1 'E AN' - 1 ' AND' - 1 'AND ' - 1 'ND L' - 1 'D LE' - 1 ' LET' - 1 'LET ' - 1 'ET L' - 1 'T LI' - 1 ' LIV' - 1</pre> =={{header\|BQN}}== <syntaxhighlight lang="bqn">Ngrams ← (⊏∾≠)¨ ∘ (⊐⊸⊔) ∘ (<˘∘↕) Show ← > ("'" ∾ ⊣ ∾ "': " ∾ (•Fmt⊢))´¨ 2‿3‿4 Show∘Ngrams¨ <"LIVE AND LET LIVE"</syntaxhighlight> {{out}} <pre>┌─ · ┌─ ┌─ ┌─ ╵"'LI': 2 ╵"'LIV': 2 ╵"'LIVE': 2 'IV': 2 'IVE': 2 'IVE ': 1 'VE': 2 'VE ': 1 'VE A': 1 'E ': 1 'E A': 1 'E AN': 1 ' A': 1 ' AN': 1 ' AND': 1 'AN': 1 'AND': 1 'AND ': 1 'ND': 1 'ND ': 1 'ND L': 1 'D ': 1 'D L': 1 'D LE': 1 ' L': 2 ' LE': 1 ' LET': 1 'LE': 1 'LET': 1 'LET ': 1 'ET': 1 'ET ': 1 'ET L': 1 'T ': 1" 'T L': 1 'T LI': 1 ┘ ' LI': 1" ' LIV': 1" ┘ ┘ ┘</pre> =={{header\|C}}== <syntaxhighlight lang="c">#include <stdio.h> #include <stdbool.h> #include <ctype.h> #include <string.h> #define MAX_N 4 #define MAX_NGRAMS 20 typedef struct { char str[MAX_N+1]; int freq; } ngram; void strUpper(char s) { while (s) { s = toupper(s); s++; } } void ngrams(int n, char text) { int i, j, count = 0; size_t len = strlen(text); bool found; char temp[MAX_N+1] = {'\0'}; ngram ng, ngrams[MAX_NGRAMS]; char s[len+1]; strcpy(s, text); strUpper(s); for (i = 0; i <= len-n; ++i) { strncpy(temp, s + i, n); found = false; for (j = 0; j < count; ++j) { if (!strcmp(ngrams[j].str, temp)) { ngrams[j].freq++; found = true; break; } } if (!found) { strncpy(ng.str, temp, n); ng.freq = 1; ngrams[count++] = ng; } } for (i = 0; i < count; ++i) { printf("(\"%s\": %d) ", ngrams[i].str, ngrams[i].freq); if (!((i+1)%5)) printf("\n"); } printf("\n\n"); } int main() { int n; char text = "Live and let live"; for (n = 2; n <= MAX_N; ++n) { printf("All %d-grams of '%s' and their frequencies:\n", n, text); ngrams(n, text); } return 0; }</syntaxhighlight> {{out}} <pre> All 2-grams of 'Live and let live' and their frequencies: ("LI": 2) ("IV": 2) ("VE": 2) ("E ": 1) (" A": 1) ("AN": 1) ("ND": 1) ("D ": 1) (" L": 2) ("LE": 1) ("ET": 1) ("T ": 1) All 3-grams of 'Live and let live' and their frequencies: ("LIV": 2) ("IVE": 2) ("VE ": 1) ("E A": 1) (" AN": 1) ("AND": 1) ("ND ": 1) ("D L": 1) (" LE": 1) ("LET": 1) ("ET ": 1) ("T L": 1) (" LI": 1) All 4-grams of 'Live and let live' and their frequencies: ("LIVE": 2) ("IVE ": 1) ("VE A": 1) ("E AN": 1) (" AND": 1) ("AND ": 1) ("ND L": 1) ("D LE": 1) (" LET": 1) ("LET ": 1) ("ET L": 1) ("T LI": 1) (" LIV": 1) </pre> =={{header\|C++}}== <syntaxhighlight lang="cpp">#include <iostream> #include <map> #include <string> std::map<std::string, int> find_ngrams(int n, const std::string& s) { std::map<std::string, int> ngrams; size_t max_loc = s.length() - n; for (size_t i = 0; i <= max_loc; i++) ngrams[s.substr(i, n)]++; return ngrams; } void print_ngrams(const std::map<std::string, int>& ngrams) { int col = 0; for (const auto& [ngram, count] : ngrams) { std::cout << "'" << ngram << "' - " << count; if (col++ % 5 == 4) std::cout << std::endl; else std::cout << '\t'; } std::cout << std::endl; } int main(void) { std::string s("LIVE AND LET LIVE"); for (int n=2; n<=4; n++) { std::cout << n << "-grams of '" << s << ":" << std::endl; print_ngrams(find_ngrams(n, s)); } return 0; }</syntaxhighlight> {{out}} <pre>2-grams of 'LIVE AND LET LIVE: ' A' - 1 ' L' - 2 'AN' - 1 'D ' - 1 'E ' - 1 'ET' - 1 'IV' - 2 'LE' - 1 'LI' - 2 'ND' - 1 'T ' - 1 'VE' - 2 3-grams of 'LIVE AND LET LIVE: ' AN' - 1 ' LE' - 1 ' LI' - 1 'AND' - 1 'D L' - 1 'E A' - 1 'ET ' - 1 'IVE' - 2 'LET' - 1 'LIV' - 2 'ND ' - 1 'T L' - 1 'VE ' - 1 4-grams of 'LIVE AND LET LIVE: ' AND' - 1 ' LET' - 1 ' LIV' - 1 'AND ' - 1 'D LE' - 1 'E AN' - 1 'ET L' - 1 'IVE ' - 1 'LET ' - 1 'LIVE' - 2 'ND L' - 1 'T LI' - 1 'VE A' - 1</pre> =={{header\|CLU}}== <syntaxhighlight lang="clu">parts = iter (n: int, s: string) yields (string) for i: int in int$from_to(1, string$size(s)-n+1) do yield(string$substr(s, i, n)) end end parts ngram = struct[str: string, count: int] find_ngrams = proc (n: int, s: string) returns (sequence[ngram]) ng: array[ngram] := array[ngram]$[] for part: string in parts(n, s) do begin for i: int in array[ngram]$indexes(ng) do if ng[i].str = part then exit found(i) end end array[ngram]$addh(ng, ngram${str: part, count: 1}) end except when found(i: int): ng[i] := ngram${str: ng[i].str, count: ng[i].count + 1} end end return(sequence[ngram]$a2s(ng)) end find_ngrams show_ngrams = proc (s: stream, n: int, str: string) ngrams: sequence[ngram] := find_ngrams(n, str) col: int := 0 for ng: ngram in sequence[ngram]$elements(ngrams) do stream$putleft(s, "'" \|\| ng.str \|\| "' - " \|\| int$unparse(ng.count), 15) if col // 5 = 4 then stream$putl(s, "") end col := col + 1 end stream$putl(s, "") end show_ngrams start_up = proc () po: stream := stream$primary_output() s: string := "LIVE AND LET LIVE" for n: int in int$from_to(2, 4) do stream$putl(po, int$unparse(n) \|\| "-grams of '" \|\| s \|\| "':") show_ngrams(po, n, s) stream$putl(po, "") end end start_up</syntaxhighlight> {{out}} <pre>2-grams of 'LIVE AND LET LIVE': 'LI' - 2 'IV' - 2 'VE' - 2 'E ' - 1 ' A' - 1 'AN' - 1 'ND' - 1 'D ' - 1 ' L' - 2 'LE' - 1 'ET' - 1 'T ' - 1 3-grams of 'LIVE AND LET LIVE': 'LIV' - 2 'IVE' - 2 'VE ' - 1 'E A' - 1 ' AN' - 1 'AND' - 1 'ND ' - 1 'D L' - 1 ' LE' - 1 'LET' - 1 'ET ' - 1 'T L' - 1 ' LI' - 1 4-grams of 'LIVE AND LET LIVE': 'LIVE' - 2 'IVE ' - 1 'VE A' - 1 'E AN' - 1 ' AND' - 1 'AND ' - 1 'ND L' - 1 'D LE' - 1 ' LET' - 1 'LET ' - 1 'ET L' - 1 'T LI' - 1 ' LIV' - 1</pre> =={{header\|Common Lisp}}== Line 48 ⟶ 587: ("ND" . 1) ("D " . 1) ("LE" . 1) ("ET" . 1) ("T " . 1)) </syntaxhighlight> =={{header\|Cowgol}}== <syntaxhighlight lang="cowgol">include "cowgol.coh"; include "strings.coh"; record Ngram is ptr: [uint8]; size: intptr; count: intptr; end record; sub PrintNgram(ngram: [Ngram]) is print_char('\''); var ptr := ngram.ptr; var n := ngram.size; while n > 0 loop print_char([ptr]); ptr := @next ptr; n := n - 1; end loop; print("' - "); print_i32(ngram.count as uint32); end sub; sub MemCmp(n: intptr, a: [uint8], b: [uint8]): (eq: uint8) is eq := 1; while n>0 loop if [a] != [b] then eq := 0; return; end if; a := @next a; b := @next b; n := n - 1; end loop; end sub; sub FindNgrams(n: intptr, str: [uint8], result: [Ngram]): (amount: intptr) is var nextres := result; amount := 0; sub NewNgram(pos: [uint8]) is nextres.ptr := pos; nextres.size := n; nextres.count := 1; nextres := @next nextres; amount := amount + 1; end sub; sub IncNgram(pos: [uint8]) is if amount == 0 then NewNgram(pos); return; end if; var curres := result; var left := amount; while left > 0 loop if MemCmp(n, pos, curres.ptr) != 0 then curres.count := curres.count + 1; return; end if; left := left - 1; curres := @next curres; end loop; NewNgram(pos); end sub; var charsleft := StrLen(str) - n + 1; while charsleft > 0 loop IncNgram(str); str := @next str; charsleft := charsleft - 1; end loop; end sub; sub ShowNgrams(n: intptr, str: [uint8]) is var ngrams: Ngram[128]; print_i32(n as uint32); print("-grams of '"); print(str); print("':\n"); var amount := FindNgrams(n, str, &ngrams[0]) as @indexof ngrams; var i: @indexof ngrams := 0; while i < amount loop PrintNgram(&ngrams[i]); if i % 5 == 4 then print_nl(); else print_char('\t'); end if; i := i + 1; end loop; print_nl(); print_nl(); end sub; var str := "LIVE AND LET LIVE"; ShowNgrams(2, str); ShowNgrams(3, str); ShowNgrams(4, str);</syntaxhighlight> {{out}} <pre>2-grams of 'LIVE AND LET LIVE': 'LI' - 2 'IV' - 2 'VE' - 2 'E ' - 1 ' A' - 1 'AN' - 1 'ND' - 1 'D ' - 1 ' L' - 2 'LE' - 1 'ET' - 1 'T ' - 1 3-grams of 'LIVE AND LET LIVE': 'LIV' - 2 'IVE' - 2 'VE ' - 1 'E A' - 1 ' AN' - 1 'AND' - 1 'ND ' - 1 'D L' - 1 ' LE' - 1 'LET' - 1 'ET ' - 1 'T L' - 1 ' LI' - 1 4-grams of 'LIVE AND LET LIVE': 'LIVE' - 2 'IVE ' - 1 'VE A' - 1 'E AN' - 1 ' AND' - 1 'AND ' - 1 'ND L' - 1 'D LE' - 1 ' LET' - 1 'LET ' - 1 'ET L' - 1 'T LI' - 1 ' LIV' - 1</pre> =={{header\|Draco}}== <syntaxhighlight lang="draco">\util.g type Ngram = struct { char nptr; word length; word amount; Ngram next; }; proc equal_n(word n; char a, b) bool: while n>0 and a = b* do a := a+1; b := b+1; n := n-1 od; n = 0 corp proc write_nchars(word n; char ptr) void: word i; for i from 1 upto n do write(ptr); ptr := ptr + 1; od corp proc write_ngrams(Ngram ngram) void: word i; i := 0; while ngram /= nil do write("'"); write_nchars(ngram.length, ngram.nptr); write("' - ", ngram.amount); if i % 5=4 then writeln() else write('\t') fi; i := i+1; ngram := ngram.next od corp proc new_ngram(word n; char ptr) Ngram: Ngram ngram; ngram := new(Ngram); ngram.length := n; ngram.nptr := ptr; ngram.amount := 1; ngram.next := nil; ngram corp; proc inc_ngram(Ngram ngram; word n; char ptr) Ngram: Ngram begin, lastn; begin := ngram; if begin = nil then new_ngram(n, ptr) else while ngram /= nil and not equal_n(n, ptr, ngram.nptr) do lastn := ngram; ngram := ngram.next od; if ngram /= nil then ngram.amount := ngram.amount + 1 else lastn.next := new_ngram(n, ptr) fi; begin fi corp proc find_ngrams(word n; char string) Ngram: Ngram ngrams; word maxpos, i; ngrams := nil; maxpos := CharsLen(string) - n; for i from 0 upto maxpos do ngrams := inc_ngram(ngrams, n, string + i) od; ngrams corp proc main() void: char string = "LIVE AND LET LIVE"; word n; for n from 2 upto 4 do writeln(n, "-grams of '", string, "':"); write_ngrams(find_ngrams(n, string)); writeln(); od; corp</syntaxhighlight> {{out}} <pre>2-grams of 'LIVE AND LET LIVE': 'LI' - 2 'IV' - 2 'VE' - 2 'E ' - 1 ' A' - 1 'AN' - 1 'ND' - 1 'D ' - 1 ' L' - 2 'LE' - 1 'ET' - 1 'T ' - 1 3-grams of 'LIVE AND LET LIVE': 'LIV' - 2 'IVE' - 2 'VE ' - 1 'E A' - 1 ' AN' - 1 'AND' - 1 'ND ' - 1 'D L' - 1 ' LE' - 1 'LET' - 1 'ET ' - 1 'T L' - 1 ' LI' - 1 4-grams of 'LIVE AND LET LIVE': 'LIVE' - 2 'IVE ' - 1 'VE A' - 1 'E AN' - 1 ' AND' - 1 'AND ' - 1 'ND L' - 1 'D LE' - 1 ' LET' - 1 'LET ' - 1 'ET L' - 1 'T LI' - 1 ' LIV' - 1</pre> =={{header\|F_Sharp\|F#}}== <syntaxhighlight lang="fsharp"> // N-grams. Nigel Galloway: April 2nd., 2024 let gram (n:string) g=let n=n.ToUpper() in n\|>Seq.windowed g\|>Seq.countBy id for n,g in (gram "Live and let live" 2) do printfn "%A %d" n g </syntaxhighlight> {{out}} <pre> [\|'L'; 'I'\|] 2 [\|'I'; 'V'\|] 2 [\|'V'; 'E'\|] 2 [\|'E'; ' '\|] 1 [\|' '; 'A'\|] 1 [\|'A'; 'N'\|] 1 [\|'N'; 'D'\|] 1 [\|'D'; ' '\|] 1 [\|' '; 'L'\|] 2 [\|'L'; 'E'\|] 1 [\|'E'; 'T'\|] 1 [\|'T'; ' '\|] 1 </pre> =={{header\|Factor}}== {{works with\|Factor\|0.99 2022-04-03}} <syntaxhighlight lang=factor>USING: ascii grouping kernel math.statistics prettyprint ; : n-grams ( str n -- assoc ) [ >upper ] dip clump histogram ; "Live and let live" 2 n-grams .</syntaxhighlight> {{out}} <pre> H{ { "ET" 1 } { "IV" 2 } { "T " 1 } { " A" 1 } { "VE" 2 } { "LI" 2 } { "E " 1 } { "D " 1 } { " L" 2 } { "ND" 1 } { "LE" 1 } { "AN" 1 } } </pre> =={{header\|Haskell}}== <syntaxhighlight lang=haskell>import Control.Applicative (ZipList (ZipList, getZipList)) import Data.Char (toUpper) import Data.List (tails) import qualified Data.Map.Strict as M ------------------- MAP OF N-GRAM COUNTS ----------------- nGramCounts :: Int -> String -> M.Map String Int nGramCounts n = foldr (flip (M.insertWith (+)) 1) M.empty . windows n ------------------------- GENERIC ------------------------ windows :: Int -> [a] -> [[a]] windows n = transpose . take n . tails transpose :: [[a]] -> [[a]] transpose [] = [] transpose xs = getZipList (traverse ZipList xs) --------------------------- TEST ------------------------- main :: IO () main = let sample = toUpper <$> "Live and let live" in mapM_ ( \n -> putStrLn (show n <> "-GRAMS:") >> mapM_ print ((M.assocs . nGramCounts n) sample) >> putStrLn "" ) [0 .. 4]</syntaxhighlight> {{Out}} <pre>0-GRAMS: 1-GRAMS: (" ",3) ("A",1) ("D",1) ("E",3) ("I",2) ("L",3) ("N",1) ("T",1) ("V",2) 2-GRAMS: (" A",1) (" L",2) ("AN",1) ("D ",1) ("E ",1) ("ET",1) ("IV",2) ("LE",1) ("LI",2) ("ND",1) ("T ",1) ("VE",2) 3-GRAMS: (" AN",1) (" LE",1) (" LI",1) ("AND",1) ("D L",1) ("E A",1) ("ET ",1) ("IVE",2) ("LET",1) ("LIV",2) ("ND ",1) ("T L",1) ("VE ",1) 4-GRAMS: (" AND",1) (" LET",1) (" LIV",1) ("AND ",1) ("D LE",1) ("E AN",1) ("ET L",1) ("IVE ",1) ("LET ",1) ("LIVE",2) ("ND L",1) ("T LI",1) ("VE A",1)</pre> =={{header\|jq}}== '''Works with jq and gojq, that is, the C and Go implementations of jq.''' <syntaxhighlight lang=jq> # Generic "bag of words" utility: def bow(stream): reduce stream as $word ({}; .[($word\|tostring)] += 1); # The ngrams as a bow def ngrams($n): ascii_upcase as $text \| bow( range(0;$text\|1+ length - $n) as $i \| $text[$i:$i+$n]); # The task # Sort by increasing frequency, then by lexicographical order def ngrams($text; $n): ($text\|ngrams($n)) as $ngrams \| "\nAll \($n)-grams of '\($text)' and their frequencies:", ($ngrams\|to_entries\|sort_by(.value,.key)[] \| "\(.key): \(.value)" ) ; ngrams("Live and let live"; 2,3,4) </syntaxhighlight> {{output}} <pre> All 2-grams of 'Live and let live' and their frequencies: A: 1 AN: 1 D : 1 E : 1 ET: 1 LE: 1 ND: 1 T : 1 L: 2 IV: 2 LI: 2 VE: 2 All 3-grams of 'Live and let live' and their frequencies: AN: 1 LE: 1 LI: 1 AND: 1 D L: 1 E A: 1 ET : 1 LET: 1 ND : 1 T L: 1 VE : 1 IVE: 2 LIV: 2 All 4-grams of 'Live and let live' and their frequencies: AND: 1 LET: 1 LIV: 1 AND : 1 D LE: 1 E AN: 1 ET L: 1 IVE : 1 LET : 1 ND L: 1 T LI: 1 VE A: 1 LIVE: 2 </pre> </pre> =={{header\|Julia}}== <syntaxhighlight lang="julia">function ngrams(str::AbstractString, n; uppercaseinput = true) s = uppercaseinput ? uppercase(str) : str unique([(ng, count(ng, s)) for ng in [SubString(s, i:i+n-1) for i=1:length(s)-n+1]]) end function eightcolumns(arr) for (i, elem) in pairs(arr) print(lpad(elem, 10), i % 8 == 0 ? "\n" : "") end println("\n") end const s = "Live and let live" ngrams(s, 1) \|> eightcolumns ngrams(s, 2) \|> eightcolumns ngrams(s, 2, uppercaseinput = false) \|> eightcolumns </syntaxhighlight>{{out}} <pre> ("L", 3) ("I", 2) ("V", 2) ("E", 3) (" ", 3) ("A", 1) ("N", 1) ("D", 1) ("T", 1) ("LI", 2) ("IV", 2) ("VE", 2) ("E ", 1) (" A", 1) ("AN", 1) ("ND", 1) ("D ", 1) (" L", 2) ("LE", 1) ("ET", 1) ("T ", 1) ("Li", 1) ("iv", 2) ("ve", 2) ("e ", 1) (" a", 1) ("an", 1) ("nd", 1) ("d ", 1) (" l", 2) ("le", 1) ("et", 1) ("t ", 1) ("li", 1) </pre> =={{header\|Miranda}}== <syntaxhighlight lang="miranda">main :: [sys_message] main = concat (map (testcase s) [2,3,4]) where s = "LIVE AND LET LIVE" testcase :: [char]->num->[sys_message] testcase s n = [Stdout (show n ++ "-grams of '" ++ s ++ ":'\n"), Stdout (showngrams n s), Stdout "\n"] showngrams :: num->[char]->[char] showngrams n s = lay (map concat (splitn 6 cols)) where ng = ngrams n s cols = [ljustify 12 (showngram ng') \| ng'<-ng] showngram :: ([char],num)->[char] showngram (s,i) = concat ["\"", s, "\": ", show i] splitn :: num->[]->[[]] splitn n [] = [] splitn n ls = take n ls:splitn n (drop n ls) ngrams :: num->[]->[([],num)] ngrams n = count . group n group :: num->[]->[[]] group n ls = [], if #ls < n group n ls = take n ls : group n (tl ls) count :: []->[(,num)] count = foldl incelem [] incelem :: [(,num)]->->[(,num)] incelem [] el = [(el, 1)] incelem ((el,n):cs) el = (el,n+1):cs incelem (c:cs) el = c:incelem cs el</syntaxhighlight> {{out}} <pre>2-grams of 'LIVE AND LET LIVE:' "LI": 2 "IV": 2 "VE": 2 "E ": 1 " A": 1 "AN": 1 "ND": 1 "D ": 1 " L": 2 "LE": 1 "ET": 1 "T ": 1 3-grams of 'LIVE AND LET LIVE:' "LIV": 2 "IVE": 2 "VE ": 1 "E A": 1 " AN": 1 "AND": 1 "ND ": 1 "D L": 1 " LE": 1 "LET": 1 "ET ": 1 "T L": 1 " LI": 1 4-grams of 'LIVE AND LET LIVE:' "LIVE": 2 "IVE ": 1 "VE A": 1 "E AN": 1 " AND": 1 "AND ": 1 "ND L": 1 "D LE": 1 " LET": 1 "LET ": 1 "ET L": 1 "T LI": 1 " LIV": 1</pre> =={{header\|Nim}}== <syntaxhighlight lang="Nim">import std/[strutils, tables] type NGrams = CountTable[string] func ngrams(text: string; n: Positive): NGrams = for i in 0..(text.len - n): result.inc(text[i..<(i + n)].toLowerAscii) const Text = "Live and let live" for n in 2..4: echo n, "-grams:" var ng = Text.ngrams(n) ng.sort() # To display n-grams with higher score first. for key, count in ng: echo "“$1”: $2".format(key, count) echo() </syntaxhighlight> {{out}} <pre>2-grams: “ve”: 2 “li”: 2 “iv”: 2 “ l”: 2 “d ”: 1 “et”: 1 “t ”: 1 “an”: 1 “nd”: 1 “e ”: 1 “le”: 1 “ a”: 1 3-grams: “ive”: 2 “liv”: 2 “ le”: 1 “nd ”: 1 “and”: 1 “et ”: 1 “ve ”: 1 “t l”: 1 “ an”: 1 “d l”: 1 “e a”: 1 “let”: 1 “ li”: 1 4-grams: “live”: 2 “ liv”: 1 “ and”: 1 “e an”: 1 “ let”: 1 “and ”: 1 “d le”: 1 “t li”: 1 “nd l”: 1 “et l”: 1 “ive ”: 1 “let ”: 1 “ve a”: 1 </pre> =={{header\|Perl}}== <syntaxhighlight lang="perl" line>use v5.36; sub n_gram ($n, $line) { my %N; map { $N{substr lc($line),$_,$n}++ } 0..length($line)-$n; %N } my %bi_grams = n_gram 2, 'Live and let live'; say qq\|'$_' - $bi_grams{$_}\| for sort keys %bi_grams; say ''; my %tri_grams = n_gram 3, 'Live and let live'; say qq\|'$_' - $tri_grams{$_}\| for sort keys %tri_grams;</syntaxhighlight> {{out}} <pre>' a' - 1 ' l' - 2 'an' - 1 'd ' - 1 'e ' - 1 'et' - 1 'iv' - 2 'le' - 1 'li' - 2 'nd' - 1 't ' - 1 've' - 2 ' an' - 1 ' le' - 1 ' li' - 1 'and' - 1 'd l' - 1 'e a' - 1 'et ' - 1 'ive' - 2 'let' - 1 'liv' - 2 'nd ' - 1 't l' - 1 've ' - 1</pre> =={{header\|Phix}}== A dictionary is used to find the index of already-seen n-grams, even though a simpler find() would be good enough for this task.<br> I have replicated most orderings found on this page, the task description order corresponds to orig/freq,<br> and jq is alpha/freq with high last, but there is no equivalent for the Factor or Raku orderings here ;-). <!--<syntaxhighlight lang="phix">(phixonline)--> <span style="color: #008080;">with</span> <span style="color: #008080;">javascript_semantics</span> <span style="color: #008080;">function</span> <span style="color: #000000;">n_grams</span><span style="color: #0000FF;">(</span><span style="color: #004080;">integer</span> <span style="color: #000000;">len</span><span style="color: #0000FF;">,</span> <span style="color: #004080;">string</span> <span style="color: #000000;">txt</span><span style="color: #0000FF;">,</span> <span style="color: #004080;">sequence</span> <span style="color: #000000;">orders</span><span style="color: #0000FF;">)</span> <span style="color: #004080;">sequence</span> <span style="color: #000000;">ng</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{},</span> <span style="color: #000000;">ngc</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{}</span> <span style="color: #004080;">integer</span> <span style="color: #000000;">d</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">new_dict</span><span style="color: #0000FF;">()</span> <span style="color: #000000;">txt</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">upper</span><span style="color: #0000FF;">(</span><span style="color: #000000;">txt</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">txt</span><span style="color: #0000FF;">)-</span><span style="color: #000000;">len</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span> <span style="color: #008080;">do</span> <span style="color: #004080;">string</span> <span style="color: #000000;">tn</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">txt</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">..</span><span style="color: #000000;">i</span><span style="color: #0000FF;">+</span><span style="color: #000000;">len</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span> <span style="color: #004080;">integer</span> <span style="color: #000000;">ndx</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">getdd</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tn</span><span style="color: #0000FF;">,</span><span style="color: #000000;">0</span><span style="color: #0000FF;">,</span><span style="color: #000000;">d</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">if</span> <span style="color: #000000;">ndx</span><span style="color: #0000FF;">=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span> <span style="color: #000000;">ng</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ng</span><span style="color: #0000FF;">,</span><span style="color: #000000;">tn</span><span style="color: #0000FF;">)</span> <span style="color: #000000;">ngc</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ngc</span><span style="color: #0000FF;">,</span><span style="color: #000000;">1</span><span style="color: #0000FF;">)</span> <span style="color: #000000;">ndx</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ng</span><span style="color: #0000FF;">)</span> <span style="color: #7060A8;">setd</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tn</span><span style="color: #0000FF;">,</span><span style="color: #000000;">ndx</span><span style="color: #0000FF;">,</span><span style="color: #000000;">d</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">else</span> <span style="color: #000000;">ngc</span><span style="color: #0000FF;">[</span><span style="color: #000000;">ndx</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span> <span style="color: #008080;">end</span> <span style="color: #008080;">for</span> <span style="color: #7060A8;">destroy_dict</span><span style="color: #0000FF;">(</span><span style="color: #000000;">d</span><span style="color: #0000FF;">)</span> <span style="color: #004080;">integer</span> <span style="color: #000000;">l</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ng</span><span style="color: #0000FF;">)</span> <span style="color: #004080;">sequence</span> <span style="color: #000000;">ares</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">columnize</span><span style="color: #0000FF;">({</span><span style="color: #000000;">ng</span><span style="color: #0000FF;">,</span><span style="color: #000000;">ngc</span><span style="color: #0000FF;">,</span><span style="color: #7060A8;">tagset</span><span style="color: #0000FF;">(</span><span style="color: #000000;">l</span><span style="color: #0000FF;">)}),</span> <span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{}</span> <span style="color: #008080;">for</span> <span style="color: #000000;">c</span> <span style="color: #008080;">in</span> <span style="color: #000000;">orders</span> <span style="color: #008080;">do</span> <span style="color: #008080;">if</span> <span style="color: #000000;">c</span><span style="color: #0000FF;">=</span><span style="color: #008000;">"original"</span> <span style="color: #008080;">then</span> <span style="color: #000080;font-style:italic;">-- original/first found order</span> <span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">res</span><span style="color: #0000FF;">,</span><span style="color: #000000;">ares</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">elsif</span> <span style="color: #000000;">c</span><span style="color: #0000FF;">=</span><span style="color: #008000;">"orig/freq"</span> <span style="color: #008080;">then</span> <span style="color: #000080;font-style:italic;">-- "" but higher freq first</span> <span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">res</span><span style="color: #0000FF;">,</span><span style="color: #7060A8;">sort_columns</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ares</span><span style="color: #0000FF;">,{-</span><span style="color: #000000;">2</span><span style="color: #0000FF;">,</span><span style="color: #000000;">3</span><span style="color: #0000FF;">}))</span> <span style="color: #008080;">elsif</span> <span style="color: #000000;">c</span><span style="color: #0000FF;">=</span><span style="color: #008000;">"alphabetic"</span> <span style="color: #008080;">then</span> <span style="color: #000080;font-style:italic;">-- alphabetical order</span> <span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">res</span><span style="color: #0000FF;">,</span><span style="color: #7060A8;">sort</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">deep_copy</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ares</span><span style="color: #0000FF;">)))</span> <span style="color: #008080;">elsif</span> <span style="color: #000000;">c</span><span style="color: #0000FF;">=</span><span style="color: #008000;">"alpha/freq"</span> <span style="color: #008080;">then</span> <span style="color: #000080;font-style:italic;">-- "" but higher freq first</span> <span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">res</span><span style="color: #0000FF;">,</span><span style="color: #7060A8;">sort_columns</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ares</span><span style="color: #0000FF;">,{-</span><span style="color: #000000;">2</span><span style="color: #0000FF;">,</span><span style="color: #000000;">1</span><span style="color: #0000FF;">}))</span> <span style="color: #008080;">else</span> <span style="color: #0000FF;">?</span><span style="color: #000000;">9</span><span style="color: #0000FF;">/</span><span style="color: #000000;">0</span> <span style="color: #000080;font-style:italic;">-- (unknown ordering requested)</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span> <span style="color: #008080;">end</span> <span style="color: #008080;">for</span> <span style="color: #008080;">return</span> <span style="color: #000000;">res</span> <span style="color: #008080;">end</span> <span style="color: #008080;">function</span> <span style="color: #008080;">constant</span> <span style="color: #000000;">src</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">"Live and let live"</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">orders</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #008000;">"original"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"orig/freq"</span><span style="color: #0000FF;">,</span> <span style="color: #008000;">"alphabetic"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"alpha/freq"</span><span style="color: #0000FF;">}</span> <span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"For \"%s\":\n"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">src</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">for</span> <span style="color: #000000;">l</span><span style="color: #0000FF;">=</span><span style="color: #000000;">2</span> <span style="color: #008080;">to</span> <span style="color: #000000;">4</span> <span style="color: #008080;">do</span> <span style="color: #004080;">sequence</span> <span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">n_grams</span><span style="color: #0000FF;">(</span><span style="color: #000000;">l</span><span style="color: #0000FF;">,</span><span style="color: #000000;">src</span><span style="color: #0000FF;">,</span><span style="color: #000000;">orders</span><span style="color: #0000FF;">)</span> <span style="color: #004080;">string</span> <span style="color: #000000;">count</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">ordinal</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">res</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]),</span><span style="color: #004600;">true</span><span style="color: #0000FF;">)</span> <span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"There are %s unique %d-grams:\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">count</span><span style="color: #0000FF;">,</span><span style="color: #000000;">l</span><span style="color: #0000FF;">})</span> <span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">,</span><span style="color: #000000;">r</span> <span style="color: #008080;">in</span> <span style="color: #000000;">res</span> <span style="color: #008080;">do</span> <span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"%12s: %s\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">orders</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">],</span><span style="color: #7060A8;">join</span><span style="color: #0000FF;">(</span><span style="color: #000000;">r</span><span style="color: #0000FF;">,</span><span style="color: #008000;">", "</span><span style="color: #0000FF;">,</span><span style="color: #000000;">fmt</span><span style="color: #0000FF;">:=</span><span style="color: #008000;">"%s %d"</span><span style="color: #0000FF;">)})</span> <span style="color: #008080;">end</span> <span style="color: #008080;">for</span> <span style="color: #008080;">end</span> <span style="color: #008080;">for</span> <!--</syntaxhighlight>--> {{out}} <pre> For "Live and let live": There are twelve unique 2-grams: original: LI 2, IV 2, VE 2, E 1, A 1, AN 1, ND 1, D 1, L 2, LE 1, ET 1, T 1 orig/freq: LI 2, IV 2, VE 2, L 2, E 1, A 1, AN 1, ND 1, D 1, LE 1, ET 1, T 1 alphabetic: A 1, L 2, AN 1, D 1, E 1, ET 1, IV 2, LE 1, LI 2, ND 1, T 1, VE 2 alpha/freq: L 2, IV 2, LI 2, VE 2, A 1, AN 1, D 1, E 1, ET 1, LE 1, ND 1, T 1 There are thirteen unique 3-grams: original: LIV 2, IVE 2, VE 1, E A 1, AN 1, AND 1, ND 1, D L 1, LE 1, LET 1, ET 1, T L 1, LI 1 orig/freq: LIV 2, IVE 2, VE 1, E A 1, AN 1, AND 1, ND 1, D L 1, LE 1, LET 1, ET 1, T L 1, LI 1 alphabetic: AN 1, LE 1, LI 1, AND 1, D L 1, E A 1, ET 1, IVE 2, LET 1, LIV 2, ND 1, T L 1, VE 1 alpha/freq: IVE 2, LIV 2, AN 1, LE 1, LI 1, AND 1, D L 1, E A 1, ET 1, LET 1, ND 1, T L 1, VE 1 There are thirteen unique 4-grams: original: LIVE 2, IVE 1, VE A 1, E AN 1, AND 1, AND 1, ND L 1, D LE 1, LET 1, LET 1, ET L 1, T LI 1, LIV 1 orig/freq: LIVE 2, IVE 1, VE A 1, E AN 1, AND 1, AND 1, ND L 1, D LE 1, LET 1, LET 1, ET L 1, T LI 1, LIV 1 alphabetic: AND 1, LET 1, LIV 1, AND 1, D LE 1, E AN 1, ET L 1, IVE 1, LET 1, LIVE 2, ND L 1, T LI 1, VE A 1 alpha/freq: LIVE 2, AND 1, LET 1, LIV 1, AND 1, D LE 1, E AN 1, ET L 1, IVE 1, LET 1, ND L 1, T LI 1, VE A 1 </pre> =={{header\|PL/M}}== <syntaxhighlight lang="plm">100H: BDOS: PROCEDURE (F,A); DECLARE F BYTE, A ADDRESS; GO TO 5; END BDOS; EXIT: PROCEDURE; GO TO 0; END EXIT; PR$CH: PROCEDURE (C); DECLARE C BYTE; CALL BDOS(2, C); END PR$CH; PR$STR: PROCEDURE (S); DECLARE S ADDRESS; CALL BDOS(9, S); END PR$STR; PR$NUM: PROCEDURE (N); DECLARE N ADDRESS; DECLARE S (6) BYTE INITIAL ('.....$'); DECLARE I BYTE; I = 5; DIGIT: I = I - 1; S(I) = N MOD 10 + '0'; IF (N := N / 10) > 0 THEN GO TO DIGIT; CALL PR$STR(.S(I)); END PR$NUM; PR$NSTR: PROCEDURE (N, STR); DECLARE (STR, N) ADDRESS, CH BASED STR BYTE; DO WHILE N>0; CALL PR$CH(CH); STR = STR+1; N = N-1; END; END PR$NSTR; CMP$NSTR: PROCEDURE (N, STRA, STRB) BYTE; DECLARE (STRA, STRB, N, I) ADDRESS; DECLARE A BASED STRA BYTE; DECLARE B BASED STRB BYTE; DO I=0 TO N-1; IF A(I) <> B(I) THEN RETURN 0; END; RETURN 0FFH; END CMP$NSTR; STR$LEN: PROCEDURE (STR) ADDRESS; DECLARE (N, STR) ADDRESS, S BASED STR BYTE; N = 0; DO WHILE S(N) <> '$'; N = N+1; END; RETURN N; END STR$LEN; FIND$NGRAMS: PROCEDURE (N, STR, RSLT) ADDRESS; DECLARE (N, I, J, STR, RSLT, FOUND) ADDRESS; DECLARE S BASED STR BYTE; DECLARE ITEM BASED RSLT ADDRESS; DECLARE MAXPOS ADDRESS; MAXPOS = STR$LEN(STR) - N; FOUND = 0; DO I = 0 TO MAXPOS; IF FOUND = 0 THEN GO TO NOT$FOUND; DO J = 0 TO FOUND; IF CMP$NSTR(N, .S(I), ITEM(2J)) THEN DO; ITEM(2J+1) = ITEM(2J+1) + 1; GO TO NEXT$ITEM; END; END; NOT$FOUND: ITEM(2FOUND) = .S(I); ITEM(2FOUND+1) = 1; FOUND = FOUND + 1; NEXT$ITEM: END; RETURN FOUND; END FIND$NGRAMS; PRINT$NGRAMS: PROCEDURE (N, STR); DECLARE (N, I, STR) ADDRESS; DECLARE RESULT (64) ADDRESS; DECLARE AMOUNT ADDRESS; CALL PR$CH(N + '0'); CALL PR$STR(.'-GRAMS OF ''$'); CALL PR$STR(STR); CALL PR$STR(.(''': ', 13, 10, '$')); AMOUNT = FIND$NGRAMS(N, STR, .RESULT); DO I = 0 TO AMOUNT - 1; CALL PR$CH(''''); CALL PR$NSTR(N, RESULT(2I)); CALL PR$STR(.''' - $'); CALL PR$NUM(RESULT(2I+1)); IF I MOD 5 = 4 THEN CALL PR$STR(.(13,10,'$')); ELSE CALL PR$CH(9); END; CALL PR$STR(.(13,10,'$')); END PRINT$NGRAMS; DECLARE STRING DATA ('LIVE AND LET LIVE$'); DECLARE N BYTE; DO N = 2 TO 4; CALL PRINT$NGRAMS(N, .STRING); END; CALL EXIT; EOF</syntaxhighlight> {{out}} <pre>2-GRAMS OF 'LIVE AND LET LIVE': 'LI' - 2 'IV' - 2 'VE' - 2 'E ' - 1 ' A' - 1 'AN' - 1 'ND' - 1 'D ' - 1 ' L' - 2 'LE' - 1 'ET' - 1 'T ' - 1 3-GRAMS OF 'LIVE AND LET LIVE': 'LIV' - 2 'VE ' - 1 'E A' - 1 ' AN' - 1 'AND' - 1 'ND ' - 1 'D L' - 1 ' LE' - 1 'LET' - 1 'ET ' - 1 'T L' - 1 ' LI' - 1 'IVE' - 1 4-GRAMS OF 'LIVE AND LET LIVE': 'LIVE' - 2 'IVE ' - 1 'VE A' - 1 'E AN' - 1 ' AND' - 1 'AND ' - 1 'ND L' - 1 'D LE' - 1 ' LET' - 1 'LET ' - 1 'ET L' - 1 'T LI' - 1 ' LIV' - 1</pre> =={{header\|Python}}== <syntaxhighlight lang="python"> import pprint from collections import Counter from typing import Iterable def n_grams(text: str, n: int) -> Iterable[str]: """Generate contiguous sequences of _n_ characters from _text_.""" if n < 1: raise ValueError("n must be an integer > 0") text = text.upper() return (text[i : (i + n)] for i in range(len(text) - n + 1)) def main() -> None: example_text = "Live and let live" for n in range(2, 5): counts = Counter(n_grams(example_text, n)).most_common() print( f"{len(counts)} {n}-grams of {example_text!r}:\n", pprint.pformat(counts, compact=True), end="\n\n", ) if __name__ == "__main__": main() </syntaxhighlight> {{out}} <pre> 12 2-grams of 'Live and let live': [('LI', 2), ('IV', 2), ('VE', 2), (' L', 2), ('E ', 1), (' A', 1), ('AN', 1), ('ND', 1), ('D ', 1), ('LE', 1), ('ET', 1), ('T ', 1)] 13 3-grams of 'Live and let live': [('LIV', 2), ('IVE', 2), ('VE ', 1), ('E A', 1), (' AN', 1), ('AND', 1), ('ND ', 1), ('D L', 1), (' LE', 1), ('LET', 1), ('ET ', 1), ('T L', 1), (' LI', 1)] 13 4-grams of 'Live and let live': [('LIVE', 2), ('IVE ', 1), ('VE A', 1), ('E AN', 1), (' AND', 1), ('AND ', 1), ('ND L', 1), ('D LE', 1), (' LET', 1), ('LET ', 1), ('ET L', 1), ('T LI', 1), (' LIV', 1)] </pre> ===Sliding window=== This example takes inspiration from the [https://docs.python.org/3/library/itertools.html#itertools-recipes sliding_window recipe] found in Python's itertools docs. <syntaxhighlight lang="python"> import pprint from collections import Counter from collections import deque from itertools import islice from typing import Iterable def n_grams(text: str, n: int) -> Iterable[str]: """Generate contiguous sequences of _n_ characters from _text_.""" it = iter(text.upper()) n_gram = deque(islice(it, n), maxlen=n) if len(n_gram) == n: yield "".join(n_gram) for x in it: n_gram.append(x) yield "".join(n_gram) def main() -> None: example_text = "Live and let live" for n in range(2, 5): counts = Counter(n_grams(example_text, n)).most_common() print( f"{len(counts)} {n}-grams of {example_text!r}:\n", pprint.pformat(counts, compact=True), end="\n\n", ) if __name__ == "__main__": main() </syntaxhighlight> {{out}} <pre> 12 2-grams of 'Live and let live': [('LI', 2), ('IV', 2), ('VE', 2), (' L', 2), ('E ', 1), (' A', 1), ('AN', 1), ('ND', 1), ('D ', 1), ('LE', 1), ('ET', 1), ('T ', 1)] 13 3-grams of 'Live and let live': [('LIV', 2), ('IVE', 2), ('VE ', 1), ('E A', 1), (' AN', 1), ('AND', 1), ('ND ', 1), ('D L', 1), (' LE', 1), ('LET', 1), ('ET ', 1), ('T L', 1), (' LI', 1)] 13 4-grams of 'Live and let live': [('LIVE', 2), ('IVE ', 1), ('VE A', 1), ('E AN', 1), (' AND', 1), ('AND ', 1), ('ND L', 1), ('D LE', 1), (' LET', 1), ('LET ', 1), ('ET L', 1), ('T LI', 1), (' LIV', 1)] </pre> And a strict variant, compositionally assembled from some basics: <syntaxhighlight lang="python">from itertools import (islice) from functools import (reduce) from operator import (add) def nGramCounts(n, s): '''A dictionary of all nGrams of dimension n in s, with the frequency of their occurrence. ''' return reduce( lambda a, gram: insertWith(add, gram, 1, a), nGrams(n, s), {} ) def nGrams(n, s): '''All case-insensitive sequences of length n in the string s.''' return (''.join(t) for t in windows(n, list(s.upper()))) # ----------------------- GENERICS ----------------------- def insertWith(f, k, x, dct): '''A new dictionary updated with a (key, f(value, x)) tuple. Where there is no existing value for the key, the supplied x is used as the default. ''' return dict(dct, {k: f(dct[k], x) if k in dct else x}) def tails(xs): '''All final segments of xs, longest first.''' return (xs[i:] for i in range(0, 1 + len(xs))) def windows(n, xs): '''Sliding windows of dimension n.''' return zip(islice(tails(xs), n)) # ------------------------- TEST ------------------------- if __name__ == "__main__": import pprint EXAMPLE = "Live and let live" for dimension in range(1, 5): result = sorted(nGramCounts(dimension, EXAMPLE).items()) print( f"{len(result)} {dimension}-grams of {EXAMPLE!r}:\n", pprint.pformat(result), end="\n\n", )</syntaxhighlight> {{Out}} <pre>9 1-grams of 'Live and let live': [(' ', 3), ('A', 1), ('D', 1), ('E', 3), ('I', 2), ('L', 3), ('N', 1), ('T', 1), ('V', 2)] 12 2-grams of 'Live and let live': [(' A', 1), (' L', 2), ('AN', 1), ('D ', 1), ('E ', 1), ('ET', 1), ('IV', 2), ('LE', 1), ('LI', 2), ('ND', 1), ('T ', 1), ('VE', 2)] 13 3-grams of 'Live and let live': [(' AN', 1), (' LE', 1), (' LI', 1), ('AND', 1), ('D L', 1), ('E A', 1), ('ET ', 1), ('IVE', 2), ('LET', 1), ('LIV', 2), ('ND ', 1), ('T L', 1), ('VE ', 1)] 13 4-grams of 'Live and let live': [(' AND', 1), (' LET', 1), (' LIV', 1), ('AND ', 1), ('D LE', 1), ('E AN', 1), ('ET L', 1), ('IVE ', 1), ('LET ', 1), ('LIVE', 2), ('ND L', 1), ('T LI', 1), ('VE A', 1)]</pre> =={{header\|Raku}}== Line 57 ⟶ 1,640: <pre>("IV"=>2,"T "=>1,"VE"=>2,"E "=>1,"LE"=>1,"AN"=>1,"LI"=>2,"ND"=>1,"ET"=>1," L"=>2," A"=>1,"D "=>1).Bag ("ET "=>1,"AND"=>1,"LIV"=>2," LI"=>1,"ND "=>1," LE"=>1,"IVE"=>2,"E A"=>1,"VE "=>1,"T L"=>1,"D L"=>1,"LET"=>1," AN"=>1).Bag</pre> =={{header\|Refal}}== <syntaxhighlight lang="refal">$ENTRY Go { , 'LIVE AND LET LIVE': e.Str = <ShowNgrams 2 e.Str> <ShowNgrams 3 e.Str> <ShowNgrams 4 e.Str>; }; ShowNgrams { s.N e.Str = <Prout <Symb s.N> '-grams of "' e.Str '":'> <ShowLines 5 <Ngrams s.N e.Str>> <Prout>; }; ShowLines { s.N = ; s.N e.X, <First s.N e.X>: (e.L) e.R = <Prout <Each DispNgram e.L>> <ShowLines s.N e.R>; }; Each { s.F = ; s.F t.I e.Is = <Mu s.F t.I> <Each s.F e.Is>; }; DispNgram { ((e.S) s.C) = '(' e.S ') - ' <Symb s.C> ' '; }; Ngrams { s.N e.Str = <Count () <Groups s.N e.Str>>; }; Groups { s.N e.X, <Lenw e.X>: s.L e.X, <Compare s.L s.N>: { '-' = ; s.C, <First s.N e.X>: (e.G) e.R, e.X: s.Z e.Y = (e.G) <Groups s.N e.Y>; } }; Count { (e.Cs) = e.Cs; (e.Cs) t.I e.Is = <Count (<Inc (e.Cs) t.I>) e.Is>; }; Inc { (e.X (t.I s.C) e.Y) t.I = e.X (t.I <+ 1 s.C>) e.Y; (e.X) t.I = e.X (t.I 1); };</syntaxhighlight> {{out}} <pre>2-grams of "LIVE AND LET LIVE": (LI) - 2 (IV) - 2 (VE) - 2 (E ) - 1 ( A) - 1 (AN) - 1 (ND) - 1 (D ) - 1 ( L) - 2 (LE) - 1 (ET) - 1 (T ) - 1 3-grams of "LIVE AND LET LIVE": (LIV) - 2 (IVE) - 2 (VE ) - 1 (E A) - 1 ( AN) - 1 (AND) - 1 (ND ) - 1 (D L) - 1 ( LE) - 1 (LET) - 1 (ET ) - 1 (T L) - 1 ( LI) - 1 4-grams of "LIVE AND LET LIVE": (LIVE) - 2 (IVE ) - 1 (VE A) - 1 (E AN) - 1 ( AND) - 1 (AND ) - 1 (ND L) - 1 (D LE) - 1 ( LET) - 1 (LET ) - 1 (ET L) - 1 (T LI) - 1 ( LIV) - 1</pre> =={{header\|RPL}}== {{works with\|Halcyon Calc\|4.2.8}} {\| class="wikitable" ! RPL code ! Comment \|- \| ≪ → text n ≪ { } DUP n text SIZE '''FOR''' j text j n - 1 + j SUB '''IF''' DUP2 POS '''THEN''' LAST 4 ROLL SWAP DUP2 GET 1 + PUT SWAP DROP SWAP '''ELSE''' + SWAP 1 + SWAP '''END''' '''NEXT SHOWG''' ≫ ≫ ‘'''-GRAMS'''’ STO ≪ { } 1 3 PICK SIZE '''FOR''' j OVER j GET "=" + 4 PICK j GET →STR + + '''NEXT''' ROT ROT DROP2 ≫ ‘'''SHOWG'''’ STO \| '''-GRAMS''' ''( text n -- { "ngram=count".. } ) '' Initialize 2 empty lists; for j = n to length(text): ngram = text[j-n+1..j] if ngram already in ngram list increase counter in other list get rid of ngram else add to ngram list and set counter at 1 on the other list Show results '''SHOWG''' ''( { "ngram".. } { counts.. } -- { "ngram=count".. } ) '' \|} {{in}} <pre> "LIVE AND LET LIVE" 2 -GRAMS "LIVE AND LET LIVE" 3 -GRAMS "LIVE AND LET LIVE" 4 -GRAMS </pre> {{out}} <pre> 3: { "LI=2" "IV=2" "VE=2" "E =1" " A=1" "AN=1" "ND=1" "D =1" " L=2" "LE=1" "ET=1" "T =1" } 2: { "LIV=2" "IVE=2" "VE =1" "E A=1" " AN=1" "AND=1" "ND =1" "D L=1" " LE=1" "LET=1" "ET =1" "T L=1" " LI=1" } 1: { "LIVE=2" "IVE =1" "VE A=1" "E AN=1" " AND=1" "AND =1" "ND L=1" "D LE=1" " LET=1" "LET =1" "ET L=1" "T LI=1" " LIV=1" } </pre> =={{header\|SETL}}== <syntaxhighlight lang="setl">program find_ngrams; input := "LIVE AND LET LIVE"; loop for size in [2,3,4] do print(str size+"-grams of '"+input+"':"); ng := ngrams(input, size); col := 0; loop for count = ng(ngram) do nprint(rpad("'" + ngram + "': " + str count, 10)); if (col +:= 1) mod 8 = 0 then print; end if; end loop; print; print; end loop; proc ngrams(input, size); ng := {}; loop for i in [1..#input-size+1] do ng(input(i..i+size-1)) +:= 1; end loop; return ng; end proc; end program;</syntaxhighlight> {{out}} <pre>2-grams of 'LIVE AND LET LIVE': ' A': 1 ' L': 2 'AN': 1 'D ': 1 'E ': 1 'ET': 1 'IV': 2 'LE': 1 'LI': 2 'ND': 1 'T ': 1 'VE': 2 3-grams of 'LIVE AND LET LIVE': ' AN': 1 ' LE': 1 ' LI': 1 'AND': 1 'D L': 1 'E A': 1 'ET ': 1 'IVE': 2 'LET': 1 'LIV': 2 'ND ': 1 'T L': 1 'VE ': 1 4-grams of 'LIVE AND LET LIVE': ' AND': 1 ' LET': 1 ' LIV': 1 'AND ': 1 'D LE': 1 'E AN': 1 'ET L': 1 'IVE ': 1 'LET ': 1 'LIVE': 2 'ND L': 1 'T LI': 1 'VE A': 1 </pre> =={{header\|Wren}}== ===Version 1 (Sorted order)=== {{libheader\|Wren-str}} {{libheader\|Wren-maputil}} {{libheader\|Wren-fmt}} <syntaxhighlight lang="~~ecmascript~~wren">import "./str" for Str import "./maputil" for ~~MapUtil~~MultiSet import "./fmt" for Fmt Line 71 ⟶ 1,811: for (i in 0..text.count-n) { var ngram = text[i...i+n] ~~MapUtil~~MultiSet.~~increase~~add(ngrams, ngram) } return ngrams Line 107 ⟶ 1,847: ("D LE" : 1) ("E AN" : 1) ("ET L" : 1) ("IVE " : 1) ("LET " : 1) ("ND L" : 1) ("T LI" : 1) ("VE A" : 1) </pre> ===Version 2 (Original order)=== {{libheader\|Wren-ordered}} The iteration order of 'Map' objects in Wren is undefined though they can subsequently be sorted into a particular order as the first version shows. However, to maintain the original order of insertion we need to use one of the classes in the above module which automatically keep track of such order when items are added or removed. <syntaxhighlight lang="wren">import "./str" for Str import "./ordered" for OrderedBag import "./fmt" for Fmt var findNgrams = Fn.new { \|n, text\| text = Str.upper(text) var ngrams = OrderedBag.new() for (i in 0..text.count-n) { var ngram = text[i...i+n] ngrams.add(ngram) } return ngrams } var text = "Live and let live" for (n in [2, 3, 4]) { var ngrams = findNgrams.call(n, text) System.print("All %(n)-grams of '%(text)' and their frequencies:") var ng = ngrams.toList.map { \|me\| "(\"%(me.key)\" : %(me.value))"} Fmt.tprint("$s ", ng, 5) System.print() }</syntaxhighlight> {{out}} <pre> All 2-grams of 'Live and let live' and their frequencies: ("LI" : 2) ("IV" : 2) ("VE" : 2) ("E " : 1) (" A" : 1) ("AN" : 1) ("ND" : 1) ("D " : 1) (" L" : 2) ("LE" : 1) ("ET" : 1) ("T " : 1) All 3-grams of 'Live and let live' and their frequencies: ("LIV" : 2) ("IVE" : 2) ("VE " : 1) ("E A" : 1) (" AN" : 1) ("AND" : 1) ("ND " : 1) ("D L" : 1) (" LE" : 1) ("LET" : 1) ("ET " : 1) ("T L" : 1) (" LI" : 1) All 4-grams of 'Live and let live' and their frequencies: ("LIVE" : 2) ("IVE " : 1) ("VE A" : 1) ("E AN" : 1) (" AND" : 1) ("AND " : 1) ("ND L" : 1) ("D LE" : 1) (" LET" : 1) ("LET " : 1) ("ET L" : 1) ("T LI" : 1) (" LIV" : 1) </pre> =={{header\|XPL0}}== <syntaxhighlight lang "XPL0">int Dict(100), Count(100), Size; proc LookUp(Wd); \Add word to dictionary, or increment its count int Wd, I; [for I:= 0 to Size-1 do if Dict(I) = Wd then [Count(I):= Count(I)+1; return; ]; Dict(Size):= Wd; Count(Size):= 1; Size:= Size+1; ]; proc ShowNGram(N, Str); \Show N-grams for string char N, Str; int I, J, Wd, Ch; [IntOut(0, N); Text(0, "-grams:^m^j"); Size:= 0; I:= 0; loop [Wd:= 0; for J:= 0 to N-1 do [Ch:= Str(I+J); if Ch = $A0 then quit; \terminating space if Ch>=^a and Ch<=^z then Ch:= Ch & ~$20; Wd:= Wd<<8 + Ch; ]; I:= I+1; LookUp(Wd); ]; for I:= 0 to Size-1 do [Wd:= Dict(I); for J:= N-1 downto 0 do ChOut(0, Wd>>(J*8)); ChOut(0, ^ ); IntOut(0, Count(I)); if rem(I/5) = 4 then CrLf(0) else ChOut(0, 9\tab\); ]; CrLf(0); ]; int N; for N:= 2 to 4 do ShowNGram(N, "Live and let live ")</syntaxhighlight> {{out}} <pre> 2-grams: LI 2 IV 2 VE 2 E 1 A 1 AN 1 ND 1 D 1 L 2 LE 1 ET 1 T 1 3-grams: LIV 2 IVE 2 VE 1 E A 1 AN 1 AND 1 ND 1 D L 1 LE 1 LET 1 ET 1 T L 1 LI 1 4-grams: LIVE 2 IVE 1 VE A 1 E AN 1 AND 1 AND 1 ND L 1 D LE 1 LET 1 LET 1 ET L 1 T LI 1 LIV 1 </pre>