Jump to content

N-grams

From Rosetta Code
N-grams is a draft programming task. It is not yet considered ready to be promoted as a complete task, for reasons that should be found in its talk page.

An N-gram is a sequence of N contiguous elements of a given text. Although N-grams refer sometimes to words or syllables, in this task we will consider only sequences of characters. The task consists in, given a text and an integer size of the desired N-grams, find all the different contiguous sequences of N characters, together with the number of times they appear in the text. For example, the 2-grams of the text "Live and let live" are:

 "LI" - 2
 "IV" - 2
 "VE" - 2
 " L" - 2
 "E " - 1
 " A" - 1
 "AN" - 1
 "ND" - 1
 "D " - 1
 "LE" - 1
 "ET" - 1
 "T " - 1

Note that space and other non-alphanumeric characters are taken into account.


See also


ABC

HOW TO RETURN n grams str:
    PUT {} IN grams
    FOR i IN {1..#str-n+1}:
        PUT str@i|n IN part
        SELECT:
            part in keys grams:
                PUT grams[part]+1 IN grams[part]
            ELSE:
                PUT 1 IN grams[part]
    RETURN grams
    
HOW TO SHOW n GRAMS FOR str:
    PUT n grams str IN grams
    PUT 0 IN col
    WRITE "`n`-grams for '`str`':"/
    FOR gr IN keys grams:
        WRITE "'`gr`' - `grams[gr]`" << 12
        IF col mod 5 = 4: WRITE /
        PUT col+1 IN col
    WRITE /
    
FOR n IN {2;3;4}:
    SHOW n GRAMS FOR "LIVE AND LET LIVE"
    WRITE /
Output:
2-grams for 'LIVE AND LET LIVE':
' A' - 1    ' L' - 2    'AN' - 1    'D ' - 1    'E ' - 1
'ET' - 1    'IV' - 2    'LE' - 1    'LI' - 2    'ND' - 1
'T ' - 1    'VE' - 2

3-grams for 'LIVE AND LET LIVE':
' AN' - 1   ' LE' - 1   ' LI' - 1   'AND' - 1   'D L' - 1
'E A' - 1   'ET ' - 1   'IVE' - 2   'LET' - 1   'LIV' - 2
'ND ' - 1   'T L' - 1   'VE ' - 1

4-grams for 'LIVE AND LET LIVE':
' AND' - 1  ' LET' - 1  ' LIV' - 1  'AND ' - 1  'D LE' - 1
'E AN' - 1  'ET L' - 1  'IVE ' - 1  'LET ' - 1  'LIVE' - 2
'ND L' - 1  'T LI' - 1  'VE A' - 1

ALGOL 68

BEGIN # split text into n-grams - n character substrings                     #

    MODE NGRAM = STRUCT( STRING gram, INT count );

    OP   UCASE = ( CHAR c )CHAR:          # return c converted to upper case #
         IF c >= "a" AND c <= "z" THEN REPR( ( ABS c - ABS "a" ) + ABS "A" ) ELSE c FI;
    OP   UCASE = ( STRING s )STRING:      # return s converted to upper case #
         BEGIN
             STRING uc := s;
             FOR i FROM LWB uc TO UPB uc DO uc[ i ] := UCASE uc[ i ] OD;
             uc
         END # UCASE # ;
    OP   LENGTH = ( STRING s )INT: ( UPB s + 1 ) - LWB s;

    # returns the n-grams of text - using a simple array to contain the      #
    # n-grams - for longer strings, an associative array might be better     #
    PRIO GRAMS = 1;
    OP   GRAMS = ( INT n, STRING text )[]NGRAM:
         BEGIN
            [ 1 : ( LENGTH text + 1 ) - n ]NGRAM result;  # initially assume #
            INT count := 0;                 # all the n-grams will be unique #
            FOR pos FROM LWB text TO ( UPB text + 1 )- n DO
                STRING ng     = UCASE text[ pos : pos + ( n - 1 ) ];
                BOOL   found := FALSE;
                INT    g pos := 0;
                FOR g FROM 1 TO count
                WHILE g pos := g;
                      NOT ( found := ng = ( gram OF result )[ g ] )
                DO SKIP OD;
                IF NOT found THEN
                    result[ count +:= 1 ] := ( ng, 1 )
                ELSE
                    ( count OF result )[ g pos ] +:= 1
                FI
            OD;
            result[ 1 : count ]
         END # NGRAMS # ;

    # prints the ngrams in ngrams                                           #
    PROC print ngrams = ( STRING title, text, []NGRAM ngrams )VOID:
         BEGIN
            print( ( title, "-grams of """, text, """:", newline ) );
            FOR g FROM LWB ngrams TO UPB ngrams DO
                print( ( "    """, ( gram OF ngrams )[ g ] ) );
                print( ( """: ", whole( ( count OF ngrams )[ g ], 0 ), newline ) )
            OD
         END # print ngrams # ;

    STRING  test       = "Live and let live";
    print ngrams( "bi",   test, 2 GRAMS test );
    print ngrams( "tri",  test, 3 GRAMS test );
    print ngrams( "quad", test, 4 GRAMS test )

END
Output:
bi-grams of "Live and let live":
    "LI": 2
    "IV": 2
    "VE": 2
    "E ": 1
    " A": 1
    "AN": 1
    "ND": 1
    "D ": 1
    " L": 2
    "LE": 1
    "ET": 1
    "T ": 1
tri-grams of "Live and let live":
    "LIV": 2
    "IVE": 2
    "VE ": 1
    "E A": 1
    " AN": 1
    "AND": 1
    "ND ": 1
    "D L": 1
    " LE": 1
    "LET": 1
    "ET ": 1
    "T L": 1
    " LI": 1
quad-grams of "Live and let live":
    "LIVE": 2
    "IVE ": 1
    "VE A": 1
    "E AN": 1
    " AND": 1
    "AND ": 1
    "ND L": 1
    "D LE": 1
    " LET": 1
    "LET ": 1
    "ET L": 1
    "T LI": 1
    " LIV": 1

APL

Works with: Dyalog APL
ngrams  (⊣,(≢⊢)),/
Output:
      2 3 4 ngrams¨ ⊂'LIVE AND LET LIVE'
  LI  2   LIV  2   LIVE  2 
  IV  2   IVE  2   IVE   1 
  VE  2   VE   1   VE A  1 
  E   1   E A  1   E AN  1 
   A  1    AN  1    AND  1 
  AN  1   AND  1   AND   1 
  ND  1   ND   1   ND L  1 
  D   1   D L  1   D LE  1 
   L  2    LE  1    LET  1 
  LE  1   LET  1   LET   1 
  ET  1   ET   1   ET L  1 
  T   1   T L  1   T LI  1 
           LI  1    LIV  1

Arturo

ngrams: function [s :string n :integer][
    0..sub size s n | map 'i -> slice upper s i i+n-1
                    | tally
]

loop [2 3 4] 'n [
    print ~"|n|-grams:"
    loop ngrams "Live and let live" n [k v] -> print [~{"|k|"} v]
    print ""
]
Output:
2-grams:
"LI" 2 
"IV" 2 
"VE" 2 
"E " 1 
" A" 1 
"AN" 1 
"ND" 1 
"D " 1 
" L" 2 
"LE" 1 
"ET" 1 
"T " 1 

3-grams:
"LIV" 2 
"IVE" 2 
"VE " 1 
"E A" 1 
" AN" 1 
"AND" 1 
"ND " 1 
"D L" 1 
" LE" 1 
"LET" 1 
"ET " 1 
"T L" 1 
" LI" 1 

4-grams:
"LIVE" 2 
"IVE " 1 
"VE A" 1 
"E AN" 1 
" AND" 1 
"AND " 1 
"ND L" 1 
"D LE" 1 
" LET" 1 
"LET " 1 
"ET L" 1 
"T LI" 1 
" LIV" 1

BASIC

10 DEFINT A-Z
20 S$ = "LIVE AND LET LIVE"
30 FOR N=2 TO 4: GOSUB 100: NEXT N
40 END
100 REM PRINT N-GRAMS OF S$
105 PRINT USING "#-grams of '";N;: PRINT S$;"':"
110 DIM P$(LEN(S$)-N+1), C(LEN(S$)-N+1)
120 FD = 0
130 FOR I=1 TO LEN(S$)-N+1
140 PA$ = MID$(S$,I,N)
150 IF FD = 0 THEN 190
160 FOR J=1 TO FD
170 IF P$(J) = PA$ THEN C(J) = C(J)+1: GOTO 210
180 NEXT J
190 FD = FD+1
200 P$(FD) = PA$ : C(FD) = 1
210 NEXT I
220 FOR I=1 TO FD
230 PRINT "'";P$(I);"': ";C(I),
240 NEXT I
250 PRINT: PRINT
260 ERASE P$, C
270 RETURN
Output:
2 grams of 'LIVE AND LET LIVE':
'LI':  2      'IV':  2      'VE':  2      'E ':  1      ' A':  1
'AN':  1      'ND':  1      'D ':  1      ' L':  2      'LE':  1
'ET':  1      'T ':  1

3 grams of 'LIVE AND LET LIVE':
'LIV':  2     'IVE':  2     'VE ':  1     'E A':  1     ' AN':  1
'AND':  1     'ND ':  1     'D L':  1     ' LE':  1     'LET':  1
'ET ':  1     'T L':  1     ' LI':  1

4 grams of 'LIVE AND LET LIVE':
'LIVE':  2    'IVE ':  1    'VE A':  1    'E AN':  1    ' AND':  1
'AND ':  1    'ND L':  1    'D LE':  1    ' LET':  1    'LET ':  1
'ET L':  1    'T LI':  1    ' LIV':  1

BCPL

get "libhdr"

let equal(str, n, i, j) = valof
$(  for k=0 to n-1
        unless str%(i+k) = str%(j+k) resultis false
    resultis true
$)

let findngrams(n, str, res) = valof 
$(  let found = 0

    for i=1 to str%0-n+1
    $(  for j=0 to found-1
        $(  if equal(str, n, i, res!(2*j))
            $(  res!(2*j+1) := res!(2*j+1) + 1
                goto nextitem
            $)
        $)
        res!(2*found) := i
        res!(2*found+1) := 1
        found := found + 1
        nextitem: loop
    $)
    resultis found
$)

let showngrams(n, str) be
$(  let res = vec 64
    let amt = findngrams(n, str, res)
    
    writef("%N-grams of '%S':*N", n, str)
    for i=0 to amt-1
    $(  wrch('*'')
        for j=res!(2*i) to res!(2*i)+n-1 do wrch(str%j)
        writef("' - %N",res!(2*i+1))
        wrch(i rem 5=4 -> '*N', '*T')
    $)
    wrch('*N')
$)

let start() be
    for n=2 to 4 do showngrams(n, "LIVE AND LET LIVE")
Output:
2-grams of 'LIVE AND LET LIVE':
'LI' - 2        'IV' - 2        'VE' - 2        'E ' - 1        ' A' - 1
'AN' - 1        'ND' - 1        'D ' - 1        ' L' - 2        'LE' - 1
'ET' - 1        'T ' - 1
3-grams of 'LIVE AND LET LIVE':
'LIV' - 2       'IVE' - 2       'VE ' - 1       'E A' - 1       ' AN' - 1
'AND' - 1       'ND ' - 1       'D L' - 1       ' LE' - 1       'LET' - 1
'ET ' - 1       'T L' - 1       ' LI' - 1
4-grams of 'LIVE AND LET LIVE':
'LIVE' - 2      'IVE ' - 1      'VE A' - 1      'E AN' - 1      ' AND' - 1
'AND ' - 1      'ND L' - 1      'D LE' - 1      ' LET' - 1      'LET ' - 1
'ET L' - 1      'T LI' - 1      ' LIV' - 1

BQN

Ngrams  (⊏∾≠)¨  ()  (<˘)
Show  > ("'"    "': "  (•Fmt⊢))´¨

234 ShowNgrams¨ <"LIVE AND LET LIVE"
Output:
┌─                                        
· ┌─          ┌─           ┌─             
  ╵"'LI': 2   ╵"'LIV': 2   ╵"'LIVE': 2    
    'IV': 2     'IVE': 2     'IVE ': 1    
    'VE': 2     'VE ': 1     'VE A': 1    
    'E ': 1     'E A': 1     'E AN': 1    
    ' A': 1     ' AN': 1     ' AND': 1    
    'AN': 1     'AND': 1     'AND ': 1    
    'ND': 1     'ND ': 1     'ND L': 1    
    'D ': 1     'D L': 1     'D LE': 1    
    ' L': 2     ' LE': 1     ' LET': 1    
    'LE': 1     'LET': 1     'LET ': 1    
    'ET': 1     'ET ': 1     'ET L': 1    
    'T ': 1"    'T L': 1     'T LI': 1    
            ┘   ' LI': 1"    ' LIV': 1"   
                         ┘             ┘  
                                         ┘

C

#include <stdio.h>
#include <stdbool.h>
#include <ctype.h>
#include <string.h>

#define MAX_N 4
#define MAX_NGRAMS 20

typedef struct {
    char str[MAX_N+1];
    int freq;
} ngram;

void *strUpper(char *s) {
    while (*s) {
        *s = toupper(*s);
        s++;
    }
}

void ngrams(int n, char *text) {
    int i, j, count = 0;
    size_t len = strlen(text);
    bool found;
    char temp[MAX_N+1] = {'\0'};
    ngram ng, ngrams[MAX_NGRAMS];
    char s[len+1];
    strcpy(s, text);
    strUpper(s);
    for (i = 0; i <= len-n; ++i) {
        strncpy(temp, s + i, n);
        found = false;
        for (j = 0; j < count; ++j) {    
            if (!strcmp(ngrams[j].str, temp)) {
                ngrams[j].freq++;
                found = true;
                break;
            }
        }
        if (!found) {
            strncpy(ng.str, temp, n);
            ng.freq = 1;
            ngrams[count++] = ng;
        }
    }
    for (i = 0; i < count; ++i) {
        printf("(\"%s\": %d)  ", ngrams[i].str, ngrams[i].freq);
        if (!((i+1)%5)) printf("\n");
    }
    printf("\n\n");
}

int main() {
    int n;
    char *text = "Live and let live";
    for (n = 2; n <= MAX_N; ++n) {
        printf("All %d-grams of '%s' and their frequencies:\n", n, text);
        ngrams(n, text);
    }
    return 0;
}
Output:
All 2-grams of 'Live and let live' and their frequencies:
("LI": 2)  ("IV": 2)  ("VE": 2)  ("E ": 1)  (" A": 1)  
("AN": 1)  ("ND": 1)  ("D ": 1)  (" L": 2)  ("LE": 1)  
("ET": 1)  ("T ": 1)  

All 3-grams of 'Live and let live' and their frequencies:
("LIV": 2)  ("IVE": 2)  ("VE ": 1)  ("E A": 1)  (" AN": 1)  
("AND": 1)  ("ND ": 1)  ("D L": 1)  (" LE": 1)  ("LET": 1)  
("ET ": 1)  ("T L": 1)  (" LI": 1)  

All 4-grams of 'Live and let live' and their frequencies:
("LIVE": 2)  ("IVE ": 1)  ("VE A": 1)  ("E AN": 1)  (" AND": 1)  
("AND ": 1)  ("ND L": 1)  ("D LE": 1)  (" LET": 1)  ("LET ": 1)  
("ET L": 1)  ("T LI": 1)  (" LIV": 1)  

C++

#include <iostream>
#include <map>
#include <string>

std::map<std::string, int> find_ngrams(int n, const std::string& s)
{
    std::map<std::string, int> ngrams;
    size_t max_loc = s.length() - n;
    for (size_t i = 0; i <= max_loc; i++)
        ngrams[s.substr(i, n)]++;
    return ngrams;
}

void print_ngrams(const std::map<std::string, int>& ngrams) 
{
    int col = 0;
    for (const auto& [ngram, count] : ngrams) {
        std::cout << "'" << ngram << "' - " << count;
        if (col++ % 5 == 4)
            std::cout << std::endl;
        else
            std::cout << '\t';
    }
    std::cout << std::endl;
}

int main(void) 
{
    std::string s("LIVE AND LET LIVE");
    
    for (int n=2; n<=4; n++) {
        std::cout << n << "-grams of '" << s << ":" << std::endl;
        print_ngrams(find_ngrams(n, s));
    }
    
    return 0;
}
Output:
2-grams of 'LIVE AND LET LIVE:
' A' - 1        ' L' - 2        'AN' - 1        'D ' - 1        'E ' - 1
'ET' - 1        'IV' - 2        'LE' - 1        'LI' - 2        'ND' - 1
'T ' - 1        'VE' - 2
3-grams of 'LIVE AND LET LIVE:
' AN' - 1       ' LE' - 1       ' LI' - 1       'AND' - 1       'D L' - 1
'E A' - 1       'ET ' - 1       'IVE' - 2       'LET' - 1       'LIV' - 2
'ND ' - 1       'T L' - 1       'VE ' - 1
4-grams of 'LIVE AND LET LIVE:
' AND' - 1      ' LET' - 1      ' LIV' - 1      'AND ' - 1      'D LE' - 1
'E AN' - 1      'ET L' - 1      'IVE ' - 1      'LET ' - 1      'LIVE' - 2
'ND L' - 1      'T LI' - 1      'VE A' - 1

CLU

parts = iter (n: int, s: string) yields (string) 
    for i: int in int$from_to(1, string$size(s)-n+1) do
        yield(string$substr(s, i, n))
    end
end parts

ngram = struct[str: string, count: int]

find_ngrams = proc (n: int, s: string) returns (sequence[ngram])
    ng: array[ngram] := array[ngram]$[]
    for part: string in parts(n, s) do
        begin 
            for i: int in array[ngram]$indexes(ng) do
                if ng[i].str = part then exit found(i) end
            end
            array[ngram]$addh(ng, ngram${str: part, count: 1})
        end
        except when found(i: int):
            ng[i] := ngram${str: ng[i].str, count: ng[i].count + 1}
        end
    end
    return(sequence[ngram]$a2s(ng))
end find_ngrams 

show_ngrams = proc (s: stream, n: int, str: string)
    ngrams: sequence[ngram] := find_ngrams(n, str)
    col: int := 0
    
    for ng: ngram in sequence[ngram]$elements(ngrams) do
        stream$putleft(s, "'" || ng.str || "' - " || 
                          int$unparse(ng.count), 15)
        if col // 5 = 4 then stream$putl(s, "") end
        col := col + 1
    end
    stream$putl(s, "")
end show_ngrams

start_up = proc ()
    po: stream := stream$primary_output()
    s: string := "LIVE AND LET LIVE"
    
    for n: int in int$from_to(2, 4) do
        stream$putl(po, int$unparse(n) || "-grams of '" || s || "':")
        show_ngrams(po, n, s)
        stream$putl(po, "")
    end
end start_up
Output:
2-grams of 'LIVE AND LET LIVE':
'LI' - 2       'IV' - 2       'VE' - 2       'E ' - 1       ' A' - 1
'AN' - 1       'ND' - 1       'D ' - 1       ' L' - 2       'LE' - 1
'ET' - 1       'T ' - 1

3-grams of 'LIVE AND LET LIVE':
'LIV' - 2      'IVE' - 2      'VE ' - 1      'E A' - 1      ' AN' - 1
'AND' - 1      'ND ' - 1      'D L' - 1      ' LE' - 1      'LET' - 1
'ET ' - 1      'T L' - 1      ' LI' - 1

4-grams of 'LIVE AND LET LIVE':
'LIVE' - 2     'IVE ' - 1     'VE A' - 1     'E AN' - 1     ' AND' - 1
'AND ' - 1     'ND L' - 1     'D LE' - 1     ' LET' - 1     'LET ' - 1
'ET L' - 1     'T LI' - 1     ' LIV' - 1

Common Lisp

A hash table is used to store and retrieve the n-grams fast.

(defun n-grams (text n)
"Return a list of all the N-grams of length n in the text, together with their frequency"
	(let* (res (*ht-n-grams* (make-hash-table :test 'equal)) )
		(loop for i from 0 to (- (length text) n) do
			(let* ((n-gram (string-upcase (subseq text i (+ i n))))
			       (freq (gethash n-gram *ht-n-grams*)))
		  		(setf (gethash n-gram *ht-n-grams*) (if (null freq) 1 (1+ freq))) ))
			(maphash #'(lambda (key val)
									(push (cons key val) res) )
				*ht-n-grams* )
			(sort res #'> :key #'cdr) ))
Output:
> (n-grams "Live and let live" 2)

(("LI" . 2) ("IV" . 2) ("VE" . 2) (" L" . 2) ("E " . 1) (" A" . 1) ("AN" . 1)
 ("ND" . 1) ("D " . 1) ("LE" . 1) ("ET" . 1) ("T " . 1))

Cowgol

include "cowgol.coh";
include "strings.coh";

record Ngram is
    ptr: [uint8];
    size: intptr;
    count: intptr;
end record;

sub PrintNgram(ngram: [Ngram]) is
    print_char('\'');
    var ptr := ngram.ptr;
    var n := ngram.size;
    while n > 0 loop
        print_char([ptr]);
        ptr := @next ptr;
        n := n - 1;
    end loop;
    print("' - ");
    print_i32(ngram.count as uint32);
end sub;

sub MemCmp(n: intptr, a: [uint8], b: [uint8]): (eq: uint8) is
    eq := 1;
    while n>0 loop
        if [a] != [b] then 
            eq := 0;
            return;
        end if;
        a := @next a;
        b := @next b;
        n := n - 1;
    end loop;
end sub;

sub FindNgrams(n: intptr, str: [uint8], result: [Ngram]): (amount: intptr) is
    var nextres := result;
    amount := 0;
    
    sub NewNgram(pos: [uint8]) is
        nextres.ptr := pos;
        nextres.size := n;
        nextres.count := 1;
        nextres := @next nextres;
        amount := amount + 1;
    end sub;
    
    sub IncNgram(pos: [uint8]) is
        if amount == 0 then
            NewNgram(pos);
            return;
        end if;
        
        var curres := result;
        var left := amount;
        while left > 0 loop
            if MemCmp(n, pos, curres.ptr) != 0 then
                curres.count := curres.count + 1;
                return;
            end if;
            left := left - 1;
            curres := @next curres;
        end loop;
        NewNgram(pos);
    end sub;
    
    var charsleft := StrLen(str) - n + 1;
    while charsleft > 0 loop
        IncNgram(str);
        str := @next str;
        charsleft := charsleft - 1;
    end loop;
end sub;

sub ShowNgrams(n: intptr, str: [uint8]) is
    var ngrams: Ngram[128];
    
    print_i32(n as uint32);
    print("-grams of '");
    print(str);
    print("':\n");
    
    var amount := FindNgrams(n, str, &ngrams[0]) as @indexof ngrams;
    var i: @indexof ngrams := 0;
    while i < amount loop
        PrintNgram(&ngrams[i]);
        if i % 5 == 4
            then print_nl();
            else print_char('\t');
        end if;
        i := i + 1;
    end loop;
    print_nl();
    print_nl();
end sub;

var str := "LIVE AND LET LIVE";
ShowNgrams(2, str);
ShowNgrams(3, str);
ShowNgrams(4, str);
Output:
2-grams of 'LIVE AND LET LIVE':
'LI' - 2        'IV' - 2        'VE' - 2        'E ' - 1        ' A' - 1
'AN' - 1        'ND' - 1        'D ' - 1        ' L' - 2        'LE' - 1
'ET' - 1        'T ' - 1

3-grams of 'LIVE AND LET LIVE':
'LIV' - 2       'IVE' - 2       'VE ' - 1       'E A' - 1       ' AN' - 1
'AND' - 1       'ND ' - 1       'D L' - 1       ' LE' - 1       'LET' - 1
'ET ' - 1       'T L' - 1       ' LI' - 1

4-grams of 'LIVE AND LET LIVE':
'LIVE' - 2      'IVE ' - 1      'VE A' - 1      'E AN' - 1      ' AND' - 1
'AND ' - 1      'ND L' - 1      'D LE' - 1      ' LET' - 1      'LET ' - 1
'ET L' - 1      'T LI' - 1      ' LIV' - 1

Draco

\util.g

type Ngram = struct {
    *char nptr;
    word length;
    word amount;
    *Ngram next;
};

proc equal_n(word n; *char a, b) bool:
    while n>0 and a* = b* do
        a := a+1;
        b := b+1;
        n := n-1
    od;
    n = 0
corp

proc write_nchars(word n; *char ptr) void:
    word i;
    for i from 1 upto n do
        write(ptr*);
        ptr := ptr + 1;
    od
corp

proc write_ngrams(*Ngram ngram) void:
    word i;
    i := 0;
    while ngram /= nil do
        write("'");
        write_nchars(ngram*.length, ngram*.nptr);
        write("' - ", ngram*.amount);
        if i % 5=4 
            then writeln()
            else write('\t')
        fi;
        i := i+1;
        ngram := ngram*.next
    od
corp

proc new_ngram(word n; *char ptr) *Ngram:
    *Ngram ngram;
    ngram := new(Ngram);
    ngram*.length := n;
    ngram*.nptr := ptr;
    ngram*.amount := 1;
    ngram*.next := nil;
    ngram
corp;

proc inc_ngram(*Ngram ngram; word n; *char ptr) *Ngram:
    *Ngram begin, lastn;
    begin := ngram;
    
    if begin = nil then
        new_ngram(n, ptr)
    else
        while
            ngram /= nil and not equal_n(n, ptr, ngram*.nptr)
        do
            lastn := ngram;
            ngram := ngram*.next
        od;
        if ngram /= nil then
            ngram*.amount := ngram*.amount + 1
        else
            lastn*.next := new_ngram(n, ptr)
        fi;
        begin
    fi
corp

proc find_ngrams(word n; *char string) *Ngram:
    *Ngram ngrams;
    word maxpos, i;
    
    ngrams := nil;
    maxpos := CharsLen(string) - n;
    for i from 0 upto maxpos do
        ngrams := inc_ngram(ngrams, n, string + i)
    od;
    ngrams
corp 

proc main() void:
    *char string = "LIVE AND LET LIVE";
    word n;
    for n from 2 upto 4 do
        writeln(n, "-grams of '", string, "':");
        write_ngrams(find_ngrams(n, string));
        writeln();
    od;
corp
Output:
2-grams of 'LIVE AND LET LIVE':
'LI' - 2        'IV' - 2        'VE' - 2        'E ' - 1        ' A' - 1
'AN' - 1        'ND' - 1        'D ' - 1        ' L' - 2        'LE' - 1
'ET' - 1        'T ' - 1
3-grams of 'LIVE AND LET LIVE':
'LIV' - 2       'IVE' - 2       'VE ' - 1       'E A' - 1       ' AN' - 1
'AND' - 1       'ND ' - 1       'D L' - 1       ' LE' - 1       'LET' - 1
'ET ' - 1       'T L' - 1       ' LI' - 1
4-grams of 'LIVE AND LET LIVE':
'LIVE' - 2      'IVE ' - 1      'VE A' - 1      'E AN' - 1      ' AND' - 1
'AND ' - 1      'ND L' - 1      'D LE' - 1      ' LET' - 1      'LET ' - 1
'ET L' - 1      'T LI' - 1      ' LIV' - 1

DuckDB

Works with: DuckDB version V1.0
Works with: DuckDB version V1.1

Part 1 of this entry focuses on the tabular representation of the "bag of words" as it is convenient for further processing and graphical displays.

Part 2 showcases DuckDB's `histogram` scalar function, which computes the "bag of words" of the values in a column of a table. It returns a DuckDB STRUCT, which is compact and useful when it is convenient to view the histogram as a single value.

Part 3 showcases DuckDB's `histogram` table-valued function, which was introduced in DuckDB V1.1.

To avoid repetitiveness and to save space, we'll begin by defining a table-valued function for generating the n-grams of a string without any aggregation, because such a table is the starting point for all three approaches.

create or replace function ngrams_table(str, len) as table (
  with recursive cte(n,g) as (
    select 2 as n, upper(substr(str, 1, len)) as g
    union all
    select n+1, upper(substr(str, n, len))
    from cte
    where g != '' )
    select g as gram from cte where length(g) = len
  );

Tabular representation

create or replace function ngrams(str, len) as table (
    with ng as (from ngrams_table(str, len))
    select count(*) as count, gram
    from ng group by gram
    order by count desc, gram asc
  );

select count, gram as "4-gram" from ngrams('Live and let live', 4);
Output:
┌───────┬─────────┐
│ count │ 4-gram  │
│ int64 │ varchar │
├───────┼─────────┤
│     2 │ LIVE    │
│     1 │  AND    │
│     1 │  LET    │
│     1 │  LIV    │
│     1 │ AND     │
│     1 │ D LE    │
│     1 │ E AN    │
│     1 │ ET L    │
│     1 │ IVE     │
│     1 │ LET     │
│     1 │ ND L    │
│     1 │ T LI    │
│     1 │ VE A    │
├───────┴─────────┤
│     13 rows     │
└─────────────────┘

Histogram as a STRUCT

# The histogram of the n-grams of upper(str) as a STRUCT
create or replace function ngrams(str, len) as (
  select histogram(gram) from ngrams_table(str,len)
);

select n, ngrams('Live and let live', n) from range(2,5) t(n);
Output:
┌───────┬──────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│   n   │                                      ngrams('Live and let live', n)                                      │
│ int64 │                                          map(varchar, ubigint)                                           │
├───────┼──────────────────────────────────────────────────────────────────────────────────────────────────────────┤
│     2 │ { A=1,  L=2, AN=1, D =1, E =1, ET=1, IV=2, LE=1, LI=2, ND=1, T =1, VE=2}                                 │
│     3 │ { AN=1,  LE=1,  LI=1, AND=1, D L=1, E A=1, ET =1, IVE=2, LET=1, LIV=2, ND =1, T L=1, VE =1}              │
│     4 │ { AND=1,  LET=1,  LIV=1, AND =1, D LE=1, E AN=1, ET L=1, IVE =1, LET =1, LIVE=2, ND L=1, T LI=1, VE A=1} │
└───────┴──────────────────────────────────────────────────────────────────────────────────────────────────────────┘

HISTOGRAM() table-valued function

The number of 'bins' used by the HISTOGRAM() table-valued function is configurable, but the default value is used in the following illustration.

with cte as (from ngrams_table('Live and let live', 2))
from histogram(cte, gram) order by count desc, bin asc;
Output:
┌────────────────┬────────┬──────────────────────────────────────────────────────────────────────────────────┐
│      bin       │ count  │                                       bar                                        │
│    varchar     │ uint64 │                                     varchar                                      │
├────────────────┼────────┼──────────────────────────────────────────────────────────────────────────────────┤
│  L             │      2 │ ████████████████████████████████████████████████████████████████████████████████ │
│ (other values) │      2 │ ████████████████████████████████████████████████████████████████████████████████ │
│ IV             │      2 │ ████████████████████████████████████████████████████████████████████████████████ │
│ LI             │      2 │ ████████████████████████████████████████████████████████████████████████████████ │
│ VE             │      2 │ ████████████████████████████████████████████████████████████████████████████████ │
│  A             │      1 │ ████████████████████████████████████████                                         │
│ AN             │      1 │ ████████████████████████████████████████                                         │
│ D              │      1 │ ████████████████████████████████████████                                         │
│ E              │      1 │ ████████████████████████████████████████                                         │
│ LE             │      1 │ ████████████████████████████████████████                                         │
│ ND             │      1 │ ████████████████████████████████████████                                         │
├────────────────┴────────┴──────────────────────────────────────────────────────────────────────────────────┤
│ 11 rows                                                                                          3 columns │
└────────────────────────────────────────────────────────────────────────────────────────────────────────────┘

EasyLang

global gram$[] cnt[] .
proc add g$ . .
   for i to len gram$[]
      if gram$[i] = g$
         cnt[i] += 1
         return
      .
   .
   gram$[] &= g$
   cnt[] &= 1
.
proc ngram s$ n . .
   gram$[] = [ ]
   cnt[] = [ ]
   for i = 1 to len s$ - n + 1
      add substr s$ i n
   .
   for i to len gram$[]
      write "('" & gram$[i] & "':" & cnt[i] & ") "
   .
   print ""
.
for k = 2 to 4
   ngram "LIVE AND LET LIVE" k
   print ""
.
Output:
('LI':2) ('IV':2) ('VE':2) ('E ':1) (' A':1) ('AN':1) ('ND':1) ('D ':1) (' L':2) ('LE':1) ('ET':1) ('T ':1) 

('LIV':2) ('IVE':2) ('VE ':1) ('E A':1) (' AN':1) ('AND':1) ('ND ':1) ('D L':1) (' LE':1) ('LET':1) ('ET ':1) ('T L':1) (' LI':1) 

('LIVE':2) ('IVE ':1) ('VE A':1) ('E AN':1) (' AND':1) ('AND ':1) ('ND L':1) ('D LE':1) (' LET':1) ('LET ':1) ('ET L':1) ('T LI':1) (' LIV':1) 

F#

// N-grams. Nigel Galloway: April 2nd., 2024
let gram (n:string) g=let n=n.ToUpper() in n|>Seq.windowed g|>Seq.countBy id
for n,g in (gram "Live and let live" 2) do printfn "%A %d" n g
Output:
[|'L'; 'I'|] 2
[|'I'; 'V'|] 2
[|'V'; 'E'|] 2
[|'E'; ' '|] 1
[|' '; 'A'|] 1
[|'A'; 'N'|] 1
[|'N'; 'D'|] 1
[|'D'; ' '|] 1
[|' '; 'L'|] 2
[|'L'; 'E'|] 1
[|'E'; 'T'|] 1
[|'T'; ' '|] 1

Factor

Works with: Factor version 0.99 2022-04-03
USING: ascii grouping kernel math.statistics prettyprint ;

: n-grams ( str n -- assoc ) [ >upper ] dip clump histogram ;

"Live and let live" 2 n-grams .
Output:
H{
    { "ET" 1 }
    { "IV" 2 }
    { "T " 1 }
    { " A" 1 }
    { "VE" 2 }
    { "LI" 2 }
    { "E " 1 }
    { "D " 1 }
    { " L" 2 }
    { "ND" 1 }
    { "LE" 1 }
    { "AN" 1 }
}

FreeBASIC

Translation of: XPL0
Const As Integer MAX_SIZE = 100

Dim Shared As Integer dict(MAX_SIZE), cnt(MAX_SIZE)
Dim Shared As Integer size

Sub LookUp(wd As Integer)        'Add word to dictionary, or increment its count
    For i As Integer = 0 To size - 1
        If dict(i) = wd Then
            cnt(i) += 1
            Return
        End If
    Next
    dict(size) = wd
    cnt(size) = 1
    size += 1
End Sub

Sub ShowNGram(n As Integer, text As String) 'Show n-grams for texting
    Dim As Integer i, j, wd, ch
    Print n; "-grams:"
    size = 0
    i = 0
    Do
        wd = 0
        For j = 0 To n - 1
            If i + j >= Len(text) Then Exit Do
            ch = Asc(Mid(text, i + j + 1, 1))
            If ch = &HA0 Then Exit Do       'terminating space
            If ch >= Asc("a") And ch <= Asc("z") Then ch = ch And Not &H20
            wd = (wd Shl 8) Or ch
        Next
        i += 1
        LookUp(wd)
    Loop
    
    For i = 0 To size - 1
        wd = dict(i)
        For j = n - 1 To 0 Step -1
            Print Chr((wd Shr (j * 8)) And &HFF);
        Next
        Print " "; cnt(i);
		If (i Mod 5) = 4 Then Print Else Print Chr(9);
    Next
    Print
End Sub

For n As Integer = 2 To 4
    ShowNGram(n, "Live and let live ")
Next

Sleep
Output:
 2-grams:
LI  2   IV  2   VE  2   E   2    A  1
AN  1   ND  1   D   1    L  2   LE  1
ET  1   T   1
 3-grams:
LIV  2  IVE  2  VE   2  E A  1   AN  1
AND  1  ND   1  D L  1   LE  1  LET  1
ET   1  T L  1   LI  1
 4-grams:
LIVE  2 IVE   2 VE A  1 E AN  1  AND  1
AND   1 ND L  1 D LE  1  LET  1 LET   1
ET L  1 T LI  1  LIV  1

Haskell

import Control.Applicative (ZipList (ZipList, getZipList))
import Data.Char (toUpper)
import Data.List (tails)
import qualified Data.Map.Strict as M

------------------- MAP OF N-GRAM COUNTS -----------------

nGramCounts :: Int -> String -> M.Map String Int
nGramCounts n =
  foldr (flip (M.insertWith (+)) 1) M.empty . windows n


------------------------- GENERIC ------------------------

windows :: Int -> [a] -> [[a]]
windows n = transpose . take n . tails

transpose :: [[a]] -> [[a]]
transpose [] = []
transpose xs = getZipList (traverse ZipList xs)


--------------------------- TEST -------------------------
main :: IO ()
main =
  let sample = toUpper <$> "Live and let live"
   in mapM_
        ( \n ->
            putStrLn (show n <> "-GRAMS:")
              >> mapM_ print ((M.assocs . nGramCounts n) sample)
              >> putStrLn ""
        )
        [0 .. 4]
Output:
0-GRAMS:

1-GRAMS:
(" ",3)
("A",1)
("D",1)
("E",3)
("I",2)
("L",3)
("N",1)
("T",1)
("V",2)

2-GRAMS:
(" A",1)
(" L",2)
("AN",1)
("D ",1)
("E ",1)
("ET",1)
("IV",2)
("LE",1)
("LI",2)
("ND",1)
("T ",1)
("VE",2)

3-GRAMS:
(" AN",1)
(" LE",1)
(" LI",1)
("AND",1)
("D L",1)
("E A",1)
("ET ",1)
("IVE",2)
("LET",1)
("LIV",2)
("ND ",1)
("T L",1)
("VE ",1)

4-GRAMS:
(" AND",1)
(" LET",1)
(" LIV",1)
("AND ",1)
("D LE",1)
("E AN",1)
("ET L",1)
("IVE ",1)
("LET ",1)
("LIVE",2)
("ND L",1)
("T LI",1)
("VE A",1)

Java

import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

public final class Ngrams {

	public static void main(String[] args) {
		final String text = "Live and let live".toUpperCase();
		for ( int letterCount : List.of( 2, 3, 4 ) ) {
			Map<String, Integer> ngrams = findNgrams(text, letterCount);		
			System.out.println("All " + letterCount + "-grams of " + text + " and their frequencies:");
			for ( Map.Entry<String, Integer> entry : ngrams.entrySet() ) {
				System.out.println("(\"" + entry.getKey() + "\" : " + entry.getValue() + ")");
			}
			System.out.println();
		}
	}	
	
	private static Map<String, Integer> findNgrams(String text, int letterCount) {
	    Map<String, Integer> ngrams = new HashMap<String, Integer>();
	    for ( int i = 0; i <= text.length() - letterCount; i++ ) {
	        String ngram = text.substring(i, i + letterCount);
	        ngrams.merge(ngram, 1, Integer::sum);
	    }	    
	    return sort(ngrams);
	}
	
	private static Map<String, Integer> sort(Map<String, Integer> map) {
		return map.entrySet().stream().sorted( (one, two) -> {
			final int comparison = two.getValue().compareTo(one.getValue());
			return ( comparison == 0 ) ? one.getKey().compareTo(two.getKey()) : comparison;
		} ).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, 
		       (oldValue, newValue) -> oldValue, LinkedHashMap::new));
	}

}
 
All 2-grams of LIVE AND LET LIVE and their frequencies:
(" L" : 2)
("IV" : 2)
("LI" : 2)
("VE" : 2)
(" A" : 1)
("AN" : 1)
("D " : 1)
("E " : 1)
("ET" : 1)
("LE" : 1)
("ND" : 1)
("T " : 1)

All 3-grams of LIVE AND LET LIVE and their frequencies:
("IVE" : 2)
("LIV" : 2)
(" AN" : 1)
(" LE" : 1)
(" LI" : 1)
("AND" : 1)
("D L" : 1)
("E A" : 1)
("ET " : 1)
("LET" : 1)
("ND " : 1)
("T L" : 1)
("VE " : 1)

All 4-grams of LIVE AND LET LIVE and their frequencies:
("LIVE" : 2)
(" AND" : 1)
(" LET" : 1)
(" LIV" : 1)
("AND " : 1)
("D LE" : 1)
("E AN" : 1)
("ET L" : 1)
("IVE " : 1)
("LET " : 1)
("ND L" : 1)
("T LI" : 1)
("VE A" : 1)

jq

Works with jq and gojq, that is, the C and Go implementations of jq.

# Generic "bag of words" utility:
def bow(stream): 
  reduce stream as $word ({}; .[($word|tostring)] += 1);

# The ngrams as a bow
def ngrams($n):
  ascii_upcase as $text
  | bow( range(0;$text|1+ length - $n) as $i | $text[$i:$i+$n]);

# The task
# Sort by increasing frequency, then by lexicographical order
def ngrams($text; $n):
  ($text|ngrams($n)) as $ngrams
  | "\nAll \($n)-grams of '\($text)' and their frequencies:",
    ($ngrams|to_entries|sort_by(.value,.key)[] | "\(.key): \(.value)" ) ;

ngrams("Live and let live"; 2,3,4)
Output:

All 2-grams of 'Live and let live' and their frequencies:
 A: 1
AN: 1
D : 1
E : 1
ET: 1
LE: 1
ND: 1
T : 1
 L: 2
IV: 2
LI: 2
VE: 2

All 3-grams of 'Live and let live' and their frequencies:
 AN: 1
 LE: 1
 LI: 1
AND: 1
D L: 1
E A: 1
ET : 1
LET: 1
ND : 1
T L: 1
VE : 1
IVE: 2
LIV: 2

All 4-grams of 'Live and let live' and their frequencies:
 AND: 1
 LET: 1
 LIV: 1
AND : 1
D LE: 1
E AN: 1
ET L: 1
IVE : 1
LET : 1
ND L: 1
T LI: 1
VE A: 1
LIVE: 2

Julia

function ngrams(str::AbstractString, n; uppercaseinput = true)
    s = uppercaseinput ? uppercase(str) : str
    unique([(ng, count(ng, s)) for ng in [SubString(s, i:i+n-1) for i=1:length(s)-n+1]])
end

function eightcolumns(arr)
    for (i, elem) in pairs(arr)
        print(lpad(elem, 10), i % 8 == 0 ? "\n" : "")
    end
    println("\n")
end
        
const s = "Live and let live"

ngrams(s, 1) |> eightcolumns
ngrams(s, 2) |> eightcolumns
ngrams(s, 2, uppercaseinput = false) |> eightcolumns
Output:
  ("L", 3)  ("I", 2)  ("V", 2)  ("E", 3)  (" ", 3)  ("A", 1)  ("N", 1)  ("D", 1)
  ("T", 1)

 ("LI", 2) ("IV", 2) ("VE", 2) ("E ", 1) (" A", 1) ("AN", 1) ("ND", 1) ("D ", 1)
 (" L", 2) ("LE", 1) ("ET", 1) ("T ", 1)

 ("Li", 1) ("iv", 2) ("ve", 2) ("e ", 1) (" a", 1) ("an", 1) ("nd", 1) ("d ", 1)
 (" l", 2) ("le", 1) ("et", 1) ("t ", 1) ("li", 1)

Miranda

main :: [sys_message]
main = concat (map (testcase s) [2,3,4])
       where s = "LIVE AND LET LIVE"

testcase :: [char]->num->[sys_message]
testcase s n = [Stdout (show n ++ "-grams of '" ++ s ++ ":'\n"),
                Stdout (showngrams n s),
                Stdout "\n"]

showngrams :: num->[char]->[char]
showngrams n s = lay (map concat (splitn 6 cols))
                 where ng    = ngrams n s
                       cols  = [ljustify 12 (showngram ng') | ng'<-ng]

showngram :: ([char],num)->[char]
showngram (s,i) = concat ["\"", s, "\": ", show i]

splitn :: num->[*]->[[*]]
splitn n [] = []
splitn n ls = take n ls:splitn n (drop n ls)

ngrams :: num->[*]->[([*],num)]
ngrams n = count . group n

group :: num->[*]->[[*]]
group n ls = [], if #ls < n
group n ls = take n ls : group n (tl ls)

count :: [*]->[(*,num)]
count = foldl incelem []

incelem :: [(*,num)]->*->[(*,num)]
incelem []          el = [(el, 1)]
incelem ((el,n):cs) el = (el,n+1):cs
incelem (c:cs)      el = c:incelem cs el
Output:
2-grams of 'LIVE AND LET LIVE:'
"LI": 2     "IV": 2     "VE": 2     "E ": 1     " A": 1     "AN": 1
"ND": 1     "D ": 1     " L": 2     "LE": 1     "ET": 1     "T ": 1

3-grams of 'LIVE AND LET LIVE:'
"LIV": 2    "IVE": 2    "VE ": 1    "E A": 1    " AN": 1    "AND": 1
"ND ": 1    "D L": 1    " LE": 1    "LET": 1    "ET ": 1    "T L": 1
" LI": 1

4-grams of 'LIVE AND LET LIVE:'
"LIVE": 2   "IVE ": 1   "VE A": 1   "E AN": 1   " AND": 1   "AND ": 1
"ND L": 1   "D LE": 1   " LET": 1   "LET ": 1   "ET L": 1   "T LI": 1
" LIV": 1

Nim

import std/[strutils, tables]

type NGrams = CountTable[string]

func ngrams(text: string; n: Positive): NGrams =
  for i in 0..(text.len - n):
    result.inc(text[i..<(i + n)].toLowerAscii)

const Text = "Live and let live"

for n in 2..4:
  echo n, "-grams:"
  var ng = Text.ngrams(n)
  ng.sort()               # To display n-grams with higher score first.
  for key, count in ng:
    echo "“$1”: $2".format(key, count)
  echo()
Output:
2-grams:
“ve”: 2
“li”: 2
“iv”: 2
“ l”: 2
“d ”: 1
“et”: 1
“t ”: 1
“an”: 1
“nd”: 1
“e ”: 1
“le”: 1
“ a”: 1

3-grams:
“ive”: 2
“liv”: 2
“ le”: 1
“nd ”: 1
“and”: 1
“et ”: 1
“ve ”: 1
“t l”: 1
“ an”: 1
“d l”: 1
“e a”: 1
“let”: 1
“ li”: 1

4-grams:
“live”: 2
“ liv”: 1
“ and”: 1
“e an”: 1
“ let”: 1
“and ”: 1
“d le”: 1
“t li”: 1
“nd l”: 1
“et l”: 1
“ive ”: 1
“let ”: 1
“ve a”: 1

Perl

use v5.36;

sub n_gram ($n, $line) {
    my %N;
    map { $N{substr lc($line),$_,$n}++ } 0..length($line)-$n;
    %N
}

my %bi_grams = n_gram 2, 'Live and let live';
say qq|'$_' - $bi_grams{$_}| for sort keys %bi_grams;

say '';

my %tri_grams = n_gram 3, 'Live and let live';
say qq|'$_' - $tri_grams{$_}| for sort keys %tri_grams;
Output:
' a' - 1
' l' - 2
'an' - 1
'd ' - 1
'e ' - 1
'et' - 1
'iv' - 2
'le' - 1
'li' - 2
'nd' - 1
't ' - 1
've' - 2

' an' - 1
' le' - 1
' li' - 1
'and' - 1
'd l' - 1
'e a' - 1
'et ' - 1
'ive' - 2
'let' - 1
'liv' - 2
'nd ' - 1
't l' - 1
've ' - 1

Phix

A dictionary is used to find the index of already-seen n-grams, even though a simpler find() would be good enough for this task.
I have replicated most orderings found on this page, the task description order corresponds to orig/freq,
and jq is alpha/freq with high last, but there is no equivalent for the Factor or Raku orderings here ;-).

with javascript_semantics
function n_grams(integer len, string txt, sequence orders)
    sequence ng = {}, ngc = {}
    integer d = new_dict()
    txt = upper(txt)
    for i=1 to length(txt)-len+1 do
        string tn = txt[i..i+len-1]
        integer ndx = getdd(tn,0,d)
        if ndx=0 then
            ng = append(ng,tn)
            ngc = append(ngc,1)
            ndx = length(ng)
            setd(tn,ndx,d)
        else
            ngc[ndx] += 1
        end if
    end for
    destroy_dict(d)
    integer l = length(ng)
    sequence ares = columnize({ng,ngc,tagset(l)}),
              res = {}
    for c in orders do
        if c="original" then -- original/first found order
            res = append(res,ares)
        elsif c="orig/freq" then -- "" but higher freq first
            res = append(res,sort_columns(ares,{-2,3}))
        elsif c="alphabetic" then -- alphabetical order
            res = append(res,sort(deep_copy(ares)))
        elsif c="alpha/freq" then -- "" but higher freq first
            res = append(res,sort_columns(ares,{-2,1}))
        else
            ?9/0 -- (unknown ordering requested)
        end if
    end for
    return res
end function

constant src = "Live and let live",
         orders = {"original","orig/freq",
                 "alphabetic","alpha/freq"}
printf(1,"For \"%s\":\n",src)
for l=2 to 4 do
    sequence res = n_grams(l,src,orders)
    string count = ordinal(length(res[1]),true)
    printf(1,"There are %s unique %d-grams:\n",{count,l})
    for i,r in res do
        printf(1,"%12s: %s\n",{orders[i],join(r,", ",fmt:="%s %d")})
    end for
end for
Output:
For "Live and let live":
There are twelve unique 2-grams:
    original: LI 2, IV 2, VE 2, E  1,  A 1, AN 1, ND 1, D  1,  L 2, LE 1, ET 1, T  1
   orig/freq: LI 2, IV 2, VE 2,  L 2, E  1,  A 1, AN 1, ND 1, D  1, LE 1, ET 1, T  1
  alphabetic:  A 1,  L 2, AN 1, D  1, E  1, ET 1, IV 2, LE 1, LI 2, ND 1, T  1, VE 2
  alpha/freq:  L 2, IV 2, LI 2, VE 2,  A 1, AN 1, D  1, E  1, ET 1, LE 1, ND 1, T  1
There are thirteen unique 3-grams:
    original: LIV 2, IVE 2, VE  1, E A 1,  AN 1, AND 1, ND  1, D L 1,  LE 1, LET 1, ET  1, T L 1,  LI 1
   orig/freq: LIV 2, IVE 2, VE  1, E A 1,  AN 1, AND 1, ND  1, D L 1,  LE 1, LET 1, ET  1, T L 1,  LI 1
  alphabetic:  AN 1,  LE 1,  LI 1, AND 1, D L 1, E A 1, ET  1, IVE 2, LET 1, LIV 2, ND  1, T L 1, VE  1
  alpha/freq: IVE 2, LIV 2,  AN 1,  LE 1,  LI 1, AND 1, D L 1, E A 1, ET  1, LET 1, ND  1, T L 1, VE  1
There are thirteen unique 4-grams:
    original: LIVE 2, IVE  1, VE A 1, E AN 1,  AND 1, AND  1, ND L 1, D LE 1,  LET 1, LET  1, ET L 1, T LI 1,  LIV 1
   orig/freq: LIVE 2, IVE  1, VE A 1, E AN 1,  AND 1, AND  1, ND L 1, D LE 1,  LET 1, LET  1, ET L 1, T LI 1,  LIV 1
  alphabetic:  AND 1,  LET 1,  LIV 1, AND  1, D LE 1, E AN 1, ET L 1, IVE  1, LET  1, LIVE 2, ND L 1, T LI 1, VE A 1
  alpha/freq: LIVE 2,  AND 1,  LET 1,  LIV 1, AND  1, D LE 1, E AN 1, ET L 1, IVE  1, LET  1, ND L 1, T LI 1, VE A 1

PL/M

100H:
BDOS: PROCEDURE (F,A); DECLARE F BYTE, A ADDRESS; GO TO 5; END BDOS;
EXIT: PROCEDURE; GO TO 0; END EXIT;
PR$CH: PROCEDURE (C); DECLARE C BYTE; CALL BDOS(2, C); END PR$CH;
PR$STR: PROCEDURE (S); DECLARE S ADDRESS; CALL BDOS(9, S); END PR$STR;

PR$NUM: PROCEDURE (N);
    DECLARE N ADDRESS;
    DECLARE S (6) BYTE INITIAL ('.....$');
    DECLARE I BYTE;
    I = 5;
DIGIT:
    I = I - 1;
    S(I) = N MOD 10 + '0';
    IF (N := N / 10) > 0 THEN GO TO DIGIT;
    CALL PR$STR(.S(I));
END PR$NUM;

PR$NSTR: PROCEDURE (N, STR);
    DECLARE (STR, N) ADDRESS, CH BASED STR BYTE;
    DO WHILE N>0;
        CALL PR$CH(CH);
        STR = STR+1;
        N = N-1;
    END;
END PR$NSTR;

CMP$NSTR: PROCEDURE (N, STRA, STRB) BYTE;
    DECLARE (STRA, STRB, N, I) ADDRESS;
    DECLARE A BASED STRA BYTE;
    DECLARE B BASED STRB BYTE;
    DO I=0 TO N-1;
        IF A(I) <> B(I) THEN RETURN 0;
    END;
    RETURN 0FFH;
END CMP$NSTR;

STR$LEN: PROCEDURE (STR) ADDRESS;
    DECLARE (N, STR) ADDRESS, S BASED STR BYTE;
    N = 0;
    DO WHILE S(N) <> '$';
        N = N+1;
    END;
    RETURN N;
END STR$LEN;

FIND$NGRAMS: PROCEDURE (N, STR, RSLT) ADDRESS;
    DECLARE (N, I, J, STR, RSLT, FOUND) ADDRESS; 
    DECLARE S BASED STR BYTE;
    DECLARE ITEM BASED RSLT ADDRESS;
    DECLARE MAXPOS ADDRESS;
    MAXPOS = STR$LEN(STR) - N;
    
    FOUND = 0;
    DO I = 0 TO MAXPOS;
        IF FOUND = 0 THEN GO TO NOT$FOUND;
        DO J = 0 TO FOUND;
            IF CMP$NSTR(N, .S(I), ITEM(2*J)) THEN DO;
                ITEM(2*J+1) = ITEM(2*J+1) + 1;
                GO TO NEXT$ITEM;
            END;
        END;
        NOT$FOUND:
        ITEM(2*FOUND) = .S(I);
        ITEM(2*FOUND+1) = 1;
        FOUND = FOUND + 1;
        NEXT$ITEM:
    END;
    RETURN FOUND;
END FIND$NGRAMS;

PRINT$NGRAMS: PROCEDURE (N, STR);
    DECLARE (N, I, STR) ADDRESS;
    DECLARE RESULT (64) ADDRESS;
    DECLARE AMOUNT ADDRESS;
    
    CALL PR$CH(N + '0');
    CALL PR$STR(.'-GRAMS OF ''$');
    CALL PR$STR(STR);
    CALL PR$STR(.(''': ', 13, 10, '$'));

    AMOUNT = FIND$NGRAMS(N, STR, .RESULT);
    DO I = 0 TO AMOUNT - 1;
        CALL PR$CH('''');
        CALL PR$NSTR(N, RESULT(2*I));
        CALL PR$STR(.''' - $');
        CALL PR$NUM(RESULT(2*I+1));
        IF I MOD 5 = 4 
            THEN CALL PR$STR(.(13,10,'$'));
            ELSE CALL PR$CH(9);
    END;
    
    CALL PR$STR(.(13,10,'$'));
END PRINT$NGRAMS;

DECLARE STRING DATA ('LIVE AND LET LIVE$');
DECLARE N BYTE;

DO N = 2 TO 4;
    CALL PRINT$NGRAMS(N, .STRING);
END;
CALL EXIT;
EOF
Output:
2-GRAMS OF 'LIVE AND LET LIVE':
'LI' - 2        'IV' - 2        'VE' - 2        'E ' - 1        ' A' - 1
'AN' - 1        'ND' - 1        'D ' - 1        ' L' - 2        'LE' - 1
'ET' - 1        'T ' - 1
3-GRAMS OF 'LIVE AND LET LIVE':
'LIV' - 2       'VE ' - 1       'E A' - 1       ' AN' - 1       'AND' - 1
'ND ' - 1       'D L' - 1       ' LE' - 1       'LET' - 1       'ET ' - 1
'T L' - 1       ' LI' - 1       'IVE' - 1
4-GRAMS OF 'LIVE AND LET LIVE':
'LIVE' - 2      'IVE ' - 1      'VE A' - 1      'E AN' - 1      ' AND' - 1
'AND ' - 1      'ND L' - 1      'D LE' - 1      ' LET' - 1      'LET ' - 1
'ET L' - 1      'T LI' - 1      ' LIV' - 1

Python

import pprint
from collections import Counter
from typing import Iterable


def n_grams(text: str, n: int) -> Iterable[str]:
    """Generate contiguous sequences of _n_ characters from _text_."""
    if n < 1:
        raise ValueError("n must be an integer > 0")

    text = text.upper()
    return (text[i : (i + n)] for i in range(len(text) - n + 1))


def main() -> None:
    example_text = "Live and let live"

    for n in range(2, 5):
        counts = Counter(n_grams(example_text, n)).most_common()
        print(
            f"{len(counts)} {n}-grams of {example_text!r}:\n",
            pprint.pformat(counts, compact=True),
            end="\n\n",
        )


if __name__ == "__main__":
    main()
Output:
12 2-grams of 'Live and let live':
 [('LI', 2), ('IV', 2), ('VE', 2), (' L', 2), ('E ', 1), (' A', 1), ('AN', 1),
 ('ND', 1), ('D ', 1), ('LE', 1), ('ET', 1), ('T ', 1)]

13 3-grams of 'Live and let live':
 [('LIV', 2), ('IVE', 2), ('VE ', 1), ('E A', 1), (' AN', 1), ('AND', 1),
 ('ND ', 1), ('D L', 1), (' LE', 1), ('LET', 1), ('ET ', 1), ('T L', 1),
 (' LI', 1)]

13 4-grams of 'Live and let live':
 [('LIVE', 2), ('IVE ', 1), ('VE A', 1), ('E AN', 1), (' AND', 1), ('AND ', 1),
 ('ND L', 1), ('D LE', 1), (' LET', 1), ('LET ', 1), ('ET L', 1), ('T LI', 1),
 (' LIV', 1)]


Sliding window

This example takes inspiration from the sliding_window recipe found in Python's itertools docs.

import pprint
from collections import Counter
from collections import deque
from itertools import islice
from typing import Iterable


def n_grams(text: str, n: int) -> Iterable[str]:
    """Generate contiguous sequences of _n_ characters from _text_."""
    it = iter(text.upper())
    n_gram = deque(islice(it, n), maxlen=n)
    if len(n_gram) == n:
        yield "".join(n_gram)
    for x in it:
        n_gram.append(x)
        yield "".join(n_gram)


def main() -> None:
    example_text = "Live and let live"

    for n in range(2, 5):
        counts = Counter(n_grams(example_text, n)).most_common()
        print(
            f"{len(counts)} {n}-grams of {example_text!r}:\n",
            pprint.pformat(counts, compact=True),
            end="\n\n",
        )


if __name__ == "__main__":
    main()
Output:
12 2-grams of 'Live and let live':
 [('LI', 2), ('IV', 2), ('VE', 2), (' L', 2), ('E ', 1), (' A', 1), ('AN', 1),
 ('ND', 1), ('D ', 1), ('LE', 1), ('ET', 1), ('T ', 1)]

13 3-grams of 'Live and let live':
 [('LIV', 2), ('IVE', 2), ('VE ', 1), ('E A', 1), (' AN', 1), ('AND', 1),
 ('ND ', 1), ('D L', 1), (' LE', 1), ('LET', 1), ('ET ', 1), ('T L', 1),
 (' LI', 1)]

13 4-grams of 'Live and let live':
 [('LIVE', 2), ('IVE ', 1), ('VE A', 1), ('E AN', 1), (' AND', 1), ('AND ', 1),
 ('ND L', 1), ('D LE', 1), (' LET', 1), ('LET ', 1), ('ET L', 1), ('T LI', 1),
 (' LIV', 1)]


And a strict variant, compositionally assembled from some basics:

from itertools import (islice)
from functools import (reduce)
from operator import (add)


def nGramCounts(n, s):
    '''A dictionary of all nGrams of dimension n in s,
       with the frequency of their occurrence.
    '''
    return reduce(
        lambda a, gram: insertWith(add, gram, 1, a),
        nGrams(n, s),
        {}
    )


def nGrams(n, s):
    '''All case-insensitive sequences of length n in the string s.'''
    return (''.join(t) for t in windows(n, list(s.upper())))


# ----------------------- GENERICS -----------------------

def insertWith(f, k, x, dct):
    '''A new dictionary updated with a 
       (key, f(value, x)) tuple.
       Where there is no existing value for the key, 
       the supplied x is used as the default.
    '''
    return dict(dct, **{k: f(dct[k], x) if k in dct else x})


def tails(xs):
    '''All final segments of xs, longest first.'''
    return (xs[i:] for i in range(0, 1 + len(xs)))


def windows(n, xs):
    '''Sliding windows of dimension n.'''
    return zip(*islice(tails(xs), n))



# ------------------------- TEST -------------------------
if __name__ == "__main__":
    import pprint

    EXAMPLE = "Live and let live"

    for dimension in range(1, 5):
        result = sorted(nGramCounts(dimension, EXAMPLE).items())
        print(
            f"{len(result)} {dimension}-grams of {EXAMPLE!r}:\n",
            pprint.pformat(result),
            end="\n\n",
        )
Output:
9 1-grams of 'Live and let live':
 [(' ', 3),
 ('A', 1),
 ('D', 1),
 ('E', 3),
 ('I', 2),
 ('L', 3),
 ('N', 1),
 ('T', 1),
 ('V', 2)]

12 2-grams of 'Live and let live':
 [(' A', 1),
 (' L', 2),
 ('AN', 1),
 ('D ', 1),
 ('E ', 1),
 ('ET', 1),
 ('IV', 2),
 ('LE', 1),
 ('LI', 2),
 ('ND', 1),
 ('T ', 1),
 ('VE', 2)]

13 3-grams of 'Live and let live':
 [(' AN', 1),
 (' LE', 1),
 (' LI', 1),
 ('AND', 1),
 ('D L', 1),
 ('E A', 1),
 ('ET ', 1),
 ('IVE', 2),
 ('LET', 1),
 ('LIV', 2),
 ('ND ', 1),
 ('T L', 1),
 ('VE ', 1)]

13 4-grams of 'Live and let live':
 [(' AND', 1),
 (' LET', 1),
 (' LIV', 1),
 ('AND ', 1),
 ('D LE', 1),
 ('E AN', 1),
 ('ET L', 1),
 ('IVE ', 1),
 ('LET ', 1),
 ('LIVE', 2),
 ('ND L', 1),
 ('T LI', 1),
 ('VE A', 1)]

Raku

sub n-gram ($this, $N=2) { Bag.new( flat $this.uc.map: { .comb.rotor($N => -($N-1))».join } ) }
dd 'Live and let live'.&n-gram; # bi-gram
dd 'Live and let live'.&n-gram(3); # tri-gram
Output:
("IV"=>2,"T "=>1,"VE"=>2,"E "=>1,"LE"=>1,"AN"=>1,"LI"=>2,"ND"=>1,"ET"=>1," L"=>2," A"=>1,"D "=>1).Bag
("ET "=>1,"AND"=>1,"LIV"=>2," LI"=>1,"ND "=>1," LE"=>1,"IVE"=>2,"E A"=>1,"VE "=>1,"T L"=>1,"D L"=>1,"LET"=>1," AN"=>1).Bag

Refal

$ENTRY Go {
    , 'LIVE AND LET LIVE': e.Str
    = <ShowNgrams 2 e.Str>
      <ShowNgrams 3 e.Str>
      <ShowNgrams 4 e.Str>;
};

ShowNgrams {
    s.N e.Str = 
        <Prout <Symb s.N> '-grams of "' e.Str '":'>
        <ShowLines 5 <Ngrams s.N e.Str>>
        <Prout>;
};

ShowLines {
    s.N = ;
    s.N e.X, <First s.N e.X>: (e.L) e.R =
        <Prout <Each DispNgram e.L>> <ShowLines s.N e.R>;
};

Each {
    s.F = ;
    s.F t.I e.Is = <Mu s.F t.I> <Each s.F e.Is>;
};

DispNgram {
    ((e.S) s.C) = '(' e.S ') - ' <Symb s.C> '  ';
};

Ngrams {
    s.N e.Str = <Count () <Groups s.N e.Str>>;
};

Groups {
    s.N e.X, <Lenw e.X>: s.L e.X, <Compare s.L s.N>: {
        '-' = ;
        s.C, <First s.N e.X>: (e.G) e.R, e.X: s.Z e.Y = 
            (e.G) <Groups s.N e.Y>;
    }
};

Count {
    (e.Cs) = e.Cs;
    (e.Cs) t.I e.Is = <Count (<Inc (e.Cs) t.I>) e.Is>;
};

Inc {
    (e.X (t.I s.C) e.Y) t.I = e.X (t.I <+ 1 s.C>) e.Y;
    (e.X) t.I = e.X (t.I 1);
};
Output:
2-grams of "LIVE AND LET LIVE":
(LI) - 2  (IV) - 2  (VE) - 2  (E ) - 1  ( A) - 1
(AN) - 1  (ND) - 1  (D ) - 1  ( L) - 2  (LE) - 1
(ET) - 1  (T ) - 1

3-grams of "LIVE AND LET LIVE":
(LIV) - 2  (IVE) - 2  (VE ) - 1  (E A) - 1  ( AN) - 1
(AND) - 1  (ND ) - 1  (D L) - 1  ( LE) - 1  (LET) - 1
(ET ) - 1  (T L) - 1  ( LI) - 1

4-grams of "LIVE AND LET LIVE":
(LIVE) - 2  (IVE ) - 1  (VE A) - 1  (E AN) - 1  ( AND) - 1
(AND ) - 1  (ND L) - 1  (D LE) - 1  ( LET) - 1  (LET ) - 1
(ET L) - 1  (T LI) - 1  ( LIV) - 1

RPL

Works with: Halcyon Calc version 4.2.8
RPL code Comment
≪ → text n 
   ≪ { } DUP n text SIZE FOR j 
       text j n - 1 + j SUB 
       IF DUP2 POS THEN 
          LAST 4 ROLL SWAP DUP2 GET 1 + PUT 
          SWAP DROP SWAP 
       ELSE + SWAP 1 + SWAP END 
     NEXT SHOWG 
≫ ≫ ‘-GRAMS’ STO

≪ { } 1 3 PICK SIZE FOR j 
      OVER j GET "=" + 4 PICK j GET →STR + + NEXT 
   ROT ROT DROP2
 ≫ ‘SHOWG’ STO
-GRAMS ( text n -- { "ngram=count".. } ) 
Initialize 2 empty lists; for j = n to length(text):
  ngram = text[j-n+1..j]
  if ngram already in ngram list
    increase counter in other list
    get rid of ngram
  else add to ngram list and set counter at 1 on the other list
Show results


SHOWG ( { "ngram".. } { counts.. } -- { "ngram=count".. } ) 



Input:
"LIVE AND LET LIVE" 2 -GRAMS
"LIVE AND LET LIVE" 3 -GRAMS
"LIVE AND LET LIVE" 4 -GRAMS
Output:
3: { "LI=2" "IV=2" "VE=2" "E =1" " A=1" "AN=1" "ND=1" "D =1" " L=2" "LE=1" "ET=1" "T =1" }
2: { "LIV=2" "IVE=2" "VE =1" "E A=1" " AN=1" "AND=1" "ND =1" "D L=1" " LE=1" "LET=1" "ET =1" "T L=1" " LI=1" }
1: { "LIVE=2" "IVE =1" "VE A=1" "E AN=1" " AND=1" "AND =1" "ND L=1" "D LE=1" " LET=1" "LET =1" "ET L=1" "T LI=1" " LIV=1" }

SETL

program find_ngrams;
    input := "LIVE AND LET LIVE";
    loop for size in [2,3,4] do
        print(str size+"-grams of '"+input+"':");
        
        ng := ngrams(input, size);
        col := 0;
        loop for count = ng(ngram) do
            nprint(rpad("'" + ngram + "': " + str count, 10));
            if (col +:= 1) mod 8 = 0 then print; end if;
        end loop;
        print;
        print;
    end loop;

    proc ngrams(input, size);
        ng := {};
        loop for i in [1..#input-size+1] do
            ng(input(i..i+size-1)) +:= 1;
        end loop;
        return ng;
    end proc;
end program;
Output:
2-grams of 'LIVE AND LET LIVE':
' A': 1   ' L': 2   'AN': 1   'D ': 1   'E ': 1   'ET': 1   'IV': 2   'LE': 1
'LI': 2   'ND': 1   'T ': 1   'VE': 2

3-grams of 'LIVE AND LET LIVE':
' AN': 1  ' LE': 1  ' LI': 1  'AND': 1  'D L': 1  'E A': 1  'ET ': 1  'IVE': 2
'LET': 1  'LIV': 2  'ND ': 1  'T L': 1  'VE ': 1

4-grams of 'LIVE AND LET LIVE':
' AND': 1 ' LET': 1 ' LIV': 1 'AND ': 1 'D LE': 1 'E AN': 1 'ET L': 1 'IVE ': 1
'LET ': 1 'LIVE': 2 'ND L': 1 'T LI': 1 'VE A': 1

Wren

Version 1 (Sorted order)

Library: Wren-str
Library: Wren-maputil
Library: Wren-fmt
import "./str" for Str
import "./maputil" for MultiSet
import "./fmt" for Fmt

var findNgrams = Fn.new { |n, text|
    text = Str.upper(text)
    var ngrams = {}
    for (i in 0..text.count-n) {
        var ngram = text[i...i+n]
        MultiSet.add(ngrams, ngram)
    }
    return ngrams
}

// sort by decreasing frequency, then by lexicographical order
var comparer = Fn.new { |i, j|
    if (i.value != j.value) return j.value < i.value
    return Str.lt(i.key, j.key)
}

var text = "Live and let live"
for (n in [2, 3, 4]) {
    var ngrams = findNgrams.call(n, text)
    System.print("All %(n)-grams of '%(text)' and their frequencies:")
    var ng = ngrams.toList.sort(comparer).map { |me| "(\"%(me.key)\" : %(me.value))"}
    Fmt.tprint("$s  ", ng, 5)
    System.print()
}
Output:
All 2-grams of 'Live and let live' and their frequencies:
(" L" : 2)   ("IV" : 2)   ("LI" : 2)   ("VE" : 2)   (" A" : 1)   
("AN" : 1)   ("D " : 1)   ("E " : 1)   ("ET" : 1)   ("LE" : 1)   
("ND" : 1)   ("T " : 1)   

All 3-grams of 'Live and let live' and their frequencies:
("IVE" : 2)   ("LIV" : 2)   (" AN" : 1)   (" LE" : 1)   (" LI" : 1)   
("AND" : 1)   ("D L" : 1)   ("E A" : 1)   ("ET " : 1)   ("LET" : 1)   
("ND " : 1)   ("T L" : 1)   ("VE " : 1)   

All 4-grams of 'Live and let live' and their frequencies:
("LIVE" : 2)   (" AND" : 1)   (" LET" : 1)   (" LIV" : 1)   ("AND " : 1)   
("D LE" : 1)   ("E AN" : 1)   ("ET L" : 1)   ("IVE " : 1)   ("LET " : 1)   
("ND L" : 1)   ("T LI" : 1)   ("VE A" : 1)   

Version 2 (Original order)

Library: Wren-ordered

The iteration order of 'Map' objects in Wren is undefined though they can subsequently be sorted into a particular order as the first version shows. However, to maintain the original order of insertion we need to use one of the classes in the above module which automatically keep track of such order when items are added or removed.

import "./str" for Str
import "./ordered" for OrderedBag
import "./fmt" for Fmt

var findNgrams = Fn.new { |n, text|
    text = Str.upper(text)
    var ngrams = OrderedBag.new()
    for (i in 0..text.count-n) {
        var ngram = text[i...i+n]
        ngrams.add(ngram)
    }
    return ngrams
}

var text = "Live and let live"
for (n in [2, 3, 4]) {
    var ngrams = findNgrams.call(n, text)
    System.print("All %(n)-grams of '%(text)' and their frequencies:")
    var ng = ngrams.toList.map { |me| "(\"%(me.key)\" : %(me.value))"}
    Fmt.tprint("$s  ", ng, 5)
    System.print()
}
Output:
All 2-grams of 'Live and let live' and their frequencies:
("LI" : 2)   ("IV" : 2)   ("VE" : 2)   ("E " : 1)   (" A" : 1)  
("AN" : 1)   ("ND" : 1)   ("D " : 1)   (" L" : 2)   ("LE" : 1)  
("ET" : 1)   ("T " : 1)  

All 3-grams of 'Live and let live' and their frequencies:
("LIV" : 2)   ("IVE" : 2)   ("VE " : 1)   ("E A" : 1)   (" AN" : 1)  
("AND" : 1)   ("ND " : 1)   ("D L" : 1)   (" LE" : 1)   ("LET" : 1)  
("ET " : 1)   ("T L" : 1)   (" LI" : 1)  

All 4-grams of 'Live and let live' and their frequencies:
("LIVE" : 2)   ("IVE " : 1)   ("VE A" : 1)   ("E AN" : 1)   (" AND" : 1)  
("AND " : 1)   ("ND L" : 1)   ("D LE" : 1)   (" LET" : 1)   ("LET " : 1)  
("ET L" : 1)   ("T LI" : 1)   (" LIV" : 1)

XPL0

int Dict(100), Count(100), Size;

proc LookUp(Wd);        \Add word to dictionary, or increment its count
int  Wd, I;
[for I:= 0 to Size-1 do
    if Dict(I) = Wd then
        [Count(I):= Count(I)+1;
        return;
        ];
Dict(Size):= Wd;
Count(Size):= 1;
Size:= Size+1;
];

proc ShowNGram(N, Str); \Show N-grams for string
char N, Str;
int  I, J, Wd, Ch;
[IntOut(0, N);  Text(0, "-grams:^m^j");
Size:= 0;  I:= 0;
loop    [Wd:= 0;
        for J:= 0 to N-1 do
            [Ch:= Str(I+J);
            if Ch = $A0 then quit;      \terminating space
            if Ch>=^a and Ch<=^z then Ch:= Ch & ~$20;
            Wd:= Wd<<8 + Ch;
            ];
        I:= I+1;
        LookUp(Wd);
        ];
for I:= 0 to Size-1 do
    [Wd:= Dict(I);
    for J:= N-1 downto 0 do
        ChOut(0, Wd>>(J*8));
    ChOut(0, ^ );
    IntOut(0, Count(I));
    if rem(I/5) = 4 then CrLf(0) else ChOut(0, 9\tab\);
    ];
CrLf(0);
];

int N;
for N:= 2 to 4 do ShowNGram(N, "Live and let live ")
Output:
2-grams:
LI 2    IV 2    VE 2    E  1     A 1
AN 1    ND 1    D  1     L 2    LE 1
ET 1    T  1    
3-grams:
LIV 2   IVE 2   VE  1   E A 1    AN 1
AND 1   ND  1   D L 1    LE 1   LET 1
ET  1   T L 1    LI 1   
4-grams:
LIVE 2  IVE  1  VE A 1  E AN 1   AND 1
AND  1  ND L 1  D LE 1   LET 1  LET  1
ET L 1  T LI 1   LIV 1  
Cookies help us deliver our services. By using our services, you agree to our use of cookies.