Text completion

From Rosetta Code
(Redirected from Text Completion)
Text completion is a draft programming task. It is not yet considered ready to be promoted as a complete task, for reasons that should be found in its talk page.
Task

Write a program that takes in a user inputted word and prints out possible words that are valid in the English dictionary. Please state any dictionaries or files/binaries/dependencies used in your program. Do show the similarity of the inputted word and outcome as a percentage. Any algorithm can be used to accomplish this task.

Resources

Github Repo
Raw Text, Save as .txt file
Hamming Distance
Jaro-Winkler Distance
SoundEx Algorithm
SoundEx Algorithm Wiki
Dice's Coefficient
Dice Coefficient Wiki

Possible Output
Input word: 
complition

compaction : 80.00% similar.
completion : 90.00% similar.
completions : 81.82% similar.
complexion : 80.00% similar.
Extension
  1. How can you make the accuracy of your program higher?



C++

Translation of: Julia
#include <algorithm>
#include <fstream>
#include <iostream>
#include <numeric>
#include <string>
#include <vector>

// See https://en.wikipedia.org/wiki/Levenshtein_distance#Iterative_with_two_matrix_rows
int levenshtein_distance(const std::string& str1, const std::string& str2) {
    size_t m = str1.size(), n = str2.size();
    std::vector<int> cost(n + 1);
    std::iota(cost.begin(), cost.end(), 0);
    for (size_t i = 0; i < m; ++i) {
        cost[0] = i + 1;
        int prev = i;
        for (size_t j = 0; j < n; ++j) {
            int c = (str1[i] == str2[j]) ? prev
                : 1 + std::min(std::min(cost[j + 1], cost[j]), prev);
            prev = cost[j + 1];
            cost[j + 1] = c;
        }
    }
    return cost[n];
}

template <typename T>
void print_vector(const std::vector<T>& vec) {
    auto i = vec.begin();
    if (i == vec.end())
        return;
    std::cout << *i++;
    for (; i != vec.end(); ++i)
        std::cout << ", " << *i;
}

int main(int argc, char** argv) {
    if (argc != 3) {
        std::cerr << "usage: " << argv[0] << " dictionary word\n";
        return EXIT_FAILURE;
    }
    std::ifstream in(argv[1]);
    if (!in) {
        std::cerr << "Cannot open file " << argv[1] << '\n';
        return EXIT_FAILURE;
    }
    std::string word(argv[2]);
    if (word.empty()) {
        std::cerr << "Word must not be empty\n";
        return EXIT_FAILURE;
    }
    constexpr size_t max_dist = 4;
    std::vector<std::string> matches[max_dist + 1];
    std::string match;
    while (getline(in, match)) {
        int distance = levenshtein_distance(word, match);
        if (distance <= max_dist)
            matches[distance].push_back(match);
    }
    for (size_t dist = 0; dist <= max_dist; ++dist) {
        if (matches[dist].empty())
            continue;
        std::cout << "Words at Levenshtein distance of " << dist
            << " (" << 100 - (100 * dist)/word.size()
            << "% similarity) from '" << word << "':\n";
        print_vector(matches[dist]);
        std::cout << "\n\n";
    }
    return EXIT_SUCCESS;
}
Output:

Using the same dictionary as the Julia example:

Words at Levenshtein distance of 1 (90% similarity) from 'complition':
completion

Words at Levenshtein distance of 2 (80% similarity) from 'complition':
coalition, competition, compilation, composition

Words at Levenshtein distance of 3 (70% similarity) from 'complition':
companion, competitions, completing, complications, computation, condition

Words at Levenshtein distance of 4 (60% similarity) from 'complition':
collection, combination, commission, comparison, compensation, competing, competitive, complaint, complete, completed, completely, complexity, compliance, compliant, compression, computing, conclusion, conditions, connection, convention, conviction, cooperation, corporation, correction, correlation, corruption, nomination, opinion, opposition, option, pollution, population, position, simulation, solution

Delphi

Library: System.Math
Translation of: Wren

Using unixdict.txt.

program Text_Completion;

{$APPTYPE CONSOLE}

{$R *.res}

uses
  System.SysUtils,
  System.Classes,
  System.Math,
  System.Generics.Collections;

type
  TSujestions = TDictionary<Integer, string>;

function Levenshtein(s, t: string): integer;
var
  d: array of array of integer;
  i, j, cost: integer;
begin
  SetLength(d, Length(s) + 1);
  for i := Low(d) to High(d) do
  begin
    SetLength(d[i], Length(t) + 1);
  end;

  for i := Low(d) to High(d) do
  begin
    d[i, 0] := i;
    for j := Low(d[i]) to High(d[i]) do
    begin
      d[0, j] := j;
    end;
  end;

  for i := Low(d) + 1 to High(d) do
  begin
    for j := Low(d[i]) + 1 to High(d[i]) do
    begin
      if s[i] = t[j] then
      begin
        cost := 0;
      end
      else
      begin
        cost := 1;
      end;

      d[i, j] := Min(Min(d[i - 1, j] + 1,      //deletion
        d[i, j - 1] + 1),     //insertion
        d[i - 1, j - 1] + cost  //substitution
      );
    end;
  end;
  Result := d[Length(s), Length(t)];
end;

function FindSujestions(Search: string; dict: TStringList): TSujestions;
var
  I, ld: Integer;
  w: string;
begin
  Result := TSujestions.Create;
  for I := 0 to 3 do
    Result.Add(I, '');

  for I := 0 to dict.Count - 1 do
  begin
    w := dict[I];
    ld := Levenshtein(Search, w);
    if ld < 4 then
      Result[ld] := Result[ld] + ' ' + w;
  end;
end;

function Similarity(Search: string; Distance: Integer): Double;
var
  ALength: Double;
begin
  ALength := Search.Length;
  Result := (ALength - Distance) * 100 / ALength;
end;

var
  dict: TStringList;
  Search: string;
  i: Integer;
  Sujestions: TSujestions;

begin
  dict := TStringList.Create;
  dict.LoadFromFile('unixdict.txt');
  Search := 'complition';

  Sujestions := FindSujestions(Search, dict);

  Writeln('Input word: '#10, Search);

  for i := 1 to 3 do
  begin
    Writeln(Format('Words which are %4.1f%% similar:', [Similarity(Search, i)]));
    Writeln(sujestions[i], #10);
  end;

  Sujestions.Free;
  dict.Free;

  Readln;
end.
Output:
Input word:
complition
Words which are 90,0% similar:
 completion

Words which are 80,0% similar:
 coalition competition compilation complexion composition

Words which are 70,0% similar:
 cognition collision combustion commotion companion compassion complain complicity compton compulsion compunction computation condition contrition demolition incompletion volition

Factor

Translation of: Julia
Works with: Factor version 0.99 2020-07-03
USING: formatting fry http.client io kernel lcs literals math
math.ranges namespaces prettyprint.config sequences splitting ;

CONSTANT: words $[
    "https://www.mit.edu/~ecprice/wordlist.10000" http-get nip
    "\n" split harvest
]
CONSTANT: word "complition"

: lev-dist-of ( str n -- n )
    [ words ] 2dip '[ _ levenshtein _ = ] filter ;

: similarity ( n -- x ) word length / 100 * 100 swap - ;

10000 margin set ! Prevent prettyprinter from wrapping sequences
4 [1,b] [
    dup [ similarity ] [ drop word ] [ word swap lev-dist-of ] tri
    "Words at Levenshtein distance of %d (%.1f%% similarity) from %u:\n%u\n\n" printf
] each
Output:
Words at Levenshtein distance of 1 (90.0% similarity) from "complition":
{ "completion" }

Words at Levenshtein distance of 2 (80.0% similarity) from "complition":
{ "coalition" "competition" "compilation" "composition" }

Words at Levenshtein distance of 3 (70.0% similarity) from "complition":
{ "companion" "competitions" "completing" "complications" "computation" "condition" }

Words at Levenshtein distance of 4 (60.0% similarity) from "complition":
{ "collection" "combination" "commission" "comparison" "compensation" "competing" "competitive" "complaint" "complete" "completed" "completely" "complexity" "compliance" "compliant" "compression" "computing" "conclusion" "conditions" "connection" "convention" "conviction" "cooperation" "corporation" "correction" "correlation" "corruption" "nomination" "opinion" "opposition" "option" "pollution" "population" "position" "simulation" "solution" }


FreeBASIC

This uses 'unixdict' and the Levenshtein distance algorithm to test for similarity.

#define MIN(a, b) iif((a) < (b), (a), (b))

Dim As String palabra = "complition"
Dim As Integer longitud = Len(palabra)
Open "unixdict.txt" For Input As #1
Dim As String cStr, wordList()
Dim As Integer n, p = 0, posic = 0
Do While Not Eof(1)
    Line Input #1, cStr
    p += 1
    If Len(cStr) > 8 Then
        posic += 1
        Redim Preserve wordList(posic)
        wordList(posic) = cStr
    End If
Loop
Close #1

Dim As String lev(4)
For n = Lbound(wordList) To Ubound(wordList)
    Dim As Integer ld = levenshtein(palabra, wordList(n))
    If ld < 4 Then lev(ld) &= wordList(n) & "  "
Next n

Print "Input word: "; palabra
For n = 1 To 3
    Dim As Integer semejanza = (longitud - n) * 100 / longitud
    Print Using !"\nWords at Levenshtein distance of # (##% similarity):"; n; semejanza
    Print lev(n); " ";
    Print
Next n
Sleep
Output:
Input word: complition

Words at Levenshtein distance of 1 (90% similarity):
completion

Words at Levenshtein distance of 2 (80% similarity):
coalition  competition  compilation  complexion  composition

Words at Levenshtein distance of 3 (70% similarity):
cognition  collision  combustion  commotion  companion  compassion  complicity  compulsion  compunction  computation  condition  contrition  demolition  incompletion


Go

Translation of: Wren
package main

import (
    "bytes"
    "fmt"
    "io/ioutil"
    "log"
)

func levenshtein(s, t string) int {
    d := make([][]int, len(s)+1)
    for i := range d {
        d[i] = make([]int, len(t)+1)
    }
    for i := range d {
        d[i][0] = i
    }
    for j := range d[0] {
        d[0][j] = j
    }
    for j := 1; j <= len(t); j++ {
        for i := 1; i <= len(s); i++ {
            if s[i-1] == t[j-1] {
                d[i][j] = d[i-1][j-1]
            } else {
                min := d[i-1][j]
                if d[i][j-1] < min {
                    min = d[i][j-1]
                }
                if d[i-1][j-1] < min {
                    min = d[i-1][j-1]
                }
                d[i][j] = min + 1
            }
        }

    }
    return d[len(s)][len(t)]
}

func main() {
    search := "complition"
    b, err := ioutil.ReadFile("unixdict.txt")
    if err != nil {
        log.Fatal("Error reading file")
    }
    words := bytes.Fields(b)
    var lev [4][]string
    for _, word := range words {
        s := string(word)
        ld := levenshtein(search, s)
        if ld < 4 {
            lev[ld] = append(lev[ld], s)
        }
    }
    fmt.Printf("Input word: %s\n\n", search)
    for i := 1; i < 4; i++ {
        length := float64(len(search))
        similarity := (length - float64(i)) * 100 / length
        fmt.Printf("Words which are %4.1f%% similar:\n", similarity)
        fmt.Println(lev[i])
        fmt.Println()
    }
}
Output:
Input word: complition

Words which are 90.0% similar:
[completion]

Words which are 80.0% similar:
[coalition competition compilation complexion composition]

Words which are 70.0% similar:
[cognition collision combustion commotion companion compassion complain complicity compton compulsion compunction computation condition contrition demolition incompletion volition]

Java

Github Repo Uses dependencies given.

import java.io.File;
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Scanner;

//uses https://github.com/dwyl/english-words

public class textCompletionConcept {
    public static int correct = 0;
    public static ArrayList<String> listed = new ArrayList<>();
    public static void main(String[]args) throws IOException, URISyntaxException {
        Scanner input = new Scanner(System.in);
        System.out.println("Input word: ");
        String errorRode = input.next();
        File file = new File(new 
        File(textCompletionConcept.class.getProtectionDomain().getCodeSource().getLocation().toURI()).getPath() + File.separator + "words.txt");
        Scanner reader = new Scanner(file);
        while(reader.hasNext()){
            double percent;
            String compareToThis = reader.nextLine();
                    char[] s1 = errorRode.toCharArray();
                    char[] s2 = compareToThis.toCharArray();
                    int maxlen = Math.min(s1.length, s2.length);
                    for (int index = 0; index < maxlen; index++) {
                        String x = String.valueOf(s1[index]);
                        String y = String.valueOf(s2[index]);
                        if (x.equals(y)) {
                            correct++;
                        }
                    }
                    double length = Math.max(s1.length, s2.length);
                    percent = correct / length;
                    percent *= 100;
                    boolean perfect = false;
                    if (percent >= 80 && compareToThis.charAt(0) == errorRode.charAt(0)) {
                        if(String.valueOf(percent).equals("100.00")){
                            perfect = true;
                        }
                        String addtoit = compareToThis + " : " + String.format("%.2f", percent) + "% similar.";
                        listed.add(addtoit);
                    }
                    if(compareToThis.contains(errorRode) && !perfect && errorRode.length() * 2 > compareToThis.length()){
                        String addtoit = compareToThis + " : 80.00% similar.";
                        listed.add(addtoit);
                    }
            correct = 0;
        }

        for(String x : listed){
            if(x.contains("100.00% similar.")){
                System.out.println(x);
                listed.clear();
                break;
            }
        }

        for(String x : listed){
            System.out.println(x);
        }
    }
}
Output
Input word: 
complition
compaction : 80.00% similar.
completion : 90.00% similar.
completions : 81.82% similar.
complexion : 80.00% similar.

Process finished with exit code 0

jq

Works with: jq

This entry uses unixdict.txt, the dictionary used frequently on other RosettaCode pages.

Since the title of this particular page is "Text completion", the solution offered here includes both a straightforward lookup of the dictionary to check for possible completions, and a check for equally good matches based on the Levenshtein distance.

The Levenshtein module used is the collection of function definitions on the RC page https://rosettacode.org/wiki/Levenshtein_distance and is thus not repeated here.

The "percentage" reported is computed as described in the comments.

The "debug" statements for showing the number of words under consideration for best Levenshtein matches is retained.

include "levenshtein-distance" {search: "."};  # https://rosettacode.org/wiki/Levenshtein_distance#jq

# input: a dictionary
# output an array of {word, d, p} objects showing the best matches as measured
# by the Levenshtein distance, d; p is an indicator of similarity expressed as
# 100 * (max(0, ($word|length) - d) / ($word|length) ) rounded to the nearest integer
def closest($word):
  if .[$word] then {$word, d:0, percentage: 100}
  else
  "Levenshtein-closest words to \($word):",
  (($word|length) as $length
   | (keys_unsorted | map(select(length | (. > $length-3) and (. < $length + 3)))) as $candidates
   | $candidates
   | (length|debug) as $debug
   | map( {word: ., d: levenshteinDistance($word; .)} )
   | minima(.d)
   | map( .p = (100 * ([0, ($length - .d) ] | max / $length) | round )))
  end ;  

# Input: a dictionary
# Output: an array of possible completions of $word
def completion($word):
  "Possible completions of \($word):",
    (keys_unsorted | map(select(startswith(word))));

def task:
  INDEX(inputs; .)  # the dictionary
  | completion("compli"),
    closest("complition"),
    closest("compxxxxtion")
;

task
Output:

Invocation: jq -nR -f text-completion.jq unixdict.txt

This assumes levenshtein-distance.jq is in the pwd. See the comments above.

Possible completions of compli:
[
  "compliant",
  "complicate",
  "complicity",
  "compliment",
  "complimentary",
  "compline"
]
Levenshtein-closest words to complition:
["DEBUG:",10396]
[
  {
    "word": "completion",
    "d": 1,
    "p": 90
  }
]
"Levenshtein-closest words to compxxxxtion:"
["DEBUG:",4082]
[
  {
    "word": "competition",
    "d": 4,
    "p": 67
  },
  {
    "word": "compilation",
    "d": 4,
    "p": 67
  },
  {
    "word": "completion",
    "d": 4,
    "p": 67
  },
  {
    "word": "complexion",
    "d": 4,
    "p": 67
  },
  {
    "word": "composition",
    "d": 4,
    "p": 67
  },
  {
    "word": "compunction",
    "d": 4,
    "p": 67
  },
  {
    "word": "computation",
    "d": 4,
    "p": 67
  }
]

Julia

See https://en.wikipedia.org/wiki/Levenshtein_distance, the number of one character edits to obtain one word from another.

using StringDistances

const fname = download("https://www.mit.edu/~ecprice/wordlist.10000", "wordlist10000.txt")
const words = read(fname, String) |> split .|> strip .|> string
const wrd = "complition"

levdistof(n, string) = filter(w -> Levenshtein()(string, w) == n, words)

for n in 1:4
    println("Words at Levenshtein distance of $n (",
        100 - Int(round(100 * n / length(wrd))), "% similarity) from \"$wrd\": \n",
        levdistof(n, wrd), "\n")
end
Output:
Words at Levenshtein distance of 1 (90% similarity) from "complition":
["completion"]

Words at Levenshtein distance of 2 (80% similarity) from "complition":
["coalition", "competition", "compilation", "composition"]

Words at Levenshtein distance of 3 (70% similarity) from "complition":
["companion", "competitions", "completing", "complications", "computation", "condition"]

Words at Levenshtein distance of 4 (60% similarity) from "complition":
["collection", "combination", "commission", "comparison", "compensation", "competing", "competitive", "complaint", "complete", "completed", "completely", "complexity", "compliance", "compliant", "compression", "computing", "conclusion", "conditions", "connection", "convention", "conviction", "cooperation", "corporation", "correction", "correlation", "corruption", "nomination", "opinion", "opposition", "option", "pollution", "population", "position", "simulation", "solution"]

Mathematica

Module[
 {word = "complition"},
 Map[
  {#, PercentForm@N[1 - EditDistance[#, word]/StringLength@word]} &,
  SpellingCorrectionList[word]
  ]
 ] // Grid
Output:
completion	90%
complication	80%
competition	80%
composition	80%
compilation	80%
compulsion	70%
contemplation	60%

Nim

Translation of: Go

We use the function editDistance from the std/editdistance module to get the Levenshtein distance (computed by considering in Unicode codepoints).

import std/editdistance, sequtils, strformat, strutils

let search = "complition"
let words = toSeq("unixdict.txt".lines)
var lev: array[4, seq[string]]
for word in words:
  let ld = editDistance(search, word)
  if ld < 4:
    lev[ld].add word
echo &"Input word: {search}\n"
let length = float(search.len)
for i in 0..3:
  if lev[i].len == 0: continue  # No result.
  let similarity = (length - float(i)) * 100 / length
  echo &"Words which are {similarity:4.1f}% similar:"
  echo lev[i].join(" ")
  echo()
Output:
Input word: complition

Words which are 90.0% similar:
completion

Words which are 80.0% similar:
coalition competition compilation complexion composition

Words which are 70.0% similar:
cognition collision combustion commotion companion compassion complain complicity compton compulsion compunction computation condition contrition demolition incompletion volition

Perl

Inspired by Raku Sorenson-Dice implementation (doesn't handle Unicode, but module Text::Dice can).

use strict;
use warnings;
use feature 'say';
use Path::Tiny;
use List::Util <uniq head>;

# sub bi_gram { (lc shift) =~ /(?<=\K.)./g } ## doesn't work in recent versions of Perl

sub bi_gram {
    my $line = lc shift;
    uniq map { substr $line,$_,2 } 0 .. length($line)-2;
}

sub score {
    my($phrase, $word) = @_;
    my %count;

    my @match = bi_gram $phrase;
    $count{$_}++ for @match, @$word;
    2 * (grep { $count{$_} > 1 } keys %count) / (@match + @$word);
}

sub sorenson {
    my($dict,$word,$cutoff) = @_; $cutoff //= 0.55;
    my(%matches,$s);

    ($s = score($word, $$dict{$_})) > $cutoff and $matches{$_} = $s for keys %$dict;
    %matches;
}

my %dict = map { $_ => [ bi_gram($_) ] } path('unixdict.txt')->slurp =~ /.{3,}/gm;

for my $word (<complition inconsqual>) {
    my(%scored,@ranked);

    %scored = sorenson(\%dict,$word);
    push @ranked, sprintf "%.3f $_", $scored{$_} for sort { $scored{$b} <=> $scored{$a} } keys %scored;
    say "\n$word:\n" . join("\n", head 10, @ranked);
}
Output:
complition:
0.778 completion
0.737 competition
0.737 composition
0.706 coalition
0.700 incompletion
0.667 complexion
0.667 complicity
0.667 decomposition
0.632 compilation
0.632 compunction

inconsqual:
0.609 inconsequential
0.588 continual
0.571 squall
0.556 conceptual
0.556 continuant
0.556 inconstant

Phix

uses levenshtein() from Levenshtein_distance#Phix (reproduced below for your convenience) and the standard unix_dict().

function levenshtein(string a, b)
    integer n = length(a),
            m = length(b)
    if n=0 then return m end if
    if m=0 then return n end if
    sequence d = repeat(repeat(0, m+1), n+1)
    for i=1 to n do
        d[i+1][1] = i
        for j=1 to m do
            d[1][j+1] = j
            --   next  := min({ prev  +substitution,  deletion, or insertion }):
            d[i+1][j+1] = min({d[i][j]+(a[i]!=b[j]), d[i][j+1]+1, d[i+1][j]+1})
        end for
    end for
    return d[$][$]
end function

with javascript_semantics
requires("1.0.1")   -- (this needs apply() to be properly transpiled!)
string word = "complition"
sequence words = unix_dict(),
         leven = apply(words,levenshtein,word)
function ln(string /*w*/, integer i, n) return leven[i]=n end function
for n=1 to 4 do
    printf(1,"Words at Levenshtein distance of %d (%g%% similarity) from \"%s\": \n%s\n",
             {n,100-round(100*n/length(word)),word,join_by(filter(words,ln,n),1,6)})
end for
Output:

(matches Delphi/Go/Wren)

Words at Levenshtein distance of 1 (90% similarity) from "complition":
completion

Words at Levenshtein distance of 2 (80% similarity) from "complition":
coalition   competition   compilation   complexion   composition

Words at Levenshtein distance of 3 (70% similarity) from "complition":
cognition   collision   combustion   commotion   companion   compassion
complain   complicity   compton   compulsion   compunction   computation
condition   contrition   demolition   incompletion   volition

Words at Levenshtein distance of 4 (60% similarity) from "complition":
abolition   admonition   ambition   ammunition   campion   caption
collusion   combination   commission   communion   compactify   comparison
compatriot   competitive   competitor   complaint   complete   compliant
complicate   compliment   compline   compositor   compression   conception
concision   conclusion   concretion   congestion   consolation   contention
convention   convolution   corruption   cotillion   decomposition   depletion
dominion   gumption   implosion   imposition   munition   oblivion
opinion   opposition   option   pollution   position   simpleton
soliton   solution   sorption   templeton   temptation   tomlinson

Raku

(formerly Perl 6)

Hamming distance

Translation of: Java
sub MAIN ( Str $user_word = 'complition', Str $filename = 'words.txt' ) {
    my @s1 = $user_word.comb;
    my @listed = gather for $filename.IO.lines -> $line {
        my @s2 = $line.comb;

        my $correct = 100 * sum( @s1 Zeq @s2)
                          / max(+@s1,   +@s2);

        my $score = ( $correct >= 100            and @s1[0] eq @s2[0] ) ?? 100
                 !! ( $correct >=  80            and @s1[0] eq @s2[0] ) ?? $correct
                 !! ( $line.contains($user_word) and @s1 * 2 > @s2    ) ?? 80
                 !!                                                        0;
        take [$score, $line] if $score;
    }

    @listed = @listed[$_] with @listed.first: :k, { .[0] == 100 };

    say "{.[0].fmt('%.2f')}% {.[1]}" for @listed;
}
Output:
80.00% compaction
90.00% completion
81.82% completions
80.00% complexion

Sorenson-Dice

Sorenson-Dice tends to return relatively low percentages even for small differences, especially for short words. We need to "lower the bar" to get any results at all. Different variations of the algorithm do or don't regularize case. This one does, though it doesn't much matter for the tested words.

Using unixdict.txt from www.puzzlers.org

sub sorenson ($phrase, %hash) {
    my $match = bigram $phrase;
    %hash.race.map: { [(2 * ($match ∩ .value) / ($match + .value)).round(.001), .key] }
}

sub bigram (\these) { Bag.new( flat these.fc.words.map: { .comb.rotor(2 => -1)».join } ) }

# Load the dictionary
my %hash = './unixdict.txt'.IO.slurp.words.race.map: { $_ => .&bigram };

# Testing
for <complition inconsqual Sørenson> -> $w {
    say "\n$w:";
    .say for sorenson($w, %hash).grep(*.[0] >= .55).sort({-.[0],~.[1]}).head(10);
}
Output:
complition:
[0.778 completion]
[0.737 competition]
[0.737 composition]
[0.706 coalition]
[0.7 incompletion]
[0.667 complexion]
[0.667 complicity]
[0.667 decomposition]
[0.632 compilation]
[0.632 compunction]

inconsqual:
[0.609 inconsequential]
[0.588 continual]
[0.571 squall]
[0.556 conceptual]
[0.556 inconstant]

Sørenson:
[0.714 sorenson]
[0.667 benson]
[0.615 swenson]
[0.571 evensong]
[0.571 sorensen]

REXX

The method used to find a word   (that is contained in a specified dictionary)   closest to a specified string of letters is:

Perform any of   (three methods of actions):

  •    delete  any letter
  •    insert  any letter
  •   (any letter is inserted at any location)
  •   (this includes prefixing any letter)
  •   (this includes suffixing any letter)
  •    substitute  any letter at any location


(Only lowercase letters were used for the [above] three methods).

Perform any of the above along with any combination of any method.

This will, in effect, delete/insert/substitute any two letters:

  •    delete  +  delete 
  •    delete  +  insert 
  •    delete  +  substitute 
  •    insert  +  delete 
  •    insert  +  insert 
  •    insert  +  substitute 
  •    substitute  +  delete 
  •    substitute  +  insert 
  •    substitute  +  substitute 

No attempt was made to change (by any method) three letters or more.

/*REXX pgm finds (dictionary) words which can be found in a specified word wheel (grid).*/
parse arg what iFID .                            /*obtain optional arguments from the CL*/
if what==''|what==","  then what= 'complition'   /*Not specified?  Then use the default.*/
if iFID==''|iFID==","  then iFID= 'UNIXDICT.TXT' /* "      "         "   "   "     "    */
@abc= 'abcdefghijklmnopqrstuvwxyz'               /*(Latin) lowercase letters to be used.*/
L= length(@abc)                                  /*   "      "    " the Latin letters.  */
wrds= 0                                          /*# words that are in the dictionary.  */
dups= 0                                          /*"   "     "   "  duplicates.         */
ills= 0                                          /*"   "     "   contain  "not" letters.*/
say '                                Reading the file: ' iFID         /*align the text. */
@.= .                                            /*non─duplicated dictionary words.     */
$=                                               /*the list of dictionary words in grid.*/
     do recs=0  while lines(iFID)\==0            /*process all words in the dictionary. */
     x= space( linein(iFID), 0)                  /*elide any blanks in the dictinary.   */
     if @.x\==.           then do; dups= dups+1; iterate; end  /*is this a duplicate?   */
     if \datatype(x,'M')  then do; ills= ills+1; iterate; end  /*has word non─letters?  */
     @.x=                                        /*signify that  X  is a dictionary word*/
     wrds= wrds + 1                              /*bump the number of "good" dist. words*/
     end   /*recs*/
a=
say '    number of  records (words) in the dictionary: '   right( commas(recs), 9)
say '    number of ill─formed words in the dictionary: '   right( commas(ills), 9)
say '    number of  duplicate words in the dictionary: '   right( commas(dups), 9)
say '    number of acceptable words in the dictionary: '   right( commas(wrds), 9)
say '    the  "word"  to be used for  text completion: '   what
say
call del what;  a= a result;        call del what,1;  a= a result
call ins what;  a= a result;        call ins what,1;  a= a result
call sub what;  a= a result;        call sub what,1;  a= a result
call prune
#= words($)
say commas(#) ' similar words found:'
       do j=1  for #;  _= word($, j);    say right( count(_,what), 24)  _
       end   /*j*/
exit #                                           /*stick a fork in it,  we're all done. */
/*──────────────────────────────────────────────────────────────────────────────────────*/
commas: parse arg _;  do ?=length(_)-3  to 1  by -3; _= insert(',', _, ?); end;   return _
prune:  do k=1  for words(a); _= word(a,k); if wordpos(_,$)==0  then $= $ _; end; return
recur:  $= $ del(z);          $= $ ins(z);           $= $ sub(z);                 return
/*──────────────────────────────────────────────────────────────────────────────────────*/
count: procedure; parse arg x,y;  cnt= 0;                            w= length(x)
         do j=1  for w;           p= pos( substr(x, j, 1), y);       if p==0  then iterate
         y= overlay(., y, p);     cnt= cnt + 1
         end   /*j*/
       return '     ' left("("format(cnt/w*100,,2)/1'%)', 9)     /*express as a percent.*/
/*──────────────────────────────────────────────────────────────────────────────────────*/
del:   procedure expose @. @abc L; parse arg y,r;      $=
         do j=1  for length(y);       z= space(left(y,j-1) ||           substr(y,j+1), 0)
         if @.z\==.  then $= $ z;     if r==1  then call recur
         end     /*j*/;               return space($)
/*──────────────────────────────────────────────────────────────────────────────────────*/
ins:   procedure expose @. @abc L; parse arg y,r;      $=
         do j=1  for length(y)
           do k=1  for L;   z= space(left(y,j-1) || substr(@abc,k,1) || substr(y,j),   0)
           if @.z\==.  then $= $ z;   if r==1  then call recur
           end   /*k*/
         end     /*j*/;               return space($)
/*──────────────────────────────────────────────────────────────────────────────────────*/
sub:   procedure expose @. @abc L; parse arg y,r;      $=
         do j=1  for length(y)
           do k=1  for L;   z= space(left(y,j-1) || substr(@abc,k,1) || substr(y,j+1), 0)
           if @.z\==.  then $= $ z;   if r==1  then call recur
           end   /*k*/
         end     /*j*/;               return space($)
output   when using the default inputs:
                                Reading the file:  UNIXDICT.TXT
    number of  records (words) in the dictionary:     25,104
    number of ill─formed words in the dictionary:        126
    number of  duplicate words in the dictionary:          0
    number of acceptable words in the dictionary:     24,978
    the  "word"  to be used for  text completion:  complition

6  similar words found:
               (88.89%)  coalition
               (90%)     completion
               (81.82%)  competition
               (90.91%)  compilation
               (81.82%)  composition
               (80%)     complexion

The input file is the same dictionary that the Java entry used.

output   when using the inputs of:     ,   GitHub.dict
                                Reading the file:  GitHub.dict
    number of  records (words) in the dictionary:    466,551
    number of ill─formed words in the dictionary:     50,254
    number of  duplicate words in the dictionary:          0
    number of acceptable words in the dictionary:    416,297
    the  "word"  to be used for  text completion:  complition

11  similar words found:
               (88.89%)  coalition
               (90%)     completion
               (81.82%)  commolition
               (81.82%)  comparition
               (81.82%)  competition
               (90.91%)  compilation
               (81.82%)  composition
               (81.82%)  complection
               (83.33%)  complication
               (80%)     compaction
               (80%)     complexion
output   when using the inputs of:     ,   my.dict

This is my personal dictionary.

                                Reading the file:  my.dict
    number of  records (words) in the dictionary:    947,364
    number of ill─formed words in the dictionary:    192,683
    number of  duplicate words in the dictionary:          6
    number of acceptable words in the dictionary:    754,675
    the  "word"  to be used for  text completion:  complition

12  similar words found:
               (88.89%)  coalition
               (90%)     completion
               (81.82%)  commolition
               (81.82%)  comparition
               (81.82%)  competition
               (90.91%)  compilation
               (90.91%)  compiletion
               (81.82%)  composition
               (81.82%)  complection
               (83.33%)  complication
               (80%)     compaction
               (80%)     complexion

V (Vlang)

Translation of: Go
import os

fn levenshtein(s string, t string) int {
    mut d := [][]int{len:s.len+1, init: []int{len:t.len+1}}
    for i,_ in d {
        d[i][0] = i
    }
    for j in d[0] {
        d[0][j] = j
    }
    for j := 1; j <= t.len; j++ {
        for i := 1; i <= s.len; i++ {
            if s[i-1] == t[j-1] {
                d[i][j] = d[i-1][j-1]
            } else {
                mut min := d[i-1][j]
                if d[i][j-1] < min {
                    min = d[i][j-1]
                }
                if d[i-1][j-1] < min {
                    min = d[i-1][j-1]
                }
                d[i][j] = min + 1
            }
        }
 
    }
    return d[s.len][t.len]
}
fn main() {
    search := "complition"
    filename := "unixdict.txt"
    words := os.read_lines(filename) or { panic('FAILED to read file: $filename')}
    mut lev := [][]string{len:4}
    for word in words {
        s := word
        ld := levenshtein(search, s)
        if ld < 4 {
            lev[ld] << s
        }
    }
    println("Input word: $search\n")
    for i in 1..4 {
        length := f64(search.len)
        similarity := (length - f64(i)) * 100 / length
        println("Words which are ${similarity:4.1f}% similar:", )
        println(lev[i])
        println('')
    }
}}
Output:
Input word: complition

Words which are 90.0% similar:
['completion', 'incompletion']

Words which are 80.0% similar:
['coalition', 'competition', 'compilation', 'complexion', 'composition', 'decomposition']

Words which are 70.0% similar:
['abolition', 'cognition', 'collision', 'combustion', 'commotion', 'companion', 'compassion', 'complain', 'complicity', 'compton', 'compulsion', 'compunction', 'computation', 'condition', 'contrition', 'demolition', 'locomotion', 'postcondition', 'volition']

Wren

Library: Wren-fmt

This uses 'unixdict' and the Levenshtein distance algorithm to test for similarity.

import "io" for File
import "./fmt" for Fmt

var levenshtein = Fn.new { |s, t|
    var ls = s.count
    var lt = t.count
    var d = List.filled(ls + 1, null)
    for (i in 0..ls) {
        d[i] = List.filled(lt + 1, 0)
        d[i][0] = i
    }
    for (j in 0..lt) d[0][j] = j
    for (j in 1..lt) {
        for (i in 1..ls) {
            if (s[i-1] == t[j-1]) {
                d[i][j] = d[i-1][j-1]
            } else {
                var min = d[i-1][j]
                if (d[i][j-1] < min) min = d[i][j-1]
                if (d[i-1][j-1] < min) min = d[i-1][j-1]
                d[i][j] = min + 1
            }
        }
    }
    return d[-1][-1]
}

var search = "complition"
var words = File.read("unixdict.txt").split("\n").map { |w| w.trim() }.where { |w| w != "" }
var lev = [[], [], [], []]
var ld
for (word in words) {
    if ((ld = levenshtein.call(search, word)) < 4) {
        lev[ld].add(word)
    }
}

System.print("Input word: %(search)\n")
for (i in 1..3) {
    var count = search.count
    var similarity = (count - i) * 100 / count
    Fmt.print("Words which are $4.1f\% similar:", similarity)
    System.print(lev[i])
    System.print()
}
Output:
Words which are 90.0% similar:
[completion]

Words which are 80.0% similar:
[coalition, competition, compilation, complexion, composition]

Words which are 70.0% similar:
[cognition, collision, combustion, commotion, companion, compassion, complain, complicity, compton, compulsion, compunction, computation, condition, contrition, demolition, incompletion, volition]