Text completion: Difference between revisions

m
m (Improved task description)
m (→‎{{header|Wren}}: Minor tidy)
 
(44 intermediate revisions by 17 users not shown)
Line 7:
;Resources:
[https://github.com/dwyl/english-words Github Repo]<br>
[https://raw.githubusercontent.com/dwyl/english-words/master/words.txt Raw Text, Save as .txt file]<br>
[[wp:Hamming_distance|Hamming Distance]]<br>
 
[[wp:Jaro%E2%80%93Winkler_distance|Jaro-Winkler Distance]]<br>
[https://dev.to/lefebvre/the-soundex-algorithm-5el1 SoundEx Algorithm]<br>
[[wp:Soundex|SoundEx Algorithm Wiki]]<br>
[http://www.catalysoft.com/articles/StrikeAMatch.html Dice's Coefficient]<br>
[[wp:S%C3%B8rensen%E2%80%93Dice_coefficient|Dice Coefficient Wiki]]
;Possible Output:
<pre>
Line 22 ⟶ 27:
;Extension
# How can you make the accuracy of your program higher?
<br><br>
 
=={{header|C++}}==
{{trans|Julia}}
<syntaxhighlight lang="cpp">#include <algorithm>
#include <fstream>
#include <iostream>
#include <numeric>
#include <string>
#include <vector>
 
// See https://en.wikipedia.org/wiki/Levenshtein_distance#Iterative_with_two_matrix_rows
int levenshtein_distance(const std::string& str1, const std::string& str2) {
size_t m = str1.size(), n = str2.size();
std::vector<int> cost(n + 1);
std::iota(cost.begin(), cost.end(), 0);
for (size_t i = 0; i < m; ++i) {
cost[0] = i + 1;
int prev = i;
for (size_t j = 0; j < n; ++j) {
int c = (str1[i] == str2[j]) ? prev
: 1 + std::min(std::min(cost[j + 1], cost[j]), prev);
prev = cost[j + 1];
cost[j + 1] = c;
}
}
return cost[n];
}
 
template <typename T>
void print_vector(const std::vector<T>& vec) {
auto i = vec.begin();
if (i == vec.end())
return;
std::cout << *i++;
for (; i != vec.end(); ++i)
std::cout << ", " << *i;
}
 
int main(int argc, char** argv) {
if (argc != 3) {
std::cerr << "usage: " << argv[0] << " dictionary word\n";
return EXIT_FAILURE;
}
std::ifstream in(argv[1]);
if (!in) {
std::cerr << "Cannot open file " << argv[1] << '\n';
return EXIT_FAILURE;
}
std::string word(argv[2]);
if (word.empty()) {
std::cerr << "Word must not be empty\n";
return EXIT_FAILURE;
}
constexpr size_t max_dist = 4;
std::vector<std::string> matches[max_dist + 1];
std::string match;
while (getline(in, match)) {
int distance = levenshtein_distance(word, match);
if (distance <= max_dist)
matches[distance].push_back(match);
}
for (size_t dist = 0; dist <= max_dist; ++dist) {
if (matches[dist].empty())
continue;
std::cout << "Words at Levenshtein distance of " << dist
<< " (" << 100 - (100 * dist)/word.size()
<< "% similarity) from '" << word << "':\n";
print_vector(matches[dist]);
std::cout << "\n\n";
}
return EXIT_SUCCESS;
}</syntaxhighlight>
 
{{out}}
Using the same dictionary as the Julia example:
<pre>
Words at Levenshtein distance of 1 (90% similarity) from 'complition':
completion
 
Words at Levenshtein distance of 2 (80% similarity) from 'complition':
coalition, competition, compilation, composition
 
Words at Levenshtein distance of 3 (70% similarity) from 'complition':
companion, competitions, completing, complications, computation, condition
 
Words at Levenshtein distance of 4 (60% similarity) from 'complition':
collection, combination, commission, comparison, compensation, competing, competitive, complaint, complete, completed, completely, complexity, compliance, compliant, compression, computing, conclusion, conditions, connection, convention, conviction, cooperation, corporation, correction, correlation, corruption, nomination, opinion, opposition, option, pollution, population, position, simulation, solution
 
</pre>
 
=={{header|Delphi}}==
{{libheader| System.SysUtils}}
{{libheader| System.Classes}}
{{libheader| System.Math}}
{{libheader| System.Generics.Collections}}
{{Trans|Wren}}
Using '''unixdict.txt'''.
<syntaxhighlight lang="delphi">
program Text_Completion;
 
{$APPTYPE CONSOLE}
 
{$R *.res}
 
uses
System.SysUtils,
System.Classes,
System.Math,
System.Generics.Collections;
 
type
TSujestions = TDictionary<Integer, string>;
 
function Levenshtein(s, t: string): integer;
var
d: array of array of integer;
i, j, cost: integer;
begin
SetLength(d, Length(s) + 1);
for i := Low(d) to High(d) do
begin
SetLength(d[i], Length(t) + 1);
end;
 
for i := Low(d) to High(d) do
begin
d[i, 0] := i;
for j := Low(d[i]) to High(d[i]) do
begin
d[0, j] := j;
end;
end;
 
for i := Low(d) + 1 to High(d) do
begin
for j := Low(d[i]) + 1 to High(d[i]) do
begin
if s[i] = t[j] then
begin
cost := 0;
end
else
begin
cost := 1;
end;
 
d[i, j] := Min(Min(d[i - 1, j] + 1, //deletion
d[i, j - 1] + 1), //insertion
d[i - 1, j - 1] + cost //substitution
);
end;
end;
Result := d[Length(s), Length(t)];
end;
 
function FindSujestions(Search: string; dict: TStringList): TSujestions;
var
I, ld: Integer;
w: string;
begin
Result := TSujestions.Create;
for I := 0 to 3 do
Result.Add(I, '');
 
for I := 0 to dict.Count - 1 do
begin
w := dict[I];
ld := Levenshtein(Search, w);
if ld < 4 then
Result[ld] := Result[ld] + ' ' + w;
end;
end;
 
function Similarity(Search: string; Distance: Integer): Double;
var
ALength: Double;
begin
ALength := Search.Length;
Result := (ALength - Distance) * 100 / ALength;
end;
 
var
dict: TStringList;
Search: string;
i: Integer;
Sujestions: TSujestions;
 
begin
dict := TStringList.Create;
dict.LoadFromFile('unixdict.txt');
Search := 'complition';
 
Sujestions := FindSujestions(Search, dict);
 
Writeln('Input word: '#10, Search);
 
for i := 1 to 3 do
begin
Writeln(Format('Words which are %4.1f%% similar:', [Similarity(Search, i)]));
Writeln(sujestions[i], #10);
end;
 
Sujestions.Free;
dict.Free;
 
Readln;
end.
 
</syntaxhighlight>
 
{{out}}
<pre>
Input word:
complition
Words which are 90,0% similar:
completion
 
Words which are 80,0% similar:
coalition competition compilation complexion composition
 
Words which are 70,0% similar:
cognition collision combustion commotion companion compassion complain complicity compton compulsion compunction computation condition contrition demolition incompletion volition
</pre>
 
=={{header|Factor}}==
{{trans|Julia}}
{{works with|Factor|0.99 2020-07-03}}
<syntaxhighlight lang="factor">USING: formatting fry http.client io kernel lcs literals math
math.ranges namespaces prettyprint.config sequences splitting ;
 
CONSTANT: words $[
"https://www.mit.edu/~ecprice/wordlist.10000" http-get nip
"\n" split harvest
]
CONSTANT: word "complition"
 
: lev-dist-of ( str n -- n )
[ words ] 2dip '[ _ levenshtein _ = ] filter ;
 
: similarity ( n -- x ) word length / 100 * 100 swap - ;
 
10000 margin set ! Prevent prettyprinter from wrapping sequences
4 [1,b] [
dup [ similarity ] [ drop word ] [ word swap lev-dist-of ] tri
"Words at Levenshtein distance of %d (%.1f%% similarity) from %u:\n%u\n\n" printf
] each</syntaxhighlight>
{{out}}
<pre>
Words at Levenshtein distance of 1 (90.0% similarity) from "complition":
{ "completion" }
 
Words at Levenshtein distance of 2 (80.0% similarity) from "complition":
{ "coalition" "competition" "compilation" "composition" }
 
Words at Levenshtein distance of 3 (70.0% similarity) from "complition":
{ "companion" "competitions" "completing" "complications" "computation" "condition" }
 
Words at Levenshtein distance of 4 (60.0% similarity) from "complition":
{ "collection" "combination" "commission" "comparison" "compensation" "competing" "competitive" "complaint" "complete" "completed" "completely" "complexity" "compliance" "compliant" "compression" "computing" "conclusion" "conditions" "connection" "convention" "conviction" "cooperation" "corporation" "correction" "correlation" "corruption" "nomination" "opinion" "opposition" "option" "pollution" "population" "position" "simulation" "solution" }
</pre>
 
 
=={{header|FreeBASIC}}==
This uses '[http://wiki.puzzlers.org/pub/wordlists/unixdict.txt unixdict]' and the [https://www.rosettacode.org/wiki/Levenshtein_distance#FreeBASIC Levenshtein distance] algorithm to test for similarity.
<syntaxhighlight lang="freebasic">#define MIN(a, b) iif((a) < (b), (a), (b))
 
Dim As String palabra = "complition"
Dim As Integer longitud = Len(palabra)
Open "unixdict.txt" For Input As #1
Dim As String cStr, wordList()
Dim As Integer n, p = 0, posic = 0
Do While Not Eof(1)
Line Input #1, cStr
p += 1
If Len(cStr) > 8 Then
posic += 1
Redim Preserve wordList(posic)
wordList(posic) = cStr
End If
Loop
Close #1
 
Dim As String lev(4)
For n = Lbound(wordList) To Ubound(wordList)
Dim As Integer ld = levenshtein(palabra, wordList(n))
If ld < 4 Then lev(ld) &= wordList(n) & " "
Next n
 
Print "Input word: "; palabra
For n = 1 To 3
Dim As Integer semejanza = (longitud - n) * 100 / longitud
Print Using !"\nWords at Levenshtein distance of # (##% similarity):"; n; semejanza
Print lev(n); " ";
Print
Next n
Sleep</syntaxhighlight>
{{out}}
<pre>Input word: complition
 
Words at Levenshtein distance of 1 (90% similarity):
completion
 
Words at Levenshtein distance of 2 (80% similarity):
coalition competition compilation complexion composition
 
Words at Levenshtein distance of 3 (70% similarity):
cognition collision combustion commotion companion compassion complicity compulsion compunction computation condition contrition demolition incompletion</pre>
 
 
=={{header|Go}}==
{{trans|Wren}}
<syntaxhighlight lang="go">package main
 
import (
"bytes"
"fmt"
"io/ioutil"
"log"
)
 
func levenshtein(s, t string) int {
d := make([][]int, len(s)+1)
for i := range d {
d[i] = make([]int, len(t)+1)
}
for i := range d {
d[i][0] = i
}
for j := range d[0] {
d[0][j] = j
}
for j := 1; j <= len(t); j++ {
for i := 1; i <= len(s); i++ {
if s[i-1] == t[j-1] {
d[i][j] = d[i-1][j-1]
} else {
min := d[i-1][j]
if d[i][j-1] < min {
min = d[i][j-1]
}
if d[i-1][j-1] < min {
min = d[i-1][j-1]
}
d[i][j] = min + 1
}
}
 
}
return d[len(s)][len(t)]
}
 
func main() {
search := "complition"
b, err := ioutil.ReadFile("unixdict.txt")
if err != nil {
log.Fatal("Error reading file")
}
words := bytes.Fields(b)
var lev [4][]string
for _, word := range words {
s := string(word)
ld := levenshtein(search, s)
if ld < 4 {
lev[ld] = append(lev[ld], s)
}
}
fmt.Printf("Input word: %s\n\n", search)
for i := 1; i < 4; i++ {
length := float64(len(search))
similarity := (length - float64(i)) * 100 / length
fmt.Printf("Words which are %4.1f%% similar:\n", similarity)
fmt.Println(lev[i])
fmt.Println()
}
}</syntaxhighlight>
 
{{out}}
<pre>
Input word: complition
 
Words which are 90.0% similar:
[completion]
 
Words which are 80.0% similar:
[coalition competition compilation complexion composition]
 
Words which are 70.0% similar:
[cognition collision combustion commotion companion compassion complain complicity compton compulsion compunction computation condition contrition demolition incompletion volition]
</pre>
 
=={{header|Java}}==
[https://github.com/dwyl/english-words Github Repo Uses dependencies given].
<syntaxhighlight lang="java">
<lang Java>
import java.io.File;
import java.io.IOException;
Line 89 ⟶ 483:
}
}
</syntaxhighlight>
</lang>
;Output
<pre>
Line 100 ⟶ 494:
 
Process finished with exit code 0
</pre>
 
=={{header|jq}}==
{{works with|jq}}
This entry uses unixdict.txt, the dictionary used frequently on other RosettaCode pages.
 
Since the title of this particular page is "Text completion", the
solution offered here includes both a straightforward lookup of the dictionary to
check for possible completions, and a check for equally good matches
based on the Levenshtein distance.
 
The Levenshtein module used is the collection of function definitions on
the RC page https://rosettacode.org/wiki/Levenshtein_distance
and is thus not repeated here.
 
The "percentage" reported is computed as described in the comments.
 
The "debug" statements for showing the number of words under consideration for best Levenshtein matches is retained.
<syntaxhighlight lang="jq">
include "levenshtein-distance" {search: "."}; # https://rosettacode.org/wiki/Levenshtein_distance#jq
 
# input: a dictionary
# output an array of {word, d, p} objects showing the best matches as measured
# by the Levenshtein distance, d; p is an indicator of similarity expressed as
# 100 * (max(0, ($word|length) - d) / ($word|length) ) rounded to the nearest integer
def closest($word):
if .[$word] then {$word, d:0, percentage: 100}
else
"Levenshtein-closest words to \($word):",
(($word|length) as $length
| (keys_unsorted | map(select(length | (. > $length-3) and (. < $length + 3)))) as $candidates
| $candidates
| (length|debug) as $debug
| map( {word: ., d: levenshteinDistance($word; .)} )
| minima(.d)
| map( .p = (100 * ([0, ($length - .d) ] | max / $length) | round )))
end ;
 
# Input: a dictionary
# Output: an array of possible completions of $word
def completion($word):
"Possible completions of \($word):",
(keys_unsorted | map(select(startswith(word))));
 
def task:
INDEX(inputs; .) # the dictionary
| completion("compli"),
closest("complition"),
closest("compxxxxtion")
;
 
task
</syntaxhighlight>
{{out}}
Invocation: jq -nR -f text-completion.jq unixdict.txt
 
This assumes levenshtein-distance.jq is in the pwd.
See the comments above.
<pre>
Possible completions of compli:
[
"compliant",
"complicate",
"complicity",
"compliment",
"complimentary",
"compline"
]
Levenshtein-closest words to complition:
["DEBUG:",10396]
[
{
"word": "completion",
"d": 1,
"p": 90
}
]
"Levenshtein-closest words to compxxxxtion:"
["DEBUG:",4082]
[
{
"word": "competition",
"d": 4,
"p": 67
},
{
"word": "compilation",
"d": 4,
"p": 67
},
{
"word": "completion",
"d": 4,
"p": 67
},
{
"word": "complexion",
"d": 4,
"p": 67
},
{
"word": "composition",
"d": 4,
"p": 67
},
{
"word": "compunction",
"d": 4,
"p": 67
},
{
"word": "computation",
"d": 4,
"p": 67
}
]
</pre>
 
=={{header|Julia}}==
See https://en.wikipedia.org/wiki/Levenshtein_distance, the number of one character edits to obtain one word from another.
<syntaxhighlight lang="julia">using StringDistances
 
const fname = download("https://www.mit.edu/~ecprice/wordlist.10000", "wordlist10000.txt")
const words = read(fname, String) |> split .|> strip .|> string
const wrd = "complition"
 
levdistof(n, string) = filter(w -> Levenshtein()(string, w) == n, words)
 
for n in 1:4
println("Words at Levenshtein distance of $n (",
100 - Int(round(100 * n / length(wrd))), "% similarity) from \"$wrd\": \n",
levdistof(n, wrd), "\n")
end
</syntaxhighlight>{{out}}
<pre>
Words at Levenshtein distance of 1 (90% similarity) from "complition":
["completion"]
 
Words at Levenshtein distance of 2 (80% similarity) from "complition":
["coalition", "competition", "compilation", "composition"]
 
Words at Levenshtein distance of 3 (70% similarity) from "complition":
["companion", "competitions", "completing", "complications", "computation", "condition"]
 
Words at Levenshtein distance of 4 (60% similarity) from "complition":
["collection", "combination", "commission", "comparison", "compensation", "competing", "competitive", "complaint", "complete", "completed", "completely", "complexity", "compliance", "compliant", "compression", "computing", "conclusion", "conditions", "connection", "convention", "conviction", "cooperation", "corporation", "correction", "correlation", "corruption", "nomination", "opinion", "opposition", "option", "pollution", "population", "position", "simulation", "solution"]
</pre>
 
=={{header|Mathematica}}==
<syntaxhighlight lang="mathematica">Module[
{word = "complition"},
Map[
{#, PercentForm@N[1 - EditDistance[#, word]/StringLength@word]} &,
SpellingCorrectionList[word]
]
] // Grid</syntaxhighlight>
{{out}}
<pre>completion 90%
complication 80%
competition 80%
composition 80%
compilation 80%
compulsion 70%
contemplation 60%</pre>
 
=={{header|Nim}}==
{{trans|Go}}
We use the function <code>editDistance</code> from the <code>std/editdistance</code> module to get the Levenshtein distance (computed by considering in Unicode codepoints).
 
<syntaxhighlight lang="nim">import std/editdistance, sequtils, strformat, strutils
 
let search = "complition"
let words = toSeq("unixdict.txt".lines)
var lev: array[4, seq[string]]
for word in words:
let ld = editDistance(search, word)
if ld < 4:
lev[ld].add word
echo &"Input word: {search}\n"
let length = float(search.len)
for i in 0..3:
if lev[i].len == 0: continue # No result.
let similarity = (length - float(i)) * 100 / length
echo &"Words which are {similarity:4.1f}% similar:"
echo lev[i].join(" ")
echo()</syntaxhighlight>
 
{{out}}
<pre>Input word: complition
 
Words which are 90.0% similar:
completion
 
Words which are 80.0% similar:
coalition competition compilation complexion composition
 
Words which are 70.0% similar:
cognition collision combustion commotion companion compassion complain complicity compton compulsion compunction computation condition contrition demolition incompletion volition</pre>
 
=={{header|Perl}}==
Inspired by Raku Sorenson-Dice implementation (doesn't handle Unicode, but module <code>Text::Dice</code> can).
<syntaxhighlight lang="perl">use strict;
use warnings;
use feature 'say';
use Path::Tiny;
use List::Util <uniq head>;
 
# sub bi_gram { (lc shift) =~ /(?<=\K.)./g } ## doesn't work in recent versions of Perl
 
sub bi_gram {
my $line = lc shift;
uniq map { substr $line,$_,2 } 0 .. length($line)-2;
}
 
sub score {
my($phrase, $word) = @_;
my %count;
 
my @match = bi_gram $phrase;
$count{$_}++ for @match, @$word;
2 * (grep { $count{$_} > 1 } keys %count) / (@match + @$word);
}
 
sub sorenson {
my($dict,$word,$cutoff) = @_; $cutoff //= 0.55;
my(%matches,$s);
 
($s = score($word, $$dict{$_})) > $cutoff and $matches{$_} = $s for keys %$dict;
%matches;
}
 
my %dict = map { $_ => [ bi_gram($_) ] } path('unixdict.txt')->slurp =~ /.{3,}/gm;
 
for my $word (<complition inconsqual>) {
my(%scored,@ranked);
 
%scored = sorenson(\%dict,$word);
push @ranked, sprintf "%.3f $_", $scored{$_} for sort { $scored{$b} <=> $scored{$a} } keys %scored;
say "\n$word:\n" . join("\n", head 10, @ranked);
}</syntaxhighlight>
{{out}}
<pre>complition:
0.778 completion
0.737 competition
0.737 composition
0.706 coalition
0.700 incompletion
0.667 complexion
0.667 complicity
0.667 decomposition
0.632 compilation
0.632 compunction
 
inconsqual:
0.609 inconsequential
0.588 continual
0.571 squall
0.556 conceptual
0.556 continuant
0.556 inconstant</pre>
 
=={{header|Phix}}==
uses levenshtein() from [[Levenshtein_distance#Phix]] (reproduced below for your convenience) and the standard unix_dict().
<!--<syntaxhighlight lang="phix">(phixonline)-->
<span style="color: #008080;">function</span> <span style="color: #000000;">levenshtein</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">a</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">b</span><span style="color: #0000FF;">)</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">n</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">a</span><span style="color: #0000FF;">),</span>
<span style="color: #000000;">m</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">b</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">n</span><span style="color: #0000FF;">=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span> <span style="color: #008080;">return</span> <span style="color: #000000;">m</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">m</span><span style="color: #0000FF;">=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span> <span style="color: #008080;">return</span> <span style="color: #000000;">n</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #004080;">sequence</span> <span style="color: #000000;">d</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">repeat</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">repeat</span><span style="color: #0000FF;">(</span><span style="color: #000000;">0</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">m</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">),</span> <span style="color: #000000;">n</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #000000;">n</span> <span style="color: #008080;">do</span>
<span style="color: #000000;">d</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">][</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">i</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">j</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #000000;">m</span> <span style="color: #008080;">do</span>
<span style="color: #000000;">d</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">][</span><span style="color: #000000;">j</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">j</span>
<span style="color: #000080;font-style:italic;">-- next := min({ prev +substitution, deletion, or insertion }):</span>
<span style="color: #000000;">d</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">][</span><span style="color: #000000;">j</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">min</span><span style="color: #0000FF;">({</span><span style="color: #000000;">d</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">][</span><span style="color: #000000;">j</span><span style="color: #0000FF;">]+(</span><span style="color: #000000;">a</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]!=</span><span style="color: #000000;">b</span><span style="color: #0000FF;">[</span><span style="color: #000000;">j</span><span style="color: #0000FF;">]),</span> <span style="color: #000000;">d</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">][</span><span style="color: #000000;">j</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">d</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">][</span><span style="color: #000000;">j</span><span style="color: #0000FF;">]+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">})</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #008080;">return</span> <span style="color: #000000;">d</span><span style="color: #0000FF;">[$][$]</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
<span style="color: #008080;">with</span> <span style="color: #008080;">javascript_semantics</span>
<span style="color: #7060A8;">requires</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"1.0.1"</span><span style="color: #0000FF;">)</span> <span style="color: #000080;font-style:italic;">-- (this needs apply() to be properly transpiled!)</span>
<span style="color: #004080;">string</span> <span style="color: #000000;">word</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">"complition"</span>
<span style="color: #004080;">sequence</span> <span style="color: #000000;">words</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">unix_dict</span><span style="color: #0000FF;">(),</span>
<span style="color: #000000;">leven</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">apply</span><span style="color: #0000FF;">(</span><span style="color: #000000;">words</span><span style="color: #0000FF;">,</span><span style="color: #000000;">levenshtein</span><span style="color: #0000FF;">,</span><span style="color: #000000;">word</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">function</span> <span style="color: #000000;">ln</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000080;font-style:italic;">/*w*/</span><span style="color: #0000FF;">,</span> <span style="color: #004080;">integer</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">n</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">return</span> <span style="color: #000000;">leven</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]=</span><span style="color: #000000;">n</span> <span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">n</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #000000;">4</span> <span style="color: #008080;">do</span>
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"Words at Levenshtein distance of %d (%g%% similarity) from \"%s\": \n%s\n"</span><span style="color: #0000FF;">,</span>
<span style="color: #0000FF;">{</span><span style="color: #000000;">n</span><span style="color: #0000FF;">,</span><span style="color: #000000;">100</span><span style="color: #0000FF;">-</span><span style="color: #7060A8;">round</span><span style="color: #0000FF;">(</span><span style="color: #000000;">100</span><span style="color: #0000FF;">*</span><span style="color: #000000;">n</span><span style="color: #0000FF;">/</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">word</span><span style="color: #0000FF;">)),</span><span style="color: #000000;">word</span><span style="color: #0000FF;">,</span><span style="color: #7060A8;">join_by</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">filter</span><span style="color: #0000FF;">(</span><span style="color: #000000;">words</span><span style="color: #0000FF;">,</span><span style="color: #000000;">ln</span><span style="color: #0000FF;">,</span><span style="color: #000000;">n</span><span style="color: #0000FF;">),</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #000000;">6</span><span style="color: #0000FF;">)})</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<!--</syntaxhighlight>-->
{{out}}
(matches Delphi/Go/Wren)
<pre>
Words at Levenshtein distance of 1 (90% similarity) from "complition":
completion
 
Words at Levenshtein distance of 2 (80% similarity) from "complition":
coalition competition compilation complexion composition
 
Words at Levenshtein distance of 3 (70% similarity) from "complition":
cognition collision combustion commotion companion compassion
complain complicity compton compulsion compunction computation
condition contrition demolition incompletion volition
 
Words at Levenshtein distance of 4 (60% similarity) from "complition":
abolition admonition ambition ammunition campion caption
collusion combination commission communion compactify comparison
compatriot competitive competitor complaint complete compliant
complicate compliment compline compositor compression conception
concision conclusion concretion congestion consolation contention
convention convolution corruption cotillion decomposition depletion
dominion gumption implosion imposition munition oblivion
opinion opposition option pollution position simpleton
soliton solution sorption templeton temptation tomlinson
</pre>
 
=={{header|Raku}}==
(formerly Perl 6)
===Hamming distance===
{{Trans|Java}}
<syntaxhighlight lang="raku" line>sub MAIN ( Str $user_word = 'complition', Str $filename = 'words.txt' ) {
my @s1 = $user_word.comb;
my @listed = gather for $filename.IO.lines -> $line {
my @s2 = $line.comb;
 
my $correct = 100 * sum( @s1 Zeq @s2)
/ max(+@s1, +@s2);
 
my $score = ( $correct >= 100 and @s1[0] eq @s2[0] ) ?? 100
!! ( $correct >= 80 and @s1[0] eq @s2[0] ) ?? $correct
!! ( $line.contains($user_word) and @s1 * 2 > @s2 ) ?? 80
!! 0;
take [$score, $line] if $score;
}
 
@listed = @listed[$_] with @listed.first: :k, { .[0] == 100 };
 
say "{.[0].fmt('%.2f')}% {.[1]}" for @listed;
}</syntaxhighlight>
{{out}}
<pre>80.00% compaction
90.00% completion
81.82% completions
80.00% complexion
</pre>
 
===Sorenson-Dice===
Sorenson-Dice tends to return relatively low percentages even for small differences, especially for short words. We need to "lower the bar" to get any results at all. Different variations of the algorithm do or don't regularize case. This one does, though it doesn't much matter for the tested words.
 
Using unixdict.txt from www.puzzlers.org
 
<syntaxhighlight lang="raku" line>sub sorenson ($phrase, %hash) {
my $match = bigram $phrase;
%hash.race.map: { [(2 * ($match ∩ .value) / ($match + .value)).round(.001), .key] }
}
 
sub bigram (\these) { Bag.new( flat these.fc.words.map: { .comb.rotor(2 => -1)».join } ) }
 
# Load the dictionary
my %hash = './unixdict.txt'.IO.slurp.words.race.map: { $_ => .&bigram };
 
# Testing
for <complition inconsqual Sørenson> -> $w {
say "\n$w:";
.say for sorenson($w, %hash).grep(*.[0] >= .55).sort({-.[0],~.[1]}).head(10);
}</syntaxhighlight>
 
{{out}}
<pre>complition:
[0.778 completion]
[0.737 competition]
[0.737 composition]
[0.706 coalition]
[0.7 incompletion]
[0.667 complexion]
[0.667 complicity]
[0.667 decomposition]
[0.632 compilation]
[0.632 compunction]
 
inconsqual:
[0.609 inconsequential]
[0.588 continual]
[0.571 squall]
[0.556 conceptual]
[0.556 inconstant]
 
Sørenson:
[0.714 sorenson]
[0.667 benson]
[0.615 swenson]
[0.571 evensong]
[0.571 sorensen]</pre>
 
=={{header|REXX}}==
The method used to find a word &nbsp; (that is contained in a specified
dictionary) &nbsp; closest to a specified string of letters is:
 
Perform any of &nbsp; (three methods of actions):
::* &nbsp; &nbsp;''delete''&nbsp; any letter
::* &nbsp; &nbsp;''insert''&nbsp; any letter
:::* &nbsp; (any letter is inserted at any location)
:::* &nbsp; (this includes prefixing any letter)
:::* &nbsp; (this includes suffixing any letter)
::* &nbsp; &nbsp;''substitute''&nbsp; any letter at any location
 
 
<small>(Only lowercase letters were used for the [above] three methods).</small>
 
Perform any of the above along with any combination of any method.
 
This will, in effect, delete/insert/substitute any two letters:
::* &nbsp; &nbsp;''delete''&nbsp; + &nbsp;''delete''&nbsp;
::* &nbsp; &nbsp;''delete''&nbsp; + &nbsp;''insert''&nbsp;
::* &nbsp; &nbsp;''delete''&nbsp; + &nbsp;''substitute''&nbsp;
::* &nbsp; &nbsp;''insert''&nbsp; + &nbsp;''delete''&nbsp;
::* &nbsp; &nbsp;''insert''&nbsp; + &nbsp;''insert''&nbsp;
::* &nbsp; &nbsp;''insert''&nbsp; + &nbsp;''substitute''&nbsp;
::* &nbsp; &nbsp;''substitute''&nbsp; + &nbsp;''delete''&nbsp;
::* &nbsp; &nbsp;''substitute''&nbsp; + &nbsp;''insert''&nbsp;
::* &nbsp; &nbsp;''substitute''&nbsp; + &nbsp;''substitute''&nbsp;
 
No attempt was made to change (by any method) three letters or more.
<syntaxhighlight lang="rexx">/*REXX pgm finds (dictionary) words which can be found in a specified word wheel (grid).*/
parse arg what iFID . /*obtain optional arguments from the CL*/
if what==''|what=="," then what= 'complition' /*Not specified? Then use the default.*/
if iFID==''|iFID=="," then iFID= 'UNIXDICT.TXT' /* " " " " " " */
@abc= 'abcdefghijklmnopqrstuvwxyz' /*(Latin) lowercase letters to be used.*/
L= length(@abc) /* " " " the Latin letters. */
wrds= 0 /*# words that are in the dictionary. */
dups= 0 /*" " " " duplicates. */
ills= 0 /*" " " contain "not" letters.*/
say ' Reading the file: ' iFID /*align the text. */
@.= . /*non─duplicated dictionary words. */
$= /*the list of dictionary words in grid.*/
do recs=0 while lines(iFID)\==0 /*process all words in the dictionary. */
x= space( linein(iFID), 0) /*elide any blanks in the dictinary. */
if @.x\==. then do; dups= dups+1; iterate; end /*is this a duplicate? */
if \datatype(x,'M') then do; ills= ills+1; iterate; end /*has word non─letters? */
@.x= /*signify that X is a dictionary word*/
wrds= wrds + 1 /*bump the number of "good" dist. words*/
end /*recs*/
a=
say ' number of records (words) in the dictionary: ' right( commas(recs), 9)
say ' number of ill─formed words in the dictionary: ' right( commas(ills), 9)
say ' number of duplicate words in the dictionary: ' right( commas(dups), 9)
say ' number of acceptable words in the dictionary: ' right( commas(wrds), 9)
say ' the "word" to be used for text completion: ' what
say
call del what; a= a result; call del what,1; a= a result
call ins what; a= a result; call ins what,1; a= a result
call sub what; a= a result; call sub what,1; a= a result
call prune
#= words($)
say commas(#) ' similar words found:'
do j=1 for #; _= word($, j); say right( count(_,what), 24) _
end /*j*/
exit # /*stick a fork in it, we're all done. */
/*──────────────────────────────────────────────────────────────────────────────────────*/
commas: parse arg _; do ?=length(_)-3 to 1 by -3; _= insert(',', _, ?); end; return _
prune: do k=1 for words(a); _= word(a,k); if wordpos(_,$)==0 then $= $ _; end; return
recur: $= $ del(z); $= $ ins(z); $= $ sub(z); return
/*──────────────────────────────────────────────────────────────────────────────────────*/
count: procedure; parse arg x,y; cnt= 0; w= length(x)
do j=1 for w; p= pos( substr(x, j, 1), y); if p==0 then iterate
y= overlay(., y, p); cnt= cnt + 1
end /*j*/
return ' ' left("("format(cnt/w*100,,2)/1'%)', 9) /*express as a percent.*/
/*──────────────────────────────────────────────────────────────────────────────────────*/
del: procedure expose @. @abc L; parse arg y,r; $=
do j=1 for length(y); z= space(left(y,j-1) || substr(y,j+1), 0)
if @.z\==. then $= $ z; if r==1 then call recur
end /*j*/; return space($)
/*──────────────────────────────────────────────────────────────────────────────────────*/
ins: procedure expose @. @abc L; parse arg y,r; $=
do j=1 for length(y)
do k=1 for L; z= space(left(y,j-1) || substr(@abc,k,1) || substr(y,j), 0)
if @.z\==. then $= $ z; if r==1 then call recur
end /*k*/
end /*j*/; return space($)
/*──────────────────────────────────────────────────────────────────────────────────────*/
sub: procedure expose @. @abc L; parse arg y,r; $=
do j=1 for length(y)
do k=1 for L; z= space(left(y,j-1) || substr(@abc,k,1) || substr(y,j+1), 0)
if @.z\==. then $= $ z; if r==1 then call recur
end /*k*/
end /*j*/; return space($)</syntaxhighlight>
{{out|output|text=&nbsp; when using the default inputs:}}
<pre>
Reading the file: UNIXDICT.TXT
number of records (words) in the dictionary: 25,104
number of ill─formed words in the dictionary: 126
number of duplicate words in the dictionary: 0
number of acceptable words in the dictionary: 24,978
the "word" to be used for text completion: complition
 
6 similar words found:
(88.89%) coalition
(90%) completion
(81.82%) competition
(90.91%) compilation
(81.82%) composition
(80%) complexion
</pre>
The input file is the same dictionary that the '''Java''' entry used.
 
{{out|output|text=&nbsp; when using the inputs of: &nbsp; &nbsp; <tt> , &nbsp; GitHub.dict </tt>}}
<pre>
Reading the file: GitHub.dict
number of records (words) in the dictionary: 466,551
number of ill─formed words in the dictionary: 50,254
number of duplicate words in the dictionary: 0
number of acceptable words in the dictionary: 416,297
the "word" to be used for text completion: complition
 
11 similar words found:
(88.89%) coalition
(90%) completion
(81.82%) commolition
(81.82%) comparition
(81.82%) competition
(90.91%) compilation
(81.82%) composition
(81.82%) complection
(83.33%) complication
(80%) compaction
(80%) complexion
</pre>
 
{{out|output|text=&nbsp; when using the inputs of: &nbsp; &nbsp; <tt> , &nbsp; my.dict </tt>}}
 
This is my personal dictionary.
<pre>
Reading the file: my.dict
number of records (words) in the dictionary: 947,364
number of ill─formed words in the dictionary: 192,683
number of duplicate words in the dictionary: 6
number of acceptable words in the dictionary: 754,675
the "word" to be used for text completion: complition
 
12 similar words found:
(88.89%) coalition
(90%) completion
(81.82%) commolition
(81.82%) comparition
(81.82%) competition
(90.91%) compilation
(90.91%) compiletion
(81.82%) composition
(81.82%) complection
(83.33%) complication
(80%) compaction
(80%) complexion
</pre>
 
=={{header|V (Vlang)}}==
{{trans|Go}}
<syntaxhighlight lang="v (vlang)">import os
 
fn levenshtein(s string, t string) int {
mut d := [][]int{len:s.len+1, init: []int{len:t.len+1}}
for i,_ in d {
d[i][0] = i
}
for j in d[0] {
d[0][j] = j
}
for j := 1; j <= t.len; j++ {
for i := 1; i <= s.len; i++ {
if s[i-1] == t[j-1] {
d[i][j] = d[i-1][j-1]
} else {
mut min := d[i-1][j]
if d[i][j-1] < min {
min = d[i][j-1]
}
if d[i-1][j-1] < min {
min = d[i-1][j-1]
}
d[i][j] = min + 1
}
}
}
return d[s.len][t.len]
}
fn main() {
search := "complition"
filename := "unixdict.txt"
words := os.read_lines(filename) or { panic('FAILED to read file: $filename')}
mut lev := [][]string{len:4}
for word in words {
s := word
ld := levenshtein(search, s)
if ld < 4 {
lev[ld] << s
}
}
println("Input word: $search\n")
for i in 1..4 {
length := f64(search.len)
similarity := (length - f64(i)) * 100 / length
println("Words which are ${similarity:4.1f}% similar:", )
println(lev[i])
println('')
}
}}</syntaxhighlight>
 
{{out}}
<pre>
Input word: complition
 
Words which are 90.0% similar:
['completion', 'incompletion']
 
Words which are 80.0% similar:
['coalition', 'competition', 'compilation', 'complexion', 'composition', 'decomposition']
 
Words which are 70.0% similar:
['abolition', 'cognition', 'collision', 'combustion', 'commotion', 'companion', 'compassion', 'complain', 'complicity', 'compton', 'compulsion', 'compunction', 'computation', 'condition', 'contrition', 'demolition', 'locomotion', 'postcondition', 'volition']
</pre>
 
=={{header|Wren}}==
{{libheader|Wren-fmt}}
This uses 'unixdict' and the Levenshtein distance algorithm to test for similarity.
<syntaxhighlight lang="wren">import "io" for File
import "./fmt" for Fmt
 
var levenshtein = Fn.new { |s, t|
var ls = s.count
var lt = t.count
var d = List.filled(ls + 1, null)
for (i in 0..ls) {
d[i] = List.filled(lt + 1, 0)
d[i][0] = i
}
for (j in 0..lt) d[0][j] = j
for (j in 1..lt) {
for (i in 1..ls) {
if (s[i-1] == t[j-1]) {
d[i][j] = d[i-1][j-1]
} else {
var min = d[i-1][j]
if (d[i][j-1] < min) min = d[i][j-1]
if (d[i-1][j-1] < min) min = d[i-1][j-1]
d[i][j] = min + 1
}
}
}
return d[-1][-1]
}
 
var search = "complition"
var words = File.read("unixdict.txt").split("\n").map { |w| w.trim() }.where { |w| w != "" }
var lev = [[], [], [], []]
var ld
for (word in words) {
if ((ld = levenshtein.call(search, word)) < 4) {
lev[ld].add(word)
}
}
 
System.print("Input word: %(search)\n")
for (i in 1..3) {
var count = search.count
var similarity = (count - i) * 100 / count
Fmt.print("Words which are $4.1f\% similar:", similarity)
System.print(lev[i])
System.print()
} </syntaxhighlight>
 
{{out}}
<pre>
Words which are 90.0% similar:
[completion]
 
Words which are 80.0% similar:
[coalition, competition, compilation, complexion, composition]
 
Words which are 70.0% similar:
[cognition, collision, combustion, commotion, companion, compassion, complain, complicity, compton, compulsion, compunction, computation, condition, contrition, demolition, incompletion, volition]
</pre>
9,476

edits