Text completion: Difference between revisions
m
→{{header|Wren}}: Minor tidy
Thundergnat (talk | contribs) m (Thundergnat moved page Text Completion to Text completion: Follow normal task title capitalization policy) |
m (→{{header|Wren}}: Minor tidy) |
||
(12 intermediate revisions by 9 users not shown) | |||
Line 8:
[https://github.com/dwyl/english-words Github Repo]<br>
[https://raw.githubusercontent.com/dwyl/english-words/master/words.txt Raw Text, Save as .txt file]<br>
[
[
[https://dev.to/lefebvre/the-soundex-algorithm-5el1 SoundEx Algorithm]<br>
[
[http://www.catalysoft.com/articles/StrikeAMatch.html Dice's Coefficient]<br>
[
;Possible Output:
<pre>
Line 31:
=={{header|C++}}==
{{trans|Julia}}
<
#include <fstream>
#include <iostream>
Line 99:
}
return EXIT_SUCCESS;
}</
{{out}}
Line 125:
{{Trans|Wren}}
Using '''unixdict.txt'''.
<syntaxhighlight lang="delphi">
program Text_Completion;
Line 236:
end.
</syntaxhighlight>
{{out}}
Line 255:
{{trans|Julia}}
{{works with|Factor|0.99 2020-07-03}}
<
math.ranges namespaces prettyprint.config sequences splitting ;
Line 273:
dup [ similarity ] [ drop word ] [ word swap lev-dist-of ] tri
"Words at Levenshtein distance of %d (%.1f%% similarity) from %u:\n%u\n\n" printf
] each</
{{out}}
<pre>
Line 288:
{ "collection" "combination" "commission" "comparison" "compensation" "competing" "competitive" "complaint" "complete" "completed" "completely" "complexity" "compliance" "compliant" "compression" "computing" "conclusion" "conditions" "connection" "convention" "conviction" "cooperation" "corporation" "correction" "correlation" "corruption" "nomination" "opinion" "opposition" "option" "pollution" "population" "position" "simulation" "solution" }
</pre>
=={{header|FreeBASIC}}==
This uses '[http://wiki.puzzlers.org/pub/wordlists/unixdict.txt unixdict]' and the [https://www.rosettacode.org/wiki/Levenshtein_distance#FreeBASIC Levenshtein distance] algorithm to test for similarity.
<syntaxhighlight lang="freebasic">#define MIN(a, b) iif((a) < (b), (a), (b))
Dim As String palabra = "complition"
Dim As Integer longitud = Len(palabra)
Open "unixdict.txt" For Input As #1
Dim As String cStr, wordList()
Dim As Integer n, p = 0, posic = 0
Do While Not Eof(1)
Line Input #1, cStr
p += 1
If Len(cStr) > 8 Then
posic += 1
Redim Preserve wordList(posic)
wordList(posic) = cStr
End If
Loop
Close #1
Dim As String lev(4)
For n = Lbound(wordList) To Ubound(wordList)
Dim As Integer ld = levenshtein(palabra, wordList(n))
If ld < 4 Then lev(ld) &= wordList(n) & " "
Next n
Print "Input word: "; palabra
For n = 1 To 3
Dim As Integer semejanza = (longitud - n) * 100 / longitud
Print Using !"\nWords at Levenshtein distance of # (##% similarity):"; n; semejanza
Print lev(n); " ";
Print
Next n
Sleep</syntaxhighlight>
{{out}}
<pre>Input word: complition
Words at Levenshtein distance of 1 (90% similarity):
completion
Words at Levenshtein distance of 2 (80% similarity):
coalition competition compilation complexion composition
Words at Levenshtein distance of 3 (70% similarity):
cognition collision combustion commotion companion compassion complicity compulsion compunction computation condition contrition demolition incompletion</pre>
=={{header|Go}}==
{{trans|Wren}}
<
import (
Line 354 ⟶ 402:
fmt.Println()
}
}</
{{out}}
Line 372 ⟶ 420:
=={{header|Java}}==
[https://github.com/dwyl/english-words Github Repo Uses dependencies given].
<syntaxhighlight lang="java">
import java.io.File;
import java.io.IOException;
Line 435 ⟶ 483:
}
}
</syntaxhighlight>
;Output
<pre>
Line 446 ⟶ 494:
Process finished with exit code 0
</pre>
=={{header|jq}}==
{{works with|jq}}
This entry uses unixdict.txt, the dictionary used frequently on other RosettaCode pages.
Since the title of this particular page is "Text completion", the
solution offered here includes both a straightforward lookup of the dictionary to
check for possible completions, and a check for equally good matches
based on the Levenshtein distance.
The Levenshtein module used is the collection of function definitions on
the RC page https://rosettacode.org/wiki/Levenshtein_distance
and is thus not repeated here.
The "percentage" reported is computed as described in the comments.
The "debug" statements for showing the number of words under consideration for best Levenshtein matches is retained.
<syntaxhighlight lang="jq">
include "levenshtein-distance" {search: "."}; # https://rosettacode.org/wiki/Levenshtein_distance#jq
# input: a dictionary
# output an array of {word, d, p} objects showing the best matches as measured
# by the Levenshtein distance, d; p is an indicator of similarity expressed as
# 100 * (max(0, ($word|length) - d) / ($word|length) ) rounded to the nearest integer
def closest($word):
if .[$word] then {$word, d:0, percentage: 100}
else
"Levenshtein-closest words to \($word):",
(($word|length) as $length
| (keys_unsorted | map(select(length | (. > $length-3) and (. < $length + 3)))) as $candidates
| $candidates
| (length|debug) as $debug
| map( {word: ., d: levenshteinDistance($word; .)} )
| minima(.d)
| map( .p = (100 * ([0, ($length - .d) ] | max / $length) | round )))
end ;
# Input: a dictionary
# Output: an array of possible completions of $word
def completion($word):
"Possible completions of \($word):",
(keys_unsorted | map(select(startswith(word))));
def task:
INDEX(inputs; .) # the dictionary
| completion("compli"),
closest("complition"),
closest("compxxxxtion")
;
task
</syntaxhighlight>
{{out}}
Invocation: jq -nR -f text-completion.jq unixdict.txt
This assumes levenshtein-distance.jq is in the pwd.
See the comments above.
<pre>
Possible completions of compli:
[
"compliant",
"complicate",
"complicity",
"compliment",
"complimentary",
"compline"
]
Levenshtein-closest words to complition:
["DEBUG:",10396]
[
{
"word": "completion",
"d": 1,
"p": 90
}
]
"Levenshtein-closest words to compxxxxtion:"
["DEBUG:",4082]
[
{
"word": "competition",
"d": 4,
"p": 67
},
{
"word": "compilation",
"d": 4,
"p": 67
},
{
"word": "completion",
"d": 4,
"p": 67
},
{
"word": "complexion",
"d": 4,
"p": 67
},
{
"word": "composition",
"d": 4,
"p": 67
},
{
"word": "compunction",
"d": 4,
"p": 67
},
{
"word": "computation",
"d": 4,
"p": 67
}
]
</pre>
=={{header|Julia}}==
See https://en.wikipedia.org/wiki/Levenshtein_distance, the number of one character edits to obtain one word from another.
<
const fname = download("https://www.mit.edu/~ecprice/wordlist.10000", "wordlist10000.txt")
Line 463 ⟶ 627:
levdistof(n, wrd), "\n")
end
</
<pre>
Words at Levenshtein distance of 1 (90% similarity) from "complition":
Line 477 ⟶ 641:
["collection", "combination", "commission", "comparison", "compensation", "competing", "competitive", "complaint", "complete", "completed", "completely", "complexity", "compliance", "compliant", "compression", "computing", "conclusion", "conditions", "connection", "convention", "conviction", "cooperation", "corporation", "correction", "correlation", "corruption", "nomination", "opinion", "opposition", "option", "pollution", "population", "position", "simulation", "solution"]
</pre>
=={{header|Mathematica}}==
<syntaxhighlight lang="mathematica">Module[
{word = "complition"},
Map[
{#, PercentForm@N[1 - EditDistance[#, word]/StringLength@word]} &,
SpellingCorrectionList[word]
]
] // Grid</syntaxhighlight>
{{out}}
<pre>completion 90%
complication 80%
competition 80%
composition 80%
compilation 80%
compulsion 70%
contemplation 60%</pre>
=={{header|Nim}}==
{{trans|Go}}
We use the function <code>editDistance</code> from the <code>std/editdistance</code> module to get the Levenshtein distance (computed by considering in Unicode codepoints).
<syntaxhighlight lang="nim">import std/editdistance, sequtils, strformat, strutils
let search = "complition"
let words = toSeq("unixdict.txt".lines)
var lev: array[4, seq[string]]
for word in words:
let ld = editDistance(search, word)
if ld < 4:
lev[ld].add word
echo &"Input word: {search}\n"
let length = float(search.len)
for i in 0..3:
if lev[i].len == 0: continue # No result.
let similarity = (length - float(i)) * 100 / length
echo &"Words which are {similarity:4.1f}% similar:"
echo lev[i].join(" ")
echo()</syntaxhighlight>
{{out}}
<pre>Input word: complition
Words which are 90.0% similar:
completion
Words which are 80.0% similar:
coalition competition compilation complexion composition
Words which are 70.0% similar:
cognition collision combustion commotion companion compassion complain complicity compton compulsion compunction computation condition contrition demolition incompletion volition</pre>
=={{header|Perl}}==
Inspired by Raku Sorenson-Dice implementation (doesn't handle Unicode, but module <code>Text::Dice</code> can).
<
use warnings;
use feature 'say';
use Path::Tiny;
use List::Util
# sub bi_gram { (lc shift) =~ /(?<=\K.)./g } ## doesn't work in recent versions of Perl
sub bi_gram {
my $line = lc shift;
uniq map { substr $line,$_,2 } 0 .. length($line)-2;
}
sub score {
Line 513 ⟶ 733:
push @ranked, sprintf "%.3f $_", $scored{$_} for sort { $scored{$b} <=> $scored{$a} } keys %scored;
say "\n$word:\n" . join("\n", head 10, @ranked);
}</
{{out}}
<pre>complition:
Line 536 ⟶ 756:
=={{header|Phix}}==
uses levenshtein() from [[Levenshtein_distance#Phix]] (reproduced below for your convenience) and
<!--<syntaxhighlight lang="phix">(phixonline)-->
<span style="color: #008080;">function</span> <span style="color: #000000;">levenshtein</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">a</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">b</span><span style="color: #0000FF;">)</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">n</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">a</span><span style="color: #0000FF;">),</span>
<span style="color: #000000;">m</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">b</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">n</span><span style="color: #0000FF;">=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span> <span style="color: #008080;">return</span> <span style="color: #000000;">m</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">m</span><span style="color: #0000FF;">=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span> <span style="color: #008080;">return</span> <span style="color: #000000;">n</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #004080;">sequence</span> <span style="color: #000000;">d</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">repeat</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">repeat</span><span style="color: #0000FF;">(</span><span style="color: #000000;">0</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">m</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">),</span> <span style="color: #000000;">n</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #000000;">n</span> <span style="color: #008080;">do</span>
<span style="color: #000000;">d</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">][</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">i</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">j</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #000000;">m</span> <span style="color: #008080;">do</span>
<span style="color: #000000;">d</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">][</span><span style="color: #000000;">j</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">j</span>
<span style="color: #000080;font-style:italic;">-- next := min({ prev +substitution, deletion, or insertion }):</span>
<span style="color: #000000;">d</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">][</span><span style="color: #000000;">j</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">min</span><span style="color: #0000FF;">({</span><span style="color: #000000;">d</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">][</span><span style="color: #000000;">j</span><span style="color: #0000FF;">]+(</span><span style="color: #000000;">a</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]!=</span><span style="color: #000000;">b</span><span style="color: #0000FF;">[</span><span style="color: #000000;">j</span><span style="color: #0000FF;">]),</span> <span style="color: #000000;">d</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">][</span><span style="color: #000000;">j</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">d</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">][</span><span style="color: #000000;">j</span><span style="color: #0000FF;">]+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">})</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #008080;">return</span> <span style="color: #000000;">d</span><span style="color: #0000FF;">[$][$]</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
<span style="color: #008080;">with</span> <span style="color: #008080;">javascript_semantics</span>
<span style="color: #7060A8;">requires</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"1.0.1"</span><span style="color: #0000FF;">)</span> <span style="color: #000080;font-style:italic;">-- (this needs apply() to be properly transpiled!)</span>
<span style="color: #004080;">string</span> <span style="color: #000000;">word</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">"complition"</span>
<span style="color: #004080;">sequence</span> <span style="color: #000000;">words</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">unix_dict</span><span style="color: #0000FF;">(),</span>
<span style="color: #000000;">leven</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">apply</span><span style="color: #0000FF;">(</span><span style="color: #000000;">words</span><span style="color: #0000FF;">,</span><span style="color: #000000;">levenshtein</span><span style="color: #0000FF;">,</span><span style="color: #000000;">word</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">function</span> <span style="color: #000000;">ln</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000080;font-style:italic;">/*w*/</span><span style="color: #0000FF;">,</span> <span style="color: #004080;">integer</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">n</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">return</span> <span style="color: #000000;">leven</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]=</span><span style="color: #000000;">n</span> <span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">n</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #000000;">4</span> <span style="color: #008080;">do</span>
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"Words at Levenshtein distance of %d (%g%% similarity) from \"%s\": \n%s\n"</span><span style="color: #0000FF;">,</span>
<span style="color: #0000FF;">{</span><span style="color: #000000;">n</span><span style="color: #0000FF;">,</span><span style="color: #000000;">100</span><span style="color: #0000FF;">-</span><span style="color: #7060A8;">round</span><span style="color: #0000FF;">(</span><span style="color: #000000;">100</span><span style="color: #0000FF;">*</span><span style="color: #000000;">n</span><span style="color: #0000FF;">/</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">word</span><span style="color: #0000FF;">)),</span><span style="color: #000000;">word</span><span style="color: #0000FF;">,</span><span style="color: #7060A8;">join_by</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">filter</span><span style="color: #0000FF;">(</span><span style="color: #000000;">words</span><span style="color: #0000FF;">,</span><span style="color: #000000;">ln</span><span style="color: #0000FF;">,</span><span style="color: #000000;">n</span><span style="color: #0000FF;">),</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #000000;">6</span><span style="color: #0000FF;">)})</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<!--</syntaxhighlight>-->
{{out}}
(matches Delphi/Go/Wren)
Line 575 ⟶ 816:
===Hamming distance===
{{Trans|Java}}
<syntaxhighlight lang="raku"
my @s1 = $user_word.comb;
my @listed = gather for $filename.IO.lines -> $line {
Line 593 ⟶ 834:
say "{.[0].fmt('%.2f')}% {.[1]}" for @listed;
}</
{{out}}
<pre>80.00% compaction
Line 606 ⟶ 847:
Using unixdict.txt from www.puzzlers.org
<syntaxhighlight lang="raku"
my $match = bigram $phrase;
%hash.race.map: { [(2 * ($match ∩ .value) / ($match + .value)).round(.001), .key] }
Line 620 ⟶ 861:
say "\n$w:";
.say for sorenson($w, %hash).grep(*.[0] >= .55).sort({-.[0],~.[1]}).head(10);
}</
{{out}}
Line 678 ⟶ 919:
No attempt was made to change (by any method) three letters or more.
<
parse arg what iFID . /*obtain optional arguments from the CL*/
if what==''|what=="," then what= 'complition' /*Not specified? Then use the default.*/
Line 741 ⟶ 982:
if @.z\==. then $= $ z; if r==1 then call recur
end /*k*/
end /*j*/; return space($)</
{{out|output|text= when using the default inputs:}}
<pre>
Line 808 ⟶ 1,049:
(80%) compaction
(80%) complexion
</pre>
=={{header|V (Vlang)}}==
{{trans|Go}}
<syntaxhighlight lang="v (vlang)">import os
fn levenshtein(s string, t string) int {
mut d := [][]int{len:s.len+1, init: []int{len:t.len+1}}
for i,_ in d {
d[i][0] = i
}
for j in d[0] {
d[0][j] = j
}
for j := 1; j <= t.len; j++ {
for i := 1; i <= s.len; i++ {
if s[i-1] == t[j-1] {
d[i][j] = d[i-1][j-1]
} else {
mut min := d[i-1][j]
if d[i][j-1] < min {
min = d[i][j-1]
}
if d[i-1][j-1] < min {
min = d[i-1][j-1]
}
d[i][j] = min + 1
}
}
}
return d[s.len][t.len]
}
fn main() {
search := "complition"
filename := "unixdict.txt"
words := os.read_lines(filename) or { panic('FAILED to read file: $filename')}
mut lev := [][]string{len:4}
for word in words {
s := word
ld := levenshtein(search, s)
if ld < 4 {
lev[ld] << s
}
}
println("Input word: $search\n")
for i in 1..4 {
length := f64(search.len)
similarity := (length - f64(i)) * 100 / length
println("Words which are ${similarity:4.1f}% similar:", )
println(lev[i])
println('')
}
}}</syntaxhighlight>
{{out}}
<pre>
Input word: complition
Words which are 90.0% similar:
['completion', 'incompletion']
Words which are 80.0% similar:
['coalition', 'competition', 'compilation', 'complexion', 'composition', 'decomposition']
Words which are 70.0% similar:
['abolition', 'cognition', 'collision', 'combustion', 'commotion', 'companion', 'compassion', 'complain', 'complicity', 'compton', 'compulsion', 'compunction', 'computation', 'condition', 'contrition', 'demolition', 'locomotion', 'postcondition', 'volition']
</pre>
Line 813 ⟶ 1,121:
{{libheader|Wren-fmt}}
This uses 'unixdict' and the Levenshtein distance algorithm to test for similarity.
<
import "./fmt" for Fmt
var levenshtein = Fn.new { |s, t|
Line 857 ⟶ 1,165:
System.print(lev[i])
System.print()
} </
{{out}}
|