Text completion: Difference between revisions

Added Perl example
m (→‎Sorenson-Dice: stabilize the returned sort order)
(Added Perl example)
Line 477:
["collection", "combination", "commission", "comparison", "compensation", "competing", "competitive", "complaint", "complete", "completed", "completely", "complexity", "compliance", "compliant", "compression", "computing", "conclusion", "conditions", "connection", "convention", "conviction", "cooperation", "corporation", "correction", "correlation", "corruption", "nomination", "opinion", "opposition", "option", "pollution", "population", "position", "simulation", "solution"]
</pre>
 
=={{header|Perl}}==
Inspired by Raku Sorenson-Dice implementation (doesn't handle Unicode, but module <code>Text::Dice</code> can).
<lang perl>use strict;
use warnings;
use feature 'say';
use Path::Tiny;
use List::Util 'head';
 
sub bi_gram { (lc shift) =~ /(?<=\K.)./g }
 
sub score {
my($phrase, $word) = @_;
my %count;
 
my @match = bi_gram $phrase;
$count{$_}++ for @match, @$word;
2 * (grep { $count{$_} > 1 } keys %count) / (@match + @$word);
}
 
sub sorenson {
my($dict,$word,$cutoff) = @_; $cutoff //= 0.55;
my(%matches,$s);
 
($s = score($word, $$dict{$_})) > $cutoff and $matches{$_} = $s for keys %$dict;
%matches;
}
 
my %dict = map { $_ => [ bi_gram($_) ] } path('unixdict.txt')->slurp =~ /.{3,}/gm;
 
for my $word (<complition inconsqual>) {
my(%scored,@ranked);
 
%scored = sorenson(\%dict,$word);
push @ranked, sprintf "%.3f $_", $scored{$_} for sort { $scored{$b} <=> $scored{$a} } keys %scored;
say "\n$word:\n" . join("\n", head 10, @ranked);
}</lang>
{{out}}
<pre>complition:
0.778 completion
0.737 competition
0.737 composition
0.706 coalition
0.700 incompletion
0.667 complexion
0.667 complicity
0.667 decomposition
0.632 compilation
0.632 compunction
 
inconsqual:
0.609 inconsequential
0.588 continual
0.571 squall
0.556 conceptual
0.556 continuant
0.556 inconstant</pre>
 
=={{header|Phix}}==
2,392

edits