Text completion: Difference between revisions
Content added Content deleted
Thundergnat (talk | contribs) m (→Sorenson-Dice: stabilize the returned sort order) |
SqrtNegInf (talk | contribs) (Added Perl example) |
||
Line 477: | Line 477: | ||
["collection", "combination", "commission", "comparison", "compensation", "competing", "competitive", "complaint", "complete", "completed", "completely", "complexity", "compliance", "compliant", "compression", "computing", "conclusion", "conditions", "connection", "convention", "conviction", "cooperation", "corporation", "correction", "correlation", "corruption", "nomination", "opinion", "opposition", "option", "pollution", "population", "position", "simulation", "solution"] |
["collection", "combination", "commission", "comparison", "compensation", "competing", "competitive", "complaint", "complete", "completed", "completely", "complexity", "compliance", "compliant", "compression", "computing", "conclusion", "conditions", "connection", "convention", "conviction", "cooperation", "corporation", "correction", "correlation", "corruption", "nomination", "opinion", "opposition", "option", "pollution", "population", "position", "simulation", "solution"] |
||
</pre> |
</pre> |
||
=={{header|Perl}}== |
|||
Inspired by Raku Sorenson-Dice implementation (doesn't handle Unicode, but module <code>Text::Dice</code> can). |
|||
<lang perl>use strict; |
|||
use warnings; |
|||
use feature 'say'; |
|||
use Path::Tiny; |
|||
use List::Util 'head'; |
|||
sub bi_gram { (lc shift) =~ /(?<=\K.)./g } |
|||
sub score { |
|||
my($phrase, $word) = @_; |
|||
my %count; |
|||
my @match = bi_gram $phrase; |
|||
$count{$_}++ for @match, @$word; |
|||
2 * (grep { $count{$_} > 1 } keys %count) / (@match + @$word); |
|||
} |
|||
sub sorenson { |
|||
my($dict,$word,$cutoff) = @_; $cutoff //= 0.55; |
|||
my(%matches,$s); |
|||
($s = score($word, $$dict{$_})) > $cutoff and $matches{$_} = $s for keys %$dict; |
|||
%matches; |
|||
} |
|||
my %dict = map { $_ => [ bi_gram($_) ] } path('unixdict.txt')->slurp =~ /.{3,}/gm; |
|||
for my $word (<complition inconsqual>) { |
|||
my(%scored,@ranked); |
|||
%scored = sorenson(\%dict,$word); |
|||
push @ranked, sprintf "%.3f $_", $scored{$_} for sort { $scored{$b} <=> $scored{$a} } keys %scored; |
|||
say "\n$word:\n" . join("\n", head 10, @ranked); |
|||
}</lang> |
|||
{{out}} |
|||
<pre>complition: |
|||
0.778 completion |
|||
0.737 competition |
|||
0.737 composition |
|||
0.706 coalition |
|||
0.700 incompletion |
|||
0.667 complexion |
|||
0.667 complicity |
|||
0.667 decomposition |
|||
0.632 compilation |
|||
0.632 compunction |
|||
inconsqual: |
|||
0.609 inconsequential |
|||
0.588 continual |
|||
0.571 squall |
|||
0.556 conceptual |
|||
0.556 continuant |
|||
0.556 inconstant</pre> |
|||
=={{header|Phix}}== |
=={{header|Phix}}== |