Text completion: Difference between revisions

Content added Content deleted

Inline

@@ Line 477: / Line 477: @@
 ["collection", "combination", "commission", "comparison", "compensation", "competing", "competitive", "complaint", "complete", "completed", "completely", "complexity", "compliance", "compliant", "compression", "computing", "conclusion", "conditions", "connection", "convention", "conviction", "cooperation", "corporation", "correction", "correlation", "corruption", "nomination", "opinion", "opposition", "option", "pollution", "population", "position", "simulation", "solution"]
 </pre>
+=={{header|Perl}}==
+Inspired by Raku Sorenson-Dice implementation (doesn't handle Unicode, but module <code>Text::Dice</code> can).
+<lang perl>use strict;
+use warnings;
+use feature 'say';
+use Path::Tiny;
+use List::Util 'head';
+sub bi_gram { (lc shift) =~ /(?<=\K.)./g }
+sub score {
+    my($phrase, $word) = @_;
+    my %count;
+    my @match = bi_gram $phrase;
+    $count{$_}++ for @match, @$word;
+* (grep { $count{$_} > 1 } keys %count) / (@match + @$word);
+}
+sub sorenson {
+    my($dict,$word,$cutoff) = @_; $cutoff //= 0.55;
+    my(%matches,$s);
+    ($s = score($word, $$dict{$_})) > $cutoff and $matches{$_} = $s for keys %$dict;
+    %matches;
+}
+my %dict = map { $_ => [ bi_gram($_) ] } path('unixdict.txt')->slurp =~ /.{3,}/gm;
+for my $word (<complition inconsqual>) {
+    my(%scored,@ranked);
+    %scored = sorenson(\%dict,$word);
+    push @ranked, sprintf "%.3f $_", $scored{$_} for sort { $scored{$b} <=> $scored{$a} } keys %scored;
+    say "\n$word:\n" . join("\n", head 10, @ranked);
+}</lang>
+{{out}}
+<pre>complition:
+.778 completion
+.737 competition
+.737 composition
+.706 coalition
+.700 incompletion
+.667 complexion
+.667 complicity
+.667 decomposition
+.632 compilation
+.632 compunction
+inconsqual:
+.609 inconsequential
+.588 continual
+.571 squall
+.556 conceptual
+.556 continuant
+.556 inconstant</pre>
 =={{header|Phix}}==