Word frequency: Difference between revisions
Content added Content deleted
(→{{header|Raku}}: wikipedia link to diacritics) |
(→{{header|Raku}}: use % operator, add type constraint and properly align output) |
||
Line 3,978: | Line 3,978: | ||
=={{header|Raku}}== |
=={{header|Raku}}== |
||
(formerly Perl 6) |
(formerly Perl 6) |
||
{{works with|Rakudo| |
{{works with|Rakudo|2022.07}} |
||
Note: much of the following exposition is no longer critical to the task as the requirements have been updated, but is left here for historical and informational reasons. |
Note: much of the following exposition is no longer critical to the task as the requirements have been updated, but is left here for historical and informational reasons. |
||
Line 3,991: | Line 3,991: | ||
Here is a sample that shows the result when using various different matchers. |
Here is a sample that shows the result when using various different matchers. |
||
<syntaxhighlight lang="raku" line>sub MAIN ($filename, $top = 10) { |
<syntaxhighlight lang="raku" line>sub MAIN ($filename, UInt $top = 10) { |
||
my $file = $filename.IO.slurp.lc.subst(/ (<[\w]-[_]>'-')\n(<[\w]-[_]>) /, {$0 ~ $1}, :g ); |
my $file = $filename.IO.slurp.lc.subst(/ (<[\w]-[_]>'-')\n(<[\w]-[_]>) /, {$0 ~ $1}, :g ); |
||
my @matcher = |
my @matcher = |
||
rx/ <[a..z]>+ /, # simple 7-bit ASCII |
rx/ <[a..z]>+ /, # simple 7-bit ASCII |
||
rx/ \w+ /, # word characters with underscore |
rx/ \w+ /, # word characters with underscore |
||
rx/ <[\w]-[_]>+ /, # word characters without underscore |
rx/ <[\w]-[_]>+ /, # word characters without underscore |
||
rx/ <[\w]-[_]>+ |
rx/ [<[\w]-[_]>+]+ % < ' - '- > / # word characters without underscore but with hyphens and contractions |
||
; |
|||
for @matcher -> $reg { |
for @matcher -> $reg { |
||
say "\nTop $top using regex: ", $reg.raku; |
say "\nTop $top using regex: ", $reg.raku; |
||
my @words = $file.comb($reg).Bag.sort(-*.value)[^$top]; |
|||
my $length = max @words».key».chars; |
|||
printf "%-{$length}s %d\n", .key, .value for @words; |
|||
} |
} |
||
}</syntaxhighlight> |
}</syntaxhighlight> |