Anagrams: Difference between revisions

Content deleted Content added

Inline

Revision as of 14:06, 23 June 2009

Two or more words can be composed of the same characters, but in a different order. Using the word list at http://www.puzzlers.org/pub/wordlists/unixdict.txt, find the sets of words that share the same characters that contain the most words in them.

Ada

<lang ada> with Ada.Text_IO; use Ada.Text_IO;

with Ada.Containers.Indefinite_Ordered_Maps; with Ada.Containers.Indefinite_Ordered_Sets;

procedure Words_Of_Equal_Characters is

  package Set_Of_Words is new Ada.Containers.Indefinite_Ordered_Sets (String);
  use Ada.Containers, Set_Of_Words;
  package Anagrams is new Ada.Containers.Indefinite_Ordered_Maps (String, Set);
  use Anagrams;

  File   : File_Type;
  Result : Map;
  Max    : Count_Type := 1;

  procedure Put (Position : Anagrams.Cursor) is
     First : Boolean := True;
     List  : Set renames Element (Position);
     procedure Put (Position : Set_Of_Words.Cursor) is
     begin
        if First then
           First := False;
        else
           Put (',');
        end if;
        Put (Element (Position));
     end Put;
  begin
     if List.Length = Max then
        Iterate (List, Put'Access);
        New_Line;
     end if;
  end Put;

begin

  Open (File, In_File, "unixdict.txt");
  loop
     declare
        Word : constant String     := Get_Line (File);
        Key  : String (Word'Range) := (others => Character'Last);
        List : Set;
        Position : Anagrams.Cursor;
     begin
        for I in Word'Range loop
           for J in Word'Range loop
              if Key (J) > Word (I) then
                 Key (J + 1..I) := Key (J..I - 1);
                 Key (J) := Word (I);
                 exit;
              end if;
           end loop;
        end loop;
        Position := Find (Result, Key);
        if Has_Element (Position) then
           List := Element (Position);
           Insert (List, Word);
           Replace_Element (Result, Position, List);
        else
           Insert (List, Word);
           Include (Result, Key, List);
        end if;
        Max := Count_Type'Max (Max, Length (List));
     end;
  end loop;

exception

  when End_Error =>
     Iterate (Result, Put'Access);
     Close (File);

end Words_Of_Equal_Characters; </lang> Sample output:

abel,able,bale,bela,elba
caret,carte,cater,crate,trace
angel,angle,galen,glean,lange
alger,glare,lager,large,regal
elan,lane,lean,lena,neal
evil,levi,live,veil,vile

AutoHotkey

contributed by Laszlo on the ahk forum <lang AutoHotkey> MsgBox % anagrams("able")

anagrams(word) {

  Static dict
  IfEqual dict,, FileRead dict, unixdict.txt ; file in the script directory
  w := sort(word)
  Loop Parse, dict, `n, `r
     If (w = sort(A_LoopField))
        t .= A_LoopField "`n"
  Return t

}

sort(word) {

  a := RegExReplace(word,".","$0`n")
  Sort a
  Return a

}</lang>

C++

<lang cpp>#include <iostream>

include <fstream>
include <string>
include <map>
include <vector>
include <algorithm>
include <iterator>

int main() {

 std::ifstream in("unixdict.txt");
 std::map<std::string, std::vector<std::string> > anagrams;

 std::string word;
 size_t count = 0;
 while (std::getline(in, word)) {
   std::string key = word;
   std::sort(key.begin(), key.end());
   // note: the [] operator automatically inserts a new value if key does not exist
   anagrams[key].push_back(word);
   count = std::max(count, anagrams[key].size());
 }

 in.close();

 for (std::map<std::string, std::vector<std::string> >::const_iterator it = anagrams.begin();
      it != anagrams.end();
      it++)
   if (it->second.size() >= count) {
     std::copy(it->second.begin(), it->second.end(),
               std::ostream_iterator<std::string>(std::cout, ", "));
     std::cout << std::endl;
   }
 return 0;

}</lang> Output:

abel, able, bale, bela, elba, 
caret, carte, cater, crate, trace, 
angel, angle, galen, glean, lange, 
alger, glare, lager, large, regal, 
elan, lane, lean, lena, neal, 
evil, levi, live, veil, vile,

D

D 1, using Phobos (to download the word list you need the Tango Std Lib). <lang d> import std.stdio, std.stream;

void main() {

   string[][string] anags;
   foreach (string w; new BufferedFile("unixdict.txt"))
       anags[w.dup.sort] ~= w.dup;
   int lmax;
   foreach (a; anags)
       lmax = lmax < a.length ? a.length : lmax;
   foreach (a; anags)
       if (a.length == lmax)
           writefln(a);

} </lang>

Factor

"resource:unixdict.txt" utf8 file-lines
[ [ natural-sort >string ] keep ] { } map>assoc sort-keys
[ [ first ] compare +eq+ = ] monotonic-split
dup 0 [ length max ] reduce '[ length _ = ] filter [ values ] map .

{
    { "abel" "able" "bale" "bela" "elba" }
    { "caret" "carte" "cater" "crate" "trace" }
    { "angel" "angle" "galen" "glean" "lange" }
    { "alger" "glare" "lager" "large" "regal" }
    { "elan" "lane" "lean" "lena" "neal" }
    { "evil" "levi" "live" "veil" "vile" }
}

Groovy

This program: <lang groovy> def words = new URL('http://www.puzzlers.org/pub/wordlists/unixdict.txt').text.readLines() def groups = words.groupBy{ it.toList().sort() } def bigGroupSize = groups.collect{ it.value.size() }.max() def isBigAnagram = { it.value.size() == bigGroupSize } println groups.findAll(isBigAnagram).collect{ it.value }.collect{ it.join(' ') }.join('\n') </lang> produces this output:

abel able bale bela elba
alger glare lager large regal
angel angle galen glean lange
caret carte cater crate trace
elan lane lean lena neal
evil levi live veil vile

Haskell

import Data.List

groupon f x y = f x == f y

main = do
  f <- readFile "./../Puzzels/Rosetta/unixdict.txt"
  let  words = lines f
       wix = groupBy (groupon fst) . sort $ zip (map sort words) words
       mxl = maximum $ map length wix
  mapM_ (print . map snd) . filter ((==mxl).length) $ wix

Sample output:

*Main> main
["abel","able","bale","bela","elba"]
["caret","carte","cater","crate","trace"]
["angel","angle","galen","glean","lange"]
["alger","glare","lager","large","regal"]
["elan","lane","lean","lena","neal"]
["evil","levi","live","veil","vile"]

J

   (#~a:~:{:"1)(]/.~/:~&>)<;.2]1!:1<'unixdict.txt'
+-----+-----+-----+-----+-----+
|abel |able |bale |bela |elba |
+-----+-----+-----+-----+-----+
|alger|glare|lager|large|regal|
+-----+-----+-----+-----+-----+
|angel|angle|galen|glean|lange|
+-----+-----+-----+-----+-----+
|caret|carte|cater|crate|trace|
+-----+-----+-----+-----+-----+
|elan |lane |lean |lena |neal |
+-----+-----+-----+-----+-----+
|evil |levi |live |veil |vile |
+-----+-----+-----+-----+-----+

Java

The key to this algorithm is the sorting of the characters in each word from the dictionary. The line Arrays.sort(chars); sorts all of the letters in the word in ascending order using a built-in quicksort, so all of the words in the first group in the result end up under the key "aegln" in the anagrams map. <lang java>import java.net.*; import java.io.*; import java.util.*;

public class WordsOfEqChars {

   public static void main(String[] args) throws IOException {
       URL url = new URL("http://www.puzzlers.org/pub/wordlists/unixdict.txt");
       InputStreamReader isr = new InputStreamReader(url.openStream());
       BufferedReader reader = new BufferedReader(isr);

       Map<String, Collection<String>> anagrams = new HashMap<String, Collection<String>>();
       String word;
       int count = 0;
       while ((word = reader.readLine()) != null) {
           char[] chars = word.toCharArray();
           Arrays.sort(chars);
           String key = new String(chars);
           if (!anagrams.containsKey(key))
               anagrams.put(key, new ArrayList<String>());
           anagrams.get(key).add(word);
           count = Math.max(count, anagrams.get(key).size());
       }

       reader.close();

       for (Collection<String> ana : anagrams.values())
           if (ana.size() >= count)
               System.out.println(ana);
   }

}</lang> Output:

[angel, angle, galen, glean, lange]
[elan, lane, lean, lena, neal]
[alger, glare, lager, large, regal]
[abel, able, bale, bela, elba]
[evil, levi, live, veil, vile]
[caret, carte, cater, crate, trace]

Mathematica

Download the dictionary, split the lines, split the word in characters and sort them. Now sort by those words, and find sequences of equal 'letter-hashes'. Return the longest sequences: <lang Mathematica>

text={#,StringJoin@@Sort[Characters[#]]}&/@Import["http://www.puzzlers.org/pub/wordlists/unixdict.txt","Lines"];
text=SortBy[text,#2&];
splits=Split[text,#12==#22&]All,All,1;
maxlen=Max[Length/@splits];
Select[splits,Length[#]==maxlen&]

</lang> gives back: <lang Mathematica>

{{abel,able,bale,bela,elba},{caret,carte,cater,crate,trace},{angel,angle,galen,glean,lange},{alger,glare,lager,large,regal},{elan,lane,lean,lena,neal},{evil,levi,live,veil,vile}}

</lang>

Perl

<lang perl>use LWP::Simple; use List::Util qw(max);

my @words = split(' ', get('http://www.puzzlers.org/pub/wordlists/unixdict.txt')); my %anagram; foreach my $word (@words) {

   push @{ $anagram{join(, sort(split(//, $word)))} }, $word;

}

my $count = max(map {scalar @$_} values %anagram); foreach my $ana (values %anagram) {

   if (@$ana >= $count) {
       print "@$ana\n";
   }

}</lang> Output:

alger glare lager large regal
abel able bale bela elba
evil levi live veil vile
angel angle galen glean lange
elan lane lean lena neal
caret carte cater crate trace

PHP

<lang php><?php $words = explode("\n", file_get_contents('http://www.puzzlers.org/pub/wordlists/unixdict.txt')); foreach ($words as $word) {

   $chars = str_split($word);
   sort($chars);
   $anagram[implode($chars)][] = $word;

}

$best = max(array_map('count', $anagram)); foreach ($anagram as $ana)

   if (count($ana) == $best)
       print_r($ana);

?></lang>

Python

Python 2.5 shell input (IDLE) <lang python>>>> import urllib >>> from collections import defaultdict >>> words = urllib.urlopen('http://www.puzzlers.org/pub/wordlists/unixdict.txt').read().split() >>> len(words) 25104 >>> anagram = defaultdict(list) # map sorted chars to anagrams >>> for word in words: anagram[tuple(sorted(word))].append( word )

>>> count = max(len(ana) for ana in anagram.itervalues()) >>> for ana in anagram.itervalues(): if len(ana) >= count: print ana

['angel', 'angle', 'galen', 'glean', 'lange'] ['alger', 'glare', 'lager', 'large', 'regal'] ['caret', 'carte', 'cater', 'crate', 'trace'] ['evil', 'levi', 'live', 'veil', 'vile'] ['elan', 'lane', 'lean', 'lena', 'neal'] ['abel', 'able', 'bale', 'bela', 'elba'] >>> count 5 >>></lang>

Translation of: Haskell

Works with: Python version 2.6

sort and then group using groupby()

<lang python>>>> import urllib, itertools >>> words = urllib.urlopen('http://www.puzzlers.org/pub/wordlists/unixdict.txt').read().split() >>> len(words) 25104 >>> anagrams = [list(g) for k,g in itertools.groupby(sorted(words, key=sorted), key=sorted)]

>>> count = max(len(ana) for ana in anagrams) >>> for ana in anagrams: if len(ana) >= count: print ana

['abel', 'able', 'bale', 'bela', 'elba'] ['caret', 'carte', 'cater', 'crate', 'trace'] ['angel', 'angle', 'galen', 'glean', 'lange'] ['alger', 'glare', 'lager', 'large', 'regal'] ['elan', 'lane', 'lean', 'lena', 'neal'] ['evil', 'levi', 'live', 'veil', 'vile'] >>> count 5 >>></lang>

Ruby

<lang ruby>require 'open-uri'

anagram = Hash.new {|hash, key| hash[key] = []} # map sorted chars to anagrams

open('http://www.puzzlers.org/pub/wordlists/unixdict.txt') do |f|

 words = f.read.split
 for word in words
   anagram[word.split().sort] << word
 end

end

count = anagram.values.map {|ana| ana.length}.max anagram.each_value do |ana|

 if ana.length >= count
   p ana
 end

end</lang> Output:

["evil", "levi", "live", "veil", "vile"]
["abel", "able", "bale", "bela", "elba"]
["elan", "lane", "lean", "lena", "neal"]
["alger", "glare", "lager", "large", "regal"]
["angel", "angle", "galen", "glean", "lange"]
["caret", "carte", "cater", "crate", "trace"]

Translation of: Haskell

Works with: Ruby version 1.8.7+

sort and then group using group_by

<lang ruby>require 'open-uri'

anagram = nil

open('http://www.puzzlers.org/pub/wordlists/unixdict.txt') do |f|

 anagram = f.read \
            .split \
            .sort_by {|s| s.each_char.sort} \
            .group_by {|s| s.each_char.sort}

end

count = anagram.each_value.map {|ana| ana.length}.max anagram.each_value do |ana|

 if ana.length >= count
   p ana
 end

end</lang>

Tcl

<lang tcl>package require Tcl 8.5 package require http

set url http://www.puzzlers.org/pub/wordlists/unixdict.txt set response [http::geturl $url] set data [http::data $response] http::cleanup $response

set max 0 array set anagrams {}

foreach line [split $data \n] {

   foreach word [split $line] {
       set anagram [join [lsort [split $word ""]] ""]
       lappend anagrams($anagram) $word
       set max [::tcl::mathfunc::max $max [llength $anagrams($anagram)]]
   }

}

foreach key [array names anagrams] {

   if {[llength $anagrams($key)] == $max} {
       puts $anagrams($key)
   }

}</lang> Outputs:

evil levi live veil vile
caret carte cater crate trace
abel able bale bela elba
elan lane lean lena neal
angel angle galen glean lange
alger glare lager large regal

Ursala

Supplying the input file on the command line during compilation makes its contents accessible as a pre-declared identifier. The algorithm is to group the words together that are made from the same unordered lists of letters, then collect the groups together that have the same number of words in them, and then show the collection associated with the highest number. <lang Ursala>

import std

show+

anagrams = mat` * leql$^&h eql|=@rK2tFlSS ^(~&,-<&)* unixdict_dot_txt</lang> output:

evil levi live veil vile
caret carte cater crate trace
alger glare lager large regal
elan lane lean lena neal
angel angle galen glean lange
abel able bale bela elba

Vedit macro language

This implementation first sorts characters of each word using Insertion sort in subroutine SORT_LETTERS.
Then the word list is sorted using built-in Sort function.
Finally, groups of words are analyzed and largest groups are recorded.

The word list is expected to be in the same directory as the script.

<lang vedit> File_Open("|(PATH_ONLY)\unixdict.txt")

Repeat(ALL) {

   Reg_Copy_Block(10, CP, EOL_Pos)     // original word
   Call("SORT_LETTERS")                // sort letters of the word
   EOL
   IC(' ') Reg_Ins(10)                 // add the original word at eol
   Line(1, ERRBREAK)

}

Sort(0, File_Size) // sort list according to anagrams

BOF Search("|F") Search(' ') // first word in the list Reg_Copy_Block(10, BOL_Pos, CP+1) // reg 10 = sorted anagram word Reg_Copy_Block(11, CP, EOL_Pos) // reg 11 = list of words in current group Reg_Empty(12) // reg 12 = list of words in largest groups Reg_Set(13, " ")

1 = 1 // words in this group
2 = 2 // words in largest group found

Repeat(ALL) {

   Line(1, ERRBREAK)
   if (Match(@10, ADVANCE) == 0) {     // same group as previous word?
       Reg_Copy_Block(11, CP-1, EOL_Pos, APPEND)  // add word to this group
       #1++
   } else {                            // different anagram group
       Search(" ", ERRBREAK)
       if (#1 == #2) {                 // same size as the largest?
           Reg_Set(12, @13, APPEND)    // append newline
           Reg_Set(12, @11, APPEND)    // append word list
       }
       if (#1 > #2) {                  // new larger size of group
           Reg_Set(12, @11)            // replace word list
           #2 = #1
       }
       Reg_Copy_Block(10, BOL_Pos, CP+1)
       Reg_Copy_Block(11, CP, EOL_Pos) // first word of new group
       #1 = 1
   }

}

Buf_Quit(OK) // close word list file Buf_Switch(Buf_Free) // output results in a new edit buffer Reg_Ins(12) // display all groups of longest anagram words Return

//////////////////////////////////////////////////////////////////// // // Sort characters in current line using Insertion sort //

SORT_LETTERS:

GP(EOL_pos) #9 = Cur_Col-1 for (#1 = 2; #1 <= #9; #1++) {

   Goto_Col(#1) #8 = Cur_Char
   #2 = #1
   while (#2 > 1) {
       #7 = Cur_Char(-1)
       if (#7 <= #8) { break }
       Ins_Char(#7, OVERWRITE)
       #2--
       Goto_Col(#2)
   }
   Ins_Char(#8, OVERWRITE)

} return </lang>

Output:

abel able bale bela elba
caret carte cater crate trace
angel angle galen glean lange
alger glare lager large regal
elan lane lean lena neal
evil levi live veil vile