Anagrams
![Task](http://static.miraheze.org/rosettacodewiki/thumb/b/ba/Rcode-button-task-crushed.png/64px-Rcode-button-task-crushed.png)
You are encouraged to solve this task according to the task description, using any language you may know.
Two or more words can be composed of the same characters, but in a different order. Using the word list at http://www.puzzlers.org/pub/wordlists/unixdict.txt, find the sets of words that share the same characters that contain the most words in them.
Ada
<lang ada>with Ada.Text_IO; use Ada.Text_IO;
with Ada.Containers.Indefinite_Ordered_Maps; with Ada.Containers.Indefinite_Ordered_Sets;
procedure Words_Of_Equal_Characters is
package Set_Of_Words is new Ada.Containers.Indefinite_Ordered_Sets (String); use Ada.Containers, Set_Of_Words; package Anagrams is new Ada.Containers.Indefinite_Ordered_Maps (String, Set); use Anagrams;
File : File_Type; Result : Map; Max : Count_Type := 1;
procedure Put (Position : Anagrams.Cursor) is First : Boolean := True; List : Set renames Element (Position); procedure Put (Position : Set_Of_Words.Cursor) is begin if First then First := False; else Put (','); end if; Put (Element (Position)); end Put; begin if List.Length = Max then Iterate (List, Put'Access); New_Line; end if; end Put;
begin
Open (File, In_File, "unixdict.txt"); loop declare Word : constant String := Get_Line (File); Key : String (Word'Range) := (others => Character'Last); List : Set; Position : Anagrams.Cursor; begin for I in Word'Range loop for J in Word'Range loop if Key (J) > Word (I) then Key (J + 1..I) := Key (J..I - 1); Key (J) := Word (I); exit; end if; end loop; end loop; Position := Find (Result, Key); if Has_Element (Position) then List := Element (Position); Insert (List, Word); Replace_Element (Result, Position, List); else Insert (List, Word); Include (Result, Key, List); end if; Max := Count_Type'Max (Max, Length (List)); end; end loop;
exception
when End_Error => Iterate (Result, Put'Access); Close (File);
end Words_Of_Equal_Characters;</lang> Sample output:
abel,able,bale,bela,elba caret,carte,cater,crate,trace angel,angle,galen,glean,lange alger,glare,lager,large,regal elan,lane,lean,lena,neal evil,levi,live,veil,vile
AutoHotkey
contributed by Laszlo on the ahk forum <lang AutoHotkey>MsgBox % anagrams("able")
anagrams(word) {
Static dict IfEqual dict,, FileRead dict, unixdict.txt ; file in the script directory w := sort(word) Loop Parse, dict, `n, `r If (w = sort(A_LoopField)) t .= A_LoopField "`n" Return t
}
sort(word) {
a := RegExReplace(word,".","$0`n") Sort a Return a
}</lang>
C
<lang c>#include <stdio.h>
- include <stdlib.h>
- include <string.h>
- include <ctype.h>
- include <time.h>
char *sortedWord(const char *word, char *wbuf) {
char *p1, *p2, *endwrd; char t; int swaps;
strcpy(wbuf, word); endwrd = wbuf+strlen(wbuf); do { swaps = 0; p1 = wbuf; p2 = endwrd-1; while (p1<p2) { if (*p2 > *p1) { t = *p2; *p2 = *p1; *p1 = t; swaps = 1; } p1++; p2--; } p1 = wbuf; p2 = p1+1; while(p2 < endwrd) { if (*p2 > *p1) { t = *p2; *p2 = *p1; *p1 = t; swaps = 1; } p1++; p2++; } } while (swaps); return wbuf;
}
static short cxmap[] = {
0x06, 0x1f, 0x4d, 0x0c, 0x5c, 0x28, 0x5d, 0x0e, 0x09, 0x33, 0x31, 0x56, 0x52, 0x19, 0x29, 0x53, 0x32, 0x48, 0x35, 0x55, 0x5e, 0x14, 0x27, 0x24, 0x02, 0x3e, 0x18, 0x4a, 0x3f, 0x4c, 0x45, 0x30, 0x08, 0x2c, 0x1a, 0x03, 0x0b, 0x0d, 0x4f, 0x07, 0x20, 0x1d, 0x51, 0x3b, 0x11, 0x58, 0x00, 0x49, 0x15, 0x2d, 0x41, 0x17, 0x5f, 0x39, 0x16, 0x42, 0x37, 0x22, 0x1c, 0x0f, 0x43, 0x5b, 0x46, 0x4b, 0x0a, 0x26, 0x2e, 0x40, 0x12, 0x21, 0x3c, 0x36, 0x38, 0x1e, 0x01, 0x1b, 0x05, 0x4e, 0x44, 0x3d, 0x04, 0x10, 0x5a, 0x2a, 0x23, 0x34, 0x25, 0x2f, 0x2b, 0x50, 0x3a, 0x54, 0x47, 0x59, 0x13, 0x57, };
- define CXMAP_SIZE (sizeof(cxmap)/sizeof(short))
int Str_Hash( const char *key, int ix_max )
{
const char *cp; short mash; int hash = 33501551; for (cp = key; *cp; cp++) { mash = cxmap[*cp % CXMAP_SIZE]; hash = (hash >>4) ^ 0x5C5CF5C ^ ((hash<<1) + (mash<<5)); hash &= 0x3FFFFFFF; } return hash % ix_max;
}
typedef struct sDictWord *DictWord; struct sDictWord {
const char *word; DictWord next;
};
typedef struct sHashEntry *HashEntry; struct sHashEntry {
const char *key; HashEntry next; DictWord words; HashEntry link; short wordCount;
};
- define HT_SIZE 8192
HashEntry hashTable[HT_SIZE];
HashEntry mostPerms = NULL;
int buildAnagrams( FILE *fin ) {
char buffer[40]; char bufr2[40]; char *hkey; int hix; HashEntry he, *hep; DictWord we; int maxPC = 2; int numWords = 0; while ( fgets(buffer, 40, fin)) { for(hkey = buffer; *hkey && (*hkey!='\n'); hkey++); *hkey = 0; hkey = sortedWord(buffer, bufr2); hix = Str_Hash(hkey, HT_SIZE); he = hashTable[hix]; hep = &hashTable[hix]; while( he && strcmp(he->key , hkey) ) { hep = &he->next; he = he->next; } if ( ! he ) { he = (HashEntry)malloc(sizeof(struct sHashEntry)); he->next = NULL; he->key = strdup(hkey); he->wordCount = 0; he->words = NULL; he->link = NULL; *hep = he; } we = (DictWord)malloc(sizeof(struct sDictWord)); we->word = strdup(buffer); we->next = he->words; he->words = we; he->wordCount++; if ( maxPC < he->wordCount) { maxPC = he->wordCount; mostPerms = he; he->link = NULL; } else if (maxPC == he->wordCount) { he->link = mostPerms; mostPerms = he; } numWords++; } printf("%d words in dictionary max ana=%d\n", numWords, maxPC); return maxPC;
}
int main( )
{
HashEntry he; DictWord we; FILE *f1; f1 = fopen("unixdict.txt","r"); buildAnagrams(f1); fclose(f1); f1 = fopen("anaout.txt","w");
// f1 = stdout;
for (he = mostPerms; he; he = he->link) { fprintf(f1,"%d:", he->wordCount); for(we = he->words; we; we = we->next) { fprintf(f1,"%s, ", we->word); } fprintf(f1, "\n"); }
fclose(f1); return 0;
}</lang> Output: (less than 1 second on old P500)
5:vile, veil, live, levi, evil, 5:trace, crate, cater, carte, caret, 5:regal, large, lager, glare, alger, 5:neal, lena, lean, lane, elan, 5:lange, glean, galen, angle, angel, 5:elba, bela, bale, able, abel,
C++
<lang cpp>#include <iostream>
- include <fstream>
- include <string>
- include <map>
- include <vector>
- include <algorithm>
- include <iterator>
int main() {
std::ifstream in("unixdict.txt"); std::map<std::string, std::vector<std::string> > anagrams;
std::string word; size_t count = 0; while (std::getline(in, word)) { std::string key = word; std::sort(key.begin(), key.end()); // note: the [] operator automatically inserts a new value if key does not exist anagrams[key].push_back(word); count = std::max(count, anagrams[key].size()); }
in.close();
for (std::map<std::string, std::vector<std::string> >::const_iterator it = anagrams.begin(); it != anagrams.end(); it++) if (it->second.size() >= count) { std::copy(it->second.begin(), it->second.end(), std::ostream_iterator<std::string>(std::cout, ", ")); std::cout << std::endl; } return 0;
}</lang> Output:
abel, able, bale, bela, elba, caret, carte, cater, crate, trace, angel, angle, galen, glean, lange, alger, glare, lager, large, regal, elan, lane, lean, lena, neal, evil, levi, live, veil, vile,
Clojure
Assume wordfile is the path of the local file containing the words. This code makes a map (groups) whose keys are sorted letters and values are lists of the key's anagrams. It then determines the length of the longest list, and prints out all the lists of that length. <lang lisp> (import '(java.io FileReader BufferedReader))
(def words (-> wordfile FileReader. BufferedReader. line-seq))
(let [f (fn [tbl word] (update-in tbl [(sort word)] conj word))]
(def groups (reduce f {} words)))
(let [wordlists (vals groups)
longest (apply max (map count wordlists))] (doseq [wordlist wordlists :when (= (count wordlist) longest)] (println wordlist)))
</lang>
Common Lisp
to retrieve the wordlist.
<lang lisp>(defun anagrams (&optional (url "http://www.puzzlers.org/pub/wordlists/unixdict.txt"))
(let ((words (drakma:http-request url :want-stream t)) (wordsets (make-hash-table :test 'equalp))) ;; populate the wordsets and close stream (do ((word (read-line words nil nil) (read-line words nil nil))) ((null word) (close words)) (let ((letters (sort (copy-seq word) 'char<))) (multiple-value-bind (pair presentp) (gethash letters wordsets) (if presentp (setf (car pair) (1+ (car pair)) (cdr pair) (cons word (cdr pair))) (setf (gethash letters wordsets) (cons 1 (list word))))))) ;; find and return the biggest wordsets (loop with maxcount = 0 with maxwordsets = '() for pair being each hash-value of wordsets if (> (car pair) maxcount) do (setf maxcount (car pair) maxwordsets (list (cdr pair))) else if (eql (car pair) maxcount) do (push (cdr pair) maxwordsets) finally (return (values maxwordsets maxcount)))))</lang>
Evalutating
<lang lisp>(multiple-value-bind (wordsets count) (anagrams)
(pprint wordsets) (print count))</lang>
produces the following output.
(("vile" "veil" "live" "levi" "evil") ("regal" "large" "lager" "glare" "alger") ("lange" "glean" "galen" "angle" "angel") ("neal" "lena" "lean" "lane" "elan") ("trace" "crate" "cater" "carte" "caret") ("elba" "bela" "bale" "able" "abel")) 5
D
D 1, using Phobos (to download the word list you need the Tango Std Lib). <lang d>import std.stdio, std.stream;
void main() {
string[][string] anags; int lmax; foreach (string w; new BufferedFile("unixdict.txt")) { string wrd = w.dup; string key = wrd.sort; anags[key] ~= wrd; int len = anags[key].length; lmax = lmax < len ? len : lmax; } foreach (a; anags) { if (a.length == lmax) { writefln(a); } }
}</lang>
E
<lang e>println("Downloading...") when (def wordText := <http://www.puzzlers.org/pub/wordlists/unixdict.txt> <- getText()) -> {
def words := wordText.split("\n")
def storage := [].asMap().diverge() def anagramTable extends storage { to get(key) { return storage.fetch(key, fn { storage[key] := [].diverge() }) } }
println("Grouping...") var largestGroupSeen := 0 for word in words { def anagramGroup := anagramTable[word.sort()] anagramGroup.push(word) largestGroupSeen max= anagramGroup.size() }
println("Selecting...") for _ => anagramGroup ? (anagramGroup.size() == mostSeen) in anagramTable { println(anagramGroup.snapshot()) }
}</lang>
F#
Read the lines in the dictionary, group by the sorted letters in each word, extract the sequences of words sharing the same letters (i.e. anagrams) and sort to put the largest sets of anagrams first: <lang fsharp> System.IO.File.ReadAllLines @"unixdict.txt" |> Seq.groupBy (Seq.sort >> Seq.toArray) |> Seq.map snd |> Seq.sortBy (fun s -> -Seq.length s) </lang> Note that it is necessary to convert the sorted letters in each word from sequences to arrays because the groupBy function uses the default comparison and sequences do not compare structurally (but arrays do in F#).
Yields: <lang fsharp> val it : seq<seq<string>> =
seq [seq ["abel"; "able"; "bale"; "bela"; ...]; seq ["alger"; "glare"; "lager"; "large"; ...]; seq ["angel"; "angle"; "galen"; "glean"; ...]; seq ["caret"; "carte"; "cater"; "crate"; ...]; ...]
</lang> There is plenty of room for optimization but finding all sets of anagrams in this dictionary takes under 0.5s using this code.
Factor
"resource:unixdict.txt" utf8 file-lines [ [ natural-sort >string ] keep ] { } map>assoc sort-keys [ [ first ] compare +eq+ = ] monotonic-split dup 0 [ length max ] reduce '[ length _ = ] filter [ values ] map .
<lang factor>{
{ "abel" "able" "bale" "bela" "elba" } { "caret" "carte" "cater" "crate" "trace" } { "angel" "angle" "galen" "glean" "lange" } { "alger" "glare" "lager" "large" "regal" } { "elan" "lane" "lean" "lena" "neal" } { "evil" "levi" "live" "veil" "vile" }
}</lang>
Groovy
This program: <lang groovy>def words = new URL('http://www.puzzlers.org/pub/wordlists/unixdict.txt').text.readLines() def groups = words.groupBy{ it.toList().sort() } def bigGroupSize = groups.collect{ it.value.size() }.max() def isBigAnagram = { it.value.size() == bigGroupSize } println groups.findAll(isBigAnagram).collect{ it.value }.collect{ it.join(' ') }.join('\n')</lang> produces this output:
abel able bale bela elba alger glare lager large regal angel angle galen glean lange caret carte cater crate trace elan lane lean lena neal evil levi live veil vile
Haskell
<lang haskell>import Data.List
groupon f x y = f x == f y
main = do
f <- readFile "./../Puzzels/Rosetta/unixdict.txt" let words = lines f wix = groupBy (groupon fst) . sort $ zip (map sort words) words mxl = maximum $ map length wix mapM_ (print . map snd) . filter ((==mxl).length) $ wix</lang>
Sample output: <lang haskell>*Main> main ["abel","able","bale","bela","elba"] ["caret","carte","cater","crate","trace"] ["angel","angle","galen","glean","lange"] ["alger","glare","lager","large","regal"] ["elan","lane","lean","lena","neal"] ["evil","levi","live","veil","vile"]</lang>
J
<lang j> (#~ a: ~: {:"1) (]/.~ /:~&>) <;.2 ] 1!:1 <'unixdict.txt' +-----+-----+-----+-----+-----+ |abel |able |bale |bela |elba | +-----+-----+-----+-----+-----+ |alger|glare|lager|large|regal| +-----+-----+-----+-----+-----+ |angel|angle|galen|glean|lange| +-----+-----+-----+-----+-----+ |caret|carte|cater|crate|trace| +-----+-----+-----+-----+-----+ |elan |lane |lean |lena |neal | +-----+-----+-----+-----+-----+ |evil |levi |live |veil |vile | +-----+-----+-----+-----+-----+</lang>
Java
The key to this algorithm is the sorting of the characters in each word from the dictionary. The line Arrays.sort(chars); sorts all of the letters in the word in ascending order using a built-in quicksort, so all of the words in the first group in the result end up under the key "aegln" in the anagrams map. <lang java5>import java.net.*; import java.io.*; import java.util.*;
public class WordsOfEqChars {
public static void main(String[] args) throws IOException { URL url = new URL("http://www.puzzlers.org/pub/wordlists/unixdict.txt"); InputStreamReader isr = new InputStreamReader(url.openStream()); BufferedReader reader = new BufferedReader(isr);
Map<String, Collection<String>> anagrams = new HashMap<String, Collection<String>>(); String word; int count = 0; while ((word = reader.readLine()) != null) { char[] chars = word.toCharArray(); Arrays.sort(chars); String key = new String(chars); if (!anagrams.containsKey(key)) anagrams.put(key, new ArrayList<String>()); anagrams.get(key).add(word); count = Math.max(count, anagrams.get(key).size()); }
reader.close();
for (Collection<String> ana : anagrams.values()) if (ana.size() >= count) System.out.println(ana); }
}</lang> Output:
[angel, angle, galen, glean, lange] [elan, lane, lean, lena, neal] [alger, glare, lager, large, regal] [abel, able, bale, bela, elba] [evil, levi, live, veil, vile] [caret, carte, cater, crate, trace]
Lua
Lua's core library is very small and does not include built-in network functionality. If a networking library were imported, the local file in the following script could be replaced with the remote dictionary file. This may or may not be a good implementation, but I thought the method was interesting. <lang lua>-- Build the word set local set = {} local file = io.open("unixdict.txt") local str = file:read() while str do
table.insert(set,str) str = file:read()
end
-- Build the anagram tree local tree = {} for i,word in next,set do
-- Sort a string from lowest char to highest local function sortString(str) if #str <= 1 then return str end local less = local greater = local pivot = str:byte(1) for i = 2, #str do if str:byte(i) <= pivot then less = less..(str:sub(i,i)) else greater = greater..(str:sub(i,i)) end end return sortString(less)..str:sub(1,1)..sortString(greater) end local sortchar = sortString(word) if not tree[#word] then tree[#word] = {} end local node = tree[#word] for i = 1,#word do if not node[sortchar:byte(i)] then node[sortchar:byte(i)] = {} end node = node[sortchar:byte(i)] end table.insert(node,word)
end
-- Gather largest groups by gathering all groups of current max size and droping gathered groups and increasing max when a new largest group is found local max = 0 local set = {} local function recurse (tree)
local num = 0 for i,node in next,tree do if type(node) == 'string' then num = num + 1 end end if num > max then set = {} max = num end if num == max then local newset = {} for i,node in next,tree do if type(node) == 'string' then table.insert(newset,node) end end table.insert(set,newset) end for i,node in next,tree do if type(node) == 'table' then recurse(node) end end
end
recurse (tree) for i,v in next,set do io.write (i..':\t')for j,u in next,v do io.write (u..' ') end print() end</lang>
M4
<lang M4>divert(-1) changequote(`[',`]') define([for],
[ifelse($#,0,$0, [ifelse(eval($2<=$3),1, [pushdef([$1],$2)$4[]popdef([$1])$0([$1],incr($2),$3,[$4])])])])
define([_bar],include(t.txt)) define([eachlineA],
[ifelse(eval($2>0),1, [$3(substr([$1],0,$2))[]eachline(substr([$1],incr($2)),[$3])])])
define([eachline],[eachlineA([$1],index($1,[ ]),[$2])]) define([removefirst],
[substr([$1],0,$2)[]substr([$1],incr($2))])
define([checkfirst],
[ifelse(eval(index([$2],substr([$1],0,1))<0),1, 0, [ispermutation(substr([$1],1), removefirst([$2],index([$2],substr([$1],0,1))))])])
define([ispermutation],
[ifelse([$1],[$2],1, eval(len([$1])!=len([$2])),1,0, len([$1]),0,0, [checkfirst([$1],[$2])])])
define([_set],[define($1<$2>,$3)]) define([_get],[defn([$1<$2>])]) define([_max],1) define([_n],0) define([matchj],
[_set([count],$2,incr(_get([count],$2)))[]ifelse(eval(_get([count],$2)>_max), 1,[define([_max],incr(_max))])[]_set([list],$2,[_get([list],$2) $1])])
define([checkwordj],
[ifelse(ispermutation([$1],_get([word],$2)),1,[matchj([$1],$2)], [addwordj([$1],incr($2))])])
define([_append],
[_set([word],_n,[$1])[]_set([count],_n,1)[]_set([list],_n, [$1 ])[]define([_n],incr(_n))])
define([addwordj],
[ifelse($2,_n,[_append([$1])],[checkwordj([$1],$2)])])
define([addword],
[addwordj([$1],0)])
divert eachline(_bar,[addword]) _max for([x],1,_n,[ifelse(_get([count],x),_max,[_get([list],x) ])])</lang>
Memory limitations keep this program from working on the full-sized dictionary. Run against the first 100 words, here is the output:
2 abel able aboard abroad
Mathematica
Download the dictionary, split the lines, split the word in characters and sort them. Now sort by those words, and find sequences of equal 'letter-hashes'. Return the longest sequences: <lang Mathematica>list=Import["http://www.puzzlers.org/pub/wordlists/unixdict.txt","Lines"]; text={#,StringJoin@@Sort[Characters[#]]}&/@list; text=SortBy[text,#2&]; splits=Split[text,#12==#22&]All,All,1; maxlen=Max[Length/@splits]; Select[splits,Length[#]==maxlen&]</lang> gives back: <lang Mathematica>{{abel,able,bale,bela,elba},{caret,carte,cater,crate,trace},{angel,angle,galen,glean,lange},{alger,glare,lager,large,regal},{elan,lane,lean,lena,neal},{evil,levi,live,veil,vile}}</lang>
An alternative is faster, but requires version 7 (for Gather
):
<lang Mathematica>splits = Gather[list, Sort[Characters[#]] == Sort[Characters[#2]] &];
maxlen = Max[Length /@ splits];
Select[splits, Length[#] == maxlen &]</lang>
Also, Mathematica's own word list is available; replacing the list definition with list = WordData[];
and forcing maxlen
to 5 yields instead this result:
{{angered,derange,enraged,grandee,grenade}, {anisometric,creationism,miscreation,reactionism,romanticise}, {aper,pare,pear,rape,reap}, {ardeb,barde,bared,beard,bread,debar}, {aril,lair,lari,liar,lira,rail,rial}, {aster,rates,stare,tears,teras}, {caret,carte,cater,crate,react,trace}, {east,eats,sate,seat,seta}, {ester,reset,steer,teres,terse}, {inert,inter,niter,nitre,trine}, {latrine,ratline,reliant,retinal,trenail}, {least,slate,stale,steal,stela,tesla}, {luster,lustre,result,rustle,sutler,ulster}, {merit,miter,mitre,remit,timer}, {part,prat,rapt,tarp,trap}, {resin,rinse,risen,serin,siren}, {respect,scepter,sceptre,specter,spectre}}
OCaml
<lang ocaml>let explode str =
let l = ref [] in let len = String.length str in for i = len - 1 downto 0 do l := str.[i] :: !l done; (!l)
let implode li =
let len = List.length li in let s = String.create len in let i = ref 0 in List.iter (fun c -> s.[!i] <- c; incr i) li; (s)
let () =
let h = Hashtbl.create 3571 in let ic = open_in "unixdict.txt" in try while true do let w = input_line ic in let k = implode(List.sort compare (explode w)) in let l = try Hashtbl.find h k with Not_found -> [] in Hashtbl.add h k (w::l); done with End_of_file -> (); let n = Hashtbl.fold (fun _ lw n -> max n (List.length lw)) h 0 in Hashtbl.iter (fun _ lw -> if List.length lw >= n then ( List.iter (Printf.printf " %s") lw; print_newline()) ) h;
- </lang>
Oz
<lang oz>declare
%% Helper function fun {ReadLines Filename} File = {New class $ from Open.file Open.text end init(name:Filename)} in for collect:C break:B do
case {File getS($)} of false then {File close} {B} [] Line then {C Line}
end end end
%% Groups anagrams by using a mutable dictionary %% with sorted words as keys WordDict = {Dictionary.new} for Word in {ReadLines "unixdict.txt"} do Keyword = {String.toAtom {Sort Word Value.'<'}} in WordDict.Keyword := Word|{CondSelect WordDict Keyword nil} end Sets = {Dictionary.items WordDict}
%% Filter such that only the largest sets remain MaxSetSize = {FoldL {Map Sets Length} Max 0} LargestSets = {Filter Sets fun {$ S} {Length S} == MaxSetSize end}
in
%% Display result (make sure strings are shown as string, not as number lists) {Inspector.object configureEntry(widgetShowStrings true)} {Inspect LargestSets}</lang>
Perl
<lang perl>use LWP::Simple; use List::Util qw(max);
my @words = split(' ', get('http://www.puzzlers.org/pub/wordlists/unixdict.txt')); my %anagram; foreach my $word (@words) {
push @{ $anagram{join(, sort(split(//, $word)))} }, $word;
}
my $count = max(map {scalar @$_} values %anagram); foreach my $ana (values %anagram) {
if (@$ana >= $count) { print "@$ana\n"; }
}</lang> refactor of above: <lang perl>use LWP::Simple;
for (split ' ' => get 'http://www.puzzlers.org/pub/wordlists/unixdict.txt')
{push @{$anagram{ join => sort split // }}, $_}
$max > @$_ or $max = @$_ for values %anagram; @$_ >= $max and print "@$_\n" for values %anagram;</lang> Output:
alger glare lager large regal abel able bale bela elba evil levi live veil vile angel angle galen glean lange elan lane lean lena neal caret carte cater crate trace
PHP
<lang php><?php $words = explode("\n", file_get_contents('http://www.puzzlers.org/pub/wordlists/unixdict.txt')); foreach ($words as $word) {
$chars = str_split($word); sort($chars); $anagram[implode($chars)][] = $word;
}
$best = max(array_map('count', $anagram)); foreach ($anagram as $ana)
if (count($ana) == $best) print_r($ana);
?></lang>
PowerShell
<lang powershell>$c = New-Object Net.WebClient $words = -split ($c.DownloadString('http://www.puzzlers.org/pub/wordlists/unixdict.txt')) $top_anagrams = $words `
| ForEach-Object { $_ | Add-Member -PassThru NoteProperty Characters ` (-join (([char[]] $_) | Sort-Object)) } ` | Group-Object Characters ` | Group-Object Count ` | Sort-Object Count ` | Select-Object -First 1
$top_anagrams.Group | ForEach-Object { $_.Group -join ', ' }</lang> Output:
abel, able, bale, bela, elba alger, glare, lager, large, regal angel, angle, galen, glean, lange caret, carte, cater, crate, trace elan, lane, lean, lena, neal evil, levi, live, veil, vile
Python
Python 2.5 shell input (IDLE) <lang python>>>> import urllib >>> from collections import defaultdict >>> words = urllib.urlopen('http://www.puzzlers.org/pub/wordlists/unixdict.txt').read().split() >>> len(words) 25104 >>> anagram = defaultdict(list) # map sorted chars to anagrams >>> for word in words: anagram[tuple(sorted(word))].append( word )
>>> count = max(len(ana) for ana in anagram.itervalues())
>>> for ana in anagram.itervalues():
if len(ana) >= count:
print ana
['angel', 'angle', 'galen', 'glean', 'lange']
['alger', 'glare', 'lager', 'large', 'regal']
['caret', 'carte', 'cater', 'crate', 'trace']
['evil', 'levi', 'live', 'veil', 'vile']
['elan', 'lane', 'lean', 'lena', 'neal']
['abel', 'able', 'bale', 'bela', 'elba']
>>> count
5
>>></lang>
sort and then group using groupby()
<lang python>>>> import urllib, itertools >>> words = urllib.urlopen('http://www.puzzlers.org/pub/wordlists/unixdict.txt').read().split() >>> len(words) 25104 >>> anagrams = [list(g) for k,g in itertools.groupby(sorted(words, key=sorted), key=sorted)]
>>> count = max(len(ana) for ana in anagrams)
>>> for ana in anagrams:
if len(ana) >= count:
print ana
['abel', 'able', 'bale', 'bela', 'elba']
['caret', 'carte', 'cater', 'crate', 'trace']
['angel', 'angle', 'galen', 'glean', 'lange']
['alger', 'glare', 'lager', 'large', 'regal']
['elan', 'lane', 'lean', 'lena', 'neal']
['evil', 'levi', 'live', 'veil', 'vile']
>>> count
5
>>></lang>
Revolution
<lang revolution>on mouseUp
repeat for each word W in url "http://www.puzzlers.org/pub/wordlists/unixdict.txt" put W & comma after A[sortChars(W)] end repeat put 0 into winningLength repeat for each element E in A get the number of items in E if it < winningLength then next repeat if it > winningLength then put it into winningLength put empty into winningList end if put (char 1 to -2 of E) & cr after winningList end repeat put winningList
end mouseUp
function sortChars X
get charsToItems(X) sort items of it return itemsToChars(it)
end sortChars
function charsToItems X
repeat for each char C in X put C & comma after R end repeat return char 1 to -2 of R
end charsToItems
function itemsToChars X
replace comma with empty in X return X
end itemsToChars</lang> Output:
abel,able,bale,bela,elba elan,lane,lean,lena,neal evil,levi,live,veil,vile caret,carte,cater,crate,trace angel,angle,galen,glean,lange alger,glare,lager,large,regal
Ruby
<lang ruby>require 'open-uri'
anagram = Hash.new {|hash, key| hash[key] = []} # map sorted chars to anagrams
open('http://www.puzzlers.org/pub/wordlists/unixdict.txt') do |f|
words = f.read.split for word in words anagram[word.split().sort] << word end
end
count = anagram.values.map {|ana| ana.length}.max anagram.each_value do |ana|
if ana.length >= count p ana end
end</lang> Output:
["evil", "levi", "live", "veil", "vile"] ["abel", "able", "bale", "bela", "elba"] ["elan", "lane", "lean", "lena", "neal"] ["alger", "glare", "lager", "large", "regal"] ["angel", "angle", "galen", "glean", "lange"] ["caret", "carte", "cater", "crate", "trace"]
<lang ruby>require 'open-uri'
anagram = nil
open('http://www.puzzlers.org/pub/wordlists/unixdict.txt') do |f|
anagram = f.read.split.group_by {|s| s.each_char.sort}
end
count = anagram.each_value.map {|ana| ana.length}.max anagram.each_value do |ana|
if ana.length >= count p ana end
end</lang>
Scala
<lang scala>val lines = scala.io.Source.fromURL("http://www.puzzlers.org/pub/wordlists/unixdict.txt").getLines.map(s=>s.trim())
var map = Map[String, Set[String]]() withDefaultValue Set() def hash(s:String) = new String(s.toList.sort(_<_).toArray) map = lines.foldLeft(map)((map,s) => map(hash(s)) += s)
val max = map.values.foldLeft(0)((a,b) => a.max(b.size))
map.values.filter(l => l.size == max).foreach{l=>l.foreach(s=>print(s+" ")); println()}</lang> Output:
glean angel angle galen lange elan neal lean lane lena regal alger large lager glare able abel bela bale elba vile levi live evil veil caret trace cater carte crate
Tcl
<lang tcl>package require Tcl 8.5 package require http
set url http://www.puzzlers.org/pub/wordlists/unixdict.txt set response [http::geturl $url] set data [http::data $response] http::cleanup $response
set max 0 array set anagrams {}
foreach line [split $data \n] {
foreach word [split $line] { set anagram [join [lsort [split $word ""]] ""] lappend anagrams($anagram) $word set max [::tcl::mathfunc::max $max [llength $anagrams($anagram)]] }
}
foreach key [array names anagrams] {
if {[llength $anagrams($key)] == $max} { puts $anagrams($key) }
}</lang> Outputs:
evil levi live veil vile caret carte cater crate trace abel able bale bela elba elan lane lean lena neal angel angle galen glean lange alger glare lager large regal
Ursala
Supplying the input file on the command line during compilation makes its contents accessible as a pre-declared identifier. The algorithm is to group the words together that are made from the same unordered lists of letters, then collect the groups together that have the same number of words in them, and then show the collection associated with the highest number. <lang Ursala>#import std
- show+
anagrams = mat` * leql$^&h eql|=@rK2tFlSS ^(~&,-<&)* unixdict_dot_txt</lang> output:
evil levi live veil vile caret carte cater crate trace alger glare lager large regal elan lane lean lena neal angel angle galen glean lange abel able bale bela elba
Vedit macro language
This implementation first sorts characters of each word using Insertion sort in subroutine SORT_LETTERS.
Then the word list is sorted using built-in Sort function.
Finally, groups of words are analyzed and largest groups are recorded.
The word list is expected to be in the same directory as the script.
<lang vedit>File_Open("|(PATH_ONLY)\unixdict.txt")
Repeat(ALL) {
Reg_Copy_Block(10, CP, EOL_Pos) // original word Call("SORT_LETTERS") // sort letters of the word EOL IC(' ') Reg_Ins(10) // add the original word at eol Line(1, ERRBREAK)
}
Sort(0, File_Size) // sort list according to anagrams
BOF Search("|F") Search(' ') // first word in the list Reg_Copy_Block(10, BOL_Pos, CP+1) // reg 10 = sorted anagram word Reg_Copy_Block(11, CP, EOL_Pos) // reg 11 = list of words in current group Reg_Empty(12) // reg 12 = list of words in largest groups Reg_Set(13, " ")
- 1 = 1 // words in this group
- 2 = 2 // words in largest group found
Repeat(ALL) {
Line(1, ERRBREAK) if (Match(@10, ADVANCE) == 0) { // same group as previous word? Reg_Copy_Block(11, CP-1, EOL_Pos, APPEND) // add word to this group #1++ } else { // different anagram group Search(" ", ERRBREAK) if (#1 == #2) { // same size as the largest? Reg_Set(12, @13, APPEND) // append newline Reg_Set(12, @11, APPEND) // append word list } if (#1 > #2) { // new larger size of group Reg_Set(12, @11) // replace word list #2 = #1 } Reg_Copy_Block(10, BOL_Pos, CP+1) Reg_Copy_Block(11, CP, EOL_Pos) // first word of new group #1 = 1 }
}
Buf_Quit(OK) // close word list file Buf_Switch(Buf_Free) // output results in a new edit buffer Reg_Ins(12) // display all groups of longest anagram words Return
//////////////////////////////////////////////////////////////////// // // Sort characters in current line using Insertion sort //
- SORT_LETTERS:
GP(EOL_pos) #9 = Cur_Col-1 for (#1 = 2; #1 <= #9; #1++) {
Goto_Col(#1) #8 = Cur_Char #2 = #1 while (#2 > 1) { #7 = Cur_Char(-1) if (#7 <= #8) { break } Ins_Char(#7, OVERWRITE) #2-- Goto_Col(#2) } Ins_Char(#8, OVERWRITE)
} return</lang>
Output:
abel able bale bela elba caret carte cater crate trace angel angle galen glean lange alger glare lager large regal elan lane lean lena neal evil levi live veil vile