Anagram generator
You are encouraged to solve this task according to the task description, using any language you may know.
There are already other tasks relating to finding existing anagrams. This one is about creating them.
Write a (set of) routine(s) that, when given a word list to work from, and word or phrase as a seed, generates anagrams of that word or phrase. Feel free to ignore letter case, white-space, punctuation and symbols. Probably best to avoid numerics too, but feel free to include them if that floats your boat.
It is not necessary to (only) generate anagrams that make sense. That is a hard problem, much more difficult than can realistically be done in a small program; though again, if you feel the need, you are invited to amaze your peers.
In general, try to form phrases made up of longer words. Feel free to manually reorder output words or add punctuation and/or case changes to get a better meaning.
- Task
Write an anagram generator program.
Use a publicly and freely available word file as its word list.
- unixdict.txt from http://wiki.puzzlers.org is a popular, though somewhat limited choice.
- A much larger word list: words_alpha.txt file from https://github.com/dwyl/english-words. May be better as far as coverage but may return unreasonably large results.
Use your program to generate anagrams of some words / phrases / names of your choice. No need to show all the output. It is likely to be very large. Just pick out one or two of the best results and show the seed word/phrase and anagram.
For example, show the seed and one or two of the best anagrams:
- Purefox -> Fur expo
- Petelomax -> Metal expo
.oO(hmmm. Seem to be detecting something of a trend here...)
Ada
This approach uniquely hashes combinations of letters and then searches for matches on the hash. A little long-winded, but very fast.
pragma Ada_2022;
with Ada.Characters.Handling; use Ada.Characters.Handling;
with Ada.Containers.Vectors;
with Ada.Text_IO; use Ada.Text_IO;
with Ada.Strings.Unbounded; use Ada.Strings.Unbounded;
with Ada.Text_IO.Unbounded_IO; use Ada.Text_IO.Unbounded_IO;
with Interfaces; use Interfaces;
procedure Anagram_Generator is
MIN_WORD_LEN : constant Positive := 2;
MAX_WORD_LEN : constant Positive := 14;
DICT_FILENAME : constant String := "unixdict.txt";
Invalid_Word : exception;
subtype LC_Chars is Character range 'a' .. 'z'; -- used to filter dictionary etc.
type Dict_Length_Range is range MIN_WORD_LEN .. (MAX_WORD_LEN - MIN_WORD_LEN);
type Word_Entry_Rec is record
Word : Unbounded_String;
Hash : Unsigned_64;
end record;
package Word_Vectors is new Ada.Containers.Vectors (Positive, Word_Entry_Rec);
type Hashed_Dict_Arr is array (Dict_Length_Range) of Word_Vectors.Vector;
function Hash (Str : String) return Unsigned_64 is
PRIMES : constant array (LC_Chars) of Unsigned_64 := [2, 3, 5, 7, 11, 13,
17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101];
Res : Unsigned_64 := 1;
begin
for C of Str loop Res := Res * PRIMES (C); end loop;
return Res;
end Hash;
function Load_Candidate_Words (Dict_Filename : String; Min_Len, Max_Len : Positive)
return Hashed_Dict_Arr is
Dict_File : File_Type;
Read_Word : Unbounded_String;
Cands : Hashed_Dict_Arr;
Valid : Boolean;
C : Character;
Word_Len : Positive;
Word_Entry : Word_Entry_Rec;
begin
Open (File => Dict_File, Mode => In_File, Name => Dict_Filename);
while not End_Of_File (Dict_File) loop
Read_Word := Get_Line (Dict_File);
Word_Len := Length (Read_Word);
if Word_Len in Min_Len .. Max_Len then
Valid := True;
for Ix in 1 .. Word_Len loop
C := Element (Read_Word, Ix);
Valid := C in LC_Chars;
exit when not Valid;
end loop;
if Valid then
Word_Entry.Word := Read_Word;
Word_Entry.Hash := Hash (To_String (Read_Word));
Cands (Dict_Length_Range (Word_Len)).Append (Word_Entry);
end if;
end if;
end loop;
Close (Dict_File);
return Cands;
end Load_Candidate_Words;
procedure Find_Words (Len_1, Len_2 : Integer; Hash_1, Hash_2 : Unsigned_64;
Dict : in out Hashed_Dict_Arr) is
begin
for WE_1 of Dict (Dict_Length_Range (Len_1)) loop
if WE_1.Hash = Hash_1 then
for WE_2 of Dict (Dict_Length_Range (Len_2)) loop
if WE_2.Hash = Hash_2 then
Put_Line (" " & WE_1.Word & " " & WE_2.Word);
end if;
end loop;
WE_1.Hash := 0; -- Don't reuse this word
end if;
end loop;
end Find_Words;
function Pop_Count (N : Unsigned_32) return Natural is
V : Unsigned_32 := N;
begin
V := V - (Shift_Right (V, 1) and 16#55555555#);
V := (V and 16#33333333#) + (Shift_Right (V, 2) and 16#33333333#);
V := Shift_Right (((V + Shift_Right (V, 4) and 16#f0f0f0f#) * 16#1010101#), 24);
return Natural (V);
end Pop_Count;
function Next (N : Unsigned_32) return Unsigned_32 is
Res : Unsigned_32;
Ones : constant Natural := Pop_Count (N);
begin
case Ones is
when 0 => return 0;
when 1 => return Shift_Left (N, 1);
when others => -- Simple implementation
Res := N + 1;
while Pop_Count (Res) /= Ones loop
Res := Res + 1;
end loop;
return Res;
end case;
end Next;
function Masked_Chars (Src : String; Mask : Unsigned_32) return String is
Res : String (1 .. Pop_Count (Mask));
Shifted : Unsigned_32 := Mask;
Str_Ix : Positive := 1;
begin
for I in 1 .. Src'Length loop
if (Shifted and 1) = 1 then
Res (Str_Ix) := Src (I);
Str_Ix := @ + 1;
end if;
Shifted := Shift_Right (Shifted, 1);
exit when Shifted = 0; -- short-circuit an empty mask
end loop;
return Res;
end Masked_Chars;
procedure Anagram (Word : String) is
Word_LC : constant String := To_Lower (Word);
Max_Search_Len : constant Positive := Word'Length - MIN_WORD_LEN;
Hashed_Dict : Hashed_Dict_Arr;
Part_1_Len, Part_2_Len : Integer;
Part_1 : Unsigned_32;
Part_Len_Mask : constant Unsigned_32 := (2 ** Word'Length) - 1;
begin
if Word'Length not in MIN_WORD_LEN .. MAX_WORD_LEN then
raise Invalid_Word with "Word length out of allowed range.";
end if;
Hashed_Dict := Load_Candidate_Words (DICT_FILENAME, MIN_WORD_LEN, Max_Search_Len);
Put_Line ("Two-word anagrams of " & Word & "...");
Part_1_Len := Word'Length - MIN_WORD_LEN;
Part_2_Len := Word'Length - Part_1_Len;
while Part_1_Len >= Part_2_Len loop
Part_1 := Unsigned_32 (2 ** (Part_1_Len) - 1);
while Part_1 < 2 ** Word'Length loop
Find_Words (Part_1_Len, Part_2_Len, Hash (Masked_Chars(Word_LC, Part_1)),
Hash (Masked_Chars(Word_LC, (not Part_1) and Part_Len_Mask)), Hashed_Dict);
Part_1 := Next (Part_1);
end loop;
Part_1_Len := @ - 1;
Part_2_Len := @ + 1;
end loop;
end Anagram;
begin
Anagram ("Purefox");
Anagram ("Petelomax");
Anagram ("rosettacode");
end Anagram_Generator;
- Output:
Two-word anagrams of Purefox... peru fox pure fox Two-word anagrams of Petelomax... example to omelet pax exempt lao tempo alex tempo axle motel apex expel atom expel moat exalt poem latex poem Two-word anagrams of rosettacode... doctorate se ostracod tee creosote tad detector sao rosetta code rosetta coed rosette coda oersted coat doctor tease cetera stood create stood derate scoot
C++
#include <algorithm>
#include <cstdint>
#include <fstream>
#include <iostream>
#include <numeric>
#include <map>
#include <set>
#include <string>
#include <vector>
std::vector<std::string> string_to_vector(const std::string& word) {
std::vector<std::string> letters{ };
for ( const char& ch : word ) {
letters.emplace_back(std::string(1, ch));
}
return letters;
}
std::string vector_to_string(const std::vector<std::string>& letters) {
std::string result = "";
for ( const std::string& letter : letters ) {
result += letter;
}
return result;
}
std::vector<std::vector<std::string>> create_combinations(
const std::vector<std::string>& words, const uint32_t& k) {
std::vector<std::vector<std::string>> combinations{ };
std::vector<bool> visited(words.size());
std::fill(visited.end() - k, visited.end(), true);
while ( std::next_permutation(visited.begin(), visited.end()) ) {
std::vector<std::string> combination{ };
for ( uint32_t i = 0; i < words.size(); ++i ) {
if ( visited[i] ) {
combination.emplace_back(words[i]);
}
}
combinations.emplace_back(combination);
}
return combinations;
}
void anagram_generator(const std::string& word, std::map<std::string, std::vector<std::string>>& word_map) {
std::string text = "";
for ( char ch : word ) {
ch = std::tolower(ch);
if ( ch >= 'a' && ch <= 'z' ) {
text += std::string(1, ch);
}
}
std::vector<std::string> text_vector = string_to_vector(text);
std::sort(text_vector.begin(), text_vector.end());
std::set<std::string> previous_letters{ };
for ( uint32_t n = text.size() / 2; n >= 1; --n ) {
for ( std::vector<std::string> letters_one_vector : create_combinations(text_vector, n) ) {
std::sort(letters_one_vector.begin(), letters_one_vector.end());
std::string letters_one = vector_to_string(letters_one_vector);
if ( previous_letters.contains(letters_one) ) {
continue;
}
previous_letters.insert(letters_one);
if ( word_map.contains(letters_one) ) {
std::vector<std::string> anagrams_one = word_map[letters_one];
std::vector<std::string> letters_two_vector{ };
std::set_difference(text_vector.begin(), text_vector.end(),
letters_one_vector.begin(), letters_one_vector.end(), back_inserter(letters_two_vector));
std::string letters_two = vector_to_string(letters_two_vector);
if ( text.size() == 2 * n ) {
if ( previous_letters.contains(letters_two) ) {
continue;
}
previous_letters.insert(letters_two);
}
if ( word_map.contains(letters_two) ) {
std::vector<std::string> anagrams_two = word_map[letters_two];
for ( const std::string& word_one : anagrams_one ) {
for ( const std::string& word_two : anagrams_two ) {
std::cout << " " << word_one << " " << word_two << std::endl;
}
}
}
}
}
}
}
int main() {
std::map<std::string, std::vector<std::string>> word_map{ };
std::fstream file_stream;
file_stream.open("../unixdict.txt");
std::string word;
while ( file_stream >> word ) {
std::vector<std::string> letters = string_to_vector(word);
std::sort(letters.begin(), letters.end());
std::string sorted_word = std::accumulate(letters.begin(), letters.end(), std::string{ });
if ( word_map.contains(sorted_word) ) {
word_map[sorted_word].emplace_back(word);
} else {
std::vector<std::string> entry{ word };
word_map[sorted_word] = entry;
}
}
file_stream.close();
std::vector<std::string> words{ "Rosetta code", "Joe Biden", "Clint Eastwood" };
for ( const std::string& word : words ) {
std::cout << "Two word anagrams of " << word << ":" << std::endl;
anagram_generator(word, word_map);
std::cout << std::endl;
}
}
- Output:
Two word anagrams of Rosetta code: stood cetera stood create scoot derate tease doctor code rosetta coed rosetta coat oersted coda rosette tee ostracod sao detector tad creosote se doctorate Two word anagrams of Joe B'iden: done jibe node jibe jed niobe Two word anagrams of Clint Eastw3ood: edison walcott atwood stencil eliot downcast clint eastwood clio downstate coil downstate loci downstate
J
Implementation:
anagen=: {{
seed=. (tolower y)([-.-.)a.{~97+i.26
letters=. ~.seed
list=. <;._2 tolower fread x
ok1=. */@e.&letters every list
ref=. #/.~seed
counts=. <: #/.~@(letters,])every ok1#list
ok2=. counts */ .<:ref
c=. ok2#counts
maybe=. i.1,~#c
while. #maybe do.
done=. (+/"2 maybe{c)*/ .=ref
if. 1 e. done do.
r=. ;:inv ((done#maybe) { ok2#I.ok1){L:0 1 <;._2 fread x
if. #r=. r #~ -. r -:"1&tolower y do. r return. end.
end.
maybe=. ; c {{
<(#~ n */ .<:"1~ [: +/"2 {&m) y,"1 0 ({:y)}.i.#m
}} ref"1(-.done)#maybe
end.
EMPTY
}}
Conceptually, we're working with a graph here -- given a seed word, we have sequence of letters and a count of each of those letters which we must supply. Given a list of words whose letters do not exceed that count, we extend that list with an additional word, discarding word sequences which go over the letter count. We would repeat as necessary until we satisfy the needed letter counts. Also, we inspect the "final result" and remove our seed word or phrase from it (if it was present -- and if that would have been our only result we would extend our under budget word sequences until they satisfy our letter counts). It seems like two words is usually sufficient here.
To limit our resource consumption, we turn this conceptual graph into a directed graph by requiring that the words in our anagram sequence appear in alphabetic order (we could go back and permute each word sequence if we wanted to pad our results).
We could further improve performance (and typically generate fewer example anagrams) if we required our initial word choice to include "rarely used letters" and then alternated between increasing alphabetic order and decreasing alphabetic order when adding words to the potential anagrams. This approach would reduce the size of intermediate results which would [usually] significantly increase search speed. But it might require backtracking if our initial choices were unfortunate.
Examples:
'unixdict.txt' anagen 'Rosettacode'
cetera stood
coat oersted
coda rosette
code rosetta
coed rosetta
create stood
creosote tad
derate scoot
detector sao
doctor tease
doctorate se
ostracod tee
'unixdict.txt' anagen 'Thundergnat'
dragnet hunt
gannett hurd
ghent tundra
gnat thunder
hurd tangent
tang thunder
'unixdict.txt' anagen 'Clint Eastwood'
atwood stencil
clio downstate
coil downstate
downcast eliot
downstate loci
edison walcott
'unixdict.txt' anagen 'Pizza party time'
aim pizza pretty
airy pizza tempt
amaze ritz tippy
ami pizza pretty
amity pert pizza
amp pizzeria tty
armpit pizza yet
army petit pizza
emit party pizza
imp pizza treaty
impart pizza yet
irma petty pizza
item party pizza
m patty pizzeria
map pizzeria tty
marietta z zippy
mart piety pizza
mary petit pizza
mat pizza pyrite
mater pity pizza
mayer pitt pizza
maze trait zippy
meaty pizza trip
mi piazza pretty
mira petty pizza
mire patty pizza
mite party pizza
mitt piazza prey
mitt piazza pyre
mizar tate zippy
mizar teat zippy
mizar tippy zeta
mt piazza pyrite
myra petit pizza
pam pizzeria tty
party pizza time
patty pizza rime
petty piazza rim
piazza pity term
piazza prime tty
piazza trim type
piety pizza tram
pizza pyrite tam
Java
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
public final class AnagramGenerator {
public static void main(String[] args) throws IOException {
Map<String, List<String>> wordMap = Files.lines(Path.of("unixdict.txt"))
.collect(Collectors.groupingBy( w -> w.chars().sorted().mapToObj( i -> String.valueOf((char) i) )
.collect(Collectors.joining())));
List<String> words = List.of( "Rosetta code", "Joe Biden", "Clint Eastwood" );
for ( String word : words ) {
System.out.println("Two word anagrams of " + word + ":");
anagramGenerator(word, wordMap);
System.out.println();
}
}
private static void anagramGenerator(String word, Map<String, List<String>> wordMap) {
String text = word.chars().mapToObj( i -> (char) i ).filter(Character::isLetter)
.map(String::valueOf).map(String::toLowerCase).collect(Collectors.joining());
List<String> textList = text.chars().mapToObj( i -> String.valueOf((char) i) ).toList();
Set<String> previousLetters = new HashSet<String>();
for ( int n = text.length() / 2; n >= 1; n-- ) {
for ( List<String> lettersOneList : createCombinations(textList, n) ) {
String lettersOne = lettersOneList.stream().sorted().collect(Collectors.joining());
if ( previousLetters.contains(lettersOne) ) {
continue;
}
previousLetters.add(lettersOne);
if ( wordMap.containsKey(lettersOne) ) {
List<String> anagramsOne = wordMap.get(lettersOne);
List<String> lettersTwoList = listDifference(textList, lettersOneList);
String lettersTwo = lettersTwoList.stream().sorted().collect(Collectors.joining());
if ( text.length() == 2 * n ) {
if ( previousLetters.contains(lettersTwo) ) {
continue;
}
previousLetters.add(lettersTwo);
}
if ( wordMap.containsKey(lettersTwo) ) {
List<String> anagramsTwo = wordMap.get(lettersTwo);
for ( String wordOne : anagramsOne ) {
for ( String wordTwo : anagramsTwo ) {
System.out.println(" " + wordOne + " " + wordTwo);
}
}
}
}
}
}
}
private static <T> List<T> listDifference(List<T> removeFrom, List<T> toRemove) {
List<T> result = new ArrayList<T>(removeFrom);
toRemove.forEach(result::remove);
return result;
}
private static <T> List<List<T>> createCombinations(List<T> elements, int k) {
List<List<T>> combinations = new ArrayList<List<T>>();
createCombinations(elements, k, new ArrayList<T>(), combinations, 0);
return combinations;
}
private static <T> void createCombinations(
List<T> elements, int k, List<T> accumulator, List<List<T>> combinations, int index) {
if ( accumulator.size() == k ) {
combinations.addFirst( new ArrayList<T>(accumulator) );
} else if ( k - accumulator.size() <= elements.size() - index ) {
createCombinations(elements, k, accumulator, combinations, index + 1);
accumulator.add(elements.get(index));
createCombinations(elements, k, accumulator, combinations, index + 1);
accumulator.removeLast();
}
}
}
- Output:
Two word anagrams of Rosetta code: scoot derate stood cetera stood create tease doctor code rosetta coed rosetta coat oersted coda rosette sao detector tee ostracod tad creosote se doctorate Two word anagrams of Joe Biden: jibe done jibe node jed niobe Two word anagrams of Clint Eastwood: edison walcott atwood stencil clint eastwood eliot downcast clio downstate coil downstate loci downstate
jq
This entry provides a number of anagram-related functions, all based on the profiling of words by the frequency counts of their constituent letters. The emphasis is on a reasonable level efficiency within the context of general-purpose utility functions.
In general, a streaming approach is taken, in part for speed and to minimize memory requirements, and in part to highlight jq's support for streaming.
Phrasal anagrams are not limited by a predetermined length, and are computed as a stream consisting of a sorted list of words, e.g. for "firefox", one phrasal anagram would be presented as:
["off","re","xi"]
In order to motivate the helper functions, consider first the task of generating anagrams of a word, $word. In this case, we have only to construct a listing of relevant words, which can be done by calling our "dictionary" construction function:
dict(inputs; $word; $word|length)
Here, the last argument ensures that only words having the same length as the target word are selected.
Next, consider the task of generating anagrammatic phrases wherein each word has a minimum length of 2. This would be accomplished by:
dict(inputs; $word; 2) | anagram_phrases($letters)
Use of gojq
The program presented here has been tested using both jq, the C-based implementation, and gojq, the Go implementation of jq, but in the latter case with the additional definition:
def keys_unsorted: keys;
Generic Functions
def count(stream): reduce stream as $x (0; .+1);
# remove adjacent duplicates
def once(stream):
foreach stream as $x ({prev: null};
if $x == .prev then .emit = null else .emit = $x | .prev = $x end;
select(.emit).emit);
# one-for-one subtraction
# if . and $y are arrays, then emit the array difference, respecting multiplicity;
# similarly if both are strings, emit a string representing the difference.
def minus($y):
if type == "array"
then reduce $y[] as $x (.;
index($x) as $ix
| if $ix then .[:$ix] + .[$ix+1:] else . end)
else explode | minus($y|explode) | implode
end;
# bag of words
def bow(stream):
reduce stream as $word ({}; .[($word|tostring)] += 1);
Anagrams
# Input: a string
def profile: bow( explode[] | [.] | implode);
# Comparison of profiles
# . and $p2 should be profiles
def le($p2):
. as $p1
| all( keys_unsorted[]; $p1[.] <= $p2[.]);
def eq($p2):
. as $p1
| all( keys_unsorted + ($p2|keys_unsorted) | unique[]; $p1[.] == $p2[.]);
# Construct a list of relevant words using the given stream.
# $min is the a-priori minimum word length and
# so if $min is the length of $word, we are looking for exact anagrams:
def dict(stream; $word; $min):
($word|length) as $length
| ($word|profile) as $profile
| if $length == $min
then [stream | select(profile|eq($profile))]
else [stream
| select(length >= $min and
length <= $length and
(profile|le($profile)))]
end ;
# Input: an array of admissible words.
# Output: a stream of anagrams of $word
def anagram($word):
($word|profile) as $profile
| .[]
| select(profile|eq($profile));
# Input: an array of admissible words.
# Output: a stream of subanagrams of $word
# regarding each occurrence of a letter as distinct from all others
def subanagrams($word):
($word|profile) as $profile
| .[]
| select(profile|le($profile));
# input: an array to be extended with an additional dictionary word.
# output: a stream of arrays with additional words
# selected from the characters in the string $letters.
# The input array should be in alphabetical order; if it is,
# so will the output array.
def extend($letters; $dict):
if $letters == "" then .
else . as $in
| ($dict|subanagrams($letters)) as $w
| select(if ($in|length) > 0 then $in[-1] <= $w else true end)
| ($letters | minus($w)) as $remaining
| ($in + [$w]) | extend($remaining; $dict)
end;
def anagram_phrases($letters):
. as $dict
| once([] | extend($letters; $dict));
# Global: $anagram $word $min
def main:
if $anagram
then dict(inputs; $word; $word|length)[]
else dict(inputs; $word; $min)
| anagram_phrases($word)
end ;
main
Invocation template
< unixdict.txt jq -ncrR --arg word WORD --argjson anagram BOOLEAN --argjson min MIN -f anagram-generator.jq
where:
- WORD is a string (normally, a word)
- BOOLEAN determines whether anagrams or anagrammatic phrases are sought
- MIN is the minimum length of admissible words to be used if $anagram is false
Examples
Anagram
# Invocation: < unixdict.txt jq -nrR --arg word read --argjson anagram true --argjson min 3 -f anagram-generator.jq dare dear erda read
Anagrammatic phrases
# Invocation: < unixdict.txt jq -ncR --arg word firefox --argjson anagram false --argjson min 2 -f anagram-generator.jq ["ex","fir","of"] ["ex","for","if"] ["ex","fro","if"] ["ex","ir","off"] ["ex","off","ri"] ["fe","fir","ox"] ["fe","fix","or"] ["fe","for","ix"] ["fe","for","xi"] ["fe","fox","ir"] ["fe","fox","ri"] ["fe","fro","ix"] ["fe","fro","xi"] ["fifo","rex"] ["fire","fox"] ["fix","fore"] ["fix","of","re"] ["fox","if","re"] ["if","of","rex"] ["ix","off","re"] ["ix","offer"] ["off","re","xi"] ["offer","xi"]
Julia
const unixwords = split(read("unixdict.txt", String) |> lowercase, r"\s+")
function findphrases(anastring::AbstractString, choices, sizelong = 4, n_shortpermitted = 1)
anadict = Dict{Char, Int}()
for c in lowercase(anastring)
if 'a' <= c <= 'z'
anadict[c] = get(anadict, c, 0) + 1
end
end
phrases = String[]
function addword(remaining, phrase, numshort)
for w in unixwords
len = length(w)
numshort < 1 && len < sizelong && continue
any(c -> get(remaining, c, 0) < count(==(c), w), w) && @goto nextword
cdict = copy(remaining)
for c in w
cdict[c] -= 1
end
if all(==(0), values(cdict))
return strip(phrase * " " * w)
elseif (newphrase = addword(cdict, phrase * " " * w, numshort - (len < sizelong))) != nothing
push!(phrases, newphrase)
print(length(phrases), "\b\b\b\b\b\b\b\b\b")
end
@label nextword
end
return nothing
end
addword(anadict, "", n_shortpermitted)
return phrases
end
for s in ["Rosetta code", "Joe Biden", "wherrera"]
println("\nFrom '$s':")
foreach(println, findphrases(s, unixwords, 4, 0) |> unique |> sort!)
end
- Output:
From 'Rosetta code': cetera stood coat oersted coda rosette code rosetta coed rosetta create stood derate scoot doctor tease oersted coat rosetta code rosette coda scoot derate stood cetera tease doctor From 'Joe Biden': done jibe jibe done node jibe From 'wherrera': herr ware rare wehr rear wehr ware herr wear herr wehr rare
Nim
import std/[algorithm, sequtils, strutils, tables]
proc readWords(filename: string): seq[string] {.compileTime.} =
result = filename.staticRead().splitLines().map(toLowerAscii)
const UnixWords = readWords("unixdict.txt")
func findPhrases(anaString: string; choices: seq[string];
sizeLong = 4; nShortPermitted = 1): seq[string] =
var anaDict: CountTable[char]
for c in anaString.toLowerAscii:
if c in 'a'..'z':
anadict.inc(c)
var phrases: seq[string]
func addWord(remaining: CountTable[char]; phrase: string; numShort: int): string =
for word in UnixWords:
block Search:
if numShort < 1 and word.len < sizeLong:
break Search
if anyIt(word, remaining.getOrDefault(it) < word.count(it)):
break Search
var cdict = remaining
for c in word: cdict.inc(c, -1)
if allIt(cdict.values.toSeq, it == 0):
return strip(phrase & ' ' & word)
let newPhrase = addWord(cdict, phrase & ' ' & word, numshort - ord(word.len < sizeLong))
if newPhrase.len > 0:
phrases.add newPhrase
discard addWord(anaDict, "", nShortPermitted)
result = move(phrases)
for s in ["Rosetta code", "Joe Biden", "wherrera"]:
echo "From '$#':" % s
for phrase in findPhrases(s, UnixWords, 4, 0).sorted.deduplicate(true):
echo phrase
echo()
- Output:
From 'Rosetta code': cetera stood coat oersted coda rosette code rosetta coed rosetta create stood derate scoot doctor tease oersted coat rosetta code rosette coda scoot derate stood cetera tease doctor From 'Joe Biden': done jibe jibe done node jibe From 'wherrera': herr ware rare wehr rear wehr ware herr wear herr wehr rare
Phix
Couldn't really think of a better way than just building a dirty great filter list to get rid of the less interesting answers....
with javascript_semantics constant bo_ring = {"al","alex","am","an","and","anent","ann","ant","ar","ares","art","at","ax","axle", "dan","dar","darn","dart","de","den","dent","dna","drag","du","dun","dunn", "ed","edt","eh","el","em","en","end","eng","erg","eros","est","et","eta","ex", "ga","gad","gar","garth","ge","ghent","gnat","gnu","grad","gu","ha","had","han", "hand","hart","hat","he","hut","la","lam","lao","lax","lee","leo","lo","lot", "ma","max","mao","mo","moe","mel","met","mt","nat","nd","ne","ned","nh","nne","nu", "opel","opt","ott","ox","pa","pax","pee","pl","pm","po","poe","rag","ran","rand", "rant","rat","rd","re","red","ret","rna","ruth","sa","sat","se","sort","st", "ta","tad","tag","tam","tamp","tao","taos","tan","tang","tangent","tanh", "tar","tat","tater","tau","tax","ted","tel","ten","tenant","tent","tern", "than","that","the","then","tn","tnt","to","top","tor","tort","tot","trag", "tsar","tun","tuna","tung","tx","un","ut","wa"} function az(string word) return min(word)>='a' and max(word)<='z' and not find(word,bo_ring) end function sequence words = filter(unix_dict(),az), wdsaz = sort(columnize({apply(words,sort),tagset(length(words))})) sequence seen = {} procedure test(string w, sequence found={}) if found={} then seen = {} printf(1,"%s:\n",{w}) w = sort(lower(w)) end if for i=abs(binary_search({w[1..1],0},wdsaz)) to length(wdsaz) do {string ax, integer wdx} = wdsaz[i] if ax[1]!=w[1] then exit end if sequence e = tagset(length(w)) e[1] = 0 integer j = 2 for k=2 to length(ax) do while j<length(w) and ax[k]>w[j] do j += 1 end while if j>length(w) or ax[k]!=w[j] then exit end if e[j] = 0 j += 1 if k=length(ax) then string aw = words[wdx] e = filter(e,"!=",0) if length(e)=0 then if length(found) then sequence f = append(deep_copy(found),aw), sf = sort(deep_copy(f)) if not find(sf,seen) then seen = append(seen,sf) printf(1," %s\n",{join(f,", ")}) end if end if else test(extract(w,e),append(deep_copy(found),aw)) end if end if end for end for end procedure papply({"Rosetta", "PureFox","PeteLomax","Wherrera","Thundergnat"},test)
- Output:
Rosetta: treat, so sea, trot east, rot seat, rot state, or taste, or oar, test oat, rest star, toe as, otter PureFox: peru, fox pure, fox rex, of, up PeteLomax: exalt, poem latex, poem apex, motel axe, elm, pot axe, let, mop axe, me, plot atom, expel moat, expel Wherrera: rare, wehr rear, wehr ware, herr wear, herr Thundergnat: ad, tenth, rung dragnet, hunt dang, net, hurt hard, gent, nut gannett, hurd agent, dr, hunt hang, tend, rut nag, tend, hurt nag, thud, rent rang, thud, net ah, tend, grunt ah, dr, gent, nut haunt, dr, gent tart, dung, hen
Raku
Using the unixdict.txt word file by default.
unit sub MAIN ($in is copy = '', :$dict = 'unixdict.txt');
say 'Enter a word or phrase to be anagramed. (Loading dictionary)' unless $in.chars;
# Load the words into a word / Bag hash
my %words = $dict.IO.slurp.lc.words.race.map: { .comb(/\w/).join => .comb(/\w/).Bag };
# Declare some globals
my ($phrase, $count, $bag);
loop {
($phrase, $count, $bag) = get-phrase;
find-anagram Hash.new: %words.grep: { .value ⊆ $bag };
}
sub get-phrase {
my $prompt = $in.chars ?? $in !! prompt "\nword or phrase? (press Enter to quit) ";
$in = '';
exit unless $prompt;
$prompt,
+$prompt.comb(/\w/),
$prompt.lc.comb(/\w/).Bag;
}
sub find-anagram (%subset, $phrase is copy = '', $last = Inf) {
my $remain = $bag ∖ $phrase.comb(/\w/).Bag; # Find the remaining letters
my %filtered = %subset.grep: { .value ⊆ $remain }; # Find words using the remaining letters
my $sofar = +$phrase.comb(/\w/); # Get the count of the letters used so far
for %filtered.sort: { -.key.chars, ~.key } { # Sort by length then alphabetically then iterate
my $maybe = +.key.comb(/\w/); # Get the letter count of the maybe addition
next if $maybe > $last; # Next if it is longer than last - only consider descending length words
next if $maybe == 1 and $last == 1; # Only allow one one character word
next if $count - $sofar - $maybe > $maybe; # Try to balance word lengths
if $sofar + $maybe == $count { # It's an anagram
say $phrase ~ ' ' ~ .key and next; # Display it and move on
} else { # Not yet a full anagram, recurse
find-anagram %filtered, $phrase ~ ' ' ~ .key, $maybe;
}
}
}
- Truncated to only show the best few as subjectively determined by me:
Punctuation, capitalization and (in some cases) word order manually massaged.
Enter a word or phrase to be anagramed. (Loading dictionary) word or phrase? (press Enter to quit) Rosettacode doctor tease word or phrase? (press Enter to quit) thundergnat dragnet hunt Gent? Nah, turd. word or phrase? (press Enter to quit) Clint Eastwood downcast eliot I contest waldo nose to wildcat
Wren
To avoid any subjectivity, this just produces all two word anagrams of a word or phrase.
Alternatives formed by simply changing the order of the two words have been suppressed.
import "io" for File
import "./str" for Str, Char
import "./perm" for Comb
import "./seq" for Lst
import "./sort" for Sort
var wordList = "unixdict.txt" // local copy
var words = File.read(wordList).split("\n").map { |w| w.trim() }
var wordMap = {}
for (word in words) {
var letters = word.toList
Sort.insertion(letters)
var sortedWord = letters.join()
if (wordMap.containsKey(sortedWord)) {
wordMap[sortedWord].add(word)
} else {
wordMap[sortedWord] = [word]
}
}
var anagramGenerator = Fn.new { |text|
var letters = Str.lower(text).toList
// remove any non-letters
for (i in letters.count-1..0) {
if (!Char.isLetter(letters[i])) letters.removeAt(i)
}
var lc = letters.count
if (lc < 2) return
var h = (lc/2).floor
var tried = {}
for (n in h..1) {
var sameLength = (lc == 2 * n)
for (letters1 in Comb.list(letters, n)) {
Sort.insertion(letters1)
letters1 = letters1.join()
if (tried[letters1]) continue
tried[letters1] = true
var anagrams = wordMap[letters1]
if (anagrams) {
var letters2 = Lst.except(letters, letters1.toList)
Sort.insertion(letters2)
letters2 = letters2.join()
if (sameLength) {
if (tried[letters2]) continue
tried[letters2] = true
}
var anagrams2 = wordMap[letters2]
if (anagrams2) {
for (word1 in anagrams) {
for (word2 in anagrams2) {
System.print(" " + word1 + " " + word2)
}
}
}
}
}
}
}
var tests = ["Rosettacode", "PureFox", "Petelomax", "Wherrera", "Thundergnat", "ClintEastwood"]
for (i in 0...tests.count) {
System.print("\n%(tests[i]):")
anagramGenerator.call(tests[i])
}
- Output:
Rosettacode: scoot derate stood cetera stood create tease doctor code rosetta coed rosetta coat oersted coda rosette sao detector tee ostracod tad creosote se doctorate PureFox: fox peru fox pure Petelomax: poem exalt poem latex apex motel alex tempo axle tempo atom expel moat expel pax omelet lao exempt to example Wherrera: wehr rare wehr rear ware herr wear herr Thundergnat: ghent tundra hunt dragnet gnat thunder tang thunder hurd gannett hurd tangent ClintEastwood: edison walcott atwood stencil clint eastwood eliot downcast clio downstate coil downstate loci downstate