Filter words

From Rosetta Code
Revision as of 19:42, 13 February 2021 by Petelomax (talk | contribs) (Merge dictionary tasks)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Filter words is a draft programming task. It is not yet considered ready to be promoted as a complete task, for reasons that should be found in its talk page.
Task

Using unixdict.txt, filter words in as many boring, unimaginative, and useless ways that you can think of. (tee hee)

Wherever possible use generic parameterised and potentially useful routines rather than being specific to one inane petty little task.

Extra credit

Replace as many entries as you can in the following tasks with "see [[Filter_words#Lang]]"

Alternade_words
Prime_words
ABC_words
Odd_words
Changeable_words
Words_containing_"the"_substring
Find_words_with_alternating_vowels_and_consonants
Words_from_neighbour_ones
Find_words_which_contains_more_than_3_e_vowels
Find_words_which_first_and_last_three_letters_are_equals
Change_e_letters_to_i_in_words
Find_words_which_contains_all_the_vowels
Find_words_which_contains_most_consonants

Phix

Draft. All of the very task-specific routines below have simply been copied; they should/will be merged where possible. <lang Phix>sequence words = get_text("demo/unixdict.txt",GT_LF_STRIPPED)

procedure filter_words(string desc, integer rid, len=3, sequence args={})

   sequence res = call_func(rid,args),
            sres = shorten(res,"",abs(len)),
            shres = iff(len<0?join(sres):sprintf("%v",{sres}))
   printf(1,"%d %s words: %s\n",{length(res),desc,shres})

end procedure function plain(integer rid) return filter(words,rid) end function

function alternade()

   sequence res = {}
   for i=1 to length(words) do
       string word = words[i]
       if length(word)>=6 then
           sequence sn = repeat("",2)
           for j=1 to length(word) do
               sn[mod(j-1,2)+1] &= word[j]
           end for
           if sum(sq_gt(apply(true,binary_search,{sn,{words}}),0))=2 then
               res = append(res,sprintf("%s (%s,%s)",{word,sn[1],sn[2]}))
           end if 
       end if
   end for
   return res

end function function sap(string word) return sum(apply(word,is_prime))==length(word) end function function abc(string word)

   sequence idii = apply(true,find,{"abc",{word}})
   return find(0,idii)==0 and idii==sort(idii)

end function function oddx(integer /*ch*/, idx) return remainder(idx,2)=1 end function function oddch(string word) return filter(word,oddx) end function function over4(string word) return length(word)>4 end function function oddwords() return filter(filter(apply(words,oddch),over4),"in",words) end function function over11(string word) return length(word)>11 end function function changeable()

   sequence w11 = filter(words,over11),
            res = {}
   for i=1 to length(w11) do
       for j=i+1 to length(w11) do
           if length(w11[i])=length(w11[j]) 
           and sum(sq_ne(w11[i],w11[j]))=1 then
               res = append(res,w11[i]&" <=> "&w11[j])
           end if
       end for
   end for
   return res

end function function the(string word) return length(word)>11 and match("the",word) end function function odd(integer idx) return remainder(idx,2)=1 end function function vowel(integer ch) return find(ch,"aeiou")!=0 end function function oddeven(string word)

   if length(word)<=9 then return false end if
   sequence consonants = apply(word,vowel),
            ohoneohone = apply(tagset(length(word)),odd)
   return find(consonants,{ohoneohone,sq_not(ohoneohone)})

end function function over9(string word) return length(word)>=9 end function sequence dictionary = filter(words,over9) function slicen(integer n) return vslice(dictionary,n)[n..-10+n] end function function neighwords()

   return unique(filter(columnize(apply(tagset(9),slicen)),"in",dictionary))

end function function note(string word) return find_any("aiou",word)=0 and length(find_all('e',word))>3 end function function flaste(string word) return length(word)>5 and word[1..3]=word[-3..-1] end function function chei(string word) return substitute(word,"e","i") end function function cheti(string word) return length(word)>5 and find('e',word) and find(chei(word),words) end function function chetei() sequence chetie = filter(words,cheti) return columnize({chetie,apply(chetie,chei)}) end function function onev(string word, integer vowel) return length(find_all(vowel,word))=1 end function function allv(string word) return length(word)>10 and sum(apply(true,onev,{{word},"aeiou"}))=5 end function function consonant(integer ch) return find(ch,"aeiou")=0 end function function over10(string word) return length(word)>10 end function function mostc()

   sequence w10 = filter(words,over10),
            res = {}
   for i=1 to length(w10) do
       string c = filter(w10[i],consonant)
       if length(unique(c))=length(c) then
           res = append(res,{length(c),w10[i]})
       end if
   end for
   return sort_columns(res,{-1,2})

end function

constant tests = {

   {"alternade",alternade,2},
   {"prime",plain,7,{sap}},
   {"abc",plain,4,{abc}},
   {"odd",oddwords,5},
   {"changeable",changeable,1},
   {"the",plain,3,{the}},
   {"alternating vowel",plain,3,{oddeven}},
   {"neighbour",neighwords,-4},
   {"> 3 e",plain,3,{note}},
   {"first3=last3",plain,3,{flaste}},
   {"e to i",chetei,2},
   {"all vowels",plain,3,{allv}},
   {"most consonant",mostc,2}}

papply(false,filter_words,tests)</lang>

Output:
58 alternade words: {"accost (acs,cot)","accuse (acs,cue)","...","truant (tun,rat)","twirly (til,wry)"}
36 prime words: {"a","aaa","age","agee","ak","am","ama","...","magma","make","mamma","me","meek","meg","q"}
55 abc words: {"aback","abacus","abc","abdicate","...","strabismic","syllabic","tabernacle","tablecloth"}
14 odd words: {"brain","cider","cried","grata","hades","...","sight","saute","spree","spree","trial"}
26 changeable words: {"aristotelean <=> aristotelian","...","upperclassman <=> upperclassmen"}
32 the words: {"authenticate","chemotherapy","chrysanthemum","...","weatherproof","weatherstrip","weatherstripping"}
67 alternating vowel words: aboriginal apologetic bimolecular ... unimodular uninominal verisimilitude
24 neighbour words: applicate architect astronomy christine ... telephone transcend transport transpose
16 words with>3 e: {"belvedere","dereference","elsewhere","...","seventeenth","telemeter","tennessee"}
8 first3=last3 words: {"antiperspirant","calendrical","einstein","hotshot","murmur","oshkosh","tartar","testes"}
26 e to i words: {{"analyses","analysis"},{"atlantes","atlantis"},"...",{"vector","victor"},{"welles","willis"}}
25 all vowels words: {"ambidextrous","bimolecular","cauliflower","...","praseodymium","stupefaction","sulfonamide"}
347 most consonant words: {{9,"comprehensible"},{8,"administrable"},"...",{4,"bourgeoisie"},{4,"onomatopoeia"}}