Textonyms: Difference between revisions

17,509 bytes added ,  2 years ago
m
→‎{{header|Phix}}: added syntax colouring, use of unix_dict(), multiple max_idx, and longest words
m (→‎{{header|Phix}}: added syntax colouring, use of unix_dict(), multiple max_idx, and longest words)
Line 2,238:
 
=={{header|Phix}}==
<!--<lang Phix>sequence digit = repeat(-1,255phixonline)-->
<span style="color: #008080;">with</span> <span style="color: #008080;">javascript_semantics</span>
digit['a'..'c'] = '2'
<span style="color: #004080;">sequence</span> <span style="color: #000000;">digit</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">repeat</span><span style="color: #0000FF;">(-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #000000;">255</span><span style="color: #0000FF;">)</span>
digit['d'..'f'] = '3'
<span style="color: #000000;">digit</span><span style="color: #0000FF;">[</span><span style="color: #008000;">'a'</span><span style="color: #0000FF;">..</span><span style="color: #008000;">'c'</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">'2'</span>
digit['g'..'i'] = '4'
<span style="color: #000000;">digit</span><span style="color: #0000FF;">[</span><span style="color: #008000;">'d'</span><span style="color: #0000FF;">..</span><span style="color: #008000;">'f'</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">'3'</span>
digit['j'..'l'] = '5'
<span style="color: #000000;">digit</span><span style="color: #0000FF;">[</span><span style="color: #008000;">'g'</span><span style="color: #0000FF;">..</span><span style="color: #008000;">'i'</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">'4'</span>
digit['m'..'o'] = '6'
<span style="color: #000000;">digit</span><span style="color: #0000FF;">[</span><span style="color: #008000;">'j'</span><span style="color: #0000FF;">..</span><span style="color: #008000;">'l'</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">'5'</span>
digit['p'..'s'] = '7'
<span style="color: #000000;">digit</span><span style="color: #0000FF;">[</span><span style="color: #008000;">'m'</span><span style="color: #0000FF;">..</span><span style="color: #008000;">'o'</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">'6'</span>
digit['t'..'v'] = '8'
<span style="color: #000000;">digit</span><span style="color: #0000FF;">[</span><span style="color: #008000;">'p'</span><span style="color: #0000FF;">..</span><span style="color: #008000;">'s'</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">'7'</span>
digit['w'..'z'] = '9'
<span style="color: #000000;">digit</span><span style="color: #0000FF;">[</span><span style="color: #008000;">'t'</span><span style="color: #0000FF;">..</span><span style="color: #008000;">'v'</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">'8'</span>
 
<span style="color: #000000;">digit</span><span style="color: #0000FF;">[</span><span style="color: #008000;">'w'</span><span style="color: #0000FF;">..</span><span style="color: #008000;">'z'</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">'9'</span>
function digits(string word)
for i=1 to length(word) do
<span style="color: #008080;">function</span> <span style="color: #000000;">digits</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">word</span><span style="color: #0000FF;">)</span>
integer ch = word[i]
<span style="color: #004080;">string</span> <span style="color: #000000;">keycode</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">repeat</span><span style="color: #0000FF;">(</span><span style="color: #008000;">' '</span><span style="color: #0000FF;">,</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">word</span><span style="color: #0000FF;">))</span>
if ch<'a' or ch>'z' then return "" end if
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">word</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
word[i] = digit[ch]
<span style="color: #004080;">integer</span> <span style="color: #000000;">ch</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">word</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span>
end for
<span style="color: #7060A8;">assert</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ch</span><span style="color: #0000FF;">>=</span><span style="color: #008000;">'a'</span> <span style="color: #008080;">and</span> <span style="color: #000000;">ch</span><span style="color: #0000FF;"><=</span><span style="color: #008000;">'z'</span><span style="color: #0000FF;">)</span>
return word
<span style="color: #000000;">keycode</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">digit</span><span style="color: #0000FF;">[</span><span style="color: #000000;">ch</span><span style="color: #0000FF;">]</span>
end function
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #008080;">return</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">keycode</span><span style="color: #0000FF;">,</span><span style="color: #000000;">word</span><span style="color: #0000FF;">}</span>
sequence words = {}, last=""
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
object word, keycode
integer keycode_count = 0, textonyms = 0,
<span style="color: #008080;">function</span> <span style="color: #000000;">az</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">word</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">return</span> <span style="color: #7060A8;">min</span><span style="color: #0000FF;">(</span><span style="color: #000000;">word</span><span style="color: #0000FF;">)>=</span><span style="color: #008000;">'a'</span> <span style="color: #008080;">and</span> <span style="color: #7060A8;">max</span><span style="color: #0000FF;">(</span><span style="color: #000000;">word</span><span style="color: #0000FF;">)<=</span><span style="color: #008000;">'z'</span> <span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
this_count = 0, max_count = 0, max_idx
 
<span style="color: #004080;">sequence</span> <span style="color: #000000;">words</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">apply</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">filter</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">unix_dict</span><span style="color: #0000FF;">(),</span><span style="color: #000000;">az</span><span style="color: #0000FF;">),</span><span style="color: #000000;">digits</span><span style="color: #0000FF;">),</span> <span style="color: #000000;">max_idx</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">long_idx</span>
integer fn = open("demo\\unixdict.txt","r")
<span style="color: #004080;">string</span> <span style="color: #000000;">word</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">keycode</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">last</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">""</span>
while 1 do
<span style="color: #004080;">integer</span> <span style="color: #000000;">keycode_count</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">textonyms</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span><span style="color: #0000FF;">,</span>
word = trim(gets(fn))
<span style="color: #000000;">this_count</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">max_count</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">longest</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span>
if atom(word) then exit end if
keycode = digits(word)
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"There are %d words in unixdict.txt which can be represented by the digit key mapping.\n"</span><span style="color: #0000FF;">,{</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">words</span><span style="color: #0000FF;">)})</span>
if length(keycode) then
words = append(words, {keycode, word})
<span style="color: #000080;font-style:italic;">-- Sort by keycode: while words are ordered we get
end if
-- eg {"a","ab","b","ba"} -&gt; {"2","22","2","22"}</span>
end while
<span style="color: #000000;">words</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">sort</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">deep_copy</span><span style="color: #0000FF;">(</span><span style="color: #000000;">words</span><span style="color: #0000FF;">))</span>
close(fn)
printf(1,"There are %d words in unixdict.txt which can be represented by the digit key mapping.\n",{length(words)})
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">words</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
 
<span style="color: #0000FF;">{</span><span style="color: #000000;">keycode</span><span style="color: #0000FF;">,</span><span style="color: #000000;">word</span><span style="color: #0000FF;">}</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">words</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span>
words = sort(words)
<span style="color: #008080;">if</span> <span style="color: #000000;">keycode</span><span style="color: #0000FF;">=</span><span style="color: #000000;">last</span> <span style="color: #008080;">then</span>
for i=1 to length(words) do
<span style="color: #000000;">textonyms</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">this_count</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span>
{keycode,word} = words[i]
<span style="color: #000000;">this_count</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
if keycode=last then
<span style="color: #008080;">if</span> <span style="color: #000000;">this_count</span><span style="color: #0000FF;">>=</span><span style="color: #000000;">max_count</span> <span style="color: #008080;">then</span>
textonyms += this_count=1
<span style="color: #008080;">if</span> <span style="color: #000000;">this_count</span><span style="color: #0000FF;">></span><span style="color: #000000;">max_count</span> <span style="color: #008080;">then</span>
this_count += 1
<span style="color: #000000;">max_idx</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">i</span><span style="color: #0000FF;">}</span>
if this_count>max_count then
max_count <span style="color: this_count#008080;">else</span>
<span style="color: #000000;">max_idx</span> <span style="color: #0000FF;">&=</span> <span style="color: #000000;">i</span>
max_idx = i
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
end if
<span style="color: #000000;">max_count</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">this_count</span>
else
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
keycode_count += 1
<span style="color: #008080;">else</span>
last = keycode
<span style="color: #000000;">keycode_count</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
this_count = 1
<span style="color: #000000;">last</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">keycode</span>
end if
<span style="color: #000000;">this_count</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span>
end for
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
 
<span style="color: #008080;">if</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">word</span><span style="color: #0000FF;">)>=</span><span style="color: #000000;">longest</span> <span style="color: #008080;">then</span>
printf(1,"They require %d digit combinations to represent them.\n",{keycode_count})
<span style="color: #008080;">if</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">word</span><span style="color: #0000FF;">)></span><span style="color: #000000;">longest</span> <span style="color: #008080;">then</span>
printf(1,"%d digit combinations represent Textonyms.\n",{textonyms})
<span style="color: #000000;">long_idx</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">i</span><span style="color: #0000FF;">}</span>
 
<span style="color: #008080;">else</span>
sequence dups = {}
<span style="color: #000000;">long_idx</span> <span style="color: #0000FF;">&=</span> <span style="color: #000000;">i</span>
for i=max_idx-max_count+1 to max_idx do
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
dups = append(dups,words[i][2])
<span style="color: #000000;">longest</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">word</span><span style="color: #0000FF;">)</span>
end for
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
 
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
printf(1,"The maximum number of textonyms for a particular digit key mapping is %d:\n",{max_count})
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"They require %d digit combinations to represent them.\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">keycode_count</span><span style="color: #0000FF;">})</span>
printf(1," %s encodes %s\n",{words[max_idx][1],join(dups,"/")})</lang>
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"%d digit combinations represent Textonyms.\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">textonyms</span><span style="color: #0000FF;">})</span>
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"The maximum number of textonyms for a particular digit key mapping is %d:\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">max_count</span><span style="color: #0000FF;">})</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">max_idx</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">k</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">max_idx</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">],</span> <span style="color: #000000;">l</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">k</span><span style="color: #0000FF;">-</span><span style="color: #000000;">max_count</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span>
<span style="color: #004080;">string</span> <span style="color: #000000;">dups</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">join</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">vslice</span><span style="color: #0000FF;">(</span><span style="color: #000000;">words</span><span style="color: #0000FF;">[</span><span style="color: #000000;">l</span><span style="color: #0000FF;">..</span><span style="color: #000000;">k</span><span style="color: #0000FF;">],</span><span style="color: #000000;">2</span><span style="color: #0000FF;">),</span><span style="color: #008000;">"/"</span><span style="color: #0000FF;">)</span>
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">" %s encodes %s\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">words</span><span style="color: #0000FF;">[</span><span style="color: #000000;">k</span><span style="color: #0000FF;">][</span><span style="color: #000000;">1</span><span style="color: #0000FF;">],</span><span style="color: #000000;">dups</span><span style="color: #0000FF;">})</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"The longest words are %d characters long\n"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">longest</span><span style="color: #0000FF;">)</span>
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"Encodings with this length are:\n"</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">long_idx</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">" %s encodes %s\n"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">words</span><span style="color: #0000FF;">[</span><span style="color: #000000;">long_idx</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]])</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<!--</lang>-->
{{Out}}
<small>(my unixdict.txt seems to have grown by 4 entries sometime in the past couple of years...)</small>
<pre>
There are 2497924981 words in unixdict.txt which can be represented by the digit key mapping.
They require 2290422906 digit combinations to represent them.
1473 digit combinations represent Textonyms.
The maximum number of textonyms for a particular digit key mapping is 9:
269 encodes amy/any/bmw/bow/box/boy/cow/cox/coy
729 encodes paw/pax/pay/paz/raw/ray/saw/sax/say
The longest words are 22 characters long
Encodings with this length are:
3532876362374256472749 encodes electroencephalography
</pre>
 
7,806

edits