Random sentence from book: Difference between revisions

m
→‎{{header|Phix}}: added syntax colouring the hard way
m (→‎{{header|Phix}}: added syntax colouring the hard way)
Line 614:
=={{header|Phix}}==
{{libheader|Phix/libcurl}}
<!--<lang Phix>-- demo/rosetta/RandomSentence.exw>
<span style="color: #000080;font-style:italic;">-- demo/rosetta/RandomSentence.exw</span>
include builtins\libcurl.e
<span style="color: #008080;">include</span> <span style="color: #000000;">builtins</span><span style="color: #0000FF;">\</span><span style="color: #000000;">libcurl</span><span style="color: #0000FF;">.</span><span style="color: #000000;">e</span>
constant url = "http://www.gutenberg.org/files/36/36-0.txt",
<span style="color: #008080;">constant</span> <span style="color: #000000;">url</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">"http://www.gutenberg.org/files/36/36-0.txt"</span><span style="color: #0000FF;">,</span>
filename = "war_of_the_worlds.txt",
<span style="color: #000000;">filename</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">"war_of_the_worlds.txt"</span><span style="color: #0000FF;">,</span>
fsent = "No one would have believed",
<span style="color: #000000;">fsent</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">"No one would have believed"</span><span style="color: #0000FF;">,</span>
lasts = "End of the Project Gutenberg EBook",
<span style="color: #000000;">lasts</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">"End of the Project Gutenberg EBook"</span><span style="color: #0000FF;">,</span>
unicodes = {utf32_to_utf8({#2019}), -- rsquo
<span style="color: #000000;">unicodes</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #7060A8;">utf32_to_utf8</span><span style="color: #0000FF;">({</span><span style="color: #000000;">#2019</span><span style="color: #0000FF;">}),</span> <span style="color: #000080;font-style:italic;">-- rsquo</span>
utf32_to_utf8({#2014})}, -- hyphen
<span style="color: #7060A8;">utf32_to_utf8</span><span style="color: #0000FF;">({</span><span style="color: #000000;">#2014</span><span style="color: #0000FF;">})},</span> <span style="color: #000080;font-style:italic;">-- hyphen</span>
asciis = {"'","-"},
<span style="color: #000000;">asciis</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #008000;">"'"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"-"</span><span style="color: #0000FF;">},</span>
aleph = tagset('Z','A')&tagset('z','a')&tagset('9','0')&",'.?! ",
<span style="color: #000000;">aleph</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">tagset</span><span style="color: #0000FF;">(</span><span style="color: #008000;">'Z'</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'A'</span><span style="color: #0000FF;">)&</span><span style="color: #7060A8;">tagset</span><span style="color: #0000FF;">(</span><span style="color: #008000;">'z'</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'a'</span><span style="color: #0000FF;">)&</span><span style="color: #7060A8;">tagset</span><span style="color: #0000FF;">(</span><span style="color: #008000;">'9'</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'0'</span><span style="color: #0000FF;">)&</span><span style="color: #008000;">",'.?! "</span><span style="color: #0000FF;">,</span>
follow = new_dict(), -- {word} -> {words,counts}
<span style="color: #000000;">follow</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">new_dict</span><span style="color: #0000FF;">(),</span> <span style="color: #000080;font-style:italic;">-- {word} -&gt; {words,counts}</span>
follow2 = new_dict() -- {word,word} -> {words,counts}
<span style="color: #000000;">follow2</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">new_dict</span><span style="color: #0000FF;">()</span> <span style="color: #000080;font-style:italic;">-- {word,word} -&gt; {words,counts}</span>
<span style="color: #008080;">if</span> <span style="color: #008080;">not</span> <span style="color: #7060A8;">file_exists</span><span style="color: #0000FF;">(</span><span style="color: #000000;">filename</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span>
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"Downloading %s...\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">filename</span><span style="color: #0000FF;">})</span>
<span style="color: #004080;">CURLcode</span> <span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">curl_easy_get_file</span><span style="color: #0000FF;">(</span><span style="color: #000000;">url</span><span style="color: #0000FF;">,</span><span style="color: #008000;">""</span><span style="color: #0000FF;">,</span><span style="color: #000000;">filename</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">res</span><span style="color: #0000FF;">!=</span><span style="color: #004600;">CURLE_OK</span> <span style="color: #008080;">then</span> <span style="color: #7060A8;">crash</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"cannot download"</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #004080;">string</span> <span style="color: #000000;">text</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">get_text</span><span style="color: #0000FF;">(</span><span style="color: #000000;">filename</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">text</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">text</span><span style="color: #0000FF;">[</span><span style="color: #7060A8;">match</span><span style="color: #0000FF;">(</span><span style="color: #000000;">fsent</span><span style="color: #0000FF;">,</span><span style="color: #000000;">text</span><span style="color: #0000FF;">)..</span><span style="color: #7060A8;">match</span><span style="color: #0000FF;">(</span><span style="color: #000000;">lasts</span><span style="color: #0000FF;">,</span><span style="color: #000000;">text</span><span style="color: #0000FF;">)-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span>
<span style="color: #000000;">text</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">substitute_all</span><span style="color: #0000FF;">(</span><span style="color: #000000;">text</span><span style="color: #0000FF;">,</span><span style="color: #000000;">unicodes</span><span style="color: #0000FF;">,</span><span style="color: #000000;">asciis</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">text</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">substitute_all</span><span style="color: #0000FF;">(</span><span style="color: #000000;">text</span><span style="color: #0000FF;">,</span><span style="color: #008000;">".?!-\n"</span><span style="color: #0000FF;">,{</span><span style="color: #008000;">" ."</span><span style="color: #0000FF;">,</span><span style="color: #008000;">" ? "</span><span style="color: #0000FF;">,</span><span style="color: #008000;">" ! "</span><span style="color: #0000FF;">,</span><span style="color: #008000;">" "</span><span style="color: #0000FF;">,</span><span style="color: #008000;">" "</span><span style="color: #0000FF;">})</span>
<span style="color: #000000;">text</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">filter</span><span style="color: #0000FF;">(</span><span style="color: #000000;">text</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"in"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">aleph</span><span style="color: #0000FF;">)</span>
<span style="color: #004080;">sequence</span> <span style="color: #000000;">words</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">split</span><span style="color: #0000FF;">(</span><span style="color: #000000;">text</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">procedure</span> <span style="color: #000000;">account</span><span style="color: #0000FF;">(</span><span style="color: #004080;">sequence</span> <span style="color: #000000;">words</span><span style="color: #0000FF;">)</span>
if not file_exists(filename) then
<span style="color: #004080;">string</span> <span style="color: #000000;">next</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">words</span><span style="color: #0000FF;">[$]</span>
printf(1,"Downloading %s...\n",{filename})
<span style="color: #000000;">words</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">words</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">..$-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span>
CURLcode res = curl_easy_get_file(url,"",filename)
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">words</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">to</span> <span style="color: #000000;">1</span> <span style="color: #008080;">by</span> <span style="color: #0000FF;">-</span><span style="color: #000000;">1</span> <span style="color: #008080;">do</span>
if res!=CURLE_OK then crash("cannot download") end if
<span style="color: #004080;">integer</span> <span style="color: #000000;">d</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">follow</span><span style="color: #0000FF;">,</span><span style="color: #000000;">follow2</span><span style="color: #0000FF;">}[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span>
end if
<span style="color: #004080;">sequence</span> <span style="color: #000000;">t</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">getdd</span><span style="color: #0000FF;">(</span><span style="color: #000000;">words</span><span style="color: #0000FF;">,{{},{}},</span><span style="color: #000000;">d</span><span style="color: #0000FF;">)</span>
string text = get_text(filename)
<span style="color: #004080;">integer</span> <span style="color: #000000;">tk</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">find</span><span style="color: #0000FF;">(</span><span style="color: #000000;">next</span><span style="color: #0000FF;">,</span><span style="color: #000000;">t</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">])</span>
text = text[match(fsent,text)..match(lasts,text)-1]
<span style="color: #008080;">if</span> <span style="color: #000000;">tk</span><span style="color: #0000FF;">=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span>
text = substitute_all(text,unicodes,asciis)
<span style="color: #000000;">t</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">t</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">],</span><span style="color: #000000;">next</span><span style="color: #0000FF;">)</span>
text = substitute_all(text,".?!-\n",{" ."," ? "," ! "," "," "})
<span style="color: #000000;">t</span><span style="color: #0000FF;">[</span><span style="color: #000000;">2</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">t</span><span style="color: #0000FF;">[</span><span style="color: #000000;">2</span><span style="color: #0000FF;">],</span><span style="color: #000000;">1</span><span style="color: #0000FF;">)</span>
text = filter(text,"in",aleph)
<span style="color: #008080;">else</span>
sequence words = split(text)
<span style="color: #000000;">t</span><span style="color: #0000FF;">[</span><span style="color: #000000;">2</span><span style="color: #0000FF;">][</span><span style="color: #000000;">tk</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
 
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
procedure account(sequence words)
<span style="color: #7060A8;">setd</span><span style="color: #0000FF;">(</span><span style="color: #000000;">words</span><span style="color: #0000FF;">,</span><span style="color: #000000;">t</span><span style="color: #0000FF;">,</span><span style="color: #000000;">d</span><span style="color: #0000FF;">)</span>
string next = words[$]
<span style="color: #000000;">words</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">words</span><span style="color: #0000FF;">[</span><span style="color: #000000;">2</span><span style="color: #0000FF;">..$]</span>
words = words[1..$-1]
<span style="color: #008080;">if</span> <span style="color: #000000;">words</span><span style="color: #0000FF;">!={</span><span style="color: #008000;">"."</span><span style="color: #0000FF;">}</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span> <span style="color: #000080;font-style:italic;">-- (may as well quit)</span>
for i=length(words) to 1 by -1 do
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
integer d = {follow,follow2}[i]
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span>
sequence t = getdd(words,{{},{}},d)
integer tk = find(next,t[1])
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">2</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">words</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
if tk=0 then
<span style="color: #008080;">if</span> <span style="color: #7060A8;">find</span><span style="color: #0000FF;">(</span><span style="color: #000000;">words</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">],{</span><span style="color: #008000;">"."</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"?"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"!"</span><span style="color: #0000FF;">})</span>
t[1] = append(t[1],next)
<span style="color: #008080;">and</span> <span style="color: #000000;">i</span><span style="color: #0000FF;"><</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">words</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span>
t[2] = append(t[2],1)
<span style="color: #000000;">words</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">lower</span><span style="color: #0000FF;">(</span><span style="color: #000000;">words</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">])</span>
else
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
t[2][tk] += 1
<span style="color: #000000;">account</span><span style="color: #0000FF;">(</span><span style="color: #000000;">words</span><span style="color: #0000FF;">[</span><span style="color: #7060A8;">max</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #000000;">i</span><span style="color: #0000FF;">-</span><span style="color: #000000;">2</span><span style="color: #0000FF;">)..</span><span style="color: #000000;">i</span><span style="color: #0000FF;">])</span>
end if
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
setd(words,t,d)
words = words[2..$]
<span style="color: #008080;">function</span> <span style="color: #000000;">weighted_random_pick</span><span style="color: #0000FF;">(</span><span style="color: #004080;">sequence</span> <span style="color: #000000;">words</span><span style="color: #0000FF;">,</span> <span style="color: #004080;">integer</span> <span style="color: #000000;">dict</span><span style="color: #0000FF;">)</span>
if words!={"."} then exit end if -- (may as well quit)
<span style="color: #004080;">sequence</span> <span style="color: #000000;">t</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">getd</span><span style="color: #0000FF;">(</span><span style="color: #000000;">words</span><span style="color: #0000FF;">,</span><span style="color: #000000;">dict</span><span style="color: #0000FF;">)</span>
end for
<span style="color: #004080;">integer</span> <span style="color: #000000;">total</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">sum</span><span style="color: #0000FF;">(</span><span style="color: #000000;">t</span><span style="color: #0000FF;">[</span><span style="color: #000000;">2</span><span style="color: #0000FF;">]),</span>
end procedure
<span style="color: #000000;">r</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">rand</span><span style="color: #0000FF;">(</span><span style="color: #000000;">total</span><span style="color: #0000FF;">)</span>
 
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">t</span><span style="color: #0000FF;">[</span><span style="color: #000000;">2</span><span style="color: #0000FF;">])</span> <span style="color: #008080;">do</span>
for i=2 to length(words) do
<span style="color: #000000;">r</span> <span style="color: #0000FF;">-=</span> <span style="color: #000000;">t</span><span style="color: #0000FF;">[</span><span style="color: #000000;">2</span><span style="color: #0000FF;">][</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span>
if find(words[i],{".","?","!"})
<span style="color: #008080;">if</span> <span style="color: #000000;">r</span><span style="color: #0000FF;"><=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span>
and i<length(words) then
<span style="color: #008080;">return</span> <span style="color: #000000;">t</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">][</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span>
words[i+1] = lower(words[i+1])
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
end if
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
account(words[max(1,i-2)..i])
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
end for
 
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #000000;">5</span> <span style="color: #008080;">do</span>
function weighted_random_pick(sequence words, integer dict)
<span style="color: #004080;">sequence</span> <span style="color: #000000;">sentence</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #008000;">"."</span><span style="color: #0000FF;">,</span><span style="color: #000000;">weighted_random_pick</span><span style="color: #0000FF;">({</span><span style="color: #008000;">"."</span><span style="color: #0000FF;">},</span><span style="color: #000000;">follow</span><span style="color: #0000FF;">)}</span>
sequence t = getd(words,dict)
<span style="color: #008080;">while</span> <span style="color: #004600;">true</span> <span style="color: #008080;">do</span>
integer total = sum(t[2]),
<span style="color: #004080;">string</span> <span style="color: #000000;">next</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">weighted_random_pick</span><span style="color: #0000FF;">(</span><span style="color: #000000;">sentence</span><span style="color: #0000FF;">[-</span><span style="color: #000000;">2</span><span style="color: #0000FF;">..-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">],</span><span style="color: #000000;">follow2</span><span style="color: #0000FF;">)</span>
r = rand(total)
<span style="color: #000000;">sentence</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">sentence</span><span style="color: #0000FF;">,</span><span style="color: #000000;">next</span><span style="color: #0000FF;">)</span>
for i=1 to length(t[2]) do
<span style="color: #008080;">if</span> <span style="color: #7060A8;">find</span><span style="color: #0000FF;">(</span><span style="color: #000000;">next</span><span style="color: #0000FF;">,{</span><span style="color: #008000;">"."</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"?"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"!"</span><span style="color: #0000FF;">})</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
r -= t[2][i]
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
if r<=0 then
<span style="color: #000000;">sentence</span><span style="color: #0000FF;">[</span><span style="color: #000000;">2</span><span style="color: #0000FF;">][</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">upper</span><span style="color: #0000FF;">(</span><span style="color: #000000;">sentence</span><span style="color: #0000FF;">[</span><span style="color: #000000;">2</span><span style="color: #0000FF;">][</span><span style="color: #000000;">1</span><span style="color: #0000FF;">])</span>
return t[1][i]
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"%s\n"</span><span style="color: #0000FF;">,{</span><span style="color: #7060A8;">join</span><span style="color: #0000FF;">(</span><span style="color: #000000;">sentence</span><span style="color: #0000FF;">[</span><span style="color: #000000;">2</span><span style="color: #0000FF;">..$-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">])&</span><span style="color: #000000;">sentence</span><span style="color: #0000FF;">[$]})</span>
end if
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
end for
<span style="color: #0000FF;">{}</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">wait_key</span><span style="color: #0000FF;">()</span>
end function
<!--</lang>-->
 
for i=1 to 5 do
sequence sentence = {".",weighted_random_pick({"."},follow)}
while true do
string next = weighted_random_pick(sentence[-2..-1],follow2)
sentence = append(sentence,next)
if find(next,{".","?","!"}) then exit end if
end while
sentence[2][1] = upper(sentence[2][1])
printf(1,"%s\n",{join(sentence[2..$-1])&sentence[$]})
end for
{} = wait_key()</lang>
{{out}}
<pre>
7,806

edits