Yahoo! search interface: Difference between revisions

m
→‎{{header|Phix}}: syntax coloured, fixed to work with current output.
(Added Wren)
m (→‎{{header|Phix}}: syntax coloured, fixed to work with current output.)
Line 2,110:
=={{header|Phix}}==
{{libheader|Phix/libcurl}}
As noted elsewhere, Yahoo and other search sites will regularly change the output format, so don't be too shocked if this is once again broken. Last fixed 22/4/2022 (with previous left in as comments).
<lang Phix>-- demo\rosetta\Yahoo_search_interface.exw
The glyphs constants do not show up properly on rosettacode, so here they are in plain text, and I even had to edit that by hand:
include builtins\libcurl.e
<pre>
 
constant glyphs = {{"\xC2\xB7 ","*"}, -- bullet point
{"&amp;#39;",`'`}, -- single quote
{"&amp;quot;",`"`}, -- double quote
{"&amp;amp;","&"}, -- ampersand
{"\xE2\x94\xAC\xC2\xAB","[R]"}, -- registered
{"\xC2\xAE","[R]"}}, -- registered
</pre>
{gutf8,gascii} = columnize(glyphs),
<!--<lang Phix>(notonline)-->
tags = {{`<a `,`</a>`},
<span style="color: #000080;font-style:italic;">--
{`<b>`,`</b>`},
-- demo\rosetta\Yahoo_search_interface.exw
{`<span class=" fc-2nd">`,`</span>`}}
-- =======================================
 
--</span>
function grab(string txt, opener, closer, integer tdx)
<span style="color: #008080;">without</span> <span style="color: #008080;">js</span> <span style="color: #000080;font-style:italic;">-- (libcurl)</span>
integer openidx = match(opener,txt,tdx)
<span style="color: #008080;">include</span> <span style="color: #000000;">builtins</span><span style="color: #0000FF;">\</span><span style="color: #000000;">libcurl</span><span style="color: #0000FF;">.</span><span style="color: #000000;">e</span>
if openidx=0 then return {0,""} end if
integer closeidx = match(closer,txt,openidx)
<span style="color: #008080;">constant</span> <span style="color: #000000;">glyphs</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{{</span><span style="color: #008000;">"\xC2\xB7 "</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"*"</span><span style="color: #0000FF;">},</span> <span style="color: #000080;font-style:italic;">-- bullet point</span>
txt = txt[openidx+length(opener)..closeidx-1]
<span style="color: #0000FF;">{</span><span style="color: #008000;">"&#39;"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">`'`</span><span style="color: #0000FF;">},</span> <span style="color: #000080;font-style:italic;">-- single quote</span>
tdx = 1
<span style="color: #0000FF;">{</span><span style="color: #008000;">"&quot;"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">`"`</span><span style="color: #0000FF;">},</span> <span style="color: #000080;font-style:italic;">-- double quote</span>
while tdx<=length(tags) do
<span style="color: #0000FF;">{</span><span style="color: #008000;">"&amp;"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"&"</span><span style="color: #0000FF;">},</span> <span style="color: #000080;font-style:italic;">-- ampersand</span>
{opener,closer} = tags[tdx]
<span style="color: #0000FF;">{</span><span style="color: #008000;">"\xE2\x94\xAC\xC2\xAB"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"[R]"</span><span style="color: #0000FF;">},</span> <span style="color: #000080;font-style:italic;">-- registered</span>
integer i = match(opener,txt)
<span style="color: #0000FF;">{</span><span style="color: #008000;">"\xC2\xAE"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"[R]"</span><span style="color: #0000FF;">}},</span> <span style="color: #000080;font-style:italic;">-- registered</span>
if i=0 then
<span style="color: #0000FF;">{</span><span style="color: #000000;">gutf8</span><span style="color: #0000FF;">,</span><span style="color: #000000;">gascii</span><span style="color: #0000FF;">}</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">columnize</span><span style="color: #0000FF;">(</span><span style="color: #000000;">glyphs</span><span style="color: #0000FF;">),</span>
tdx += 1
<span style="color: #000000;">tags</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{{</span><span style="color: #008000;">`&lt;a `</span><span style="color: #0000FF;">,</span><span style="color: #008000;">`&lt;/a&gt;`</span><span style="color: #0000FF;">},</span>
else
<span style="color: #0000FF;">{</span><span style="color: #008000;">`&lt;b&gt;`</span><span style="color: #0000FF;">,</span><span style="color: #008000;">`&lt;/b&gt;`</span><span style="color: #0000FF;">},</span>
if opener[$]='>' then
<span style="color: #0000FF;">{</span><span style="color: #008000;">`&lt;span class=" fc-2nd"&gt;`</span><span style="color: #0000FF;">,</span><span style="color: #008000;">`&lt;/span&gt;`</span><span style="color: #0000FF;">}}</span>
txt[i..i+length(opener)-1] = ""
else
<span style="color: #008080;">function</span> <span style="color: #000000;">grab</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">txt</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">opener</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">closer</span><span style="color: #0000FF;">,</span> <span style="color: #004080;">integer</span> <span style="color: #000000;">tdx</span><span style="color: #0000FF;">,</span> <span style="color: #004080;">bool</span> <span style="color: #000000;">crop</span><span style="color: #0000FF;">)</span>
txt[i..find('>',txt,i)] = ""
<span style="color: #004080;">integer</span> <span style="color: #000000;">openidx</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">match</span><span style="color: #0000FF;">(</span><span style="color: #000000;">opener</span><span style="color: #0000FF;">,</span><span style="color: #000000;">txt</span><span style="color: #0000FF;">,</span><span style="color: #000000;">tdx</span><span style="color: #0000FF;">)</span>
end if
<span style="color: #008080;">if</span> <span style="color: #000000;">openidx</span><span style="color: #0000FF;">=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span> <span style="color: #008080;">return</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">0</span><span style="color: #0000FF;">,</span><span style="color: #008000;">""</span><span style="color: #0000FF;">}</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
i = match(closer,txt,i)
<span style="color: #004080;">integer</span> <span style="color: #000000;">closeidx</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">match</span><span style="color: #0000FF;">(</span><span style="color: #000000;">closer</span><span style="color: #0000FF;">,</span><span style="color: #000000;">txt</span><span style="color: #0000FF;">,</span><span style="color: #000000;">openidx</span><span style="color: #0000FF;">)</span>
txt[i..i+length(closer)-1] = ""
<span style="color: #000000;">txt</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">txt</span><span style="color: #0000FF;">[</span><span style="color: #000000;">openidx</span><span style="color: #0000FF;">+</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">opener</span><span style="color: #0000FF;">)..</span><span style="color: #000000;">closeidx</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span>
end if
<span style="color: #000000;">tdx</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span>
end while
<span style="color: #008080;">while</span> <span style="color: #000000;">tdx</span><span style="color: #0000FF;"><=</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tags</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
txt = substitute_all(txt,gutf8,gascii)
<span style="color: #0000FF;">{</span><span style="color: #000000;">opener</span><span style="color: #0000FF;">,</span><span style="color: #000000;">closer</span><span style="color: #0000FF;">}</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">tags</span><span style="color: #0000FF;">[</span><span style="color: #000000;">tdx</span><span style="color: #0000FF;">]</span>
if length(txt)>80 then txt[78..$] = ".." end if
<span style="color: #004080;">integer</span> <span style="color: #000000;">i</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">match</span><span style="color: #0000FF;">(</span><span style="color: #000000;">opener</span><span style="color: #0000FF;">,</span><span style="color: #000000;">txt</span><span style="color: #0000FF;">)</span>
return {closeidx+length(closer),txt}
<span style="color: #008080;">if</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span>
end function
<span style="color: #000000;">tdx</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
 
<span style="color: #008080;">else</span>
procedure YahooSearch(string query, integer page=1)
<span style="color: #008080;">if</span> <span style="color: #000000;">opener</span><span style="color: #0000FF;">[$]=</span><span style="color: #008000;">'&gt;'</span> <span style="color: #008080;">then</span>
printf(1,"Page %d:\n=======\n",page)
<span style="color: #000000;">txt</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">..</span><span style="color: #000000;">i</span><span style="color: #0000FF;">+</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">opener</span><span style="color: #0000FF;">)-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">""</span>
string url = sprintf("https://search.yahoo.com/search?p=%s&b=%d", {query, (page-1)*10+1})
<span style="color: #008080;">else</span>
object res = curl_easy_perform_ex(url)
<span style="color: #000000;">txt</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">..</span><span style="color: #7060A8;">find</span><span style="color: #0000FF;">(</span><span style="color: #008000;">'&gt;'</span><span style="color: #0000FF;">,</span><span style="color: #000000;">txt</span><span style="color: #0000FF;">,</span><span style="color: #000000;">i</span><span style="color: #0000FF;">)]</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">""</span>
if not string(res) then
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
?{"some error",res,curl_easy_strerror(res)}
<span style="color: #000000;">i</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">match</span><span style="color: #0000FF;">(</span><span style="color: #000000;">closer</span><span style="color: #0000FF;">,</span><span style="color: #000000;">txt</span><span style="color: #0000FF;">,</span><span style="color: #000000;">i</span><span style="color: #0000FF;">)</span>
return
<span style="color: #000000;">txt</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">..</span><span style="color: #000000;">i</span><span style="color: #0000FF;">+</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">closer</span><span style="color: #0000FF;">)-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">""</span>
end if
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
integer rdx = 1
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
string title, link, desc
<span style="color: #000000;">txt</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">substitute_all</span><span style="color: #0000FF;">(</span><span style="color: #000000;">txt</span><span style="color: #0000FF;">,</span><span style="color: #000000;">gutf8</span><span style="color: #0000FF;">,</span><span style="color: #000000;">gascii</span><span style="color: #0000FF;">)</span>
while true do
<span style="color: #008080;">if</span> <span style="color: #000000;">crop</span> <span style="color: #008080;">and</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">txt</span><span style="color: #0000FF;">)></span><span style="color: #000000;">80</span> <span style="color: #008080;">then</span> <span style="color: #000000;">txt</span><span style="color: #0000FF;">[</span><span style="color: #000000;">78</span><span style="color: #0000FF;">..$]</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">".."</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
{rdx,title} = grab(res,`<h3 class="title ov-h">`,`</h3>`,rdx)
<span style="color: #008080;">return</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">closeidx</span><span style="color: #0000FF;">+</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">closer</span><span style="color: #0000FF;">),</span><span style="color: #000000;">txt</span><span style="color: #0000FF;">}</span>
if rdx=0 then exit end if
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
{rdx,link} = grab(res,`<span class=" fz-ms fw-m fc-12th wr-bw lh-17">`,`</span>`,rdx)
{rdx,desc} = grab(res,`<p class="fz-ms lh-1_43x">`,`</p>`,rdx)
<span style="color: #008080;">procedure</span> <span style="color: #000000;">YahooSearch</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">query</span><span style="color: #0000FF;">,</span> <span style="color: #004080;">integer</span> <span style="color: #000000;">page</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span><span style="color: #0000FF;">)</span>
printf(1,"title:%s\nlink:%s\ndesc:%s\n\n",{title,link,desc})
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"Page %d:\n=======\n"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">page</span><span style="color: #0000FF;">)</span>
end while
<span style="color: #004080;">string</span> <span style="color: #000000;">url</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">sprintf</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"https://search.yahoo.com/search?p=%s&b=%d"</span><span style="color: #0000FF;">,</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">query</span><span style="color: #0000FF;">,</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">page</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">)*</span><span style="color: #000000;">10</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">})</span>
end procedure
<span style="color: #000080;font-style:italic;">--?url</span>
 
<span style="color: #004080;">object</span> <span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">curl_easy_perform_ex</span><span style="color: #0000FF;">(</span><span style="color: #000000;">url</span><span style="color: #0000FF;">)</span>
YahooSearch("rosettacode")
<span style="color: #000080;font-style:italic;">--?res</span>
YahooSearch("rosettacode",2)</lang>
<span style="color: #008080;">if</span> <span style="color: #008080;">not</span> <span style="color: #004080;">string</span><span style="color: #0000FF;">(</span><span style="color: #000000;">res</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span>
<span style="color: #0000FF;">?{</span><span style="color: #008000;">"some error"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">res</span><span style="color: #0000FF;">,</span><span style="color: #7060A8;">curl_easy_strerror</span><span style="color: #0000FF;">(</span><span style="color: #000000;">res</span><span style="color: #0000FF;">)}</span>
<span style="color: #008080;">return</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">rdx</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span>
<span style="color: #004080;">string</span> <span style="color: #000000;">title</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">link</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">desc</span>
<span style="color: #008080;">while</span> <span style="color: #004600;">true</span> <span style="color: #008080;">do</span>
<span style="color: #000080;font-style:italic;">-- {rdx,title} = grab(res,`&lt;h3 class="title ov-h"&gt;`,`&lt;/h3&gt;`,rdx)
-- {rdx,title} = grab(res,`&lt;span class=" d-ib p-abs t-0 l-0 fz-14 lh-20 fc-obsidian wr-bw ls-n pb-4"&gt;`,`&lt;/span&gt;`,rdx)</span>
<span style="color: #0000FF;">{</span><span style="color: #000000;">rdx</span><span style="color: #0000FF;">,</span><span style="color: #000000;">title</span><span style="color: #0000FF;">}</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">grab</span><span style="color: #0000FF;">(</span><span style="color: #000000;">res</span><span style="color: #0000FF;">,</span><span style="color: #008000;">`&lt;h3 style="display:block;margin-top:24px;margin-bottom:2px;" class="title"&gt;`</span><span style="color: #0000FF;">,</span><span style="color: #008000;">`&lt;/h3&gt;`</span><span style="color: #0000FF;">,</span><span style="color: #000000;">rdx</span><span style="color: #0000FF;">,</span><span style="color: #004600;">false</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">rdx</span><span style="color: #0000FF;">=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #000080;font-style:italic;">-- {rdx,title} = grab(res,`&lt;/span&gt;`,`&lt;/a&gt;`,rdx)
-- title = title[rmatch(`&lt;/span&gt;`,title)+7..rmatch(`&lt;\a&gt;`,title)]</span>
<span style="color: #000000;">title</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">title</span><span style="color: #0000FF;">[</span><span style="color: #7060A8;">rmatch</span><span style="color: #0000FF;">(</span><span style="color: #008000;">`&lt;/span&gt;`</span><span style="color: #0000FF;">,</span><span style="color: #000000;">title</span><span style="color: #0000FF;">)+</span><span style="color: #000000;">7</span><span style="color: #0000FF;">..$]</span>
<span style="color: #000080;font-style:italic;">-- {rdx,link} = grab(res,`&lt;span class=" fz-ms fw-m fc-12th wr-bw lh-17"&gt;`,`&lt;/span&gt;`,rdx)</span>
<span style="color: #0000FF;">{</span><span style="color: #000000;">rdx</span><span style="color: #0000FF;">,</span><span style="color: #000000;">link</span><span style="color: #0000FF;">}</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">grab</span><span style="color: #0000FF;">(</span><span style="color: #000000;">res</span><span style="color: #0000FF;">,</span><span style="color: #008000;">`&lt;span&gt;`</span><span style="color: #0000FF;">,</span><span style="color: #008000;">`&lt;/span&gt;`</span><span style="color: #0000FF;">,</span><span style="color: #000000;">rdx</span><span style="color: #0000FF;">,</span><span style="color: #004600;">true</span><span style="color: #0000FF;">)</span>
<span style="color: #000080;font-style:italic;">-- {rdx,desc} = grab(res,`&lt;p class="fz-ms lh-1_43x"&gt;`,`&lt;/p&gt;`,rdx)</span>
<span style="color: #0000FF;">{</span><span style="color: #000000;">rdx</span><span style="color: #0000FF;">,</span><span style="color: #000000;">desc</span><span style="color: #0000FF;">}</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">grab</span><span style="color: #0000FF;">(</span><span style="color: #000000;">res</span><span style="color: #0000FF;">,</span><span style="color: #008000;">`&lt;span class=" fc-falcon"&gt;`</span><span style="color: #0000FF;">,</span><span style="color: #008000;">`&lt;/span&gt;`</span><span style="color: #0000FF;">,</span><span style="color: #000000;">rdx</span><span style="color: #0000FF;">,</span><span style="color: #004600;">true</span><span style="color: #0000FF;">)</span>
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"title:%s\nlink:%s\ndesc:%s\n\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">title</span><span style="color: #0000FF;">,</span><span style="color: #000000;">link</span><span style="color: #0000FF;">,</span><span style="color: #000000;">desc</span><span style="color: #0000FF;">})</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span>
<span style="color: #000000;">YahooSearch</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"rosettacode"</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">YahooSearch</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"rosettacode"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">2</span><span style="color: #0000FF;">)</span>
<span style="color: #0000FF;">?</span><span style="color: #008000;">"done"</span>
<span style="color: #0000FF;">{}</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">wait_key</span><span style="color: #0000FF;">()</span>
<!--</lang>-->
{{out}}
<pre>
7,794

edits