Yahoo! search interface: Difference between revisions
m
→{{header|Phix}}: syntax coloured, fixed to work with current output.
(Added Wren) |
m (→{{header|Phix}}: syntax coloured, fixed to work with current output.) |
||
Line 2,110:
=={{header|Phix}}==
{{libheader|Phix/libcurl}}
As noted elsewhere, Yahoo and other search sites will regularly change the output format, so don't be too shocked if this is once again broken. Last fixed 22/4/2022 (with previous left in as comments).
The glyphs constants do not show up properly on rosettacode, so here they are in plain text, and I even had to edit that by hand:
<pre>
constant glyphs = {{"\xC2\xB7 ","*"}, -- bullet point
{"&#39;",`'`}, -- single quote
{"&quot;",`"`}, -- double quote
{"&amp;","&"}, -- ampersand
{"\xE2\x94\xAC\xC2\xAB","[R]"}, -- registered
{"\xC2\xAE","[R]"}}, -- registered
</pre>
<!--<lang Phix>(notonline)-->
<span style="color: #000080;font-style:italic;">--
-- demo\rosetta\Yahoo_search_interface.exw
-- =======================================
--</span>
<span style="color: #008080;">without</span> <span style="color: #008080;">js</span> <span style="color: #000080;font-style:italic;">-- (libcurl)</span>
<span style="color: #008080;">include</span> <span style="color: #000000;">builtins</span><span style="color: #0000FF;">\</span><span style="color: #000000;">libcurl</span><span style="color: #0000FF;">.</span><span style="color: #000000;">e</span>
<span style="color: #008080;">constant</span> <span style="color: #000000;">glyphs</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{{</span><span style="color: #008000;">"\xC2\xB7 "</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"*"</span><span style="color: #0000FF;">},</span> <span style="color: #000080;font-style:italic;">-- bullet point</span>
<span style="color: #0000FF;">{</span><span style="color: #008000;">"'"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">`'`</span><span style="color: #0000FF;">},</span> <span style="color: #000080;font-style:italic;">-- single quote</span>
<span style="color: #0000FF;">{</span><span style="color: #008000;">"""</span><span style="color: #0000FF;">,</span><span style="color: #008000;">`"`</span><span style="color: #0000FF;">},</span> <span style="color: #000080;font-style:italic;">-- double quote</span>
<span style="color: #0000FF;">{</span><span style="color: #008000;">"&"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"&"</span><span style="color: #0000FF;">},</span> <span style="color: #000080;font-style:italic;">-- ampersand</span>
<span style="color: #0000FF;">{</span><span style="color: #008000;">"\xE2\x94\xAC\xC2\xAB"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"[R]"</span><span style="color: #0000FF;">},</span> <span style="color: #000080;font-style:italic;">-- registered</span>
<span style="color: #0000FF;">{</span><span style="color: #008000;">"\xC2\xAE"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"[R]"</span><span style="color: #0000FF;">}},</span> <span style="color: #000080;font-style:italic;">-- registered</span>
<span style="color: #0000FF;">{</span><span style="color: #000000;">gutf8</span><span style="color: #0000FF;">,</span><span style="color: #000000;">gascii</span><span style="color: #0000FF;">}</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">columnize</span><span style="color: #0000FF;">(</span><span style="color: #000000;">glyphs</span><span style="color: #0000FF;">),</span>
<span style="color: #000000;">tags</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{{</span><span style="color: #008000;">`<a `</span><span style="color: #0000FF;">,</span><span style="color: #008000;">`</a>`</span><span style="color: #0000FF;">},</span>
<span style="color: #0000FF;">{</span><span style="color: #008000;">`<b>`</span><span style="color: #0000FF;">,</span><span style="color: #008000;">`</b>`</span><span style="color: #0000FF;">},</span>
<span style="color: #0000FF;">{</span><span style="color: #008000;">`<span class=" fc-2nd">`</span><span style="color: #0000FF;">,</span><span style="color: #008000;">`</span>`</span><span style="color: #0000FF;">}}</span>
<span style="color: #008080;">function</span> <span style="color: #000000;">grab</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">txt</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">opener</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">closer</span><span style="color: #0000FF;">,</span> <span style="color: #004080;">integer</span> <span style="color: #000000;">tdx</span><span style="color: #0000FF;">,</span> <span style="color: #004080;">bool</span> <span style="color: #000000;">crop</span><span style="color: #0000FF;">)</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">openidx</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">match</span><span style="color: #0000FF;">(</span><span style="color: #000000;">opener</span><span style="color: #0000FF;">,</span><span style="color: #000000;">txt</span><span style="color: #0000FF;">,</span><span style="color: #000000;">tdx</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">openidx</span><span style="color: #0000FF;">=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span> <span style="color: #008080;">return</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">0</span><span style="color: #0000FF;">,</span><span style="color: #008000;">""</span><span style="color: #0000FF;">}</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">closeidx</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">match</span><span style="color: #0000FF;">(</span><span style="color: #000000;">closer</span><span style="color: #0000FF;">,</span><span style="color: #000000;">txt</span><span style="color: #0000FF;">,</span><span style="color: #000000;">openidx</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">txt</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">txt</span><span style="color: #0000FF;">[</span><span style="color: #000000;">openidx</span><span style="color: #0000FF;">+</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">opener</span><span style="color: #0000FF;">)..</span><span style="color: #000000;">closeidx</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span>
<span style="color: #000000;">tdx</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span>
<span style="color: #008080;">while</span> <span style="color: #000000;">tdx</span><span style="color: #0000FF;"><=</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tags</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
<span style="color: #0000FF;">{</span><span style="color: #000000;">opener</span><span style="color: #0000FF;">,</span><span style="color: #000000;">closer</span><span style="color: #0000FF;">}</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">tags</span><span style="color: #0000FF;">[</span><span style="color: #000000;">tdx</span><span style="color: #0000FF;">]</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">i</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">match</span><span style="color: #0000FF;">(</span><span style="color: #000000;">opener</span><span style="color: #0000FF;">,</span><span style="color: #000000;">txt</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">tdx</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
<span style="color: #008080;">else</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">opener</span><span style="color: #0000FF;">[$]=</span><span style="color: #008000;">'>'</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">txt</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">..</span><span style="color: #000000;">i</span><span style="color: #0000FF;">+</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">opener</span><span style="color: #0000FF;">)-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">""</span>
<span style="color: #008080;">else</span>
<span style="color: #000000;">txt</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">..</span><span style="color: #7060A8;">find</span><span style="color: #0000FF;">(</span><span style="color: #008000;">'>'</span><span style="color: #0000FF;">,</span><span style="color: #000000;">txt</span><span style="color: #0000FF;">,</span><span style="color: #000000;">i</span><span style="color: #0000FF;">)]</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">""</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #000000;">i</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">match</span><span style="color: #0000FF;">(</span><span style="color: #000000;">closer</span><span style="color: #0000FF;">,</span><span style="color: #000000;">txt</span><span style="color: #0000FF;">,</span><span style="color: #000000;">i</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">txt</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">..</span><span style="color: #000000;">i</span><span style="color: #0000FF;">+</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">closer</span><span style="color: #0000FF;">)-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">""</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
<span style="color: #000000;">txt</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">substitute_all</span><span style="color: #0000FF;">(</span><span style="color: #000000;">txt</span><span style="color: #0000FF;">,</span><span style="color: #000000;">gutf8</span><span style="color: #0000FF;">,</span><span style="color: #000000;">gascii</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">crop</span> <span style="color: #008080;">and</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">txt</span><span style="color: #0000FF;">)></span><span style="color: #000000;">80</span> <span style="color: #008080;">then</span> <span style="color: #000000;">txt</span><span style="color: #0000FF;">[</span><span style="color: #000000;">78</span><span style="color: #0000FF;">..$]</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">".."</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">return</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">closeidx</span><span style="color: #0000FF;">+</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">closer</span><span style="color: #0000FF;">),</span><span style="color: #000000;">txt</span><span style="color: #0000FF;">}</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
<span style="color: #008080;">procedure</span> <span style="color: #000000;">YahooSearch</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">query</span><span style="color: #0000FF;">,</span> <span style="color: #004080;">integer</span> <span style="color: #000000;">page</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span><span style="color: #0000FF;">)</span>
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"Page %d:\n=======\n"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">page</span><span style="color: #0000FF;">)</span>
<span style="color: #004080;">string</span> <span style="color: #000000;">url</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">sprintf</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"https://search.yahoo.com/search?p=%s&b=%d"</span><span style="color: #0000FF;">,</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">query</span><span style="color: #0000FF;">,</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">page</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">)*</span><span style="color: #000000;">10</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">})</span>
<span style="color: #000080;font-style:italic;">--?url</span>
<span style="color: #004080;">object</span> <span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">curl_easy_perform_ex</span><span style="color: #0000FF;">(</span><span style="color: #000000;">url</span><span style="color: #0000FF;">)</span>
<span style="color: #000080;font-style:italic;">--?res</span>
<span style="color: #008080;">if</span> <span style="color: #008080;">not</span> <span style="color: #004080;">string</span><span style="color: #0000FF;">(</span><span style="color: #000000;">res</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span>
<span style="color: #0000FF;">?{</span><span style="color: #008000;">"some error"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">res</span><span style="color: #0000FF;">,</span><span style="color: #7060A8;">curl_easy_strerror</span><span style="color: #0000FF;">(</span><span style="color: #000000;">res</span><span style="color: #0000FF;">)}</span>
<span style="color: #008080;">return</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">rdx</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span>
<span style="color: #004080;">string</span> <span style="color: #000000;">title</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">link</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">desc</span>
<span style="color: #008080;">while</span> <span style="color: #004600;">true</span> <span style="color: #008080;">do</span>
<span style="color: #000080;font-style:italic;">-- {rdx,title} = grab(res,`<h3 class="title ov-h">`,`</h3>`,rdx)
-- {rdx,title} = grab(res,`<span class=" d-ib p-abs t-0 l-0 fz-14 lh-20 fc-obsidian wr-bw ls-n pb-4">`,`</span>`,rdx)</span>
<span style="color: #0000FF;">{</span><span style="color: #000000;">rdx</span><span style="color: #0000FF;">,</span><span style="color: #000000;">title</span><span style="color: #0000FF;">}</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">grab</span><span style="color: #0000FF;">(</span><span style="color: #000000;">res</span><span style="color: #0000FF;">,</span><span style="color: #008000;">`<h3 style="display:block;margin-top:24px;margin-bottom:2px;" class="title">`</span><span style="color: #0000FF;">,</span><span style="color: #008000;">`</h3>`</span><span style="color: #0000FF;">,</span><span style="color: #000000;">rdx</span><span style="color: #0000FF;">,</span><span style="color: #004600;">false</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">rdx</span><span style="color: #0000FF;">=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #000080;font-style:italic;">-- {rdx,title} = grab(res,`</span>`,`</a>`,rdx)
-- title = title[rmatch(`</span>`,title)+7..rmatch(`<\a>`,title)]</span>
<span style="color: #000000;">title</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">title</span><span style="color: #0000FF;">[</span><span style="color: #7060A8;">rmatch</span><span style="color: #0000FF;">(</span><span style="color: #008000;">`</span>`</span><span style="color: #0000FF;">,</span><span style="color: #000000;">title</span><span style="color: #0000FF;">)+</span><span style="color: #000000;">7</span><span style="color: #0000FF;">..$]</span>
<span style="color: #000080;font-style:italic;">-- {rdx,link} = grab(res,`<span class=" fz-ms fw-m fc-12th wr-bw lh-17">`,`</span>`,rdx)</span>
<span style="color: #0000FF;">{</span><span style="color: #000000;">rdx</span><span style="color: #0000FF;">,</span><span style="color: #000000;">link</span><span style="color: #0000FF;">}</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">grab</span><span style="color: #0000FF;">(</span><span style="color: #000000;">res</span><span style="color: #0000FF;">,</span><span style="color: #008000;">`<span>`</span><span style="color: #0000FF;">,</span><span style="color: #008000;">`</span>`</span><span style="color: #0000FF;">,</span><span style="color: #000000;">rdx</span><span style="color: #0000FF;">,</span><span style="color: #004600;">true</span><span style="color: #0000FF;">)</span>
<span style="color: #000080;font-style:italic;">-- {rdx,desc} = grab(res,`<p class="fz-ms lh-1_43x">`,`</p>`,rdx)</span>
<span style="color: #0000FF;">{</span><span style="color: #000000;">rdx</span><span style="color: #0000FF;">,</span><span style="color: #000000;">desc</span><span style="color: #0000FF;">}</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">grab</span><span style="color: #0000FF;">(</span><span style="color: #000000;">res</span><span style="color: #0000FF;">,</span><span style="color: #008000;">`<span class=" fc-falcon">`</span><span style="color: #0000FF;">,</span><span style="color: #008000;">`</span>`</span><span style="color: #0000FF;">,</span><span style="color: #000000;">rdx</span><span style="color: #0000FF;">,</span><span style="color: #004600;">true</span><span style="color: #0000FF;">)</span>
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"title:%s\nlink:%s\ndesc:%s\n\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">title</span><span style="color: #0000FF;">,</span><span style="color: #000000;">link</span><span style="color: #0000FF;">,</span><span style="color: #000000;">desc</span><span style="color: #0000FF;">})</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span>
<span style="color: #000000;">YahooSearch</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"rosettacode"</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">YahooSearch</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"rosettacode"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">2</span><span style="color: #0000FF;">)</span>
<span style="color: #0000FF;">?</span><span style="color: #008000;">"done"</span>
<span style="color: #0000FF;">{}</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">wait_key</span><span style="color: #0000FF;">()</span>
<!--</lang>-->
{{out}}
<pre>
|