Web scraping: Difference between revisions
m
→{{header|Phix}}: syntax coloured, marked p2js incompatible, updated output
m (→{{header|Phix}}: syntax coloured, marked p2js incompatible, updated output) |
|||
Line 1,543:
=={{header|Phix}}==
{{libheader|Phix/libcurl}}
<!--<lang Phix>(notonline)--
<span style="color: #000080;font-style:italic;">--
-- demo\rosetta\web_scrape.exw
-- ===========================
--</span>
<span style="color: #008080;">without</span> <span style="color: #008080;">js</span> <span style="color: #000080;font-style:italic;">-- (libcurl)</span>
<span style="color: #008080;">include</span> <span style="color: #000000;">builtins</span><span style="color: #0000FF;">\</span><span style="color: #000000;">libcurl</span><span style="color: #0000FF;">.</span><span style="color: #000000;">e</span>
<span style="color: #008080;">include</span> <span style="color: #000000;">builtins</span><span style="color: #0000FF;">\</span><span style="color: #004080;">timedate</span><span style="color: #0000FF;">.</span><span style="color: #000000;">e</span>
<span style="color: #004080;">object</span> <span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">curl_easy_perform_ex</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"https://rosettacode.org/wiki/Talk:Web_scraping"</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">if</span> <span style="color: #004080;">string</span><span style="color: #0000FF;">(</span><span style="color: #000000;">res</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">split</span><span style="color: #0000FF;">(</span><span style="color: #000000;">res</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'\n'</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">res</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">k</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">match</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"UTC"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">res</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">])</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">k</span> <span style="color: #008080;">then</span>
<span style="color: #004080;">string</span> <span style="color: #000000;">line</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">res</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span> <span style="color: #000080;font-style:italic;">-- (debug aid)</span>
<span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">line</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">..</span><span style="color: #000000;">k</span><span style="color: #0000FF;">-</span><span style="color: #000000;">3</span><span style="color: #0000FF;">]</span>
<span style="color: #000000;">k</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">rmatch</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"</a>"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">res</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">trim</span><span style="color: #0000FF;">(</span><span style="color: #000000;">res</span><span style="color: #0000FF;">[</span><span style="color: #000000;">k</span><span style="color: #0000FF;">+</span><span style="color: #000000;">5</span><span style="color: #0000FF;">..$])</span>
<span style="color: #008080;">exit</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #0000FF;">?</span><span style="color: #000000;">res</span>
<span style="color: #008080;">if</span> <span style="color: #004080;">string</span><span style="color: #0000FF;">(</span><span style="color: #000000;">res</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span>
<span style="color: #004080;">timedate</span> <span style="color: #000000;">td</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">parse_date_string</span><span style="color: #0000FF;">(</span><span style="color: #000000;">res</span><span style="color: #0000FF;">,</span> <span style="color: #0000FF;">{</span><span style="color: #008000;">"hh:mm, d Mmmm yyyy"</span><span style="color: #0000FF;">})</span>
<span style="color: #0000FF;">?</span><span style="color: #7060A8;">format_timedate</span><span style="color: #0000FF;">(</span><span style="color: #000000;">td</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"Dddd Mmmm ddth yyyy h:mpm"</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">else</span>
<span style="color: #0000FF;">?{</span><span style="color: #008000;">"some error"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">res</span><span style="color: #0000FF;">,</span><span style="color: #7060A8;">curl_easy_strerror</span><span style="color: #0000FF;">(</span><span style="color: #000000;">res</span><span style="color: #0000FF;">)}</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
{{out}}
(
<pre>
"20:59, 30 May 2020"
"Saturday May 30th 2020 8:59pm"
</pre>
|