Web scraping: Difference between revisions

Content added Content deleted

Inline

@@ Line 1,467: / Line 1,467: @@
 Note that the string between '<' and '>' refers to regex tokens, so to match a literal '&lt;BR&gt;' you need to quote it, while <ws> refers to the built-in token whitespace.
 Also, whitespace is ignored by default in Perl&nbsp;6 regexes.
+=={{header|Phix}}==
+<lang Phix>-- demo\rosetta\web_scrape.exw
+include builtins\libcurl.e
+include builtins\timedate.e
+curl_global_init()
+atom curl = curl_easy_init()
+curl_easy_setopt(curl, CURLOPT_URL, "https://tycho.usno.navy.mil/cgi-bin/timer.pl")
+object res = curl_easy_perform_ex(curl)
+if string(res) then
+    res = split(res,'\n')
+    for i=1 to length(res) do
+        integer k = match("UTC",res[i])
+        if k then
+            res = res[i][5..k-2]
+            exit
+        end if
+    end for
+    ?res
+    if string(res) then
+        timedate td = parse_date_string(res, {"Mmm. d, hh:mm:ss"})
+        ?format_timedate(td,"h:mpm Mmmm ddth")
+    end if
+else
+    ?"some error"
+end if
+curl_easy_cleanup(curl)
+curl_global_cleanup()</lang>
+{{out}}
+<pre>
+"Apr. 26, 12:24:11"
+"12:24pm April 26th"
+</pre>
+Note that since that webpage has no year, td[DT_YEAR] will be 0 (you could of course just set it from date()[DT_YEAR]), and hence the weekday would also be wrong.
 =={{header|PHP}}==