Find URI in text: Difference between revisions
Content added Content deleted
Thundergnat (talk | contribs) m (syntax highlighting fixup automation) |
(→{{header|Phix}}: added a proper version) |
||
Line 621: | Line 621: | ||
=={{header|Phix}}== |
=={{header|Phix}}== |
||
The following is based on scanForUrls() in demo\edita\src\easynclr.e which is used/tested on a daily basis and may get additional bugfixes, though it is quite strongly coupled in with syntax colouring and other editor gubbins. As the output shows it does not (yet) handle dangling ")" correctly. |
|||
<!--<syntaxhighlight lang="phix">(phixonline)--> |
|||
<span style="color: #008080;">with</span> <span style="color: #008080;">javascript_semantics</span> |
|||
<span style="color: #008080;">constant</span> <span style="color: #000000;">schemes</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #008000;">`ftp`</span><span style="color: #0000FF;">,</span><span style="color: #008000;">`gopher`</span><span style="color: #0000FF;">,</span><span style="color: #008000;">`http`</span><span style="color: #0000FF;">,</span><span style="color: #008000;">`https`</span><span style="color: #0000FF;">,</span><span style="color: #008000;">`mailto`</span><span style="color: #0000FF;">,</span><span style="color: #008000;">`news`</span><span style="color: #0000FF;">,</span><span style="color: #008000;">`nntp`</span><span style="color: #0000FF;">,</span><span style="color: #008000;">`telnet`</span><span style="color: #0000FF;">,</span><span style="color: #008000;">`wais`</span><span style="color: #0000FF;">,</span><span style="color: #008000;">`file`</span><span style="color: #0000FF;">,</span><span style="color: #008000;">`prospero`</span><span style="color: #0000FF;">,</span><span style="color: #008000;">`edit`</span><span style="color: #0000FF;">}</span> |
|||
<span style="color: #008080;">function</span> <span style="color: #000000;">scan_for_urls</span><span style="color: #0000FF;">(</span><span style="color: #004080;">sequence</span> <span style="color: #000000;">text</span><span style="color: #0000FF;">)</span> |
|||
<span style="color: #000080;font-style:italic;">-- such as http::\\wikipedia.org</span> |
|||
<span style="color: #004080;">integer</span> <span style="color: #000000;">chidx</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">chidx2</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">lt</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">text</span><span style="color: #0000FF;">),</span> <span style="color: #000000;">ch2</span> |
|||
<span style="color: #004080;">sequence</span> <span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{}</span> |
|||
<span style="color: #008080;">while</span> <span style="color: #000000;">chidx2</span><span style="color: #0000FF;"><=</span><span style="color: #000000;">lt</span> <span style="color: #008080;">do</span> |
|||
<span style="color: #000000;">ch2</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">text</span><span style="color: #0000FF;">[</span><span style="color: #000000;">chidx2</span><span style="color: #0000FF;">]</span> |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">ch2</span><span style="color: #0000FF;">>=</span><span style="color: #008000;">'a'</span> <span style="color: #008080;">and</span> <span style="color: #000000;">ch2</span><span style="color: #0000FF;"><=</span><span style="color: #008000;">'z'</span> <span style="color: #008080;">then</span> |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">chidx2</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">></span><span style="color: #000000;">chidx</span> <span style="color: #008080;">or</span> <span style="color: #000000;">text</span><span style="color: #0000FF;">[</span><span style="color: #000000;">chidx</span><span style="color: #0000FF;">]<=</span><span style="color: #008000;">' '</span> <span style="color: #008080;">then</span> |
|||
<span style="color: #000000;">chidx</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">chidx2</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
<span style="color: #008080;">while</span> <span style="color: #000000;">chidx2</span><span style="color: #0000FF;"><=</span><span style="color: #000000;">lt</span> <span style="color: #008080;">do</span> |
|||
<span style="color: #000000;">ch2</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">text</span><span style="color: #0000FF;">[</span><span style="color: #000000;">chidx2</span><span style="color: #0000FF;">]</span> |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">ch2</span><span style="color: #0000FF;"><</span><span style="color: #008000;">'a'</span> <span style="color: #008080;">or</span> <span style="color: #000000;">ch2</span><span style="color: #0000FF;">></span><span style="color: #008000;">'z'</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
<span style="color: #000000;">chidx2</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span> |
|||
<span style="color: #004080;">string</span> <span style="color: #000000;">oneword</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">text</span><span style="color: #0000FF;">[</span><span style="color: #000000;">chidx</span><span style="color: #0000FF;">..</span><span style="color: #000000;">chidx2</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span> |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">chidx2</span><span style="color: #0000FF;">></span><span style="color: #000000;">lt</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
<span style="color: #000000;">ch2</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">text</span><span style="color: #0000FF;">[</span><span style="color: #000000;">chidx2</span><span style="color: #0000FF;">]</span> |
|||
<span style="color: #008080;">if</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">ch2</span><span style="color: #0000FF;">=</span><span style="color: #008000;">':'</span> <span style="color: #008080;">and</span> <span style="color: #7060A8;">find</span><span style="color: #0000FF;">(</span><span style="color: #000000;">oneword</span><span style="color: #0000FF;">,</span><span style="color: #000000;">schemes</span><span style="color: #0000FF;">))</span> |
|||
<span style="color: #008080;">or</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">ch2</span><span style="color: #0000FF;">=</span><span style="color: #008000;">'.'</span> <span style="color: #008080;">and</span> <span style="color: #7060A8;">equal</span><span style="color: #0000FF;">(</span><span style="color: #000000;">oneword</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"www"</span><span style="color: #0000FF;">))</span> <span style="color: #008080;">then</span> |
|||
<span style="color: #000000;">chidx2</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span> |
|||
<span style="color: #004080;">integer</span> <span style="color: #000000;">chidx0</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">chidx2</span> |
|||
<span style="color: #004080;">bool</span> <span style="color: #000000;">isUrl</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">false</span> |
|||
<span style="color: #008080;">while</span> <span style="color: #000000;">chidx2</span><span style="color: #0000FF;"><=</span><span style="color: #000000;">lt</span> <span style="color: #008080;">do</span> |
|||
<span style="color: #000000;">ch2</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">text</span><span style="color: #0000FF;">[</span><span style="color: #000000;">chidx2</span><span style="color: #0000FF;">]</span> |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">ch2</span><span style="color: #0000FF;">=</span><span style="color: #008000;">'\"'</span> <span style="color: #008080;">and</span> <span style="color: #000000;">chidx2</span><span style="color: #0000FF;">=</span><span style="color: #000000;">chidx0</span> <span style="color: #008080;">then</span> |
|||
<span style="color: #008080;">while</span> <span style="color: #000000;">chidx2</span><span style="color: #0000FF;"><</span><span style="color: #000000;">lt</span> <span style="color: #008080;">do</span> |
|||
<span style="color: #000000;">chidx2</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span> |
|||
<span style="color: #000000;">ch2</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">text</span><span style="color: #0000FF;">[</span><span style="color: #000000;">chidx2</span><span style="color: #0000FF;">]</span> |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">ch2</span><span style="color: #0000FF;">=</span><span style="color: #008000;">'\"'</span> <span style="color: #008080;">then</span> |
|||
<span style="color: #000000;">chidx2</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span> |
|||
<span style="color: #000000;">isUrl</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">true</span> |
|||
<span style="color: #008080;">exit</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span> |
|||
<span style="color: #008080;">exit</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">ch2</span><span style="color: #0000FF;">></span><span style="color: #000000;">255</span> <span style="color: #008080;">or</span> <span style="color: #000000;">ch2</span><span style="color: #0000FF;"><=</span><span style="color: #008000;">' '</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
<span style="color: #000000;">isUrl</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">true</span> |
|||
<span style="color: #000000;">chidx2</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span> |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">isUrl</span> <span style="color: #008080;">then</span> |
|||
<span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">res</span><span style="color: #0000FF;">,</span><span style="color: #000000;">text</span><span style="color: #0000FF;">[</span><span style="color: #000000;">chidx</span><span style="color: #0000FF;">..</span><span style="color: #000000;">chidx2</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">])</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
<span style="color: #000000;">chidx</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">chidx2</span> |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">chidx2</span><span style="color: #0000FF;">></span><span style="color: #000000;">lt</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
<span style="color: #008080;">else</span> |
|||
<span style="color: #000000;">chidx2</span> <span style="color: #0000FF;">-=</span> <span style="color: #000000;">1</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
<span style="color: #000000;">chidx2</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span> |
|||
<span style="color: #008080;">return</span> <span style="color: #000000;">res</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span> |
|||
<span style="color: #008080;">constant</span> <span style="color: #000000;">txt</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">""" |
|||
this URI contains an illegal character, parentheses and a misplaced full stop: |
|||
http://en.wikipedia.org/wiki/Erich_Kästner_(camera_designer). (which is handled by http://mediawiki.org/). |
|||
and another one just to confuse the parser: http://en.wikipedia.org/wiki/-) |
|||
")" is handled the wrong way by the mediawiki parser. |
|||
ftp://domain.name/path(balanced_brackets)/foo.html |
|||
ftp://domain.name/path(balanced_brackets)/ending.in.dot. |
|||
ftp://domain.name/path(unbalanced_brackets/ending.in.dot. |
|||
leading junk ftp://domain.name/path/embedded?punct/uation. |
|||
leading junk ftp://domain.name/dangling_close_paren) |
|||
if you have other interesting URIs for testing, please add them here: |
|||
http://www.example.org/foo.html#includes_fragment |
|||
http://www.example.org/foo.html#enthält_Unicode-Fragment |
|||
http://192.168.0.1/admin/?hackme=%%%%%%%%%true |
|||
blah (foo://domain.hld/)))) |
|||
https://haxor.ur:4592/~mama/####&?foo |
|||
"""</span> |
|||
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"%s\n"</span><span style="color: #0000FF;">,{</span><span style="color: #7060A8;">join</span><span style="color: #0000FF;">(</span><span style="color: #000000;">scan_for_urls</span><span style="color: #0000FF;">(</span><span style="color: #000000;">txt</span><span style="color: #0000FF;">),</span><span style="color: #008000;">"\n"</span><span style="color: #0000FF;">)})</span> |
|||
<!--</syntaxhighlight>--> |
|||
{{out}} |
|||
<pre> |
|||
http://en.wikipedia.org/wiki/Erich_Kästner_(camera_designer). |
|||
http://mediawiki.org/). |
|||
http://en.wikipedia.org/wiki/-) |
|||
ftp://domain.name/path(balanced_brackets)/foo.html |
|||
ftp://domain.name/path(balanced_brackets)/ending.in.dot. |
|||
ftp://domain.name/path(unbalanced_brackets/ending.in.dot. |
|||
ftp://domain.name/path/embedded?punct/uation. |
|||
ftp://domain.name/dangling_close_paren) |
|||
http://www.example.org/foo.html#includes_fragment |
|||
http://www.example.org/foo.html#enthält_Unicode-Fragment |
|||
http://192.168.0.1/admin/?hackme=%%%%%%%%%true |
|||
https://haxor.ur:4592/~mama/####&?foo |
|||
</pre> |
|||
=={{header|PHP}}== |
=={{header|PHP}}== |