Find URI in text: Difference between revisions
→{{header|Phix}}: added a proper version
Thundergnat (talk | contribs) m (syntax highlighting fixup automation) |
(→{{header|Phix}}: added a proper version) |
||
Line 621:
=={{header|Phix}}==
<!--<syntaxhighlight lang="phix">(phixonline)-->
<span style="color: #008080;">with</span> <span style="color: #008080;">javascript_semantics</span>
<span style="color: #008080;">constant</span> <span style="color: #000000;">schemes</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #008000;">`ftp`</span><span style="color: #0000FF;">,</span><span style="color: #008000;">`gopher`</span><span style="color: #0000FF;">,</span><span style="color: #008000;">`http`</span><span style="color: #0000FF;">,</span><span style="color: #008000;">`https`</span><span style="color: #0000FF;">,</span><span style="color: #008000;">`mailto`</span><span style="color: #0000FF;">,</span><span style="color: #008000;">`news`</span><span style="color: #0000FF;">,</span><span style="color: #008000;">`nntp`</span><span style="color: #0000FF;">,</span><span style="color: #008000;">`telnet`</span><span style="color: #0000FF;">,</span><span style="color: #008000;">`wais`</span><span style="color: #0000FF;">,</span><span style="color: #008000;">`file`</span><span style="color: #0000FF;">,</span><span style="color: #008000;">`prospero`</span><span style="color: #0000FF;">,</span><span style="color: #008000;">`edit`</span><span style="color: #0000FF;">}</span>
<span style="color: #008080;">function</span> <span style="color: #000000;">scan_for_urls</span><span style="color: #0000FF;">(</span><span style="color: #004080;">sequence</span> <span style="color: #000000;">text</span><span style="color: #0000FF;">)</span>
<span style="color: #000080;font-style:italic;">-- such as http::\\wikipedia.org</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">chidx</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">chidx2</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">lt</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">text</span><span style="color: #0000FF;">),</span> <span style="color: #000000;">ch2</span>
<span style="color: #004080;">sequence</span> <span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{}</span>
<span style="color: #008080;">while</span> <span style="color: #000000;">chidx2</span><span style="color: #0000FF;"><=</span><span style="color: #000000;">lt</span> <span style="color: #008080;">do</span>
<span style="color: #000000;">ch2</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">text</span><span style="color: #0000FF;">[</span><span style="color: #000000;">chidx2</span><span style="color: #0000FF;">]</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">ch2</span><span style="color: #0000FF;">>=</span><span style="color: #008000;">'a'</span> <span style="color: #008080;">and</span> <span style="color: #000000;">ch2</span><span style="color: #0000FF;"><=</span><span style="color: #008000;">'z'</span> <span style="color: #008080;">then</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">chidx2</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">></span><span style="color: #000000;">chidx</span> <span style="color: #008080;">or</span> <span style="color: #000000;">text</span><span style="color: #0000FF;">[</span><span style="color: #000000;">chidx</span><span style="color: #0000FF;">]<=</span><span style="color: #008000;">' '</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">chidx</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">chidx2</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">while</span> <span style="color: #000000;">chidx2</span><span style="color: #0000FF;"><=</span><span style="color: #000000;">lt</span> <span style="color: #008080;">do</span>
<span style="color: #000000;">ch2</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">text</span><span style="color: #0000FF;">[</span><span style="color: #000000;">chidx2</span><span style="color: #0000FF;">]</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">ch2</span><span style="color: #0000FF;"><</span><span style="color: #008000;">'a'</span> <span style="color: #008080;">or</span> <span style="color: #000000;">ch2</span><span style="color: #0000FF;">></span><span style="color: #008000;">'z'</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #000000;">chidx2</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
<span style="color: #004080;">string</span> <span style="color: #000000;">oneword</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">text</span><span style="color: #0000FF;">[</span><span style="color: #000000;">chidx</span><span style="color: #0000FF;">..</span><span style="color: #000000;">chidx2</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">chidx2</span><span style="color: #0000FF;">></span><span style="color: #000000;">lt</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #000000;">ch2</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">text</span><span style="color: #0000FF;">[</span><span style="color: #000000;">chidx2</span><span style="color: #0000FF;">]</span>
<span style="color: #008080;">if</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">ch2</span><span style="color: #0000FF;">=</span><span style="color: #008000;">':'</span> <span style="color: #008080;">and</span> <span style="color: #7060A8;">find</span><span style="color: #0000FF;">(</span><span style="color: #000000;">oneword</span><span style="color: #0000FF;">,</span><span style="color: #000000;">schemes</span><span style="color: #0000FF;">))</span>
<span style="color: #008080;">or</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">ch2</span><span style="color: #0000FF;">=</span><span style="color: #008000;">'.'</span> <span style="color: #008080;">and</span> <span style="color: #7060A8;">equal</span><span style="color: #0000FF;">(</span><span style="color: #000000;">oneword</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"www"</span><span style="color: #0000FF;">))</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">chidx2</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">chidx0</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">chidx2</span>
<span style="color: #004080;">bool</span> <span style="color: #000000;">isUrl</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">false</span>
<span style="color: #008080;">while</span> <span style="color: #000000;">chidx2</span><span style="color: #0000FF;"><=</span><span style="color: #000000;">lt</span> <span style="color: #008080;">do</span>
<span style="color: #000000;">ch2</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">text</span><span style="color: #0000FF;">[</span><span style="color: #000000;">chidx2</span><span style="color: #0000FF;">]</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">ch2</span><span style="color: #0000FF;">=</span><span style="color: #008000;">'\"'</span> <span style="color: #008080;">and</span> <span style="color: #000000;">chidx2</span><span style="color: #0000FF;">=</span><span style="color: #000000;">chidx0</span> <span style="color: #008080;">then</span>
<span style="color: #008080;">while</span> <span style="color: #000000;">chidx2</span><span style="color: #0000FF;"><</span><span style="color: #000000;">lt</span> <span style="color: #008080;">do</span>
<span style="color: #000000;">chidx2</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
<span style="color: #000000;">ch2</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">text</span><span style="color: #0000FF;">[</span><span style="color: #000000;">chidx2</span><span style="color: #0000FF;">]</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">ch2</span><span style="color: #0000FF;">=</span><span style="color: #008000;">'\"'</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">chidx2</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
<span style="color: #000000;">isUrl</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">true</span>
<span style="color: #008080;">exit</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
<span style="color: #008080;">exit</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">ch2</span><span style="color: #0000FF;">></span><span style="color: #000000;">255</span> <span style="color: #008080;">or</span> <span style="color: #000000;">ch2</span><span style="color: #0000FF;"><=</span><span style="color: #008000;">' '</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #000000;">isUrl</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">true</span>
<span style="color: #000000;">chidx2</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">isUrl</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">res</span><span style="color: #0000FF;">,</span><span style="color: #000000;">text</span><span style="color: #0000FF;">[</span><span style="color: #000000;">chidx</span><span style="color: #0000FF;">..</span><span style="color: #000000;">chidx2</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">])</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #000000;">chidx</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">chidx2</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">chidx2</span><span style="color: #0000FF;">></span><span style="color: #000000;">lt</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">else</span>
<span style="color: #000000;">chidx2</span> <span style="color: #0000FF;">-=</span> <span style="color: #000000;">1</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #000000;">chidx2</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
<span style="color: #008080;">return</span> <span style="color: #000000;">res</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
<span style="color: #008080;">constant</span> <span style="color: #000000;">txt</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">"""
this URI contains an illegal character, parentheses and a misplaced full stop:
http://en.wikipedia.org/wiki/Erich_Kästner_(camera_designer). (which is handled by http://mediawiki.org/).
and another one just to confuse the parser: http://en.wikipedia.org/wiki/-)
")" is handled the wrong way by the mediawiki parser.
ftp://domain.name/path(balanced_brackets)/foo.html
ftp://domain.name/path(balanced_brackets)/ending.in.dot.
ftp://domain.name/path(unbalanced_brackets/ending.in.dot.
leading junk ftp://domain.name/path/embedded?punct/uation.
leading junk ftp://domain.name/dangling_close_paren)
if you have other interesting URIs for testing, please add them here:
http://www.example.org/foo.html#includes_fragment
http://www.example.org/foo.html#enthält_Unicode-Fragment
http://192.168.0.1/admin/?hackme=%%%%%%%%%true
blah (foo://domain.hld/))))
https://haxor.ur:4592/~mama/####&?foo
"""</span>
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"%s\n"</span><span style="color: #0000FF;">,{</span><span style="color: #7060A8;">join</span><span style="color: #0000FF;">(</span><span style="color: #000000;">scan_for_urls</span><span style="color: #0000FF;">(</span><span style="color: #000000;">txt</span><span style="color: #0000FF;">),</span><span style="color: #008000;">"\n"</span><span style="color: #0000FF;">)})</span>
<!--</syntaxhighlight>-->
{{out}}
<pre>
http://en.wikipedia.org/wiki/Erich_Kästner_(camera_designer).
http://mediawiki.org/).
http://en.wikipedia.org/wiki/-)
ftp://domain.name/path(balanced_brackets)/foo.html
ftp://domain.name/path(balanced_brackets)/ending.in.dot.
ftp://domain.name/path(unbalanced_brackets/ending.in.dot.
ftp://domain.name/path/embedded?punct/uation.
ftp://domain.name/dangling_close_paren)
http://www.example.org/foo.html#includes_fragment
http://www.example.org/foo.html#enthält_Unicode-Fragment
http://192.168.0.1/admin/?hackme=%%%%%%%%%true
https://haxor.ur:4592/~mama/####&?foo
</pre>
=={{header|PHP}}==
|