Find URI in text: Difference between revisions

julia example
m (→‎{{header|REXX}}: spelled out the word "hierarchical".)
(julia example)
Line 313:
ftp://domain.name/path/embedded?punct/uation.
ftp://domain.name/dangling_close_paren)</lang>
 
 
=={{header|Julia}}==
The Julia URI parser treats stop: and here: as schemes with an empty path. Looking at the RFC this seems
technically correct except that the schemes "stop:" and "here:" do not exist, whereas http: and ftp: do.
<lang julia>using URIParser, HTTP
 
function findvalidURI(txt)
results = String[]
# whitespace not allowed in URI, so split on whitespace
for str in split(txt, r"\s+")
# convert escaped chars to %dd format
s = replace(replace(str, r"\&\#x([\d\w]{2})\;" => s"\%\1"), "?" => "x")
try
if isvalid(parse(HTTP.URI, s))
push!(results, str)
end
catch
continue
end
end
return results
end
 
testtext = """
this URI contains an illegal character, parentheses and a misplaced full stop:
http://en.wikipedia.org/wiki/Erich_Kästner_(camera_designer). (which is handled by http://mediawiki.org/).
and another one just to confuse the parser: http://en.wikipedia.org/wiki/-)
")" is handled the wrong way by the mediawiki parser.
ftp://domain.name/path(balanced_brackets)/foo.html
ftp://domain.name/path(balanced_brackets)/ending.in.dot.
ftp://domain.name/path(unbalanced_brackets/ending.in.dot.
leading junk ftp://domain.name/path/embedded?punct/uation.
leading junk ftp://domain.name/dangling_close_paren)
if you have other interesting URIs for testing, please add them here:
"""
for t in strip.(split(testtext, "\n")), result in findvalidURI(t)
println(result)
end
</lang>{{out}}
<pre>
stop:
http://en.wikipedia.org/wiki/Erich_Kästner_(camera_designer).
http://mediawiki.org/).
parser:
http://en.wikipedia.org/wiki/-)
ftp://domain.name/path(balanced_brackets)/foo.html
ftp://domain.name/path(balanced_brackets)/ending.in.dot.
ftp://domain.name/path(unbalanced_brackets/ending.in.dot.
ftp://domain.name/path/embedded?punct/uation.
ftp://domain.name/dangling_close_paren)
here:
</pre>
 
 
 
=={{header|Kotlin}}==
4,108

edits