Find URI in text: Difference between revisions

m
→‎{{header|REXX}}: used a template for the output section, added/changed whitespace and comments,
m (added whitespace to the task's preamble, added a ;Task:, added whitespace before the TOC.)
m (→‎{{header|REXX}}: used a template for the output section, added/changed whitespace and comments,)
Line 637:
 
=={{header|REXX}}==
<lang rexx>/*REXX program scans a text (contained within the REXX pgmprogram) to extract URIs. and IRIs*/
text$$= 'this URI contains an illegal character, parentheses and a misplaced full stop:',
'http://en.wikipedia.org/wiki/Erich_Kästner_(camera_designer). (which is handled by http://mediawiki.org/).',
'and another one just to confuse the parser: http://en.wikipedia.org/wiki/-)',
'")" is handled the wrong way by the mediawiki parser.',
'ftp://domain.name/path(balanced_brackets)/foo.html',
'ftp://domain.name/path(balanced_brackets)/ending.in.dot.',
'ftp://domain.name/path(unbalanced_brackets/ending.in.dot.',
'leading junk ftp://domain.name/path/embedded?punct/uation.',
'leading junk ftp://domain.name/dangling_close_paren)',
'if you have other interesting URIs for testing, please add them here:'
 
@abc= 'abcdefghijklmnopqrstuvwxyz'; @abcs=@abc||translate /*construct lowercase (@abcLatin) alphabet.*/
@abcU= @abc; upper @abcU; @abcs= @abc || @abcU /* " lower & uppercase " */
@scheme=@abcs || 0123456789 || '+-.'
@unreservedscheme= @abcs || 0123456789 || '+-._~' /*add decimal digits & some punctuation*/
@unreserved= @abcs || 0123456789 || '-._~' /* " " " " " " */
@reserved= @unreserved"/?#[]@!$&)(*+,;=\'" /*add other punctuation & special chars*/
t=space(text)' ' /*variable T is a working copy.*/
#$=0 space($$)' ' /*countvariable of URI's$ found sois far.a working copy of $$ */
#= 0 /*▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄the count of URI's found (so far).*/
/*▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄*/
do while t\='' /*scan text for multiple URIs. */
y=pos(':',t) do while $\=''; y= pos(':', $) /*locate a colon (:) in the text body.$*/
if y==0 then leave /*ColonWas a colon found? NoNope, we're done. */
if y==1 then do; parse var $ . $ /*handle a bare colon by itself. */
parse var t . t iterate /*ignorego theand barekeep colonscanning (:).for a colon. */
iterateend /*go &[↑] keep scanning for (a colonrare special case.) */
sr= reverse( left($, y - 1) end ) /*extract [↑]the ascheme rare specialand casereverse it. */
srse=reverse(left verify(tsr,y-1) @scheme) /*extractlocate the scheme andend reverse of the scheme. */
se$=verify substr(sr$,@scheme y + 1) /*locateassign thean endadjusted ofnew the schemetext. */
tif se\=substr(t,y+1)=0 then sr= left(sr, se - 1) /*possibly "crop" /*assignthe an adjustedscheme new textname. */
if se\=s=0 then sr=leftreverse(sr,se-1) /*possiblyreverse cropit theagain schemeto rectify the name. */
she=reverse verify(sr$, @reserved) /*reverselocate againthe toend rectifyof namethe hier─part. */
hes=verify s':'left(t$,@reserved he - 1) /*locateextract and append the endhier─part. of the hier-part*/
s$=s':'left substr(t$, he-1) /*extractassign &an appendadjusted thenew hier-part of text. */
t#=substr(t,he) # + 1 /*assignbump anthe adjusted newURI text counter. */
!.#=#+1 s /*bumpassign the URI counter. to an array (!.) */
!.#=send /*while*/ /*assign [↑] scan the text for URI's. to an array. */
/*▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀*/
end /*while t\='' */ /* [↑] scan the text for URIs. */
do k=1 for #; say !.k; end /*stick a fork in it, we're all done. */*▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀*</lang>
{{out|output|text=&nbsp; when using the internal default inputs:}}
do k=1 for #; say !.k; end /*stick a fork in it, we're done.*/</lang>
'''output'''
<pre>
stop: