Text processing/2: Difference between revisions

Content added Content deleted
m (→‎{{header|REXX}}: added/changed whitespace and comments, simplified the COMMAS subroutine.)
Line 2,241: Line 2,241:


=={{header|REXX}}==
=={{header|REXX}}==
This REXX program process the file mentioned in "text processing 1" and does further valiidate on the dates, flags, and data.
This REXX program process the file mentioned in "text processing 1" and does further validate on the dates, flags, and data.
<br><br>
<br><br>
Some of the checks performed are:
Some of the checks performed are:
* checks for duplicated date records.
::* &nbsp; checks for duplicated date records.
* checks for a bad date (YYYY-MM-DD) format, among:
::* &nbsp; checks for a bad date (YYYY-MM-DD) format, among:
* wrong length
::* &nbsp; wrong length
* year > current year
::* &nbsp; year > current year
* year < 1970 (to allow for posthumous data)
::* &nbsp; year < 1970 (to allow for posthumous data)
* mm < 1 or mm > 12
::* &nbsp; mm < 1 or mm > 12
* dd < 1 or dd > days for the month
::* &nbsp; dd < 1 or dd > days for the month
* yyyy, dd, mm isn't numeric
::* &nbsp; yyyy, dd, mm isn't numeric
* missing data (or flags)
::* &nbsp; missing data (or flags)
* flag isn't an integer
::* &nbsp; flag isn't an integer
* flag contains a decimal point
::* &nbsp; flag contains a decimal point
* data isn't numeric
::* &nbsp; data isn't numeric
In addition, all of the presented numbers (may) have commas inserted.
In addition, all of the presented numbers may have commas inserted.
<br><br>
<br><br>
The program has (negated) code to write the report to a file in addition to the console.
The program has (negated) code to write the report to a file in addition to the console.
<lang rexx>/*REXX program to process instrument data from a data file. */
<lang rexx>/*REXX program to process instrument data from a data file. */
numeric digits 20 /*allow for bigger numbers. */
numeric digits 20 /*allow for bigger numbers. */
ifid='READINGS.TXT' /*the input file. */
ifid='READINGS.TXT' /*name of the input file. */
ofid='READINGS.OUT' /*the outut file. */
ofid='READINGS.OUT' /* " " " output " */
grandSum=0 /*grand sum of whole file. */
grandSum=0 /*grand sum of the whole file. */
grandflg=0 /*grand num of flagged data. */
grandFlg=0 /*grand number of flagged data. */
grandOKs=0
grandOKs=0
longFlag=0 /*longest period of flagged data.*/
Lflag=0 /*longest period of flagged data. */
contFlag=0 /*longest continous flagged data.*/
Cflag=0 /*longest continuous flagged data. */
oldDate =0 /*placeholder of penutilmate date*/
oldDate =0 /*placeholder of penultimate date. */
w =16 /*width of fields when displayed.*/
w =16 /*width of fields when displayed. */
dupDates=0 /*count of duplicated timestamps.*/
dupDates=0 /*count of duplicated timestamps. */
badflags=0 /*count of bad flags (¬ integer).*/
badFlags=0 /*count of bad flags (not integer). */
badDates=0 /*count of bad dates (bad format)*/
badDates=0 /*count of bad dates (bad format). */
badData =0 /*count of bad datas (¬ numeric).*/
badData =0 /*count of bad data (not numeric). */
ignoredR=0 /*count of ignored records (bad).*/
ignoredR=0 /*count of ignored records, bad records*/
maxInstruments=24 /*maximum number of instruments. */
maxInstruments=24 /*maximum number of instruments. */
yyyyCurr=right(date(),4) /*get the current year (today). */
yyyyCurr=right(date(),4) /*get the current year (today). */
monDD. =31 /*number of days in every month. */
monDD. =31 /*number of days in every month. */
/*February is figured on the fly.*/
/*# days in Feb. is figured on the fly.*/
monDD.4 =30
monDD.4 =30
monDD.6 =30
monDD.6 =30
Line 2,284: Line 2,284:
monDD.11=30
monDD.11=30


do records=1 while lines(ifid)\==0 /*read until finished. */
do records=1 while lines(ifid)\==0 /*read until finished. */
rec=linein(ifid) /*read the next record (line). */
rec=linein(ifid) /*read the next record (line). */
parse var rec datestamp Idata /*pick off the dateStamp & data. */
parse var rec datestamp Idata /*pick off the the dateStamp and data. */
if datestamp==oldDate then do /*found a duplicate timestamp. */
if datestamp==oldDate then do /*found a duplicate timestamp. */
dupDates=dupDates+1 /*bump the counter.*/
dupDates=dupDates+1 /*bump the dupDate counter*/
call sy datestamp copies('~',30),
call sy datestamp copies('~',30),
'is a duplicate of the',
'is a duplicate of the',
"previous datestamp."
"previous datestamp."
ignoredR=ignoredR+1 /*bump ignoredRecs.*/
ignoredR=ignoredR+1 /*bump # of ignoredRecs.*/
iterate /*ignore this duplicate record. */
iterate /*ignore this duplicate record. */
end
end


parse var datestamp yyyy '-' mm '-' dd /*obtain YYYY, MM, and DD. */
parse var datestamp yyyy '-' mm '-' dd /*obtain YYYY, MM, and the DD. */
monDD.2=28+leapyear(yyyy) /*how long is February in YYYY ? */
monDD.2=28+leapyear(yyyy) /*how long is February in year YYYY ? */
/*check for various bad formats. */
/*check for various bad formats. */
if verify(yyyy||mm||dd,1234567890)\==0 |,
if verify(yyyy||mm||dd,1234567890)\==0 |,
length(datestamp)\==10 |,
length(datestamp)\==10 |,
Line 2,306: Line 2,306:
yyyy<1970 |,
yyyy<1970 |,
yyyy>yyyyCurr |,
yyyy>yyyyCurr |,
mm=0 | dd=0 |,
mm=0 | dd=0 |,
mm>12 | dd>monDD.mm then do
mm>12 | dd>monDD.mm then do
badDates=badDates+1
badDates=badDates+1
call sy datestamp copies('~'),
call sy datestamp copies('~'),
'has an illegal format.'
'has an illegal format.'
ignoredR=ignoredR+1 /*bump ignoredRecs.*/
ignoredR=ignoredR+1 /*bump number ignoredRecs.*/
iterate /*ignore this bad date record. */
iterate /*ignore this bad record. */
end
end
oldDate=datestamp /*save datestamp for next read. */
oldDate=datestamp /*save datestamp for the next read. */
sum=0
sum=0
flg=0
flg=0
OKs=0
OKs=0


do j=1 until Idata='' /*process the instrument data. */
do j=1 until Idata='' /*process the instrument data. */
parse var Idata data.j flag.j Idata
parse var Idata data.j flag.j Idata


if pos('.',flag.j)\==0 |, /*flag have a decimal point -or-*/
if pos('.',flag.j)\==0 |, /*does flag have a decimal point -or- */
\datatype(flag.j,'W') then do /*is the flag not a whole number?*/
\datatype(flag.j,'W') then do /* ··· is the flag not a whole number? */
badflags=badflags+1 /*bump counter.*/
badFlags=badFlags+1 /*bump badFlags counter*/
call sy datestamp copies('~'),
call sy datestamp copies('~'),
'instrument' j "has a bad flag:",
'instrument' j "has a bad flag:",
flag.j
flag.j
iterate /*ignore it & it's data.*/
iterate /*ignore it and it's data. */
end
end


if \datatype(data.j,'N') then do /*is the flag not a whole number?*/
if \datatype(data.j,'N') then do /*is the flag not a whole number?*/
badData=badData+1 /*bump counter.*/
badData=badData+1 /*bump counter.*/
call sy datestamp copies('~'),
call sy datestamp copies('~'),
'instrument' j "has bad data:",
'instrument' j "has bad data:",
data.j
data.j
iterate /*ignore it & it's flag.*/
iterate /*ignore it & it's flag.*/
end
end


if flag.j>0 then do /*if good data, ... */
if flag.j>0 then do /*if good data, ~~~ */
OKs=OKs+1
OKs=OKs+1
sum=sum+data.j
sum=sum+data.j
if contFlag>longFlag then do
if Cflag>Lflag then do
longdate=datestamp
Ldate=datestamp
longFlag=contFlag
Lflag=Cflag
end
end
contFlag=0
Cflag=0
end
end
else do /*flagged data ... */
else do /*flagged data ~~~ */
flg=flg+1
flg=flg+1
contFlag=contFlag+1
Cflag=Cflag+1
end
end
end /*j*/
end /*j*/


if j>maxInstruments then do
if j>maxInstruments then do
badData=badData+1 /*bump counter.*/
badData=badData+1 /*bump the badData counter.*/
call sy datestamp copies('~'),
call sy datestamp copies('~'),
'too many instrument datum'
'too many instrument datum'
end
end


if OKs\==0 then avg=format(sum/OKs,,3)
if OKs\==0 then avg=format(sum/OKs,,3)
else avg='[n/a]'
else avg='[n/a]'
grandOKs=grandOKs+OKs
grandOKs=grandOKs+OKs
_=right(comma(avg),w)
_=right(commas(avg),w)
grandSum=grandSum+sum
grandSum=grandSum+sum
grandFlg=grandFlg+flg
grandFlg=grandFlg+flg
Line 2,370: Line 2,370:
end /*records*/
end /*records*/


records=records-1 /*adjust for reading end-of-file.*/
records=records-1 /*adjust for reading the end─of─file. */
if grandOKs\==0 then grandAvg=format(grandsum/grandOKs,,3)
if grandOKs\==0 then grandAvg=format(grandsum/grandOKs,,3)
else grandAvg='[n/a]'
else grandAvg='[n/a]'
call sy
call sy
call sy copies('=',60)
call sy copies('=',60)
call sy ' records read:' right(comma(records ),w)
call sy ' records read:' right(commas(records ),w)
call sy ' records ignored:' right(comma(ignoredR),w)
call sy ' records ignored:' right(commas(ignoredR),w)
call sy ' grand sum:' right(comma(grandSum),w+4)
call sy ' grand sum:' right(commas(grandSum),w+4)
call sy ' grand average:' right(comma(grandAvg),w+4)
call sy ' grand average:' right(commas(grandAvg),w+4)
call sy ' grand OK data:' right(comma(grandOKs),w)
call sy ' grand OK data:' right(commas(grandOKs),w)
call sy ' grand flagged:' right(comma(grandFlg),w)
call sy ' grand flagged:' right(commas(grandFlg),w)
call sy ' duplicate dates:' right(comma(dupDates),w)
call sy ' duplicate dates:' right(commas(dupDates),w)
call sy ' bad dates:' right(comma(badDates),w)
call sy ' bad dates:' right(commas(badDates),w)
call sy ' bad data:' right(comma(badData ),w)
call sy ' bad data:' right(commas(badData ),w)
call sy ' bad flags:' right(comma(badflags),w)
call sy ' bad flags:' right(commas(badFlags),w)
if Lflag\==0 then call sy ' longest flagged:' right(commas(LFlag),w) " ending at " Ldate
if longFlag\==0 then
call sy ' longest flagged:' right(comma(longFlag),w) " ending at " longdate
call sy copies('=',60)
call sy copies('=',60)
exit /*stick a fork in it, we're all done.*/
call sy
/*────────────────────────────────────────────────────────────────────────────*/
exit /*stick a fork in it, we're done.*/
commas: procedure; parse arg _; n=_'.9'; #=123456789; b=verify(n,#,"M")
/*──────────────────────────────────LEAPYEAR subroutine─────────────────*/
e=verify(n,#'0',,verify(n,#"0.",'M'))-4
leapyear: procedure; arg y /*year could be: Y, YY, YYY, YYYY*/
do j=e to b by -3; _=insert(',',_,j); end /*j*/; return _
if length(y)==2 then y=left(right(date(),4),2)y /*adjust for YY year.*/
/*────────────────────────────────────────────────────────────────────────────*/
if y//4\==0 then return 0 /* not by 4? Not a leapyear.*/
return y//100\==0 | y//400==0 /*apply 100 and 400 year rule. */
leapyear: procedure; arg y /*year could be: Y, YY, YYY, or YYYY*/
if length(y)==2 then y=left(right(date(),4),2)y /*adjust for YY year.*/
/*──────────────────────────────────SY subroutine───────────────────────*/
if y//4\==0 then return 0 /* not divisible by 4? Not a leapyear*/
sy: procedure; parse arg stuff; say stuff
return y//100\==0 | y//400==0 /*apply the 100 and the 400 year rule.*/
if 1==0 then call lineout ofid,stuff
/*────────────────────────────────────────────────────────────────────────────*/
return
sy: say arg(1); call lineout ofid,arg(1); return</lang>
/*──────────────────────────────────COMMA subroutine────────────────────*/
'''output''' &nbsp; when using the default input file:
comma: procedure; parse arg _,c,p,t;arg ,cu;c=word(c ",",1)
if cu=='BLANK' then c=' ';o=word(p 3,1);p=abs(o);t=word(t 999999999,1)
if \datatype(p,'W')|\datatype(t,'W')|p==0|arg()>4 then return _;n=_'.9'
#=123456789;k=0;if o<0 then do;b=verify(_,' ');if b==0 then return _
e=length(_)-verify(reverse(_),' ')+1;end;else do;b=verify(n,#,"M")
e=verify(n,#'0',,verify(n,#"0.",'M'))-p-1;end
do j=e to b by -p while k<t;_=insert(c,_,j);k=k+1;end;return _</lang>
'''output'''
<pre style="height:35ex">
<pre style="height:35ex">