Function frequency: Difference between revisions

add REXX version 3
(add REXX version 2)
(add REXX version 3)
Line 1,044:
1 arg
1 translate</pre>
 
===version 3===
This program counts statically.
Contents of comments and literal strings are not analyzed.
Neithet are function invocations via CALL.
<lang>/* REXX ****************************************** Version 11.12.2015 **
* Rexx Tokenizer to find function invocations
*-----------------------------------------------------------------------
* Tokenization remembers the following for each token
* t.i text of token
* t.i.0t type of token: Cx/V/K/N/O/S/L
* comment/variable/keyword/constant/operator/string/label
* t.i.0il line of token in the input
* t.i.0ic col of token in the input
* t.i.0prev index of token starting previous instruction
* t.i.0ol line of token in the output
* t.i.0oc col of token in the output
*---------------------------------------------------------------------*/
Call time 'R'
Parse Upper Arg fid '(' options
If fid='?' Then Do
Say 'Tokenike a REXX proram and list the function invocations found'
Say ' which are of the form symbol(... or ''string''(...'
Say ' (the left parenthesis must immediately follow the symbol'
Say ' or literal string.)'
Say 'Syntax:'
Say ' TKZ pgm < ( <Debug> <Tokens> >'
Exit
End
g.=0
Call init /* Initialize constants etc. */
g.0cont='01'x
g.0breakc='02'x
cnt.=0
Call readin /* Read input file into l.* */
Call tokenize /* Tokenize the input */
tk=''
Call process_tokens
g.0fun_list=wordsort(g.0fun_list)
Do While g.0fun_list>''
Parse Var g.0fun_list fun g.0fun_list
Say right(cnt.fun,3) fun
End
Say time('E') 'seconds elapsed for' t.0 'tokens in' g.0lines 'lines.'
Exit
 
init:
/***********************************************************************
* Initialize constants etc.
***********************************************************************/
g.=''
g.0debug=0 /* set debug off by default */
 
fid=strip(fid)
If fid='' Then /* no file specified */
Exit exit(12 'no input file specified')
Parse Var fid fn '.'
 
os=options /* options specified on command */
g.0debug=0 /* turn off debug output */
g.0tokens=0 /* No token file */
Do While os<>'' /* process them individually */
Parse Upper Var os o os /* pick one */
Select
When abbrev('DEBUG',o,1) Then /* Debug specified */
g.0debug=1 /* turn on debug output */
When abbrev('TOKENS',o,1) Then /* Write a file with tokens */
g.0tokens=1
Otherwise /* anything else */
Say 'Unknown option:' o /* tell the user and ignore it */
End
End
 
If g.0debug Then Do
g.0dbg=fn'.dbg'; '@erase' g.0dbg
End
If g.0tokens Then Do
g.0tkf=fn'.tok'; '@erase' g.0tkf
End
 
/***********************************************************************
* Language specifics
***********************************************************************/
g.0special='+-*/%''";:<>^\=|,()& '/* special characters */
/* chars that may start a var */
g.0a='abcdefghijklmnopqrstuvwxyz'||,
'ABCDEFGHIJKLMNOPQRSTUVWXYZ@#$!?_'
g.0n='1234567890' /* numeric characters */
g.0vc=g.0a||g.0n||'.' /* var-character */
/* multi-character operators */
g.0opx='&& ** // << <<= <= <> == >< >= >> >>=',
'^< ^<< ^= ^== ^> ^>> \< \<< \= \== \> \>> ||'
 
t.='' /* token list */
Return
 
readin:
/***********************************************************************
* Read the file to be formatted
***********************************************************************/
lc=''
i=0
g.0lines=0
Do While lines(fid)<>0
li=linein(fid)
g.0lines=g.0lines+1
If i>0 Then
lc=strip(l.i,'T')
If right(lc,1)=',' Then Do
l.i=left(lc,length(lc)-1) li
End
Else Do
i=i+1
l.i=li
End
End
l.0=i
Call lineout fid
t=l.0+1
l.t=g.0eof /* add a stopper at program end */
l.0=t /* adjust number of lines */
g.0il=t /* remember end of program */
Return
 
tokenize:
/***********************************************************************
* First perform tokenization
* Input: l.* Program text
* Output: t.* Token list
* t.0t.i token type CA CB CC C comment begin/middle/end
* S string
* O operator (special character)
* V variable symbol
* N constant
* X end of text
* Note: special characters are treated as separate tokens
***********************************************************************/
li=0 /* line index */
ti=0 /* token index */
Do While li<l.0 /* as long as there is more input */
li=li+1 /* index of next line */
l=l.li /* next line to be processed */
g.0newline=1
g.0cc=0 /* current column */
Call dsp l.li /* debug output */
If l='' Then /* empty line */
Call addtoken '/*--*/','C' /* preserve with special token */
Do While l<>'' /* work through the line */
nbc=verify(l,' ') /* first non-blank column */
g.0cc=g.0cc+nbc /* advance to this */
If g.0newline='' Then Do
If t.ti.0ic='' Then
t.ti.0ic=0
If g.0cc=t.ti.0ic+length(t.ti) Then Do
tj=ti+1
t.tj.0ad=1
End
End
l=substr(l,nbc) /* and continue with rest of line */
Parse Var l c +1 l 1 c2 +2 /* get character(s) */
g.0tb=g.0cc /* remember where token starts */
Select /* take a decision */
When c2='/*' Then /* comment starts here */
Call comment /* process comment */
When pos(c,'''"')>0 Then /* literal string starts here */
Call string c /* process literal string */
Otherwise /* neither comment nor literal */
Call token /* get other token */
End /* cmt, string, or token done */
End /* end of loop over line */
End /* end of loop over program */
t.0=ti /* store number of tokens */
Call dsp ti 'tokens' l.0 'lines'
Return
comment:
/***********************************************************************
* Parse a comment
* Nested comments are supported
***********************************************************************/
cbeg=t.ti.0il
l=substr(l,2) /* continue after slash-asterisk */
g.0cc=g.0cc+1 /* update current char position */
t='/*' /* token so far */
incmt=1 /* indicate "within a comment" */
Do Until incmt=0 /* loop until done */
bc=pos('/*',l) /* next begin comment, if any */
ec=pos('*/',l) /* next end comment, if any */
Select /* decide */
When bc>0 &, /* begin-comment found */
(ec=0 | bc<ec) Then Do /* and no end-comment or later */
t=t||left(l,bc+1) /* add this all to token */
incmt=incmt+1 /* increment comment nest-depth */
l=substr(l,bc+2) /* continue after slash-asterisk */
g.0cc=g.0cc+bc+1 /* update current char position */
End
When ec>0 Then Do /* end-comment found */
t=t||left(l,ec+1) /* add all to token */
incmt=incmt-1 /* decrement nesting */
l=substr(l,ec+2) /* continue after asterisk-slash */
g.0cc=g.0cc+ec+1 /* update current char position */
End
Otherwise Do /* no further comment bracket */
Call addtoken t||l,ct() /* rest of line to token */
li=li+1 /* proceed to next line */
l=l.li /* contents of next line */
g.0newline=1
If l=g.0eof Then Do
Say 'Comment started in line' cbeg 'is not closed before EOF'
Exit err(58)
End
g.0cc=0 /* current char (none) */
g.0tb=1 /* token (comment) starts here */
End
End
End
Call addtoken t,ct() /* last (or only) comment token */
If pos('*debug*',t)>0 Then g.0debug=1
Return
 
ct:
/***********************************************************************
* Comment type
***********************************************************************/
If incmt>0 Then Do /* within a comment */
If t.ti.0t='CA' |, /* prev. token was start or cont */
t.ti.0t='CB' Then Return 'CB' /* this is continuation */
Else Return 'CA' /* this is start */
End
Else Do /* comment is over */
If t.ti.0t='CA' |, /* prev. token was start or cont */
t.ti.0t='CB' Then Return 'CC' /* this is final part */
Else Return 'C' /* this is just a comment */
End
string:
/***********************************************************************
* Parse a string
* take care of '111'B and '123'X
***********************************************************************/
Parse Arg delim /* string delimiter found */
t=delim /* star building the token */
instr=1 /* note we are within a string */
g.0ss=li
Do Until instr=0 /* continue until it is over */
se=pos(delim,l) /* ending delimiter */
If se>0 Then Do /* found */
If substr(l,se+1,1)=delim Then Do /* but it is doubled */
t=t||left(l,se+1) /* so add all so far to token */
l=substr(l,se+2) /* and take rest of line */
g.0cc=g.0cc+se+1 /* and set current character pos */
End
Else Do /* not another one */
instr=0 /* string is done */
t=t||left(l,se) /* add the string data to token */
l=substr(l,se+1) /* take the rest of the line */
g.0cc=g.0cc+se /* and set current character pos */
If pos(translate(left(l,1)),'BX')>0 Then
If pos(substr(l,2,1),g.0vc)=0 Then Do
t=t||left(l,1) /* add the char to the token */
l=substr(l,2) /* take the rest of the line */
g.0cc=g.0cc+1 /* and set current character pos */
End
End
End
Else Do /* not found */
Call addtoken t||l,'S' /* store the token */
g.0lasttoken='' /* reset this switch */
li=li+1 /* go on to the next line */
If li>l.0 Then /* there is no next line */
Exit err(60,'string starting in line' g.0ss,
'does not end before end of file')
Else
Say 'string starting at line' g.0ss 'extended over line boundary'
l=l.li /* take contents of the next line */
g.0cc=1 /* current char position */
g.0tb=1 /* ?? */
End
End
Call addtoken t,'S' /* store the token */
Return
token:
/***********************************************************************
* Parse a token
***********************************************************************/
IF c=g.0comma & l='' Then Do
t=g.0cont
type='O' /* O (for operator - not quite...)*/
End
Else Do
If pos(c,g.0special)>0 Then Do /* a special character */
t=c /* take it as is */
type='O' /* O (for operator - not quite...)*/
End
Else Do /* some other character */
nsp=verify(l,g.0special,'M') /* find delimiting character */
If nsp>0 Then Do /* some character found */
t=c||left(l,nsp-1) /* take all up to this character */
l=substr(l,nsp) /* and continue from there */
End
Else Do /* none found */
t=c||l /* add rest of line to token */
l='' /* and all is used up */
End
g.0cc=g.0cc+length(t)-1 /* adjust current char position */
If pos(right(t,1),'eE')>0 &, /* consider nxxxE+nn case */
pos(left(l,1),'+-')>0 Then Do
If pos(left(t,1),'.1234567890')>0 Then /* start . or digit */
If pos(substr(l,2,1),'1234567890')>0 Then Do /* dig after+- */
nsp=verify(substr(l,2),g.0special,'M')+1 /* find end */
If nsp>1 Then /* delimiting character found */
exp=substr(l,2,nsp-2) /* exponent (if numeric) */
Else
exp=substr(l,2)
If verify(exp,'0123456789')=0 Then Do
t=t||left(l,1)||exp
l=substr(l,length(exp)+2)
g.0cc=g.0cc+length(exp)+2
End
End
End
Select
When isvar(t) Then /* token qualifies as variable */
type='V'
When isconst(t) Then /* token is a constant symbol */
type='N'
When t=g.0eof Then /* token is end of file indication*/
type='X'
Otherwise Do /* anything else is an error */
Say 'li='li
Say l
Say 'token error'
Trace ?R
Exit err(62,'token' t 'is neither variable nor constant')
End
End
If left(l,1)='(' Then
type=type||'F'
End
End
Call addtoken t,type /* store the token */
Return
addtoken:
/***********************************************************************
* Add a token to the token list
***********************************************************************/
Parse Arg t,type /* token and its type */
If type='O' Then Do /* operator (special character) */
If pos(t,'><=&|/*')>0 Then Do /* char for composite operator */
If wordpos(t.ti||t,g.0opx)>0 Then Do /* composite operator */
t.ti=t.ti||t /* use concatenation */
/* does not handle =/**/= */
t='' /* we are done */
Return
End
End
End
 
If type='CC' & t='*/' Then Do /* The special case for SPA */
Return
End
 
ti=ti+1 /* increment index */
t.ti=t /* store token's value */
t.ti.0t=left(type,1) /* and its type */
t.ti.0nl=g.0newline /* token starts a new line */
g.0newline='' /* reset new line switch */
If t.ti.0t='C' Then Do
t.ti.0t=type
If left(t.ti,3)='/* ' &,
right(t.ti,3)=' */' Then
t.ti='/*' strip(substr(t.ti,4,length(t.ti)-6)) '*/'
End
t.ti.0f=substr(type,2,1) /* 'F' if possibly a function */
Call setpos ti li g.0tb /* and its position */
If left(type,1)='C' Then /* ??? */
If left(t.ti,2)<>'/*' Then Do
ts=strip(t.ti,'L')
t.ti.0oc=t.ti.0oc+length(t.ti)-length(ts)
t.ti=ts
End
If t.ti.0ol='' Then t.ti.0ol=li
If t.ti.0oc='' Then t.ti.0oc=0
t.ti.0il=t.ti.0ol /* and its position */
t.ti.0ic=t.ti.0oc /* and its position */
Call dsp ti t.ti t.ti.0il'/'t.ti.0ic '->' t.ti.0ol'/'t.ti.0oc
t='' /* reset token variable */
Return
 
lookback:
/***********************************************************************
* Look back if...
***********************************************************************/
Do i_=ti To 1 By -1
Select
When left(t.i_.0t,1)='C' Then Nop
When t.i_.0used<>1 &,
(t.i_=g.0comma |,
t.i_=g.0cont) Then Do
t.i_.0used=1
t.i_=g.0cont
Return '0'
End
Otherwise
Return '1'
End
End
Return '1'
 
isvar:
/***********************************************************************
* Determine if a string qualifies as variable name
***********************************************************************/
Parse Arg a_ +1 b_
res=(pos(a_,g.0a)>0) &,
(verify(b_,g.0a||g.0n||'.')=0)
Return res
 
isconst:
/***********************************************************************
* Determine if a string qualifies as constant
***********************************************************************/
Parse Arg a_
res=(verify(a_,g.0a||g.0n||'.+-')=0) /* ??? */
Return res
 
setpos:
Parse Arg seti sol soc
setz='setpos:' t.seti t.seti.0ol'/'t.seti.0oc '-->',
sol'/'soc '('sigl')'
Call dsp setz
t.seti.0ol=sol
t.seti.0oc=soc
Return
 
process_tokens:
/***********************************************************************
* Process the token list
***********************************************************************/
Do i=1 To t.0
If g.0tokens Then
Call lineout g.0tkf,right(i,4) right(t.i.0il,3)'.'left(t.i.0ic,3),
right(t.i.0ol,3)'.'left(t.i.0oc,3),
left(t.i.0t,2) left(t.i,25)
If t.i='(' Then Do
j=i-1
If t.j.0ol=t.i.0il & ,
t.j.0oc+length(t.j)=t.i.0ic &,
pos(t.j.0t,'VS')>0 Then
Call store_f t.j
End
End
If g.0tokens Then
Call lineout g.0tkf
Return
 
store_f:
Parse Arg funct
If wordpos(funct,g.0fun_list)=0 then
g.0fun_list=g.0fun_list funct
cnt.funct=cnt.funct+1
Return
 
dsp:
/***********************************************************************
* Record (and display) a debug line
***********************************************************************/
Parse Arg ol_.1
If g.0debug>0 Then
Call lineout g.0dbg,ol_.1
If g.0debug>1 Then
Say ol_.1
Return
 
wordsort: Procedure
/**********************************************************************
* Sort the list of words supplied as argument. Return the sorted list
**********************************************************************/
Parse Arg wl
wa.=''
wa.0=0
Do While wl<>''
Parse Var wl w wl
Do i=1 To wa.0
If wa.i>w Then Leave
End
If i<=wa.0 Then Do
Do j=wa.0 To i By -1
ii=j+1
wa.ii=wa.j
End
End
wa.i=w
wa.0=wa.0+1
End
swl=''
Do i=1 To wa.0
swl=swl wa.i
End
Return strip(swl)
 
err:
/***********************************************************************
* Diagnostic error exit
***********************************************************************/
Parse Arg errnum, errtxt
Say 'err:' errnum errtxt
If t.ti.0il>g.0il Then
Say 'Error' arg(1) 'at end of file'
Else Do
Say 'Error' arg(1) 'around line' t.ti.0il', column' t.ti.0ic
_=t.ti.0il
Say l._
Say copies(' ',t.ti.0ic-1)'|'
End
If errtxt<>'' Then Say ' 'errtxt
Exit 12</lang>
{{out}}
Result for the above program.
<pre> 2 abbrev
2 arg
1 copies
2 ct
3 err
1 exit
1 isconst
1 isvar
21 left
9 length
1 linein
1 lines
15 pos
7 right
5 strip
17 substr
1 time
1 translate
6 verify
2 wordpos
1 wordsort
0.093000 seconds elapsed for 2200 tokens in 510 lines.</pre>
 
 
=={{header|Tcl}}==
2,295

edits