Idiomatically determine all the characters that can be used for symbols: Difference between revisions

Content added Content deleted
(Rename Perl 6 -> Raku, alphabetize, minor clean-up)
(→‎{{header|AWK}}: Clean up code to not include special rules for whitespace; also, AWK is not uppercase letter only)
Line 17: Line 17:


=={{header|AWK}}==
=={{header|AWK}}==
<lang AWK># usage: gawk -f Idiomatically_determine_all_the_characters_that_can_be_used_for_symbols.awk
<lang AWK>

# syntax: GAWK -f IDIOMATICALLY_DETERMINE_ALL_THE_CHARACTERS_THAT_CAN_BE_USED_FOR_SYMBOLS.AWK
function is_valid_identifier(id, rc) {
fn = "is_valid_identifier.awk"
printf("function unused(%s) {}\n", id, id) >fn
printf("BEGIN { exit(0) }\n") >>fn
close(fn)

rc = system("gawk -f is_valid_identifier.awk 2>errors")
return rc == 0
}

BEGIN {
BEGIN {
fn = "TEMP.AWK"
for (i = 0; i <= 255; i++) {
cmd = sprintf("GAWK -f %s 2>NUL",fn)
c = sprintf("%c", i)

for (i=0; i<=255; i++) {
c = sprintf("%c",i)
if (is_valid_identifier(c))
good1 = good1 c;
if (c ~ /\x09|\x0D|\x0A|\x20/) { ng++; continue } # tab,CR,LF,space
else
(run(c) == 0) ? (ok1 = ok1 c) : (ng1 = ng1 c) # 1st character
(run("_" c) == 0) ? (ok2 = ok2 c) : (ng2 = ng2 c) # 2nd..nth character
bad1 = bad1 c

if (is_valid_identifier("_" c "_"))
good2 = good2 c;
else
bad2 = bad2 c;
}
}

printf("1st character: %d NG, %d OK %s\n",length(ng1)+ng,length(ok1),ok1)
printf("2nd..nth char: %d NG, %d OK %s\n",length(ng2)+ng,length(ok2),ok2)
printf("1st character: %d bad, %d ok: %s\n",
length(bad1), length(good1), good1)
printf("2nd..nth char: %d bad, %d ok: %s\n",
length(bad2), length(good2), good2)
exit(0)
exit(0)
}
function run(c, rc) {
printf("BEGIN{%s+=0}\n",c) >fn
close(fn)
rc = system(cmd)
return(rc)
}
}
</lang>
</lang>
<p>output:</p>
<p>output:</p>
<pre>
<pre>
1st character: 203 NG, 53 OK ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz
1st character: 203 bad, 53 ok: ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz
2nd..nth char: 193 NG, 63 OK 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz
2nd..nth char: 193 bad, 63 ok: 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz
</pre>
</pre>