Jump to content

Textonyms: Difference between revisions

AWK (gawk) solution added
imported>Meerkut
(AWK (gawk) solution added)
Line 309:
 
7325 -> [peak peal peck real reck seal]</pre>
 
=={{header|AWK}}==
{{works with|gawk|5.1.0 }}
<syntaxhighlight lang="AWK">
#!/usr/bin/env -S gawk -E
 
BEGIN { # user's configuration area
 
KEYMAP="2 abc 3 def 4 ghi 5 jkl 6 mno 7 pqrs 8 tuv 9 wxyz"
FNAME="/usr/share/dict/american-english" # 0.5 MB; 102775 words;
 
#KEYMAP="2 αβγά 3 δεζέ 4 ηθιήίϊΐ 5 κλμ 6 νξοό 7 πρσς 8 τυφύϋΰ 9 χψωώ"
#FNAME="/usr/share/dict/greek" # 19.5MB; 828808 words;
 
# where generated data will be written,
# or comment out a line if you don’t need it.
EXPORT_TXN="/tmp/textonyms"
EXPORT_ALL="/tmp/phonewords"
EXPORT_BAD="/tmp/invalidwords" #also the line ‘BUFF_ERRW = BUFF_...’
}
BEGIN { # main
delete ARGV; ARGC=1 # do not accept command line arguments
delete XEK # reserve id for use only as a hash table
delete TXN # reserve id ...
AZ="" # generated Alphabet
EE=0 # invalid word Counter
KK=0 # valid word Counter
TT=0 # textonym groups in the table TXN
BUFF_ERRW="" # invalid word buffer
TOTAL=1 # enum
COUNT=2 # enum
 
STDERR="/dev/stderr"
OLD_RS=RS
OLD_FS=FS
processFile()
generateReport()
userQuery()
}
function processFile( ii,jj,nn,errW,ss,aKey,aGroup,qqq){
$0=KEYMAP
AZ=" "
for (ii=1; ii<=NF; ii=ii+2) {
aKey=$ii; aGroup=$(ii+1)
nn=split(aGroup, qqq, //)
for (jj=1; jj<=nn; jj++) {ss=qqq[jj]; XEK[ss]=aKey; AZ = AZ ss " " }
}
AZ = AZ " "
######################
RS="^$" #
FS="[\n\t ]+" #
######################
if ((getline <FNAME) <= 0) {
printf "unexpected EOF or error: ‘%s’ %s\n",FNAME,ERRNO >STDERR
exit 1
} else printf "total words in the file ‘%s’: %s\n", FNAME,NF
for (ii=1; ii<=NF; ii++) {
errW=0
ss=tolower($ii)
nn=split(ss, qqq, //)
nmb=""
for (jj=1; jj<=nn; jj++) {
lchr=qqq[jj]
if (pos1=index(AZ," "lchr" ")>0) { nmb = nmb XEK[lchr] }
else {
EE++
errW=1
BUFF_ERRW = BUFF_ERRW $ii "\n"
break
}
}
if (errW) { continue }
T9=TXN[nmb][TOTAL]
if (index(T9,ss)==0) {
TXN[nmb][TOTAL] = T9 " " ss
TXN[nmb][COUNT]++
}
KK++
}
}
function generateReport( elm){
for (elm in TXN) { if (TXN[elm][COUNT]>1) { TT++ } }
printf "valid words: %9s\n", KK
printf "invalid words: %9s\n", EE
printf "table indices for valid words: %9s\n", length(TXN)
printf "textonym groups in the table: %9s\n", TT
exportData()
close(EXPORT_BAD); close(EXPORT_TXN); close(EXPORT_ALL)
}
function exportData( elm){
if (EXPORT_BAD != "") print BUFF_ERRW >EXPORT_BAD
 
if (EXPORT_TXN != "" && EXPORT_ALL != "") {
printf "%s\n",
"number-of-textonyms\tword's-length\tkeys\tlist-of-textonyms" >EXPORT_ALL
printf "%s\n",
"number-of-textonyms\tword's-length\tkeys\tlist-of-textonyms" >EXPORT_TXN
for (elm in TXN) {
printf "%s\t%s\t%s\t%s\n",
TXN[elm][COUNT], length(elm), elm, TXN[elm][TOTAL] >EXPORT_ALL
if (TXN[elm][COUNT]>1) {
printf "%s\t%s\t%s\t%s\n",
TXN[elm][COUNT], length(elm), elm, TXN[elm][TOTAL] >EXPORT_TXN
}
}
return ## return ## return ## return ##
} else if (EXPORT_ALL != "") {
printf "%s\n",
"number-of-textonyms\tword's-length\tkeys\tlist-of-textonyms" >EXPORT_ALL
for (elm in TXN) {
printf "%s\t%s\t%s\t%s\n",
TXN[elm][COUNT], length(elm), elm, TXN[elm][TOTAL] >EXPORT_ALL
}
}
else {
printf "%s\n",
"number-of-textonyms\tword's-length\tkeys\tlist-of-textonyms" >EXPORT_TXN
for (elm in TXN) {
if (TXN[elm][COUNT]>1) {
printf "%s\t%s\t%s\t%s\n",
TXN[elm][COUNT], length(elm), elm, TXN[elm][TOTAL] >EXPORT_TXN
}
}
}
}
function userQuery( userasks,ss,nn,key,words){
printf "txn>> "
RS=OLD_RS
FS=OLD_FS
while ((getline ) > 0) {
userasks=$1
if (NF==0){ printf "txn>> ", ""; continue }
else if (userasks ~ /^-e|--ex|--exit$/) { exit }
else if (userasks ~ /^[0-9]+$/) {
nn=TXN[userasks][COUNT]+0
words=TXN[userasks][TOTAL]
if (nn == 0) { printf "%s -> %s\n", userasks,"no matching words" }
else { printf "%s -> (%s) %s\n", userasks,nn,words }
}
else {
ss=tolower(userasks)
if ((key=keySeq_orElse_zero(ss))>0) {
printf "%s -> %s;%s in the table\n", ss,key,
((key in TXN) ?"":" not")
}
else {
printf "%s -> not a valid word for the alphabet:\n%s\n", userasks,AZ
}
}
printf "txn>> "
}
printf "\n"
}
function keySeq_orElse_zero(aWord, qqq,lchr,nn,jj,buf){
nn=split(aWord, qqq, //)
for (jj=1; jj<=nn; jj++) {
lchr=qqq[jj]
if (index(AZ," "lchr" ")>0) { buf = buf XEK[lchr] } else { return 0 }
}
return buf
}
</syntaxhighlight>
 
{{out}}
<pre>
# Run, assuming the code is in the txn.awk
$ LANG=en_US.UTF-8 ./txn.awk
total words in the file ‘/usr/share/dict/american-english’: 102775
valid words: 73318
invalid words: 29457
table indices for valid words: 65817
textonym groups in the table: 4670
txn>> cafe
cafe -> 2233; in the table
txn>> café
café -> not a valid word for the alphabet:
a b c d e f g h i j k l m n o p q r s t u v w x y z
txn>> 2233
2233 -> (3) abed aced bade
txn>> --exit
$
$
$ egrep 'café' "/tmp/invalidwords"
café
café's
cafés
$
$ sort -n -b -k 1 "/tmp/textonyms" | tail -n 7
8 6 782537 quaker pucker quakes rubles stakes staler stales sucker
9 3 269 amy bmw cox coy any bow box boy cow
9 4 2273 case acre bard bare barf base cape card care
9 4 7277 parr sars paps pars pass raps rasp saps sass
9 4 7867 pump puns rump rums runs stop sump sums suns
9 5 46637 homer goner goods goofs homes hones hoods hoofs inner
12 5 22737 acres bards barer bares barfs baser bases caper capes cards cares cases
$
$
$ sort -n -b -k 2 "/tmp/phonewords" | tail -n 5
1 20 86242722837478422559 uncharacteristically
1 21 353287636237425647267 electroencephalograms
1 21 353287636237425647274 electroencephalograph
1 22 2686837738658846627437 counterrevolutionaries
1 22 3532876362374256472747 electroencephalographs
$
</pre>
 
=={{header|C}}==
Anonymous user
Cookies help us deliver our services. By using our services, you agree to our use of cookies.