User:Eriksiers/BBC BASIC detokenizer

From Rosetta Code
Revision as of 02:43, 3 March 2012 by Eriksiers (talk | contribs) (created)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)

This is to convert tokenized BBC BASIC files to a human-readable (plain text) format.

There are actually two parts to this. The first is VBA/Excel, and the second is QBasic.

(If you're only interested in the actual detokenizing code, and not how it's generated, skip down to the QBasic section.)

VBA/Excel

This code uses data contained in an Excel spreadsheet to generate the QBasic code below. (I'm not including the actual data, but it can be easily reconstructed from the QBasic code.)

<lang vb>Sub buildBas()

   Open ActiveWorkbook.Path & "\detoken.bas" For Output As 1
       For L0 = 2 To Cells.SpecialCells(xlCellTypeLastCell).Row
           If Len(Cells(L0, 2).Value) Then
               tmp = tmp + 1
               If (tmp Mod 10) = 1 Then Print #1, "DATA ";
               Print #1, LTrim$(Val("&h" & Cells(L0, 1).Value)); ","""; Cells(L0, 2).Value; """";
               If (tmp Mod 10) = 0 Then
                   Print #1,
               Else
                   Print #1, ",";
               End If
           End If
       Next
       Print #1, "0": Print #1,
       Print #1, "DIM linenum AS INTEGER, indentLevel AS INTEGER, indentPending AS INTEGER"
       Print #1, "DIM encoded AS STRING * 1"
       Print #1, "DIM keywords(255) AS STRING"
       Print #1, "DO"
       Print #1, "    READ t%"
       Print #1, "    IF 0 = t% THEN EXIT DO"
       Print #1, "    READ keywords(t%)"
       Print #1, "LOOP"
       Print #1, "nm$ = ENVIRON$(""DETOKEN"")"
       Print #1, "OPEN nm$ + "".bbc"" FOR BINARY AS 2"
       Print #1, "OPEN nm$ + "".bas"" FOR OUTPUT AS 3"
       Print #1, "'skip first char"
       Print #1, "GET #2, , encoded"
       Print #1, "GET #2, , linenum"
       Print #1, "IF -1 = linenum THEN"
       Print #1, "    CLOSE: SYSTEM"
       Print #1, "ELSEIF linenum <> 0 THEN"
       Print #1, "    PRINT #3, LTRIM$(STR$(linenum)); "" "";"
       Print #1, "END IF"
       'still haven't figured out the first char...
       'Print #1, "PRINT #3, ASC(encoded); CHR$(9);"
       Print #1, "DO UNTIL EOF(2)"
       Print #1, "    GET #2, , encoded"
       Print #1, "    i% = ASC(encoded)"
       Print #1, "    SELECT CASE i%"
       Print #1, "        CASE 0"
       Print #1, "            'do nothing"
       Print #1, "        CASE 13"
       Print #1, "            PRINT #3,"
       Print #1, "            'skip first char of next line"
       Print #1, "            IF NOT EOF(2) THEN"
       Print #1, "                GET #2, , encoded"
       Print #1, "                GET #2, , linenum"
       Print #1, "                IF -1 = linenum THEN"
       Print #1, "                    CLOSE: SYSTEM"
       Print #1, "                ELSEIF linenum <> 0 THEN"
       Print #1, "                    PRINT #3, LTRIM$(STR$(linenum)); "" "";"
       Print #1, "                END IF"
       Print #1, "            END IF"
       Print #1, "        CASE ELSE"
       Print #1, "            IF keywords(i%) <> """" THEN"
       Print #1, "                PRINT #3, keywords(i%);"
       Print #1, "            ELSE"
       Print #1, "                PRINT #3, encoded;"
       Print #1, "            END IF"
       Print #1, "    END SELECT"
       Print #1, "LOOP"
       Print #1, "CLOSE"
       Print #1, "SYSTEM"
   Close

End Sub</lang>

QBasic

This is the code that actually does the work. I chose QBasic for a number of reasons, the most important of which are:

  • interpreter (no compile time)
  • simple
  • relatively portable
  • I've used it for around 20 years, so why change now?

Note that since I expect the code to run under QBasic, you don't specify the file(s) to be detokenized on the command line, but instead do it one file at a time, via the DETOKEN environment variable. (This can easily be changed, but I don't care to.)

<lang qbasic>DATA 1,"CIRCLE",2,"ELLIPSE",3,"FILLED",4,"MOUSE",5,"ORIGIN",6,"QUIT",7,"RECTANGLE",8,"SWAP",9,"SYS",10,"TINT" DATA 11,"WAIT",12,"INSTALL",14,"PRIVATE",15,"BY",16,"EXIT",128,"AND",129,"DIV",130,"EOR",131,"MOD",132,"OR" DATA 133,"ERROR",134,"LINE",135,"OFF",136,"STEP",137,"SPC",138,"TAB(",139,"ELSE",140,"THEN",142,"OPENIN",143,"PTR" DATA 144,"PAGE",145,"TIME",146,"LOMEM",147,"HIMEM",148,"ABS",149,"ACS",150,"ADVAL",151,"ASC",152,"ASN",153,"ATN" DATA 154,"BGET",155,"COS",156,"COUNT",157,"DEG",158,"ERL",159,"ERR",160,"EVAL",161,"EXP",162,"EXT",163,"FALSE" DATA 164,"FN",165,"GET",166,"INKEY",167,"INSTR(",168,"INT",169,"LEN",170,"LN",171,"LOG",172,"NOT",173,"OPENUP" DATA 174,"OPENOUT",175,"PI",176,"POINT",177,"POS",178,"RAD",179,"RND",180,"SGN",181,"SIN",182,"SQR",183,"TAN" DATA 184,"TO",185,"TRUE",186,"USR",187,"VAL",188,"RND",189,"CHR$",190,"GET$",191,"INKEY$",192,"LEFT$",193,"MID$(" DATA 194,"RIGHT$",195,"STR$",196,"STRING",197,"EOF",198,"SUM",199,"WHILE",200,"CASE",201,"WHEN",202,"OF",203,"ENDCASE" DATA 204,"OTHERWISE",205,"ENDIF",206,"ENDWHILE",207,"PTR",208,"PAGE",209,"TIME",210,"LOMEM",211,"HIMEM",212,"SOUND",213,"BPUT" DATA 214,"CALL",215,"CHAIN",216,"CLEAR",217,"CLOSE",218,"CLG",219,"CLS",220,"DATA",221,"DEF",222,"DIM",223,"DRAW" DATA 224,"END",225,"ENDPROC",226,"ENVELOPE",227,"FOR",228,"GOSUB",229,"GOTO",230,"GCOL",231,"IF",232,"INPUT",233,"LET" DATA 234,"LOCAL",235,"MODE",236,"MOVE",237,"NEXT",238,"ON",239,"VDU",240,"PLOT",241,"PRINT",242,"PROC",243,"READ" DATA 244,"REM",245,"REPEAT",246,"REPORT",247,"RESTORE",248,"RETURN",249,"RUN",250,"STOP",251,"COLOUR",252,"TRACE",253,"UNTIL" DATA 254,"WIDTH",255,"OSCLI",0

DIM linenum AS INTEGER, encoded AS STRING * 1 DIM keywords(255) AS STRING DO

   READ t%
   IF 0 = t% THEN EXIT DO
   READ keywords(t%)

LOOP nm$ = ENVIRON$("DETOKEN") OPEN nm$ + ".bbc" FOR BINARY AS 2 OPEN nm$ + ".bas" FOR OUTPUT AS 3 'skip first char GET #2, , encoded GET #2, , linenum IF -1 = linenum THEN

   CLOSE: SYSTEM

ELSEIF linenum <> 0 THEN

   PRINT #3, LTRIM$(STR$(linenum)); " ";

END IF DO UNTIL EOF(2)

   GET #2, , encoded
   i% = ASC(encoded)
   SELECT CASE i%
       CASE 0
           'do nothing
       CASE 13
           PRINT #3,
           'skip first char of next line
           IF NOT EOF(2) THEN
               GET #2, , encoded
               GET #2, , linenum
               IF -1 = linenum THEN
                   CLOSE: SYSTEM
               ELSEIF linenum <> 0 THEN
                   PRINT #3, LTRIM$(STR$(linenum)); " ";
               END IF
           END IF
       CASE ELSE
           IF keywords(i%) <> "" THEN
               PRINT #3, keywords(i%);
           ELSE
               PRINT #3, encoded;
           END IF
   END SELECT

LOOP CLOSE SYSTEM</lang>