GSTrans string conversion: Difference between revisions

From Rosetta Code
Content added Content deleted
imported>Jgh
(Initial page with initial BBC BASIC example.)
 
m (julia example)
Line 82: Line 82:
</syntaxhighlight>
</syntaxhighlight>
No checks for string lengths is done. On decoding, invalid encodings are ignored and skipped, for instance '''|4''' is decoded as '''4'''.
No checks for string lengths is done. On decoding, invalid encodings are ignored and skipped, for instance '''|4''' is decoded as '''4'''.

=={{header|Julia}}==
<syntaxhighlight lang="julia">"""
ASCII code Symbols used
0 |@
1 - 26 |letter eg |A (or |a) = ASCII 1, |M (or |m) = ASCII 13
27 |[ or |{
28 |\
29 |] or |}
30 |^ or |~
31 |_ or |' (grave accent)
32 - 126 keyboard character, except for:
" |"
| ||
< |<
127 |?
128 - 255 |!coded symbol eg ASCII 128 = |!|@ ASCII 129 = |!|A

See also www.riscos.com/support/developers/prm/conversions.html
"""

"""
GSTrans_encode(arr::Vector{Char})

To avoid Unicode multibyte glitches, handle as vector of Chars, but we throw
an assertion error if any are multibyte (so, 0 <= integer value of char <= 255).
"""
function GSTrans_encode(arr::Vector{Char})
function GSTChar_encode(c::Char)
i = Int(c)
@assert 0 <= i <= 255 "Char value of $c, $i, is out of range"
resultchars = Char[]
if 0 <= i <= 31
push!(resultchars, '|', Char(64 + i))
elseif c == '"'
push!(resultchars, '|', '"')
elseif c == '|'
push!(resultchars, '|', '|')
elseif i == 127
push!(resultchars, '|', '?')
elseif 128 <= i <= 255 # |! then recurse after subtracting 128
push!(resultchars, '|', '!', GSTChar_encode(Char(i - 128))...)
else
push!(resultchars, c)
end
return resultchars
end
return String(mapreduce(GSTChar_encode, vcat, arr, init = Char[]))
end

"""
GSTrans_encode(str::AbstractString)

Encode after converting a potentially Unicode string to codeunit bytes and
then to a vector of ascii Chars, then pass this to encoding routine for the vector
"""
GSTrans_encode(str::AbstractString) = GSTrans_encode(Char.(transcode(UInt8, str)))

function GSTrans_decode(str::AbstractString)
result = UInt8[]
gotbar, gotbang, bangadd = false, false, 0
for c in str
if gotbang
if c == '|'
bangadd = 128
gotbar = true
else
push!(result, Char(Int(c) + 128))
end
gotbang = false
elseif gotbar
if c == '?'
push!(result, Char(127 + bangadd))
elseif c == '!'
gotbang = true
elseif c == '|' || c == '"' || c == '<'
push!(result, Char(Int(c) + bangadd))
elseif c == '[' || c == '{'
push!(result, Char(27 + bangadd))
elseif c == '\\'
push!(result, Char(28 + bangadd))
elseif c == ']' || c == '}'
push!(result, Char(29 + bangadd))
elseif c == '^' || c == '~'
push!(result, Char(30 + bangadd))
elseif c == '_' || c == '`'
push!(result, Char(31 + bangadd))
else
push!(result, Char(Int(uppercase(c)) - 64 + bangadd))
end
gotbar, bangadd = false, 0
elseif c == '|'
gotbar = true
else
push!(result, Char(c))
end
end
return String(result)
end

const TESTS = ["ALERT|G", "wert↑"]
const RAND_TESTS = [String(Char.(rand(0:255, 10))) for _ in 1:8]
const DECODE_TESTS = ["|LHello|G|J|M", "|m|j|@|e|!t|m|!|?"]

for t in [TESTS; RAND_TESTS]
encoded = GSTrans_encode(t)
decoded = GSTrans_decode(encoded)
println("String $t encoded is: $encoded, decoded is: $decoded.")
@assert t == decoded
end

for enc in DECODE_TESTS
print("Encoded string $enc decoded is: ")
display(GSTrans_decode(enc))
end
</syntaxhighlight>{{out}}
<pre>
String ALERT|G encoded is: ALERT||G, decoded is: ALERT|G.
String wert↑ encoded is: wert|!b|!|F|!|Q, decoded is: wert↑.
String @♂aN°$ª7Î encoded is: @|KaN|!B|!0$|!B|!*7|!B|!|R|!C|!|N, decoded is: @♂aN°$ª7Î.
String ÙC▼æÔt6¤☻Ì encoded is: |!C|!|YC|_|!C|!&|!C|!|Tt6|!B|!$|B|!C|!|L, decoded is: ÙC▼æÔt6¤☻Ì.
String "@)Ð♠qhýÌÿ encoded is: |"@)|!C|!|P|Fqh|!C|!=|!C|!|L|!C|!?, decoded is: "@)Ð♠qhýÌÿ.
String +☻#o9$u♠©A encoded is: +|B#o9$u|F|!B|!)A, decoded is: +☻#o9$u♠©A.
String ♣àlæi6Ú.é encoded is: |E|!C|! l|!B|!|K|!C|!&i6|!C|!|Z.|!C|!), decoded is: ♣àlæi6Ú.é.
String ÏÔ♀È♥@ë encoded is: |!C|!|O|!C|!|T|!B|!|[|Lj|!C|!|H|C@|!B|!|I|!C|!+, decoded is: ÏÔ♀È♥@ë.
String Rç÷%◄MZûhZ encoded is: R|!C|!'|!C|!7%|QMZ|!C|!;hZ, decoded is: Rç÷%◄MZûhZ.
String ç>¾AôVâ♫↓P encoded is: |!C|!'>|!B|!>A|!C|!4V|!C|!|"|N|YP, decoded is: ç>¾AôVâ♫↓P.
Encoded string |LHello|G|J|M decoded is: "\fHello\a\n\r"
Encoded string |m|j|@|e|!t|m|!|? decoded is: "\r\n\0\x05\xf4\r\xff"
</pre>

Revision as of 09:53, 24 October 2023

Task
GSTrans string conversion
You are encouraged to solve this task according to the task description, using any language you may know.

GSTrans string encoding is a method of encoding all 8-bit character values 0-255 with only printable characters. It originates on Acorn computers to allow command line commands to process non-printable characters.

 Character Encoding
 0-31      |letter eg |@, |A, |i |[ etc.
 32-126    character, except for:
 "         |"
 |         ||
 127       |?
 128-255   |! followed by encoding, eg |!|@ = 128

A string can be surrounded in quotes, eg "ALERT|G".

See http://www.riscos.com/support/developers/prm/conversions.html

Examples:

 |LHello|G|J|M       encodes  CHR$12;"Hello";CHR$7;CHR$10;CHR$13
 "|m|j|@|e|!t|m|!|?" encodes  13,10,0,5,244,13,255
Task
  • Write two functions, one to encode a string of characters into a GSTrans string, and one to decode a GSTrans string. Indicate if any error checking is done, and how it is indicated.

BBC BASIC

   10 REM > GSTrans.bbc
   20 REM GSTrans in BASIC
   30 REM J.G.Harston
   40 :
   50 REPEAT
   60   INPUT LINE "GSstring: "A$
   70   A$=FNGS_Decode(A$,0)
   80   A$=FNGS_Encode(A$)
   90   PRINT A$
  100 UNTIL FALSE
  110 END
  120 :
  130 :
  140 :
  150 REM Decode a GSTrans string
  160 REM On entry: inp$=GSTransed string
  170 REM           flg%=0 - parse whole string, *KEY style
  180 REM               =1 - parse until space, filename style (not implemented)
  190 REM Returns:  decoded string
  200 DEFFNGS_Decode(inp$,flg%)
  210 LOCAL out$,byte%,set%
  220 IF LEFT$(inp$,1)=" ":REPEAT:inp$=MID$(inp$,2):UNTIL LEFT$(inp$,1)<>" "
  230 IF LEFT$(inp$,1)="""":IF RIGHT$(inp$,1)="""":inp$=MID$(inp$,2,LENinp$-2)
  240 IF inp$="":=""
  250 REPEAT
  260   byte%=-1:set%=0
  270   IF LEFT$(inp$,2)="|!":set%=128:inp$=MID$(inp$,3)
  280   IF LEFT$(inp$,1)="|":byte%=ASCMID$(inp$,2,1)AND31
  290   IF LEFT$(inp$,2)="||":byte%=ASC"|"
  300   IF LEFT$(inp$,2)="|?":byte%=127
  310   IF LEFT$(inp$,2)="|""":byte%=34
  320   IF LEFT$(inp$,2)="""""":byte%=34
  330   IF byte%<0:byte%=ASC(inp$):inp$=MID$(inp$,2) ELSE inp$=MID$(inp$,3)
  340   out$=out$+CHR$(set%+byte%)
  350 UNTIL inp$=""
  360 =out$
  370 :
  380 REM Encode into a GSTrans string
  390 REM On entry: inp$=raw string
  400 REM Returns:  GSTrans string
  410 DEFFNGS_Encode(inp$)
  420 LOCAL out$,byte%
  430 IF inp$="":=""""""
  440 REPEAT
  450   byte%=ASC(inp$):inp$=MID$(inp$,2)
  460   IF byte%>127:out$=out$+"|!":byte%=byte% AND 127
  470   IF byte%>31 AND byte%<>ASC"""" AND byte%<>ASC"|" AND byte%<>127:out$=out$+CHR$(byte%)
  480   IF byte%<32:out$=out$+"|"+CHR$(byte%+64)
  490   IF byte%=ASC"""":out$=out$+""""""
  500   IF byte%=ASC"|":out$=out$+"||"
  510   IF byte%=127:out$=out$+"|?"
  520 UNTIL inp$=""
  530 =""""+out$+""""
  540 :

No checks for string lengths is done. On decoding, invalid encodings are ignored and skipped, for instance |4 is decoded as 4.

Julia

""" 
ASCII code	Symbols used
0	        |@
1 - 26	    |letter eg |A (or |a) = ASCII 1, |M (or |m) = ASCII 13
27	        |[ or |{
28	        |\
29	        |] or |}
30	        |^ or |~
31	        |_ or |' (grave accent)
32 - 126	keyboard character, except for:
"	        |"
|	        ||
<	        |<
127	        |?
128 - 255	|!coded symbol eg ASCII 128 = |!|@ ASCII 129 = |!|A

See also www.riscos.com/support/developers/prm/conversions.html
"""

""" 
    GSTrans_encode(arr::Vector{Char})

    To avoid Unicode multibyte glitches, handle as vector of Chars, but we throw 
    an assertion error if any are multibyte (so, 0 <= integer value of char <= 255).
"""
function GSTrans_encode(arr::Vector{Char})
    function GSTChar_encode(c::Char)
        i = Int(c)
        @assert 0 <= i <= 255 "Char value of $c, $i, is out of range"
        resultchars = Char[]
        if 0 <= i <= 31
            push!(resultchars, '|', Char(64 + i))
        elseif c == '"'
            push!(resultchars, '|', '"')
        elseif c == '|'
            push!(resultchars, '|', '|')
        elseif i == 127
            push!(resultchars, '|', '?')
        elseif 128 <= i <= 255 # |! then recurse after subtracting 128
            push!(resultchars, '|', '!', GSTChar_encode(Char(i - 128))...)
        else
            push!(resultchars, c)
        end
        return resultchars
    end
    return String(mapreduce(GSTChar_encode, vcat, arr, init = Char[]))
end

""" 
    GSTrans_encode(str::AbstractString)

    Encode after converting a potentially Unicode string to codeunit bytes and 
    then to a vector of ascii Chars, then pass this to encoding routine for the vector
"""
GSTrans_encode(str::AbstractString) = GSTrans_encode(Char.(transcode(UInt8, str)))

function GSTrans_decode(str::AbstractString)
    result = UInt8[]
    gotbar, gotbang, bangadd = false, false, 0
    for c in str
        if gotbang
            if c == '|'
                bangadd = 128
                gotbar = true
            else
                push!(result, Char(Int(c) + 128))
            end
            gotbang = false
        elseif gotbar       
            if c == '?'
                push!(result, Char(127 + bangadd))
            elseif c == '!'
                gotbang = true
            elseif c == '|' || c == '"' || c == '<'
                push!(result, Char(Int(c) + bangadd))
            elseif c == '[' || c == '{'
                push!(result, Char(27 + bangadd))
            elseif c == '\\'
                push!(result, Char(28 + bangadd))
            elseif c == ']' || c == '}'
                push!(result, Char(29 + bangadd))
            elseif c == '^' || c == '~'
                push!(result, Char(30 + bangadd))
            elseif c == '_' || c == '`'
                push!(result, Char(31 + bangadd))
            else
                push!(result, Char(Int(uppercase(c)) - 64 + bangadd))
            end
            gotbar, bangadd = false, 0
        elseif c == '|'
                gotbar = true
        else
            push!(result, Char(c))
        end
    end
    return String(result)
end

const TESTS = ["ALERT|G", "wert↑"]
const RAND_TESTS = [String(Char.(rand(0:255, 10))) for _ in 1:8]
const DECODE_TESTS = ["|LHello|G|J|M", "|m|j|@|e|!t|m|!|?"]

for t in [TESTS; RAND_TESTS]
    encoded = GSTrans_encode(t)
    decoded = GSTrans_decode(encoded)
    println("String $t encoded is: $encoded, decoded is: $decoded.")
    @assert t == decoded
end

for enc in DECODE_TESTS
    print("Encoded string $enc decoded is: ")
    display(GSTrans_decode(enc))
end
Output:
String ALERT|G encoded is: ALERT||G, decoded is: ALERT|G.
String wert↑ encoded is: wert|!b|!|F|!|Q, decoded is: wert↑.
String @♂aN°$ª7Î encoded is: @|KaN|!B|!0$|!B|!*7|!B|!|R|!C|!|N, decoded is: @♂aN°$ª7Î.
String ÙC▼æÔt6¤☻Ì encoded is: |!C|!|YC|_|!C|!&|!C|!|Tt6|!B|!$|B|!C|!|L, decoded is: ÙC▼æÔt6¤☻Ì.
String "@)Ð♠qhýÌÿ encoded is: |"@)|!C|!|P|Fqh|!C|!=|!C|!|L|!C|!?, decoded is: "@)Ð♠qhýÌÿ.
String +☻#o9$u♠©A encoded is: +|B#o9$u|F|!B|!)A, decoded is: +☻#o9$u♠©A.
String ♣àlæi6Ú.é encoded is: |E|!C|! l|!B|!|K|!C|!&i6|!C|!|Z.|!C|!), decoded is: ♣àlæi6Ú.é.
String ÏÔ♀È♥@ë encoded is: |!C|!|O|!C|!|T|!B|!|[|Lj|!C|!|H|C@|!B|!|I|!C|!+, decoded is: ÏÔ♀È♥@ë.
String Rç÷%◄MZûhZ encoded is: R|!C|!'|!C|!7%|QMZ|!C|!;hZ, decoded is: Rç÷%◄MZûhZ.
String ç>¾AôVâ♫↓P encoded is: |!C|!'>|!B|!>A|!C|!4V|!C|!|"|N|YP, decoded is: ç>¾AôVâ♫↓P.
Encoded string |LHello|G|J|M decoded is: "\fHello\a\n\r"
Encoded string |m|j|@|e|!t|m|!|? decoded is: "\r\n\0\x05\xf4\r\xff"