GSTrans string conversion: Difference between revisions
Content added Content deleted
imported>Jgh (Initial page with initial BBC BASIC example.) |
m (julia example) |
||
Line 82: | Line 82: | ||
</syntaxhighlight> |
</syntaxhighlight> |
||
No checks for string lengths is done. On decoding, invalid encodings are ignored and skipped, for instance '''|4''' is decoded as '''4'''. |
No checks for string lengths is done. On decoding, invalid encodings are ignored and skipped, for instance '''|4''' is decoded as '''4'''. |
||
=={{header|Julia}}== |
|||
<syntaxhighlight lang="julia">""" |
|||
ASCII code Symbols used |
|||
0 |@ |
|||
1 - 26 |letter eg |A (or |a) = ASCII 1, |M (or |m) = ASCII 13 |
|||
27 |[ or |{ |
|||
28 |\ |
|||
29 |] or |} |
|||
30 |^ or |~ |
|||
31 |_ or |' (grave accent) |
|||
32 - 126 keyboard character, except for: |
|||
" |" |
|||
| || |
|||
< |< |
|||
127 |? |
|||
128 - 255 |!coded symbol eg ASCII 128 = |!|@ ASCII 129 = |!|A |
|||
See also www.riscos.com/support/developers/prm/conversions.html |
|||
""" |
|||
""" |
|||
GSTrans_encode(arr::Vector{Char}) |
|||
To avoid Unicode multibyte glitches, handle as vector of Chars, but we throw |
|||
an assertion error if any are multibyte (so, 0 <= integer value of char <= 255). |
|||
""" |
|||
function GSTrans_encode(arr::Vector{Char}) |
|||
function GSTChar_encode(c::Char) |
|||
i = Int(c) |
|||
@assert 0 <= i <= 255 "Char value of $c, $i, is out of range" |
|||
resultchars = Char[] |
|||
if 0 <= i <= 31 |
|||
push!(resultchars, '|', Char(64 + i)) |
|||
elseif c == '"' |
|||
push!(resultchars, '|', '"') |
|||
elseif c == '|' |
|||
push!(resultchars, '|', '|') |
|||
elseif i == 127 |
|||
push!(resultchars, '|', '?') |
|||
elseif 128 <= i <= 255 # |! then recurse after subtracting 128 |
|||
push!(resultchars, '|', '!', GSTChar_encode(Char(i - 128))...) |
|||
else |
|||
push!(resultchars, c) |
|||
end |
|||
return resultchars |
|||
end |
|||
return String(mapreduce(GSTChar_encode, vcat, arr, init = Char[])) |
|||
end |
|||
""" |
|||
GSTrans_encode(str::AbstractString) |
|||
Encode after converting a potentially Unicode string to codeunit bytes and |
|||
then to a vector of ascii Chars, then pass this to encoding routine for the vector |
|||
""" |
|||
GSTrans_encode(str::AbstractString) = GSTrans_encode(Char.(transcode(UInt8, str))) |
|||
function GSTrans_decode(str::AbstractString) |
|||
result = UInt8[] |
|||
gotbar, gotbang, bangadd = false, false, 0 |
|||
for c in str |
|||
if gotbang |
|||
if c == '|' |
|||
bangadd = 128 |
|||
gotbar = true |
|||
else |
|||
push!(result, Char(Int(c) + 128)) |
|||
end |
|||
gotbang = false |
|||
elseif gotbar |
|||
if c == '?' |
|||
push!(result, Char(127 + bangadd)) |
|||
elseif c == '!' |
|||
gotbang = true |
|||
elseif c == '|' || c == '"' || c == '<' |
|||
push!(result, Char(Int(c) + bangadd)) |
|||
elseif c == '[' || c == '{' |
|||
push!(result, Char(27 + bangadd)) |
|||
elseif c == '\\' |
|||
push!(result, Char(28 + bangadd)) |
|||
elseif c == ']' || c == '}' |
|||
push!(result, Char(29 + bangadd)) |
|||
elseif c == '^' || c == '~' |
|||
push!(result, Char(30 + bangadd)) |
|||
elseif c == '_' || c == '`' |
|||
push!(result, Char(31 + bangadd)) |
|||
else |
|||
push!(result, Char(Int(uppercase(c)) - 64 + bangadd)) |
|||
end |
|||
gotbar, bangadd = false, 0 |
|||
elseif c == '|' |
|||
gotbar = true |
|||
else |
|||
push!(result, Char(c)) |
|||
end |
|||
end |
|||
return String(result) |
|||
end |
|||
const TESTS = ["ALERT|G", "wert↑"] |
|||
const RAND_TESTS = [String(Char.(rand(0:255, 10))) for _ in 1:8] |
|||
const DECODE_TESTS = ["|LHello|G|J|M", "|m|j|@|e|!t|m|!|?"] |
|||
for t in [TESTS; RAND_TESTS] |
|||
encoded = GSTrans_encode(t) |
|||
decoded = GSTrans_decode(encoded) |
|||
println("String $t encoded is: $encoded, decoded is: $decoded.") |
|||
@assert t == decoded |
|||
end |
|||
for enc in DECODE_TESTS |
|||
print("Encoded string $enc decoded is: ") |
|||
display(GSTrans_decode(enc)) |
|||
end |
|||
</syntaxhighlight>{{out}} |
|||
<pre> |
|||
String ALERT|G encoded is: ALERT||G, decoded is: ALERT|G. |
|||
String wert↑ encoded is: wert|!b|!|F|!|Q, decoded is: wert↑. |
|||
String @♂aN°$ª7Î encoded is: @|KaN|!B|!0$|!B|!*7|!B|!|R|!C|!|N, decoded is: @♂aN°$ª7Î. |
|||
String ÙC▼æÔt6¤☻Ì encoded is: |!C|!|YC|_|!C|!&|!C|!|Tt6|!B|!$|B|!C|!|L, decoded is: ÙC▼æÔt6¤☻Ì. |
|||
String "@)Ð♠qhýÌÿ encoded is: |"@)|!C|!|P|Fqh|!C|!=|!C|!|L|!C|!?, decoded is: "@)Ð♠qhýÌÿ. |
|||
String +☻#o9$u♠©A encoded is: +|B#o9$u|F|!B|!)A, decoded is: +☻#o9$u♠©A. |
|||
String ♣àlæi6Ú.é encoded is: |E|!C|! l|!B|!|K|!C|!&i6|!C|!|Z.|!C|!), decoded is: ♣àlæi6Ú.é. |
|||
String ÏÔ♀È♥@ë encoded is: |!C|!|O|!C|!|T|!B|!|[|Lj|!C|!|H|C@|!B|!|I|!C|!+, decoded is: ÏÔ♀È♥@ë. |
|||
String Rç÷%◄MZûhZ encoded is: R|!C|!'|!C|!7%|QMZ|!C|!;hZ, decoded is: Rç÷%◄MZûhZ. |
|||
String ç>¾AôVâ♫↓P encoded is: |!C|!'>|!B|!>A|!C|!4V|!C|!|"|N|YP, decoded is: ç>¾AôVâ♫↓P. |
|||
Encoded string |LHello|G|J|M decoded is: "\fHello\a\n\r" |
|||
Encoded string |m|j|@|e|!t|m|!|? decoded is: "\r\n\0\x05\xf4\r\xff" |
|||
</pre> |
Revision as of 09:53, 24 October 2023
GSTrans string conversion
You are encouraged to solve this task according to the task description, using any language you may know.
You are encouraged to solve this task according to the task description, using any language you may know.
GSTrans string encoding is a method of encoding all 8-bit character values 0-255 with only printable characters. It originates on Acorn computers to allow command line commands to process non-printable characters.
Character Encoding 0-31 |letter eg |@, |A, |i |[ etc. 32-126 character, except for: " |" | || 127 |? 128-255 |! followed by encoding, eg |!|@ = 128
A string can be surrounded in quotes, eg "ALERT|G".
See http://www.riscos.com/support/developers/prm/conversions.html
Examples:
|LHello|G|J|M encodes CHR$12;"Hello";CHR$7;CHR$10;CHR$13 "|m|j|@|e|!t|m|!|?" encodes 13,10,0,5,244,13,255
- Task
- Write two functions, one to encode a string of characters into a GSTrans string, and one to decode a GSTrans string. Indicate if any error checking is done, and how it is indicated.
BBC BASIC
10 REM > GSTrans.bbc
20 REM GSTrans in BASIC
30 REM J.G.Harston
40 :
50 REPEAT
60 INPUT LINE "GSstring: "A$
70 A$=FNGS_Decode(A$,0)
80 A$=FNGS_Encode(A$)
90 PRINT A$
100 UNTIL FALSE
110 END
120 :
130 :
140 :
150 REM Decode a GSTrans string
160 REM On entry: inp$=GSTransed string
170 REM flg%=0 - parse whole string, *KEY style
180 REM =1 - parse until space, filename style (not implemented)
190 REM Returns: decoded string
200 DEFFNGS_Decode(inp$,flg%)
210 LOCAL out$,byte%,set%
220 IF LEFT$(inp$,1)=" ":REPEAT:inp$=MID$(inp$,2):UNTIL LEFT$(inp$,1)<>" "
230 IF LEFT$(inp$,1)="""":IF RIGHT$(inp$,1)="""":inp$=MID$(inp$,2,LENinp$-2)
240 IF inp$="":=""
250 REPEAT
260 byte%=-1:set%=0
270 IF LEFT$(inp$,2)="|!":set%=128:inp$=MID$(inp$,3)
280 IF LEFT$(inp$,1)="|":byte%=ASCMID$(inp$,2,1)AND31
290 IF LEFT$(inp$,2)="||":byte%=ASC"|"
300 IF LEFT$(inp$,2)="|?":byte%=127
310 IF LEFT$(inp$,2)="|""":byte%=34
320 IF LEFT$(inp$,2)="""""":byte%=34
330 IF byte%<0:byte%=ASC(inp$):inp$=MID$(inp$,2) ELSE inp$=MID$(inp$,3)
340 out$=out$+CHR$(set%+byte%)
350 UNTIL inp$=""
360 =out$
370 :
380 REM Encode into a GSTrans string
390 REM On entry: inp$=raw string
400 REM Returns: GSTrans string
410 DEFFNGS_Encode(inp$)
420 LOCAL out$,byte%
430 IF inp$="":=""""""
440 REPEAT
450 byte%=ASC(inp$):inp$=MID$(inp$,2)
460 IF byte%>127:out$=out$+"|!":byte%=byte% AND 127
470 IF byte%>31 AND byte%<>ASC"""" AND byte%<>ASC"|" AND byte%<>127:out$=out$+CHR$(byte%)
480 IF byte%<32:out$=out$+"|"+CHR$(byte%+64)
490 IF byte%=ASC"""":out$=out$+""""""
500 IF byte%=ASC"|":out$=out$+"||"
510 IF byte%=127:out$=out$+"|?"
520 UNTIL inp$=""
530 =""""+out$+""""
540 :
No checks for string lengths is done. On decoding, invalid encodings are ignored and skipped, for instance |4 is decoded as 4.
Julia
"""
ASCII code Symbols used
0 |@
1 - 26 |letter eg |A (or |a) = ASCII 1, |M (or |m) = ASCII 13
27 |[ or |{
28 |\
29 |] or |}
30 |^ or |~
31 |_ or |' (grave accent)
32 - 126 keyboard character, except for:
" |"
| ||
< |<
127 |?
128 - 255 |!coded symbol eg ASCII 128 = |!|@ ASCII 129 = |!|A
See also www.riscos.com/support/developers/prm/conversions.html
"""
"""
GSTrans_encode(arr::Vector{Char})
To avoid Unicode multibyte glitches, handle as vector of Chars, but we throw
an assertion error if any are multibyte (so, 0 <= integer value of char <= 255).
"""
function GSTrans_encode(arr::Vector{Char})
function GSTChar_encode(c::Char)
i = Int(c)
@assert 0 <= i <= 255 "Char value of $c, $i, is out of range"
resultchars = Char[]
if 0 <= i <= 31
push!(resultchars, '|', Char(64 + i))
elseif c == '"'
push!(resultchars, '|', '"')
elseif c == '|'
push!(resultchars, '|', '|')
elseif i == 127
push!(resultchars, '|', '?')
elseif 128 <= i <= 255 # |! then recurse after subtracting 128
push!(resultchars, '|', '!', GSTChar_encode(Char(i - 128))...)
else
push!(resultchars, c)
end
return resultchars
end
return String(mapreduce(GSTChar_encode, vcat, arr, init = Char[]))
end
"""
GSTrans_encode(str::AbstractString)
Encode after converting a potentially Unicode string to codeunit bytes and
then to a vector of ascii Chars, then pass this to encoding routine for the vector
"""
GSTrans_encode(str::AbstractString) = GSTrans_encode(Char.(transcode(UInt8, str)))
function GSTrans_decode(str::AbstractString)
result = UInt8[]
gotbar, gotbang, bangadd = false, false, 0
for c in str
if gotbang
if c == '|'
bangadd = 128
gotbar = true
else
push!(result, Char(Int(c) + 128))
end
gotbang = false
elseif gotbar
if c == '?'
push!(result, Char(127 + bangadd))
elseif c == '!'
gotbang = true
elseif c == '|' || c == '"' || c == '<'
push!(result, Char(Int(c) + bangadd))
elseif c == '[' || c == '{'
push!(result, Char(27 + bangadd))
elseif c == '\\'
push!(result, Char(28 + bangadd))
elseif c == ']' || c == '}'
push!(result, Char(29 + bangadd))
elseif c == '^' || c == '~'
push!(result, Char(30 + bangadd))
elseif c == '_' || c == '`'
push!(result, Char(31 + bangadd))
else
push!(result, Char(Int(uppercase(c)) - 64 + bangadd))
end
gotbar, bangadd = false, 0
elseif c == '|'
gotbar = true
else
push!(result, Char(c))
end
end
return String(result)
end
const TESTS = ["ALERT|G", "wert↑"]
const RAND_TESTS = [String(Char.(rand(0:255, 10))) for _ in 1:8]
const DECODE_TESTS = ["|LHello|G|J|M", "|m|j|@|e|!t|m|!|?"]
for t in [TESTS; RAND_TESTS]
encoded = GSTrans_encode(t)
decoded = GSTrans_decode(encoded)
println("String $t encoded is: $encoded, decoded is: $decoded.")
@assert t == decoded
end
for enc in DECODE_TESTS
print("Encoded string $enc decoded is: ")
display(GSTrans_decode(enc))
end
- Output:
String ALERT|G encoded is: ALERT||G, decoded is: ALERT|G. String wert↑ encoded is: wert|!b|!|F|!|Q, decoded is: wert↑. String @♂aN°$ª7Î encoded is: @|KaN|!B|!0$|!B|!*7|!B|!|R|!C|!|N, decoded is: @♂aN°$ª7Î. String ÙC▼æÔt6¤☻Ì encoded is: |!C|!|YC|_|!C|!&|!C|!|Tt6|!B|!$|B|!C|!|L, decoded is: ÙC▼æÔt6¤☻Ì. String "@)Ð♠qhýÌÿ encoded is: |"@)|!C|!|P|Fqh|!C|!=|!C|!|L|!C|!?, decoded is: "@)Ð♠qhýÌÿ. String +☻#o9$u♠©A encoded is: +|B#o9$u|F|!B|!)A, decoded is: +☻#o9$u♠©A. String ♣àlæi6Ú.é encoded is: |E|!C|! l|!B|!|K|!C|!&i6|!C|!|Z.|!C|!), decoded is: ♣àlæi6Ú.é. String ÏÔ♀È♥@ë encoded is: |!C|!|O|!C|!|T|!B|!|[|Lj|!C|!|H|C@|!B|!|I|!C|!+, decoded is: ÏÔ♀È♥@ë. String Rç÷%◄MZûhZ encoded is: R|!C|!'|!C|!7%|QMZ|!C|!;hZ, decoded is: Rç÷%◄MZûhZ. String ç>¾AôVâ♫↓P encoded is: |!C|!'>|!B|!>A|!C|!4V|!C|!|"|N|YP, decoded is: ç>¾AôVâ♫↓P. Encoded string |LHello|G|J|M decoded is: "\fHello\a\n\r" Encoded string |m|j|@|e|!t|m|!|? decoded is: "\r\n\0\x05\xf4\r\xff"