GSTrans string conversion: Difference between revisions

m (→‎{{header|Julia}}: add more error handling)
 
(27 intermediate revisions by 8 users not shown)
Line 25:
 
=={{header|ALGOL 68}}==
As with the Wren sample includes the Julia test cases, but Algol 68 implementations don't generally handle UTF-8 so the Unicode characters, STRINGs are encodedarrays of CHARs and a CHAR is usually asa separatesingle bytesbyte.<br>
This does verya little error checking - soas invalidin codedthe stringsWren, willetc. probablysamples, decodeif toan incorrectinvalid byte valuesvalue (but < 0 or > 255 ) is the inputresult wasof incorrecta anyway.decoding, the original character is retained..) Additionally, if the string ends with "|" or "|!", the final "|" ot "|!" is ignored.<br>
Quoted strings retain their quotes when encoded or decoded.<br>
ControlIn the output of the following, control characters are shown as their decimal values, enclosed in "[" and "]"/
<syntaxhighlight lang="algol68">
BEGIN # GSTrans string conversion #
Line 72:
REPR 127
ELIF s[ i ] /= "!" THEN
REPRINT (ch = ABS s[ i ] - 64 );
IF ch < 0 THEN s[ i ] ELSE REPR ch FI
ELSE
i +:= 1;
IF i > UPB s THEN
""
ELIF s[ i ] /= "|" THEN
REPR (INT ch = ABS s[ i ] + 128 );
IF ch > 255 THEN s[ i ] ELSE REPR ch FI
ELIF ( i +:= 1 ) > UPB s THEN
""
ELSE
STRING c = DECODE STRING( "|" + s[ i ] );
REPRINT (ch = ABS c[ LWB c ] + 128 );
IF ch > 255 THEN s[ i ] ELSE REPR ch FI
FI
FI;
Line 119 ⟶ 122:
)
)
OD;
STRING invalid = "|=|1|!";
print( ( "Decoding: ", invalid, " -> ", DECODE invalid, newline ) )
END
</syntaxhighlight>
Line 137 ⟶ 142:
[13][10][0][5]ô[13]ÿ -> |M|J|@|E|!t|M|!|? -> [13][10][0][5]ô[13]ÿ
"quoted|text" -> "quoted||text" -> "quoted|text"
Decoding: |=|1|! -> =1
</pre>
 
Line 197 ⟶ 203:
</syntaxhighlight>
No checks for string lengths is done. On decoding, invalid encodings are ignored and skipped, for instance '''|4''' is decoded as '''4'''.
 
=={{header|Emacs Lisp}}==
<syntaxhighlight lang="lisp">
 
 
"
ASCII code Symbols used
0 |@
1 - 26 |letter eg |A (or |a) = ASCII 1, |M (or |m) = ASCII 13
27 |[ or |{
28 |\
29 |] or |}
30 |^ or |~
31 |_ or |' (grave accent)
32 - 126 keyboard character, except for:
\" |\"
| ||
< |<
127 |?
128 - 255 |!coded symbol eg ASCII 128 = |!|@ ASCII 129 = |!|A
"
 
(defun gst--load-char (encoded)
(if (gst--is-end encoded)
(error "Unexpected end.")
(let ((c (aref (car encoded) (cadr encoded))))
(setcdr encoded (list (1+ (cadr encoded))))
c )))
 
(defun gst--is-end (lst)
(>= (cadr lst) (length (car lst))))
 
(defun gst--translate-special (c)
(cond
((eq c ?@) 0)
((eq c ?\[) 27)
((eq c ?\{) 27)
((eq c ?\\) 28)
((eq c ?\]) 29)
((eq c ?\}) 29)
((eq c ?^) 30)
((eq c ?~) 30)
((eq c ?_) 31)
((eq c ?') 31)
((eq c ?\") ?\")
((eq c ?|) ?|)
((eq c ?<) ?<)
((eq c ??) 127)
((and (>= c 65) (<= c 90)) (+ (- c 65) 1))
((and (>= c 97) (<= c 122)) (+ (- c 97) 1))
(t nil)))
 
(defun gst--load-highpos-token (encoded)
(let ((c (gst--load-char encoded)) sp)
(cond
((eq c ?|)
(setq sp (gst--load-char encoded))
(+ 128 (gst--translate-special sp)))
((and (> c 31) (< c 127))
(+ 128 c))
(t (error "Not a printable character.")))))
 
(defun gst--load-token (encoded)
(let ((c (gst--load-char encoded)) sp)
(cond
((eq c ?|)
(setq sp (gst--load-char encoded))
(if (eq sp ?!)
(gst--load-highpos-token encoded)
(gst--translate-special sp)))
((and (> c 31) (< c 127)) c)
(t (error "Not a printable character.")))))
 
(defun gst-parse (text)
(let ((encoded (list text 0)) (decoded '()))
(while (not (gst--is-end encoded))
(add-to-list 'decoded (gst--load-token encoded) 't))
decoded))
 
(progn
(let ((text "|LHello|G|J|M"))
(message "%s => %s" text (gst-parse "|LHello|G|J|M"))))
</syntaxhighlight>
{{out}}
<pre>
|LHello|G|J|M => (12 72 101 108 111 7 10 13)
</pre>
 
=={{header|Java}}==
This example checks that the string being encoded only contains characters within the range 0..255 (inclusive), and does not process the string if an invalid character is found.
 
Strings being decoded which contain unprintable characters have each such character, c, replaced by the string
CHR$(c). Invalid strings such as |5 are decoded as 5.
<syntaxhighlight lang="java">
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
 
public final class GSTransStringConversion {
 
public static void main(String[] args) {
List<String> tests = List.of( "ALERT|G", "wert↑", "@♂aN°$ª7Î", "ÙC▼æÔt6¤☻Ì", "\"@)Ð♠qhýÌÿ",
"+☻#o9$u♠©A", "♣àlæi6Ú.é", "ÏÔ♀È♥@ë", "Rç÷%◄MZûhZ", "ç>¾AôVâ♫↓P" );
for ( String test : tests ) {
String encoded = encode(test);
System.out.println(test + " --> " + encoded + " --> " + decode(encoded));
}
System.out.println();
for ( String encoded : List.of ( "|LHello|G|J|M", "|m|j|@|e|!t|m|!|?", "abc|1de|5f" ) ) {
System.out.println("The encoded string " + encoded + " is decoded as " + decode(encoded));
}
}
private static String encode(String text) {
StringBuilder result = new StringBuilder();
byte[] bytes = text.getBytes(StandardCharsets.UTF_8);
for ( int k = 0; k < bytes.length; k++ ) {
int charValue = bytes[k] & 0xff;
if ( charValue < 0 || charValue > 255 ) {
throw new IllegalArgumentException("Character value is out of range: " + charValue);
}
StringBuilder chars = new StringBuilder();
if ( charValue >= 128 ) {
chars.append('|'); chars.append('!');
charValue -= 128;
}
 
if ( charValue <= 31 ) {
chars.append('|'); chars.append((char) ( 64 + charValue ));
} else if ( charValue == 34 ) {
chars.append('|'); chars.append('"');
} else if ( charValue == 124 ) {
chars.append('|'); chars.append('|');
} else if ( charValue == 127 ) {
chars.append('|'); chars.append('?');
} else {
chars.append((char) charValue);
}
result.append(chars.toString());
}
return result.toString();
}
 
private static String decode(String text) {
List<Byte> bytes = new ArrayList<Byte>();
boolean previousVerticalBar = false;
boolean previousExclamationMark = false;
int addend = 0;
for ( char ch : text.toCharArray() ) {
if ( previousExclamationMark ) {
if ( ch == '|' ) {
addend = 128;
previousVerticalBar = true;
} else {
bytes.add((byte) ( 128 + ch ));
}
previousExclamationMark = false;
} else if ( previousVerticalBar ) {
if ( ch == '?' ) {
bytes.add((byte) ( 127 + addend ));
} else if ( ch == '!' ) {
previousExclamationMark = true;
} else if ( ch == '|' || ch == '"' || ch == '<' ) {
bytes.add((byte) ( ch + addend ));
} else if ( ch == '[' || ch == '{' ) {
bytes.add((byte) ( 27 + addend ));
} else if ( ch == '\\' ) {
bytes.add((byte) ( 28 + addend ));
} else if ( ch == ']' || ch == '}' ) {
bytes.add((byte) ( 29 + addend ));
} else if ( ch == '^' || ch == '~' ) {
bytes.add((byte) ( 30 + addend ));
} else if ( ch == '_' || ch == '`' ) {
bytes.add((byte) ( 31 + addend ));
} else {
final int value = Integer.valueOf(Character.toUpperCase(ch)) - 64 + addend;
if ( 0 < value && value < 32 ) {
byte[] newBytes = ( "CHR$(" + String.valueOf(value) + ")" ).getBytes();
for ( byte bb : newBytes ) {
bytes.add(bb);
}
} else if ( value > 0 ) {
bytes.add((byte) value);
} else {
bytes.add((byte) ch);
}
}
previousVerticalBar = false;
addend = 0;
} else if ( ch == '|' ) {
previousVerticalBar = true;
} else {
bytes.add((byte) ch);
}
}
String decoded = "";
List<Byte> highValueBytes = new ArrayList<Byte>();
for ( byte bb = 0; bb < bytes.size(); bb++ ) {
if ( bytes.get(bb) > 0 ) {
decoded += decodeHighValueBytes(highValueBytes);
decoded += new String( new byte[] { bytes.get(bb) }, StandardCharsets.UTF_8 );
} else {
highValueBytes.add(bytes.get(bb));
}
}
decoded += decodeHighValueBytes(highValueBytes);
return decoded;
}
private static String decodeHighValueBytes(List<Byte> highValueBytes) {
String result = "";
if ( ! highValueBytes.isEmpty() ) {
if ( highValueBytes.size() == 1 ) {
result += Character.toString(highValueBytes.get(0) & 0xff);
} else {
byte[] newBytes = new byte[highValueBytes.size()];
for ( int j = 0; j < highValueBytes.size(); j++ ) {
newBytes[j] = highValueBytes.get(j);
}
result += new String(newBytes, StandardCharsets.UTF_8);
}
highValueBytes.clear();
}
return result;
}
 
}
</syntaxhighlight>
{{ out }}
<pre>
ALERT|G --> ALERT||G --> ALERT|G
wert↑ --> wert|!b|!|F|!|Q --> wert↑
@♂aN°$ª7Î --> @|!b|!|Y|!|BaN|!B|!0$|!B|!*7|!C|!|N --> @♂aN°$ª7Î
ÙC▼æÔt6¤☻Ì --> |!C|!|YC|!b|!|V|!<|!C|!&|!C|!|Tt6|!B|!$|!b|!|X|!;|!C|!|L --> ÙC▼æÔt6¤☻Ì
"@)Ð♠qhýÌÿ --> |"@)|!C|!|P|!b|!|Y|! qh|!C|!=|!C|!|L|!C|!? --> "@)Ð♠qhýÌÿ
+☻#o9$u♠©A --> +|!b|!|X|!;#o9$u|!b|!|Y|! |!B|!)A --> +☻#o9$u♠©A
♣àlæi6Ú.é --> |!b|!|Y|!#|!C|! l|!C|!&i6|!C|!|Z.|!C|!) --> ♣àlæi6Ú.é
ÏÔ♀È♥@ë --> |!C|!|O|!C|!|T|!b|!|Y|!|@|!C|!|H|!b|!|Y|!%@|!C|!+ --> ÏÔ♀È♥@ë
Rç÷%◄MZûhZ --> R|!C|!'|!C|!7%|!b|!|W|!|DMZ|!C|!;hZ --> Rç÷%◄MZûhZ
ç>¾AôVâ♫↓P --> |!C|!'>|!B|!>A|!C|!4V|!C|!|"|!b|!|Y|!+|!b|!|F|!|SP --> ç>¾AôVâ♫↓P
 
The encoded string |LHello|G|J|M is decoded as CHR$(12)HelloCHR$(7)CHR$(10)CHR$(13)
The encoded string |m|j|@|e|!t|m|!|? is decoded as CHR$(13)CHR$(10)@CHR$(5)ôCHR$(13)ÿ
The encoded string abc|1de|5f is decoded as abc1de5f
</pre>
 
=={{header|jq}}==
'''Adapted from [[#Wren|Wren]]'''
{{works with|jq}}
 
'''Works with gojq, the Go implementation of jq'''
 
Strings in jq are just JSON strings, and therefore the constituent
codepoints are not restricted to 8-bit bytes. The `encode` and `decode` filters
presented here, however, only check that their inputs are non-empty
JSON strings.
 
<syntaxhighlight lang="jq">
def encode($upper):
# helper function to encode bytes < 128
def f:
if (. >= 1 and . <= 26)
then "|" + (if $upper then [. + 64]|implode else [. + 96]|implode end)
elif . < 32
then "|" + ([. + 64] | implode)
elif . == 34 # quotation mark
then "|\""
elif . == 60 # less than
then "|<"
elif . == 124 # vertical bar
then "||"
elif . == 127 # DEL
then "|?"
else [.]|implode
end ;
. as $s
| if ($s | (type != "string") or (length == 0)) then "Argument of encode must be a non-empty string." | error
else # remove any outer quotation marks
($s | if (length > 1 and .[:1] == "\"" and .[-1:] == "\"") then .[1:-1] else . end) as $s
# iterate through the string's codepoints
| reduce ($s|explode)[] as $b ( {enc: ""};
if $b < 128 then .enc += ($b|f)
else .enc += "|!" + (($b - 128)|f)
end)
| .enc
end;
 
def decode:
# helper function for decoding bytes after "|"
def f:
if . == 34 # quotation mark
then 34
elif . == 60 # less than
then 60
elif . == 63 # question mark
then 127
elif . >= 64 and . < 96 # @ + upper case letter + [\]^_
then . - 64
elif . == 96 # grave accent
then 31
elif . == 124 # vertical bar
then 124
elif . >= 97 and . < 127 # lower case letter + {}~
then . - 96
else .
end;
. as $s
| if ($s | (type != "string") or (length == 0)) then "Argument of decode must be a non-empty string." | error
else
# remove any outer quotation marks
($s | if (length > 1 and .[:1] == "\"" and .[-1:] == "\"") then $s[1:-1] else . end) as $s
| ($s|explode) as $bytes
| ($bytes|length) as $bc
| {i: 0, dec: "" }
# iterate through the string's bytes decoding as we go
| until(.i >= $bc;
if $bytes[.i] != 124
then .dec += ([$bytes[.i]] | implode)
| .i += 1
else
if (.i < $bc - 1) and ($bytes[.i+1] != 33)
then .dec += ([$bytes[.i+1] | f ] | implode)
| .i += 2
else
if (.i < $bc - 2) and ($bytes[.i+2] != 124)
then .dec += ([128 + $bytes[.i+2]] | implode)
| .i += 3
else
if (.i < $bc - 3) and ($bytes[.i+2] == 124)
then .dec += ([128 + ($bytes[.i+3] | f)] | implode)
| .i += 4
else .i += 1
end
end
end
end)
| .dec
end;
 
def strings: [
"\fHello\u0007\n\r",
"\r\n\u0000\u0005\u00f4\r\u00ff"
];
 
def uppers: [true, false];
 
def task1:
range(0; strings|length) as $i
| strings[$i]
| uppers[] as $u
| encode($u) as $enc
| ($enc|decode) as $dec
| "string: \(tojson)",
"encoded (\(if $u then "upper" else "lower" end)) : \($enc|tojson)",
"decoded : \($dec|tojson)",
"string == decoded ? \($dec == .)\n"
;
 
def jstrings:[
"ALERT|G",
"wert↑",
"@♂aN°$ª7Î",
"ÙC▼æÔt6¤☻Ì",
"\"@)Ð♠qhýÌÿ",
"+☻#o9$u♠©A",
"♣àlæi6Ú.é",
"ÏÔ♀È♥@ë",
"Rç÷\\%◄MZûhZ",
"ç>¾AôVâ♫↓P"
];
 
def task2:
"Julia strings: string -> encoded (upper) <- decoded (same or different)\n",
( jstrings[]
| encode(true) as $enc
| ($enc|decode) as $dec
| " \(tojson) -> \($enc|tojson) <- \($dec|tojson) (\( if . == $dec then "same" else "different" end))"
);
 
task1, task2
</syntaxhighlight>
'''Invocation''': jq -nr -f gstrans.jq
{{output}}
<pre>
string: "\fHello\u0007\n\r"
encoded (upper) : "|LHello|G|J|M"
decoded : "\fHello\u0007\n\r"
string == decoded ? true
 
string: "\fHello\u0007\n\r"
encoded (lower) : "|lHello|g|j|m"
decoded : "\fHello\u0007\n\r"
string == decoded ? true
 
string: "\r\n\u0000\u0005ô\rÿ"
encoded (upper) : "|M|J|@|E|!t|M|!|?"
decoded : "\r\n\u0000\u0005ô\rÿ"
string == decoded ? true
 
string: "\r\n\u0000\u0005ô\rÿ"
encoded (lower) : "|m|j|@|e|!t|m|!|?"
decoded : "\r\n\u0000\u0005ô\rÿ"
string == decoded ? true
 
Julia strings: string -> encoded (upper) <- decoded (same or different)
 
"ALERT|G" -> "ALERT||G" <- "ALERT|G" (same)
"wert↑" -> "wert|!ℑ" <- "wert↑" (same)
"@♂aN°$ª7Î" -> "@|!◂aN|!0$|!*7|!N" <- "@♂aN°$ª7Î" (same)
"ÙC▼æÔt6¤☻Ì" -> "|!YC|!┼|!f|!Tt6|!$|!▻|!L" <- "ÙC▼æÔt6¤☻Ì" (same)
"\"@)Ð♠qhýÌÿ" -> "|\"@)|!P|!◠qh|!}|!L|!|?" <- "\"@)Ð♠qhýÌÿ" (same)
"+☻#o9$u♠©A" -> "+|!▻#o9$u|!◠|!)A" <- "+☻#o9$u♠©A" (same)
"♣àlæi6Ú.é" -> "|!◣|!`l|!fi6|!Z.|!i" <- "♣àlæi6Ú.é" (same)
"ÏÔ♀È♥@ë" -> "|!O|!T|!◀|!H|!◥@|!k" <- "ÏÔ♀È♥@ë" (same)
"Rç÷\\%◄MZûhZ" -> "R|!g|!w\\%|!╄MZ|!{hZ" <- "Rç÷\\%◄MZûhZ" (same)
"ç>¾AôVâ♫↓P" -> "|!g>|!>A|!tV|!b|!◫|!ℓP" <- "ç>¾AôVâ♫↓P" (same)
</pre>
 
=={{header|Julia}}==
Line 259 ⟶ 687:
Decode a GSTrans coded string back to original format. If decoding results
in a negative value for the result due to encoding errors such as "|1" will
sunstitutesubstitute the char without the subtraction of 64 from the | bar, as in the
Wren and Phix examples, so that "|1" becomes '1'.
"""
Line 341 ⟶ 769:
and in fact penned a hexstr() rather similar to the two routines actually asked for, just to improve the console display a little bit.<br>
Also, the following always encodes to uppercase, but the decode part will properly cope with (eg) "|m|j|@|e|!t|m|!|?".<br>
As per Wren, strings in Phix are just sequences of bytes: UTF-8 or similar is completely irrelevant here, and ''won't'' mess up byte subscripting.<br>
Since strings are a sequence of (unsigned) bytes, there can be no encoding errors for anything that passes the typecheck of the "string s" parameter.<br>
For decoding, explicit assertion failures occur for unprintable characters, multiple high bits such as "|!|!", or generating negative bytes such as from "|1".
<!--<syntaxhighlight lang="phix">(phixonline)-->
<span style="color: #008080;">with</span> <span style="color: #008080;">javascript_semantics</span>
Line 353 ⟶ 783:
<span style="color: #008080;">if</span> <span style="color: #000000;">b</span><span style="color: #0000FF;"><</span><span style="color: #008000;">' '</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">res</span> <span style="color: #0000FF;">&=</span> <span style="color: #008000;">"|"</span><span style="color: #0000FF;">&(</span><span style="color: #008000;">'@'</span><span style="color: #0000FF;">+</span><span style="color: #000000;">b</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">elsif</span> <span style="color: #7060A8;">find</span><span style="color: #0000FF;">(</span><span style="color: #000000;">b</span><span style="color: #0000FF;">,</span><span style="color: #008000;">`"|&lt;`</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">res</span> <span style="color: #0000FF;">&=</span> <span style="color: #008000;">"|"</span><span style="color: #0000FF;">&</span><span style="color: #000000;">b</span>
<span style="color: #008080;">elsif</span> <span style="color: #000000;">b</span><span style="color: #0000FF;">=</span>'\<span style="color: #000000;">x7F</span>' <span style="color: #008080;">then</span>
<span style="color: #000000;">res</span> <span style="color: #0000FF;">&=</span> <span style="color: #008000;">"|?"</span>
<span style="color: #008080;">else</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">kres</span> <span style="color: #0000FF;">&=</span> <span style="color: #7060A8;">find</span><span style="color: #0000FF;">(</span><span style="color: #000000;">b</span><span style="color: #0000FF;">,</span><span style="color: #008000;">`"|&lt;`</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">k</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">res</span> <span style="color: #0000FF;">&=</span> <span style="color: #008000;">"|"</span><span style="color: #0000FF;">&</span><span style="color: #000000;">b</span>
<span style="color: #008080;">elsif</span> <span style="color: #000000;">b</span><span style="color: #0000FF;">=</span>'\<span style="color: #000000;">x7F</span>' <span style="color: #008080;">then</span>
<span style="color: #000000;">res</span> <span style="color: #0000FF;">&=</span> <span style="color: #008000;">"|?"</span>
<span style="color: #008080;">else</span>
<span style="color: #000000;">res</span> <span style="color: #0000FF;">&=</span> <span style="color: #000000;">b</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
Line 372 ⟶ 799:
<span style="color: #004080;">integer</span> <span style="color: #000000;">hb</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">#00</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">b</span> <span style="color: #008080;">in</span> <span style="color: #000000;">s</span> <span style="color: #008080;">do</span>
<span style="color: #7060A8;">assert</span><span style="color: #0000FF;">(</span><span style="color: #000000;">b</span><span style="color: #0000FF;">>=</span><span style="color: #008000;">' '</span> <span style="color: #008080;">and</span> <span style="color: #000000;">b</span><span style="color: #0000FF;"><=</span><span style="color: #008000;">'~'</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"non-printable character"</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">bar</span> <span style="color: #008080;">then</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">b</span><span style="color: #0000FF;">=</span><span style="color: #008000;">'!'</span> <span style="color: #008080;">then</span>
<span style="color: #7060A8;">assert</span><span style="color: #0000FF;">(</span><span style="color: #000000;">hb</span><span style="color: #0000FF;">==</span><span style="color: #000000;">#00</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"high bit already set"</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">hb</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">#80</span>
<span style="color: #008080;">else</span>
Line 381 ⟶ 809:
<span style="color: #008080;">elsif</span> <span style="color: #008080;">not</span> <span style="color: #7060A8;">find</span><span style="color: #0000FF;">(</span><span style="color: #000000;">b</span><span style="color: #0000FF;">,</span><span style="color: #008000;">`"|&lt;`</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">b</span> <span style="color: #0000FF;">-=</span> <span style="color: #7060A8;">iff</span><span style="color: #0000FF;">(</span><span style="color: #000000;">b</span><span style="color: #0000FF;">>=</span><span style="color: #008000;">'a'</span><span style="color: #0000FF;">?</span><span style="color: #000000;">#60</span><span style="color: #0000FF;">:</span><span style="color: #000000;">#40</span><span style="color: #0000FF;">)</span>
<span style="color: #7060A8;">assert</span><span style="color: #0000FF;">(</span><span style="color: #000000;">b</span><span style="color: #0000FF;">>=</span><span style="color: #000000;">0</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"negative byte generated"</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #000000;">res</span> <span style="color: #0000FF;">&=</span> <span style="color: #000000;">b</span><span style="color: #0000FF;">+</span><span style="color: #000000;">hb</span>
Line 422 ⟶ 851:
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"%s &lt;-&gt; %s (decoded same:%t)\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">ht</span><span style="color: #0000FF;">,</span><span style="color: #000000;">he</span><span style="color: #0000FF;">,</span><span style="color: #000000;">d</span><span style="color: #0000FF;">=</span><span style="color: #000000;">t</span><span style="color: #0000FF;">})</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #000080;font-style:italic;">--assertion failures:
--?hexstr(GSTrans_decode("|!|!"))
--?hexstr(GSTrans_decode("|!|1"))
--?hexstr(GSTrans_decode("|1"))
--?hexstr(GSTrans_decode("\xF4"))</span>
<!--</syntaxhighlight>-->
{{out}}
Line 427 ⟶ 861:
\x0CHello\x07\n\r <-> |LHello|G|J|M (decoded same:true)
\r\n\0\x05\xF4\r\xFF <-> |M|J|@|E|!t|M|!|? (decoded same:true)
</pre>
 
=={{header|Python}}==
 
By default, errors during decoding will raise a <code>KeyError</code>. If the optional <code>default</code> argument is given to <code>gs_trans_decode</code>, it will replace any erroneous symbols with the default byte instead of raising a <code>KeyError</code>.
 
<syntaxhighlight lang="python">
"""GS byte string translation using an exhaustive map and regex reverse lookup.
 
Requires Python >= 3.9.
"""
import re
from typing import Optional
 
TABLE: dict[int, bytes] = {
0: b"|@",
**{byte: f"|{chr(byte+64)}".encode() for byte in range(1, 27)},
27: b"|[",
28: b"|\\",
29: b"|]",
30: b"|^",
31: b"|_",
**{byte: chr(byte).encode() for byte in range(32, 127)},
34: b'|"',
60: b"|<",
124: b"||",
127: b"|?",
}
 
# 128 - 255
TABLE.update({byte: b"|!" + TABLE[byte - 128] for byte in range(128, 256)})
 
REVERSE_LOOKUP: dict[bytes, int] = {
**{v: k for k, v in TABLE.items()},
b"|{": 27,
b"|}": 29,
b"|~": 30,
b"|`": 31, # aka backtick
**{f"|{chr(byte+96)}".encode(): byte for byte in range(1, 27)},
}
 
RE = re.compile(b"|".join(re.escape(s) for s in REVERSE_LOOKUP) + b"|.")
 
 
def gs_trans_encode(s: bytes) -> bytes:
return b"".join(TABLE[byte] for byte in s)
 
 
def gs_trans_decode(s: bytes, default: Optional[int] = None) -> bytes:
if default is None:
return bytes(REVERSE_LOOKUP[seq] for seq in RE.findall(s))
return bytes(REVERSE_LOOKUP.get(seq, default) for seq in RE.findall(s))
 
 
examples: list[bytes] = [
b"\x0CHello\x07\n\r",
b"\r\n\0\x05\xF4\r\xFF",
]
 
if __name__ == "__main__":
for example in examples:
encoded = gs_trans_encode(example)
print(f"{example!r} -> {encoded!r}")
assert gs_trans_decode(encoded) == example
</syntaxhighlight>
 
{{out}}
<pre>
b'\x0cHello\x07\n\r' -> b'|LHello|G|J|M'
b'\r\n\x00\x05\xf4\r\xff' -> b'|M|J|@|E|!t|M|!|?'
</pre>
 
=={{header|Raku}}==
{{trans|Julia}}
<syntaxhighlight lang="raku" line># 20231105 Raku programming solution
 
sub GSTrans-encode(Str $str) {
return [~] $str.encode('utf8').list.chrs.comb.map: -> $c {
my $i = $c.ord;
die "Char value of $c, $i, is out of range" unless 0 <= $i <= 255;
given ($i,$c) {
when 0 <= $i <= 31 { '|' ~ chr(64 + $i) }
when $c eq '"' { '|"' }
when $c eq '|' { '||' }
when $i == 127 { '|?' }
when 128 <= $i <= 255 { '|!' ~ GSTrans-encode(chr($i - 128)) }
default { $c }
}
}
}
 
sub GSTrans-decode(Str $str) {
my ($gotbar, $gotbang, $bangadd) = False, False, 0;
 
my @result = gather for $str.comb -> $c {
if $gotbang {
if $c eq '|' {
$bangadd = 128;
$gotbar = True;
} else {
take $c.ord + 128;
}
$gotbang = False;
} elsif $gotbar {
given $c {
when $c eq '?' { take 127 + $bangadd }
when $c eq '!' { $gotbang = True }
when $c eq '|' || $c eq '"' || $c eq '<' { take $c.ord + $bangadd }
when $c eq '[' || $c eq '{' { take 27 + $bangadd }
when $c eq '\\' { take 28 + $bangadd }
when $c eq ']' || $c eq '}' { take 29 + $bangadd }
when $c eq '^' || $c eq '~' { take 30 + $bangadd }
when $c eq '_' || $c eq '`' { take 31 + $bangadd }
default { my $i = $c.uc.ord - 64 + $bangadd;
take $i >= 0 ?? $i !! $c.ord }
}
$gotbar = False;
$bangadd = 0;
} elsif $c eq '|' {
$gotbar = True
} else {
take $c.ord
}
}
return Blob.new(@result).decode('utf8c8')
}
 
my @TESTS = <ALERT|G 'wert↑>;
my @RAND_TESTS = ^256 .roll(10).chrs.join xx 8;
my @DECODE_TESTS = < |LHello|G|J|M |m|j|@|e|!t|m|!|? abc|1de|5f >;
 
for |@TESTS, |@RAND_TESTS -> $t {
my $encoded = GSTrans-encode($t);
my $decoded = GSTrans-decode($encoded);
say "String $t encoded is: $encoded, decoded is: $decoded.";
die unless $t ~~ $decoded;
}
for @DECODE_TESTS -> $enc {
say "Encoded string $enc decoded is: ", GSTrans-decode($enc);
}</syntaxhighlight>
You may [https://ato.pxeger.com/run?1=jVZLb9pAEL5W_IoBIYEVsICUlIZXXjRVlbZS4FAprxq8gBNjt2s7D7Hm2HvPveTQ_qke-0s6612bNSERe7DXs983r52d9a_f1LgJHh__BP643Pj76osXDOG4P6CG45WJM3JNUuz7FPKeTzWYZwCAEj-gDpwtLiKpLlEF1NAoaLpteb4-mlJPH7mzoT4zvu1CuQP5EcyB03HMHiBvQRtlukvNppSaFoHc4dSgcGvYAQF3jIASIktgeeAGPpegXxOSg8CxiedBBVptrgqftXo9VjSxbokDRSTmR9rSKo67KS4opO0ql86hwAqwAHS6uPMatnBRg3CVhgGQ71DIFWIpp-FXuB7HUji2BocZaEO19kbFdZ_iqrVGKsoIl-X-rmwTdx9RZc7QNFWPScZGYPuQHnPuaoyK3mEmzGTUAjDJugLA7SvmJ64_NChuTzRxJjjjL8M0NdzZd4btkVL8qjQzkrdHicc9acPE8KeEwtgVqqNiietE-mSNE-2JTIqTHCtyHLELwBPbaKbXhMe4NKABUdZCIOjliibfuCGyPrEg0sqUzCb-yZBjVKQz8Z-qykV1KmGqNSHiwiqYCw94eWwtwwqfpWQ5RfGGx_gCHDPHmFLSy49WYjuJfgPzZ6qKeaJixXt4ln9-vuQ0NuRcqDbDJf_thvxLlb9I-NuVzUK-Uulfl_Tq8_T4HM7VDhiINJdBtB7JTNfucoidsaDTxj7W7fJpNhtvlSi9FwqVrtYppM5M5Un5rjtn6YOkMlKHSKmhdI-B5Ao5sN2h7pC7omwLmi4bTnSXjPA24Q2Jt41Brz_oo8XW_knvdMCOoXBHqP_vx89OM1o_3f90dBWDLmv1HdCpa9vFakUTd9G1azlwfw8NgT_qHX4-6iWMFrCT98S2XXbMPrCPwGbsmu0xwrI-TrOsC8ZwxKomYfUxdJqQyfC-xYRbJZwo9nkP85NGmRfNmWd3pV3nfa0Zg0TYKkgmIqYLqGc8QA6bsYVHHG3Eqi1vN7FTglhXJJUfei7i8xtW3pxIXyyS9SbmmUeUzguPBNXKTY2M96RJTzrBl1WDudK6CND7UPxgyP-M-H_jPw Attempt This Online!]
 
=={{header|Rust}}==
On error, changes |1 to 1 as in the Wren example.
<syntaxhighlight lang="rust">/* GSTrans encoding and decoding */
 
use std::collections::HashMap;
use std::iter::FromIterator;
 
/* encoding lookup table */
const ENCODE_TABLE: &[&str] = &[
"|@", "|A", "|B", "|C", "|D", "|E", "|F", "|G",
"|H", "|I", "|J", "|K", "|L", "|M", "|N", "|O",
"|P", "|Q", "|R", "|S", "|T", "|U", "|V", "|W",
"|X", "|Y", "|Z", "|[", "|\\", "|]", "|^", "|_",
" ", "!", "|\"", "#", "$", "%", "&", "\'",
"(", ")", "*", "+", ",", "-", ".", "/",
"0", "1", "2", "3", "4", "5", "6", "7",
"8", "9", ":", ";", "|<", "=", ">", "?",
"@", "A", "B", "C", "D", "E", "F", "G",
"H", "I", "J", "K", "L", "M", "N", "O",
"P", "Q", "R", "S", "T", "U", "V", "W",
"X", "Y", "Z", "[", "\\", "]", "^", "_",
"`", "a", "b", "c", "d", "e", "f", "g",
"h", "i", "j", "k", "l", "m", "n", "o",
"p", "q", "r", "s", "t", "u", "v", "w",
"x", "y", "z", "{", "||", "}", "~", "|?",
"|@", "|!|A","|!|B","|!|C","|!|D","|!|E","|!|F","|!|G",
"|!|H","|!|I","|!|J","|!|K","|!|L","|!|M","|!|N","|!|O",
"|!|P","|!|Q","|!|R","|!|S","|!|T","|!|U","|!|V","|!|W",
"|!|X","|!|Y","|!|Z","|!|[","|!|\\","|!|]","|!|^","|!|_",
"|! ","|!!","|!|\"", "|!#", "|!$", "|!%", "|!&", "|!\'",
"|!(","|!)","|!*", "|!+", "|!,", "|!-", "|!.", "|!/",
"|!0", "|!1", "|!2", "|!3", "|!4", "|!5", "|!6", "|!7",
"|!8", "|!9", "|!:", "|!;", "|!|<", "|!=", "|!>", "|!?",
"|!@", "|!A", "|!B", "|!C", "|!D", "|!E", "|!F", "|!G",
"|!H", "|!I", "|!J", "|!K", "|!L", "|!M", "|!N", "|!O",
"|!P", "|!Q", "|!R", "|!S", "|!T", "|!U", "|!V", "|!W",
"|!X", "|!Y", "|!Z", "|![", "|!\\","|!]", "|!^", "|!_",
"|!`", "|!a", "|!b", "|!c", "|!d", "|!e", "|!f", "|!g",
"|!h", "|!i", "|!j", "|!k", "|!l", "|!m", "|!n", "|!o",
"|!p", "|!q", "|!r", "|!s", "|!t", "|!u", "|!v", "|!w",
"|!x", "|!y", "|!z", "|!{", "|!||","|!}", "|!~", "|!|?",
];
 
// Encode a string into GSTrans form. Will throw an indexing error if a char
// is encountered that does not have integer value >= 0 and <= 255.
fn gs_trans_encode(txt: &str) -> String {
return txt
.as_bytes()
.iter()
.map(|c| ENCODE_TABLE[*c as usize])
.collect::<Vec<_>>()
.join("");
}
 
// Decode GSTrans coded text. Uses a lookoup table `table`. If table lookup fails
// at any point, will emit a warning to stderr and skip the char at that index.
fn gs_trans_decode(txt: &str, table: &HashMap<&&str, usize>) -> String {
let mut result = Vec::<u8>::new();
let mut i = 0;
let mut substr;
let mut uppersubstr: String;
while i < txt.len() {
let mut foundchar = false;
let mut decoded = 0_usize;
for j in 0..5 {
if i + j > txt.len() {
break;
}
substr = &txt[i..i + j];
if j == 2 || j == 4 { // match |a as |A in the table
uppersubstr = substr.to_uppercase();
substr = &uppersubstr;
}
if table.contains_key(&substr) {
decoded = table[&substr];
foundchar = true;
i += j;
break;
}
}
if foundchar {
result.push(decoded as u8);
} else { // error found: skip one char in the bad encoding, so "|1" becomes "1"
eprintln!("Warning: Bad encoding at position {}, skipped a char", i);
i += 1;
}
}
return String::from_utf8_lossy(&result).to_string(); // back to utf8 from bytes
}
 
fn main() {
// decoding lookup table
let mut decode_table =
HashMap::from_iter(ENCODE_TABLE.iter().enumerate().map(|(i, v)| (v, i)));
for (v, k) in
[(27, &"|{"), (29, &"|}"), (30, &"|~"), (31, &"|`",),
(155, &"|!|{"), (157, &"|!|}"), (158, &"|!|~"), (159, &"|!|`",),] {
decode_table.insert(k, v);
}
for test in ["ALERT|G", "wert↑", "@♂aN°$ª7Î", "ÙC▼æÔt6¤☻Ì", "\"@)Ð♠qhýÌÿ",
"+☻#o9$u♠©A", "♣àlæi6Ú.é", "ÏÔ♀È♥@ë", "Rç÷\\%◄MZûhZ", "ç>¾AôVâ♫↓P"] {
let encoded = gs_trans_encode(test);
let decoded = gs_trans_decode(&encoded, &decode_table);
println!("Test string {}, encoded: {}, then decoded: {}", test, encoded, decoded);
assert!(test == decoded);
}
for test in [&"|LHello|G|J|M", &"|m|j|@|e|!t|m|!|?", &"abc|1de|5f"] {
let decoded = gs_trans_decode(test, &decode_table);
println!("Test string {} decoded is: {}", test, decoded);
}
}
</syntaxhighlight>{{out}}
<pre>
Test string ALERT|G, encoded: ALERT||G, then decoded: ALERT|G
Test string wert↑, encoded: wert|!b|!|F|!|Q, then decoded: wert↑
Test string @♂aN°$ª7Î, encoded: @|!b|!|Y|!|BaN|!B|!0$|!B|!*7|!C|!|N, then decoded: @♂aN°$ª7Î
Test string ÙC▼æÔt6¤☻Ì, encoded: |!C|!|YC|!b|!|V|!|<|!C|!&|!C|!|Tt6|!B|!$|!b|!|X|!;|!C|!|L, then decoded: ÙC▼æÔt6¤☻Ì
Test string "@)Ð♠qhýÌÿ, encoded: |"@)|!C|!|P|!b|!|Y|! qh|!C|!=|!C|!|L|!C|!?, then decoded: "@)Ð♠qhýÌÿ
Test string +☻#o9$u♠©A, encoded: +|!b|!|X|!;#o9$u|!b|!|Y|! |!B|!)A, then decoded: +☻#o9$u♠©A
Test string ♣àlæi6Ú.é, encoded: |!b|!|Y|!#|!C|! l|!C|!&i6|!C|!|Z.|!C|!), then decoded: ♣àlæi6Ú.é
Test string ÏÔ♀È♥@ë, encoded: |!C|!|O|!C|!|T|!b|!|Y|@|!C|!|H|!b|!|Y|!%@|!C|!+, then decoded: ÏÔ♀È♥@ë
Test string Rç÷\%◄MZûhZ, encoded: R|!C|!'|!C|!7\%|!b|!|W|!|DMZ|!C|!;hZ, then decoded: Rç÷\%◄MZûhZ
Test string ç>¾AôVâ♫↓P, encoded: |!C|!'>|!B|!>A|!C|!4V|!C|!|"|!b|!|Y|!+|!b|!|F|!|SP, then decoded: ç>¾AôVâ♫↓P
Test string |LHello|G|J|M decoded is: �Hello�
 
Test string |m|j|@|e|!t|m|!|? decoded is:
���
Test string abc|1de|5f decoded is: abc1de5f
Warning: Bad encoding at position 3, skipped a char
Warning: Bad encoding at position 7, skipped a char
</pre>
 
=== Without table lookup ===
{{trans|Julia}}
<syntaxhighlight lang="rust">fn gs_char_encode(i: u8) -> String {
let mut resultchars = Vec::<u8>::new();
match i {
0..=31 => { resultchars.extend(['|' as u8, 64 + i]) }
0x22 => { resultchars.extend(['|' as u8, '"' as u8]) }
0x7c => { resultchars.extend(['|' as u8, '|' as u8]) }
127 => { resultchars.extend(['|' as u8, '?' as u8]) }
128..=255 => { // |! then recurse after subtracting 128
resultchars.extend(['|' as u8, '!' as u8]);
resultchars.extend(gs_char_encode(i - 128).as_bytes());
}
_ => { resultchars.push(i) }
}
return String::from_utf8_lossy(&resultchars).to_string();
}
 
fn gs_trans_encode(s: &str) -> String {
return s.as_bytes().iter().map(|byt| gs_char_encode(*byt)).collect::<Vec<_>>().join("");
}
 
fn gs_trans_decode(s: &str) -> String {
let mut result = Vec::<u8>::new();
let mut gotbar = false;
let mut gotbang = false;
let mut bangadd = 0;
for c in s.chars() {
let i = c as u8;
if gotbang {
if c == '|' {
bangadd = 128;
gotbar = true;
} else {
result.push(i + 128);
}
gotbang = false;
} else if gotbar {
match c {
'?' => { result.push(127 + bangadd) }
'!' => { gotbang = true }
'|' | '"' | '<' => { result.push(i + bangadd) }
'[' | '{' => { result.push(27 + bangadd) }
'\\' => { result.push(28 + bangadd) }
']' | '}' => { result.push(29 + bangadd) }
'^' | '~' => { result.push(30 + bangadd) }
'_' | '`' => { result.push(31 + bangadd) }
_ => { // mask bit 32 to make lowercase into uppercase
let j = bangadd + (if c.is_lowercase() {i - 32} else {i});
result.push(if j >= 64 {j - 64} else {i});
}
}
gotbar = false;
bangadd = 0;
} else if c == '|' {
gotbar = true;
} else {
result.push(i);
}
}
return String::from_utf8_lossy(&result).to_string();
}
 
fn main() {
for t in ["ALERT|G", "wert↑", "@♂aN°$ª7Î", "ÙC▼æÔt6¤☻Ì", "\"@)Ð♠qhýÌÿ",
"+☻#o9$u♠©A", "♣àlæi6Ú.é", "ÏÔ♀È♥@ë", "Rç÷\\%◄MZûhZ", "ç>¾AôVâ♫↓P"] {
let e = gs_trans_encode(t);
let d = gs_trans_decode(&e);
println!("Test string {} encoded is {}, decoded is: {}", t, e, d.escape_debug());
assert!(t == d);
}
for t in [&"abc|1de|5f", &"|LHello|G|J|M", &"|m|j|@|e|!t|m|!|?"] {
let d = gs_trans_decode(t);
println!("Test string {} decoded is {}", t, d.escape_debug());
}
}
</syntaxhighlight>{{out}}
<pre>
Test string ALERT|G encoded is ALERT||G, decoded is: ALERT|G
Test string wert↑ encoded is wert|!b|!|F|!|Q, decoded is: wert↑
Test string @♂aN°$ª7Î encoded is @|!b|!|Y|!|BaN|!B|!0$|!B|!*7|!C|!|N, decoded is: @♂aN°$ª7Î
Test string ÙC▼æÔt6¤☻Ì encoded is |!C|!|YC|!b|!|V|!<|!C|!&|!C|!|Tt6|!B|!$|!b|!|X|!;|!C|!|L, decoded is: ÙC▼æÔt6¤☻Ì
Test string "@)Ð♠qhýÌÿ encoded is |"@)|!C|!|P|!b|!|Y|! qh|!C|!=|!C|!|L|!C|!?, decoded is: \"@)Ð♠qhýÌÿ
Test string +☻#o9$u♠©A encoded is +|!b|!|X|!;#o9$u|!b|!|Y|! |!B|!)A, decoded is: +☻#o9$u♠©A
Test string ♣àlæi6Ú.é encoded is |!b|!|Y|!#|!C|! l|!C|!&i6|!C|!|Z.|!C|!), decoded is: ♣àlæi6Ú.é
Test string ÏÔ♀È♥@ë encoded is |!C|!|O|!C|!|T|!b|!|Y|!|@|!C|!|H|!b|!|Y|!%@|!C|!+, decoded is: ÏÔ♀È♥@ë
Test string Rç÷\%◄MZûhZ encoded is R|!C|!'|!C|!7\%|!b|!|W|!|DMZ|!C|!;hZ, decoded is: Rç÷\\%◄MZûhZ
Test string ç>¾AôVâ♫↓P encoded is |!C|!'>|!B|!>A|!C|!4V|!C|!|"|!b|!|Y|!+|!b|!|F|!|SP, decoded is: ç>¾AôVâ♫↓P
Test string abc|1de|5f decoded is abc1de5f
Test string |LHello|G|J|M decoded is \u{c}Hello\u{7}\n\r
Test string |m|j|@|e|!t|m|!|? decoded is \r\n\0\u{5}�\r�
 
</pre>
 
=={{header|Wren}}==
{{libheader|Wren-fmt}}
Strings in Wren are just an immutable array of bytes. They are usually interpreted as UTF-8 but don't have to be. Unicode characters in the example Julia strings are therefore encoded using their constituent UTF-8 bytes which decodes fine but may not give the same encoding as Julia itself.
 
If an invalid byte (following the "|" flag) is encountered whilst decoding, it is decoded as if the flag were not present.
<syntaxhighlight lang="wren">import "./fmt" for Fmt
 
class GSTrans {
Where strings contain control characters, their decoded version is printed to the terminal as a byte list.
<syntaxhighlight lang="ecmascript">class GSTrans {
static encode(s, upper) {
if (!(s is String && s.count > 0)) Fiber.abort("Argument must be a non-empty string.")
Line 536 ⟶ 1,338:
"\fHello\a\n\r",
"\r\n\0\x05\xf4\r\xff"
]
 
var texts = [
"""\fHello\a\n\r""",
"""\r\n\0\x05\xf4\r\xff"""
]
 
Line 547 ⟶ 1,344:
for (i in 0...strings.count) {
var s = strings[i]
var t = Fmt.swrite("\"$q", +Fmt.B(0, texts[i] +"\""s))
var u = uppers[i]
var enc = GSTrans.encode(s, u)
var dec = GSTrans.decode(enc)
var d = Fmt.swrite("$q", Fmt.B(0, dec))
System.print("string: %(t)")
System.print("encoded (%(u ? "upper" : "lower")) : %(enc)")
System.print("decoded (as byte list): %(dec.bytes.toListd)")
System.print("string == decoded ? %(dec == s)\n")
}
Line 582 ⟶ 1,380:
string: "\fHello\a\n\r"
encoded (upper) : |LHello|G|J|M
decoded : "\fHello\a\n\r"
decoded (as byte list): [12, 72, 101, 108, 108, 111, 7, 10, 13]
string == decoded ? true
 
string: "\r\n\0\x05\xf4\r\xff"
encoded (lower) : |m|j|@|e|!t|m|!|?
decoded (as byte list): [13, 10, "\r\n\0, 5, 244, 13, 255]\x05\xf4\r\xff"
string == decoded ? true
 
2,442

edits