GSTrans string conversion: Difference between revisions

 
(10 intermediate revisions by 5 users not shown)
Line 203:
</syntaxhighlight>
No checks for string lengths is done. On decoding, invalid encodings are ignored and skipped, for instance '''|4''' is decoded as '''4'''.
 
=={{header|Emacs Lisp}}==
<syntaxhighlight lang="lisp">
 
 
"
ASCII code Symbols used
0 |@
1 - 26 |letter eg |A (or |a) = ASCII 1, |M (or |m) = ASCII 13
27 |[ or |{
28 |\
29 |] or |}
30 |^ or |~
31 |_ or |' (grave accent)
32 - 126 keyboard character, except for:
\" |\"
| ||
< |<
127 |?
128 - 255 |!coded symbol eg ASCII 128 = |!|@ ASCII 129 = |!|A
"
 
(defun gst--load-char (encoded)
(if (gst--is-end encoded)
(error "Unexpected end.")
(let ((c (aref (car encoded) (cadr encoded))))
(setcdr encoded (list (1+ (cadr encoded))))
c )))
 
(defun gst--is-end (lst)
(>= (cadr lst) (length (car lst))))
 
(defun gst--translate-special (c)
(cond
((eq c ?@) 0)
((eq c ?\[) 27)
((eq c ?\{) 27)
((eq c ?\\) 28)
((eq c ?\]) 29)
((eq c ?\}) 29)
((eq c ?^) 30)
((eq c ?~) 30)
((eq c ?_) 31)
((eq c ?') 31)
((eq c ?\") ?\")
((eq c ?|) ?|)
((eq c ?<) ?<)
((eq c ??) 127)
((and (>= c 65) (<= c 90)) (+ (- c 65) 1))
((and (>= c 97) (<= c 122)) (+ (- c 97) 1))
(t nil)))
 
(defun gst--load-highpos-token (encoded)
(let ((c (gst--load-char encoded)) sp)
(cond
((eq c ?|)
(setq sp (gst--load-char encoded))
(+ 128 (gst--translate-special sp)))
((and (> c 31) (< c 127))
(+ 128 c))
(t (error "Not a printable character.")))))
 
(defun gst--load-token (encoded)
(let ((c (gst--load-char encoded)) sp)
(cond
((eq c ?|)
(setq sp (gst--load-char encoded))
(if (eq sp ?!)
(gst--load-highpos-token encoded)
(gst--translate-special sp)))
((and (> c 31) (< c 127)) c)
(t (error "Not a printable character.")))))
 
(defun gst-parse (text)
(let ((encoded (list text 0)) (decoded '()))
(while (not (gst--is-end encoded))
(add-to-list 'decoded (gst--load-token encoded) 't))
decoded))
 
(progn
(let ((text "|LHello|G|J|M"))
(message "%s => %s" text (gst-parse "|LHello|G|J|M"))))
</syntaxhighlight>
{{out}}
<pre>
|LHello|G|J|M => (12 72 101 108 111 7 10 13)
</pre>
 
=={{header|Java}}==
This example checks that the string being encoded only contains characters within the range 0..255 (inclusive), and does not process the string if an invalid character is found.
 
Strings being decoded which contain unprintable characters have each such character, c, replaced by the string
CHR$(c). Invalid strings such as |5 are decoded as 5.
<syntaxhighlight lang="java">
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
 
public final class GSTransStringConversion {
 
public static void main(String[] args) {
List<String> tests = List.of( "ALERT|G", "wert↑", "@♂aN°$ª7Î", "ÙC▼æÔt6¤☻Ì", "\"@)Ð♠qhýÌÿ",
"+☻#o9$u♠©A", "♣àlæi6Ú.é", "ÏÔ♀È♥@ë", "Rç÷%◄MZûhZ", "ç>¾AôVâ♫↓P" );
for ( String test : tests ) {
String encoded = encode(test);
System.out.println(test + " --> " + encoded + " --> " + decode(encoded));
}
System.out.println();
for ( String encoded : List.of ( "|LHello|G|J|M", "|m|j|@|e|!t|m|!|?", "abc|1de|5f" ) ) {
System.out.println("The encoded string " + encoded + " is decoded as " + decode(encoded));
}
}
private static String encode(String text) {
StringBuilder result = new StringBuilder();
byte[] bytes = text.getBytes(StandardCharsets.UTF_8);
for ( int k = 0; k < bytes.length; k++ ) {
int charValue = bytes[k] & 0xff;
if ( charValue < 0 || charValue > 255 ) {
throw new IllegalArgumentException("Character value is out of range: " + charValue);
}
StringBuilder chars = new StringBuilder();
if ( charValue >= 128 ) {
chars.append('|'); chars.append('!');
charValue -= 128;
}
 
if ( charValue <= 31 ) {
chars.append('|'); chars.append((char) ( 64 + charValue ));
} else if ( charValue == 34 ) {
chars.append('|'); chars.append('"');
} else if ( charValue == 124 ) {
chars.append('|'); chars.append('|');
} else if ( charValue == 127 ) {
chars.append('|'); chars.append('?');
} else {
chars.append((char) charValue);
}
result.append(chars.toString());
}
return result.toString();
}
 
private static String decode(String text) {
List<Byte> bytes = new ArrayList<Byte>();
boolean previousVerticalBar = false;
boolean previousExclamationMark = false;
int addend = 0;
for ( char ch : text.toCharArray() ) {
if ( previousExclamationMark ) {
if ( ch == '|' ) {
addend = 128;
previousVerticalBar = true;
} else {
bytes.add((byte) ( 128 + ch ));
}
previousExclamationMark = false;
} else if ( previousVerticalBar ) {
if ( ch == '?' ) {
bytes.add((byte) ( 127 + addend ));
} else if ( ch == '!' ) {
previousExclamationMark = true;
} else if ( ch == '|' || ch == '"' || ch == '<' ) {
bytes.add((byte) ( ch + addend ));
} else if ( ch == '[' || ch == '{' ) {
bytes.add((byte) ( 27 + addend ));
} else if ( ch == '\\' ) {
bytes.add((byte) ( 28 + addend ));
} else if ( ch == ']' || ch == '}' ) {
bytes.add((byte) ( 29 + addend ));
} else if ( ch == '^' || ch == '~' ) {
bytes.add((byte) ( 30 + addend ));
} else if ( ch == '_' || ch == '`' ) {
bytes.add((byte) ( 31 + addend ));
} else {
final int value = Integer.valueOf(Character.toUpperCase(ch)) - 64 + addend;
if ( 0 < value && value < 32 ) {
byte[] newBytes = ( "CHR$(" + String.valueOf(value) + ")" ).getBytes();
for ( byte bb : newBytes ) {
bytes.add(bb);
}
} else if ( value > 0 ) {
bytes.add((byte) value);
} else {
bytes.add((byte) ch);
}
}
previousVerticalBar = false;
addend = 0;
} else if ( ch == '|' ) {
previousVerticalBar = true;
} else {
bytes.add((byte) ch);
}
}
String decoded = "";
List<Byte> highValueBytes = new ArrayList<Byte>();
for ( byte bb = 0; bb < bytes.size(); bb++ ) {
if ( bytes.get(bb) > 0 ) {
decoded += decodeHighValueBytes(highValueBytes);
decoded += new String( new byte[] { bytes.get(bb) }, StandardCharsets.UTF_8 );
} else {
highValueBytes.add(bytes.get(bb));
}
}
decoded += decodeHighValueBytes(highValueBytes);
return decoded;
}
private static String decodeHighValueBytes(List<Byte> highValueBytes) {
String result = "";
if ( ! highValueBytes.isEmpty() ) {
if ( highValueBytes.size() == 1 ) {
result += Character.toString(highValueBytes.get(0) & 0xff);
} else {
byte[] newBytes = new byte[highValueBytes.size()];
for ( int j = 0; j < highValueBytes.size(); j++ ) {
newBytes[j] = highValueBytes.get(j);
}
result += new String(newBytes, StandardCharsets.UTF_8);
}
highValueBytes.clear();
}
return result;
}
 
}
</syntaxhighlight>
{{ out }}
<pre>
ALERT|G --> ALERT||G --> ALERT|G
wert↑ --> wert|!b|!|F|!|Q --> wert↑
@♂aN°$ª7Î --> @|!b|!|Y|!|BaN|!B|!0$|!B|!*7|!C|!|N --> @♂aN°$ª7Î
ÙC▼æÔt6¤☻Ì --> |!C|!|YC|!b|!|V|!<|!C|!&|!C|!|Tt6|!B|!$|!b|!|X|!;|!C|!|L --> ÙC▼æÔt6¤☻Ì
"@)Ð♠qhýÌÿ --> |"@)|!C|!|P|!b|!|Y|! qh|!C|!=|!C|!|L|!C|!? --> "@)Ð♠qhýÌÿ
+☻#o9$u♠©A --> +|!b|!|X|!;#o9$u|!b|!|Y|! |!B|!)A --> +☻#o9$u♠©A
♣àlæi6Ú.é --> |!b|!|Y|!#|!C|! l|!C|!&i6|!C|!|Z.|!C|!) --> ♣àlæi6Ú.é
ÏÔ♀È♥@ë --> |!C|!|O|!C|!|T|!b|!|Y|!|@|!C|!|H|!b|!|Y|!%@|!C|!+ --> ÏÔ♀È♥@ë
Rç÷%◄MZûhZ --> R|!C|!'|!C|!7%|!b|!|W|!|DMZ|!C|!;hZ --> Rç÷%◄MZûhZ
ç>¾AôVâ♫↓P --> |!C|!'>|!B|!>A|!C|!4V|!C|!|"|!b|!|Y|!+|!b|!|F|!|SP --> ç>¾AôVâ♫↓P
 
The encoded string |LHello|G|J|M is decoded as CHR$(12)HelloCHR$(7)CHR$(10)CHR$(13)
The encoded string |m|j|@|e|!t|m|!|? is decoded as CHR$(13)CHR$(10)@CHR$(5)ôCHR$(13)ÿ
The encoded string abc|1de|5f is decoded as abc1de5f
</pre>
 
=={{header|jq}}==
'''Adapted from [[#Wren|Wren]]'''
{{works with|jq}}
 
'''Works with gojq, the Go implementation of jq'''
 
Strings in jq are just JSON strings, and therefore the constituent
codepoints are not restricted to 8-bit bytes. The `encode` and `decode` filters
presented here, however, only check that their inputs are non-empty
JSON strings.
 
<syntaxhighlight lang="jq">
def encode($upper):
# helper function to encode bytes < 128
def f:
if (. >= 1 and . <= 26)
then "|" + (if $upper then [. + 64]|implode else [. + 96]|implode end)
elif . < 32
then "|" + ([. + 64] | implode)
elif . == 34 # quotation mark
then "|\""
elif . == 60 # less than
then "|<"
elif . == 124 # vertical bar
then "||"
elif . == 127 # DEL
then "|?"
else [.]|implode
end ;
. as $s
| if ($s | (type != "string") or (length == 0)) then "Argument of encode must be a non-empty string." | error
else # remove any outer quotation marks
($s | if (length > 1 and .[:1] == "\"" and .[-1:] == "\"") then .[1:-1] else . end) as $s
# iterate through the string's codepoints
| reduce ($s|explode)[] as $b ( {enc: ""};
if $b < 128 then .enc += ($b|f)
else .enc += "|!" + (($b - 128)|f)
end)
| .enc
end;
 
def decode:
# helper function for decoding bytes after "|"
def f:
if . == 34 # quotation mark
then 34
elif . == 60 # less than
then 60
elif . == 63 # question mark
then 127
elif . >= 64 and . < 96 # @ + upper case letter + [\]^_
then . - 64
elif . == 96 # grave accent
then 31
elif . == 124 # vertical bar
then 124
elif . >= 97 and . < 127 # lower case letter + {}~
then . - 96
else .
end;
. as $s
| if ($s | (type != "string") or (length == 0)) then "Argument of decode must be a non-empty string." | error
else
# remove any outer quotation marks
($s | if (length > 1 and .[:1] == "\"" and .[-1:] == "\"") then $s[1:-1] else . end) as $s
| ($s|explode) as $bytes
| ($bytes|length) as $bc
| {i: 0, dec: "" }
# iterate through the string's bytes decoding as we go
| until(.i >= $bc;
if $bytes[.i] != 124
then .dec += ([$bytes[.i]] | implode)
| .i += 1
else
if (.i < $bc - 1) and ($bytes[.i+1] != 33)
then .dec += ([$bytes[.i+1] | f ] | implode)
| .i += 2
else
if (.i < $bc - 2) and ($bytes[.i+2] != 124)
then .dec += ([128 + $bytes[.i+2]] | implode)
| .i += 3
else
if (.i < $bc - 3) and ($bytes[.i+2] == 124)
then .dec += ([128 + ($bytes[.i+3] | f)] | implode)
| .i += 4
else .i += 1
end
end
end
end)
| .dec
end;
 
def strings: [
"\fHello\u0007\n\r",
"\r\n\u0000\u0005\u00f4\r\u00ff"
];
 
def uppers: [true, false];
 
def task1:
range(0; strings|length) as $i
| strings[$i]
| uppers[] as $u
| encode($u) as $enc
| ($enc|decode) as $dec
| "string: \(tojson)",
"encoded (\(if $u then "upper" else "lower" end)) : \($enc|tojson)",
"decoded : \($dec|tojson)",
"string == decoded ? \($dec == .)\n"
;
 
def jstrings:[
"ALERT|G",
"wert↑",
"@♂aN°$ª7Î",
"ÙC▼æÔt6¤☻Ì",
"\"@)Ð♠qhýÌÿ",
"+☻#o9$u♠©A",
"♣àlæi6Ú.é",
"ÏÔ♀È♥@ë",
"Rç÷\\%◄MZûhZ",
"ç>¾AôVâ♫↓P"
];
 
def task2:
"Julia strings: string -> encoded (upper) <- decoded (same or different)\n",
( jstrings[]
| encode(true) as $enc
| ($enc|decode) as $dec
| " \(tojson) -> \($enc|tojson) <- \($dec|tojson) (\( if . == $dec then "same" else "different" end))"
);
 
task1, task2
</syntaxhighlight>
'''Invocation''': jq -nr -f gstrans.jq
{{output}}
<pre>
string: "\fHello\u0007\n\r"
encoded (upper) : "|LHello|G|J|M"
decoded : "\fHello\u0007\n\r"
string == decoded ? true
 
string: "\fHello\u0007\n\r"
encoded (lower) : "|lHello|g|j|m"
decoded : "\fHello\u0007\n\r"
string == decoded ? true
 
string: "\r\n\u0000\u0005ô\rÿ"
encoded (upper) : "|M|J|@|E|!t|M|!|?"
decoded : "\r\n\u0000\u0005ô\rÿ"
string == decoded ? true
 
string: "\r\n\u0000\u0005ô\rÿ"
encoded (lower) : "|m|j|@|e|!t|m|!|?"
decoded : "\r\n\u0000\u0005ô\rÿ"
string == decoded ? true
 
Julia strings: string -> encoded (upper) <- decoded (same or different)
 
"ALERT|G" -> "ALERT||G" <- "ALERT|G" (same)
"wert↑" -> "wert|!ℑ" <- "wert↑" (same)
"@♂aN°$ª7Î" -> "@|!◂aN|!0$|!*7|!N" <- "@♂aN°$ª7Î" (same)
"ÙC▼æÔt6¤☻Ì" -> "|!YC|!┼|!f|!Tt6|!$|!▻|!L" <- "ÙC▼æÔt6¤☻Ì" (same)
"\"@)Ð♠qhýÌÿ" -> "|\"@)|!P|!◠qh|!}|!L|!|?" <- "\"@)Ð♠qhýÌÿ" (same)
"+☻#o9$u♠©A" -> "+|!▻#o9$u|!◠|!)A" <- "+☻#o9$u♠©A" (same)
"♣àlæi6Ú.é" -> "|!◣|!`l|!fi6|!Z.|!i" <- "♣àlæi6Ú.é" (same)
"ÏÔ♀È♥@ë" -> "|!O|!T|!◀|!H|!◥@|!k" <- "ÏÔ♀È♥@ë" (same)
"Rç÷\\%◄MZûhZ" -> "R|!g|!w\\%|!╄MZ|!{hZ" <- "Rç÷\\%◄MZûhZ" (same)
"ç>¾AôVâ♫↓P" -> "|!g>|!>A|!tV|!b|!◫|!ℓP" <- "ç>¾AôVâ♫↓P" (same)
</pre>
 
=={{header|Julia}}==
Line 510 ⟶ 932:
b'\r\n\x00\x05\xf4\r\xff' -> b'|M|J|@|E|!t|M|!|?'
</pre>
 
=={{header|Raku}}==
{{trans|Julia}}
<syntaxhighlight lang="raku" line># 20231105 Raku programming solution
 
sub GSTrans-encode(Str $str) {
return [~] $str.encode('utf8').list.chrs.comb.map: -> $c {
my $i = $c.ord;
die "Char value of $c, $i, is out of range" unless 0 <= $i <= 255;
given ($i,$c) {
when 0 <= $i <= 31 { '|' ~ chr(64 + $i) }
when $c eq '"' { '|"' }
when $c eq '|' { '||' }
when $i == 127 { '|?' }
when 128 <= $i <= 255 { '|!' ~ GSTrans-encode(chr($i - 128)) }
default { $c }
}
}
}
 
sub GSTrans-decode(Str $str) {
my ($gotbar, $gotbang, $bangadd) = False, False, 0;
 
my @result = gather for $str.comb -> $c {
if $gotbang {
if $c eq '|' {
$bangadd = 128;
$gotbar = True;
} else {
take $c.ord + 128;
}
$gotbang = False;
} elsif $gotbar {
given $c {
when $c eq '?' { take 127 + $bangadd }
when $c eq '!' { $gotbang = True }
when $c eq '|' || $c eq '"' || $c eq '<' { take $c.ord + $bangadd }
when $c eq '[' || $c eq '{' { take 27 + $bangadd }
when $c eq '\\' { take 28 + $bangadd }
when $c eq ']' || $c eq '}' { take 29 + $bangadd }
when $c eq '^' || $c eq '~' { take 30 + $bangadd }
when $c eq '_' || $c eq '`' { take 31 + $bangadd }
default { my $i = $c.uc.ord - 64 + $bangadd;
take $i >= 0 ?? $i !! $c.ord }
}
$gotbar = False;
$bangadd = 0;
} elsif $c eq '|' {
$gotbar = True
} else {
take $c.ord
}
}
return Blob.new(@result).decode('utf8c8')
}
 
my @TESTS = <ALERT|G 'wert↑>;
my @RAND_TESTS = ^256 .roll(10).chrs.join xx 8;
my @DECODE_TESTS = < |LHello|G|J|M |m|j|@|e|!t|m|!|? abc|1de|5f >;
 
for |@TESTS, |@RAND_TESTS -> $t {
my $encoded = GSTrans-encode($t);
my $decoded = GSTrans-decode($encoded);
say "String $t encoded is: $encoded, decoded is: $decoded.";
die unless $t ~~ $decoded;
}
for @DECODE_TESTS -> $enc {
say "Encoded string $enc decoded is: ", GSTrans-decode($enc);
}</syntaxhighlight>
You may [https://ato.pxeger.com/run?1=jVZLb9pAEL5W_IoBIYEVsICUlIZXXjRVlbZS4FAprxq8gBNjt2s7D7Hm2HvPveTQ_qke-0s6612bNSERe7DXs983r52d9a_f1LgJHh__BP643Pj76osXDOG4P6CG45WJM3JNUuz7FPKeTzWYZwCAEj-gDpwtLiKpLlEF1NAoaLpteb4-mlJPH7mzoT4zvu1CuQP5EcyB03HMHiBvQRtlukvNppSaFoHc4dSgcGvYAQF3jIASIktgeeAGPpegXxOSg8CxiedBBVptrgqftXo9VjSxbokDRSTmR9rSKo67KS4opO0ql86hwAqwAHS6uPMatnBRg3CVhgGQ71DIFWIpp-FXuB7HUji2BocZaEO19kbFdZ_iqrVGKsoIl-X-rmwTdx9RZc7QNFWPScZGYPuQHnPuaoyK3mEmzGTUAjDJugLA7SvmJ64_NChuTzRxJjjjL8M0NdzZd4btkVL8qjQzkrdHicc9acPE8KeEwtgVqqNiietE-mSNE-2JTIqTHCtyHLELwBPbaKbXhMe4NKABUdZCIOjliibfuCGyPrEg0sqUzCb-yZBjVKQz8Z-qykV1KmGqNSHiwiqYCw94eWwtwwqfpWQ5RfGGx_gCHDPHmFLSy49WYjuJfgPzZ6qKeaJixXt4ln9-vuQ0NuRcqDbDJf_thvxLlb9I-NuVzUK-Uulfl_Tq8_T4HM7VDhiINJdBtB7JTNfucoidsaDTxj7W7fJpNhtvlSi9FwqVrtYppM5M5Un5rjtn6YOkMlKHSKmhdI-B5Ao5sN2h7pC7omwLmi4bTnSXjPA24Q2Jt41Brz_oo8XW_knvdMCOoXBHqP_vx89OM1o_3f90dBWDLmv1HdCpa9vFakUTd9G1azlwfw8NgT_qHX4-6iWMFrCT98S2XXbMPrCPwGbsmu0xwrI-TrOsC8ZwxKomYfUxdJqQyfC-xYRbJZwo9nkP85NGmRfNmWd3pV3nfa0Zg0TYKkgmIqYLqGc8QA6bsYVHHG3Eqi1vN7FTglhXJJUfei7i8xtW3pxIXyyS9SbmmUeUzguPBNXKTY2M96RJTzrBl1WDudK6CND7UPxgyP-M-H_jPw Attempt This Online!]
 
=={{header|Rust}}==
Line 695 ⟶ 1,187:
result.push(if j >= 64 {j - 64} else {i});
}
} gotbar = false;
gotbar = false;
bangadd = 0;
} else if c == '|' {
Line 742 ⟶ 1,235:
 
If an invalid byte (following the "|" flag) is encountered whilst decoding, it is decoded as if the flag were not present.
<syntaxhighlight lang="ecmascriptwren">classimport GSTrans"./fmt" for {Fmt
 
class GSTrans {
static encode(s, upper) {
if (!(s is String && s.count > 0)) Fiber.abort("Argument must be a non-empty string.")
2,442

edits