GSTrans string conversion: Difference between revisions

← Older edit

GSTrans string conversion (view source)

Revision as of 00:05, 26 January 2024

16,997 bytes added , 3 months ago

→‎{{header|jq}}

Peak

2,442

edits

Revision as of 22:46, 29 October 2023 (view source) Wherrera (talk \| contribs) m (→‎Without table lookup) ← Older edit		Latest revision as of 00:05, 26 January 2024 (view source) Peak (talk \| contribs) (→‎{{header\|jq}})
(10 intermediate revisions by 5 users not shown)
Line 203: </syntaxhighlight> No checks for string lengths is done. On decoding, invalid encodings are ignored and skipped, for instance '''\|4''' is decoded as '''4'''. =={{header\|Emacs Lisp}}== <syntaxhighlight lang="lisp"> " ASCII code Symbols used 0 \|@ 1 - 26 \|letter eg \|A (or \|a) = ASCII 1, \|M (or \|m) = ASCII 13 27 \|[ or \|{ 28 \|\ 29 \|] or \|} 30 \|^ or \|~ 31 \|_ or \|' (grave accent) 32 - 126 keyboard character, except for: \" \|\" \| \|\| < \|< 127 \|? 128 - 255 \|!coded symbol eg ASCII 128 = \|!\|@ ASCII 129 = \|!\|A " (defun gst--load-char (encoded) (if (gst--is-end encoded) (error "Unexpected end.") (let ((c (aref (car encoded) (cadr encoded)))) (setcdr encoded (list (1+ (cadr encoded)))) c ))) (defun gst--is-end (lst) (>= (cadr lst) (length (car lst)))) (defun gst--translate-special (c) (cond ((eq c ?@) 0) ((eq c ?\[) 27) ((eq c ?\{) 27) ((eq c ?\\) 28) ((eq c ?\]) 29) ((eq c ?\}) 29) ((eq c ?^) 30) ((eq c ?~) 30) ((eq c ?_) 31) ((eq c ?') 31) ((eq c ?\") ?\") ((eq c ?\|) ?\|) ((eq c ?<) ?<) ((eq c ??) 127) ((and (>= c 65) (<= c 90)) (+ (- c 65) 1)) ((and (>= c 97) (<= c 122)) (+ (- c 97) 1)) (t nil))) (defun gst--load-highpos-token (encoded) (let ((c (gst--load-char encoded)) sp) (cond ((eq c ?\|) (setq sp (gst--load-char encoded)) (+ 128 (gst--translate-special sp))) ((and (> c 31) (< c 127)) (+ 128 c)) (t (error "Not a printable character."))))) (defun gst--load-token (encoded) (let ((c (gst--load-char encoded)) sp) (cond ((eq c ?\|) (setq sp (gst--load-char encoded)) (if (eq sp ?!) (gst--load-highpos-token encoded) (gst--translate-special sp))) ((and (> c 31) (< c 127)) c) (t (error "Not a printable character."))))) (defun gst-parse (text) (let ((encoded (list text 0)) (decoded '())) (while (not (gst--is-end encoded)) (add-to-list 'decoded (gst--load-token encoded) 't)) decoded)) (progn (let ((text "\|LHello\|G\|J\|M")) (message "%s => %s" text (gst-parse "\|LHello\|G\|J\|M")))) </syntaxhighlight> {{out}} <pre> \|LHello\|G\|J\|M => (12 72 101 108 111 7 10 13) </pre> =={{header\|Java}}== This example checks that the string being encoded only contains characters within the range 0..255 (inclusive), and does not process the string if an invalid character is found. Strings being decoded which contain unprintable characters have each such character, c, replaced by the string CHR$(c). Invalid strings such as \|5 are decoded as 5. <syntaxhighlight lang="java"> import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; public final class GSTransStringConversion { public static void main(String[] args) { List<String> tests = List.of( "ALERT\|G", "wert↑", "@♂aN°$ª7Î", "ÙC▼æÔt6¤☻Ì", "\"@)Ð♠qhýÌÿ", "+☻#o9$u♠©A", "♣àlæi6Ú.é", "ÏÔ♀È♥@ë", "Rç÷%◄MZûhZ", "ç>¾AôVâ♫↓P" ); for ( String test : tests ) { String encoded = encode(test); System.out.println(test + " --> " + encoded + " --> " + decode(encoded)); } System.out.println(); for ( String encoded : List.of ( "\|LHello\|G\|J\|M", "\|m\|j\|@\|e\|!t\|m\|!\|?", "abc\|1de\|5f" ) ) { System.out.println("The encoded string " + encoded + " is decoded as " + decode(encoded)); } } private static String encode(String text) { StringBuilder result = new StringBuilder(); byte[] bytes = text.getBytes(StandardCharsets.UTF_8); for ( int k = 0; k < bytes.length; k++ ) { int charValue = bytes[k] & 0xff; if ( charValue < 0 \|\| charValue > 255 ) { throw new IllegalArgumentException("Character value is out of range: " + charValue); } StringBuilder chars = new StringBuilder(); if ( charValue >= 128 ) { chars.append('\|'); chars.append('!'); charValue -= 128; } if ( charValue <= 31 ) { chars.append('\|'); chars.append((char) ( 64 + charValue )); } else if ( charValue == 34 ) { chars.append('\|'); chars.append('"'); } else if ( charValue == 124 ) { chars.append('\|'); chars.append('\|'); } else if ( charValue == 127 ) { chars.append('\|'); chars.append('?'); } else { chars.append((char) charValue); } result.append(chars.toString()); } return result.toString(); } private static String decode(String text) { List<Byte> bytes = new ArrayList<Byte>(); boolean previousVerticalBar = false; boolean previousExclamationMark = false; int addend = 0; for ( char ch : text.toCharArray() ) { if ( previousExclamationMark ) { if ( ch == '\|' ) { addend = 128; previousVerticalBar = true; } else { bytes.add((byte) ( 128 + ch )); } previousExclamationMark = false; } else if ( previousVerticalBar ) { if ( ch == '?' ) { bytes.add((byte) ( 127 + addend )); } else if ( ch == '!' ) { previousExclamationMark = true; } else if ( ch == '\|' \|\| ch == '"' \|\| ch == '<' ) { bytes.add((byte) ( ch + addend )); } else if ( ch == '[' \|\| ch == '{' ) { bytes.add((byte) ( 27 + addend )); } else if ( ch == '\\' ) { bytes.add((byte) ( 28 + addend )); } else if ( ch == ']' \|\| ch == '}' ) { bytes.add((byte) ( 29 + addend )); } else if ( ch == '^' \|\| ch == '~' ) { bytes.add((byte) ( 30 + addend )); } else if ( ch == '_' \|\| ch == '`' ) { bytes.add((byte) ( 31 + addend )); } else { final int value = Integer.valueOf(Character.toUpperCase(ch)) - 64 + addend; if ( 0 < value && value < 32 ) { byte[] newBytes = ( "CHR$(" + String.valueOf(value) + ")" ).getBytes(); for ( byte bb : newBytes ) { bytes.add(bb); } } else if ( value > 0 ) { bytes.add((byte) value); } else { bytes.add((byte) ch); } } previousVerticalBar = false; addend = 0; } else if ( ch == '\|' ) { previousVerticalBar = true; } else { bytes.add((byte) ch); } } String decoded = ""; List<Byte> highValueBytes = new ArrayList<Byte>(); for ( byte bb = 0; bb < bytes.size(); bb++ ) { if ( bytes.get(bb) > 0 ) { decoded += decodeHighValueBytes(highValueBytes); decoded += new String( new byte[] { bytes.get(bb) }, StandardCharsets.UTF_8 ); } else { highValueBytes.add(bytes.get(bb)); } } decoded += decodeHighValueBytes(highValueBytes); return decoded; } private static String decodeHighValueBytes(List<Byte> highValueBytes) { String result = ""; if ( ! highValueBytes.isEmpty() ) { if ( highValueBytes.size() == 1 ) { result += Character.toString(highValueBytes.get(0) & 0xff); } else { byte[] newBytes = new byte[highValueBytes.size()]; for ( int j = 0; j < highValueBytes.size(); j++ ) { newBytes[j] = highValueBytes.get(j); } result += new String(newBytes, StandardCharsets.UTF_8); } highValueBytes.clear(); } return result; } } </syntaxhighlight> {{ out }} <pre> ALERT\|G --> ALERT\|\|G --> ALERT\|G wert↑ --> wert\|!b\|!\|F\|!\|Q --> wert↑ @♂aN°$ª7Î --> @\|!b\|!\|Y\|!\|BaN\|!B\|!0$\|!B\|!7\|!C\|!\|N --> @♂aN°$ª7Î ÙC▼æÔt6¤☻Ì --> \|!C\|!\|YC\|!b\|!\|V\|!<\|!C\|!&\|!C\|!\|Tt6\|!B\|!$\|!b\|!\|X\|!;\|!C\|!\|L --> ÙC▼æÔt6¤☻Ì "@)Ð♠qhýÌÿ --> \|"@)\|!C\|!\|P\|!b\|!\|Y\|! qh\|!C\|!=\|!C\|!\|L\|!C\|!? --> "@)Ð♠qhýÌÿ +☻#o9$u♠©A --> +\|!b\|!\|X\|!;#o9$u\|!b\|!\|Y\|! \|!B\|!)A --> +☻#o9$u♠©A ♣àlæi6Ú.é --> \|!b\|!\|Y\|!#\|!C\|! l\|!C\|!&i6\|!C\|!\|Z.\|!C\|!) --> ♣àlæi6Ú.é ÏÔ♀È♥@ë --> \|!C\|!\|O\|!C\|!\|T\|!b\|!\|Y\|!\|@\|!C\|!\|H\|!b\|!\|Y\|!%@\|!C\|!+ --> ÏÔ♀È♥@ë Rç÷%◄MZûhZ --> R\|!C\|!'\|!C\|!7%\|!b\|!\|W\|!\|DMZ\|!C\|!;hZ --> Rç÷%◄MZûhZ ç>¾AôVâ♫↓P --> \|!C\|!'>\|!B\|!>A\|!C\|!4V\|!C\|!\|"\|!b\|!\|Y\|!+\|!b\|!\|F\|!\|SP --> ç>¾AôVâ♫↓P The encoded string \|LHello\|G\|J\|M is decoded as CHR$(12)HelloCHR$(7)CHR$(10)CHR$(13) The encoded string \|m\|j\|@\|e\|!t\|m\|!\|? is decoded as CHR$(13)CHR$(10)@CHR$(5)ôCHR$(13)ÿ The encoded string abc\|1de\|5f is decoded as abc1de5f </pre> =={{header\|jq}}== '''Adapted from [[#Wren\|Wren]]''' {{works with\|jq}} '''Works with gojq, the Go implementation of jq''' Strings in jq are just JSON strings, and therefore the constituent codepoints are not restricted to 8-bit bytes. The `encode` and `decode` filters presented here, however, only check that their inputs are non-empty JSON strings. <syntaxhighlight lang="jq"> def encode($upper): # helper function to encode bytes < 128 def f: if (. >= 1 and . <= 26) then "\|" + (if $upper then [. + 64]\|implode else [. + 96]\|implode end) elif . < 32 then "\|" + ([. + 64] \| implode) elif . == 34 # quotation mark then "\|\"" elif . == 60 # less than then "\|<" elif . == 124 # vertical bar then "\|\|" elif . == 127 # DEL then "\|?" else [.]\|implode end ; . as $s \| if ($s \| (type != "string") or (length == 0)) then "Argument of encode must be a non-empty string." \| error else # remove any outer quotation marks ($s \| if (length > 1 and .[:1] == "\"" and .[-1:] == "\"") then .[1:-1] else . end) as $s # iterate through the string's codepoints \| reduce ($s\|explode)[] as $b ( {enc: ""}; if $b < 128 then .enc += ($b\|f) else .enc += "\|!" + (($b - 128)\|f) end) \| .enc end; def decode: # helper function for decoding bytes after "\|" def f: if . == 34 # quotation mark then 34 elif . == 60 # less than then 60 elif . == 63 # question mark then 127 elif . >= 64 and . < 96 # @ + upper case letter + [\]^_ then . - 64 elif . == 96 # grave accent then 31 elif . == 124 # vertical bar then 124 elif . >= 97 and . < 127 # lower case letter + {}~ then . - 96 else . end; . as $s \| if ($s \| (type != "string") or (length == 0)) then "Argument of decode must be a non-empty string." \| error else # remove any outer quotation marks ($s \| if (length > 1 and .[:1] == "\"" and .[-1:] == "\"") then $s[1:-1] else . end) as $s \| ($s\|explode) as $bytes \| ($bytes\|length) as $bc \| {i: 0, dec: "" } # iterate through the string's bytes decoding as we go \| until(.i >= $bc; if $bytes[.i] != 124 then .dec += ([$bytes[.i]] \| implode) \| .i += 1 else if (.i < $bc - 1) and ($bytes[.i+1] != 33) then .dec += ([$bytes[.i+1] \| f ] \| implode) \| .i += 2 else if (.i < $bc - 2) and ($bytes[.i+2] != 124) then .dec += ([128 + $bytes[.i+2]] \| implode) \| .i += 3 else if (.i < $bc - 3) and ($bytes[.i+2] == 124) then .dec += ([128 + ($bytes[.i+3] \| f)] \| implode) \| .i += 4 else .i += 1 end end end end) \| .dec end; def strings: [ "\fHello\u0007\n\r", "\r\n\u0000\u0005\u00f4\r\u00ff" ]; def uppers: [true, false]; def task1: range(0; strings\|length) as $i \| strings[$i] \| uppers[] as $u \| encode($u) as $enc \| ($enc\|decode) as $dec \| "string: \(tojson)", "encoded (\(if $u then "upper" else "lower" end)) : \($enc\|tojson)", "decoded : \($dec\|tojson)", "string == decoded ? \($dec == .)\n" ; def jstrings:[ "ALERT\|G", "wert↑", "@♂aN°$ª7Î", "ÙC▼æÔt6¤☻Ì", "\"@)Ð♠qhýÌÿ", "+☻#o9$u♠©A", "♣àlæi6Ú.é", "ÏÔ♀È♥@ë", "Rç÷\\%◄MZûhZ", "ç>¾AôVâ♫↓P" ]; def task2: "Julia strings: string -> encoded (upper) <- decoded (same or different)\n", ( jstrings[] \| encode(true) as $enc \| ($enc\|decode) as $dec \| " \(tojson) -> \($enc\|tojson) <- \($dec\|tojson) (\( if . == $dec then "same" else "different" end))" ); task1, task2 </syntaxhighlight> '''Invocation''': jq -nr -f gstrans.jq {{output}} <pre> string: "\fHello\u0007\n\r" encoded (upper) : "\|LHello\|G\|J\|M" decoded : "\fHello\u0007\n\r" string == decoded ? true string: "\fHello\u0007\n\r" encoded (lower) : "\|lHello\|g\|j\|m" decoded : "\fHello\u0007\n\r" string == decoded ? true string: "\r\n\u0000\u0005ô\rÿ" encoded (upper) : "\|M\|J\|@\|E\|!t\|M\|!\|?" decoded : "\r\n\u0000\u0005ô\rÿ" string == decoded ? true string: "\r\n\u0000\u0005ô\rÿ" encoded (lower) : "\|m\|j\|@\|e\|!t\|m\|!\|?" decoded : "\r\n\u0000\u0005ô\rÿ" string == decoded ? true Julia strings: string -> encoded (upper) <- decoded (same or different) "ALERT\|G" -> "ALERT\|\|G" <- "ALERT\|G" (same) "wert↑" -> "wert\|!ℑ" <- "wert↑" (same) "@♂aN°$ª7Î" -> "@\|!◂aN\|!0$\|!7\|!N" <- "@♂aN°$ª7Î" (same) "ÙC▼æÔt6¤☻Ì" -> "\|!YC\|!┼\|!f\|!Tt6\|!$\|!▻\|!L" <- "ÙC▼æÔt6¤☻Ì" (same) "\"@)Ð♠qhýÌÿ" -> "\|\"@)\|!P\|!◠qh\|!}\|!L\|!\|?" <- "\"@)Ð♠qhýÌÿ" (same) "+☻#o9$u♠©A" -> "+\|!▻#o9$u\|!◠\|!)A" <- "+☻#o9$u♠©A" (same) "♣àlæi6Ú.é" -> "\|!◣\|!`l\|!fi6\|!Z.\|!i" <- "♣àlæi6Ú.é" (same) "ÏÔ♀È♥@ë" -> "\|!O\|!T\|!◀\|!H\|!◥@\|!k" <- "ÏÔ♀È♥@ë" (same) "Rç÷\\%◄MZûhZ" -> "R\|!g\|!w\\%\|!╄MZ\|!{hZ" <- "Rç÷\\%◄MZûhZ" (same) "ç>¾AôVâ♫↓P" -> "\|!g>\|!>A\|!tV\|!b\|!◫\|!ℓP" <- "ç>¾AôVâ♫↓P" (same) </pre> =={{header\|Julia}}== Line 510 ⟶ 932: b'\r\n\x00\x05\xf4\r\xff' -> b'\|M\|J\|@\|E\|!t\|M\|!\|?' </pre> =={{header\|Raku}}== {{trans\|Julia}} <syntaxhighlight lang="raku" line># 20231105 Raku programming solution sub GSTrans-encode(Str $str) { return [~] $str.encode('utf8').list.chrs.comb.map: -> $c { my $i = $c.ord; die "Char value of $c, $i, is out of range" unless 0 <= $i <= 255; given ($i,$c) { when 0 <= $i <= 31 { '\|' ~ chr(64 + $i) } when $c eq '"' { '\|"' } when $c eq '\|' { '\|\|' } when $i == 127 { '\|?' } when 128 <= $i <= 255 { '\|!' ~ GSTrans-encode(chr($i - 128)) } default { $c } } } } sub GSTrans-decode(Str $str) { my ($gotbar, $gotbang, $bangadd) = False, False, 0; my @result = gather for $str.comb -> $c { if $gotbang { if $c eq '\|' { $bangadd = 128; $gotbar = True; } else { take $c.ord + 128; } $gotbang = False; } elsif $gotbar { given $c { when $c eq '?' { take 127 + $bangadd } when $c eq '!' { $gotbang = True } when $c eq '\|' \|\| $c eq '"' \|\| $c eq '<' { take $c.ord + $bangadd } when $c eq '[' \|\| $c eq '{' { take 27 + $bangadd } when $c eq '\\' { take 28 + $bangadd } when $c eq ']' \|\| $c eq '}' { take 29 + $bangadd } when $c eq '^' \|\| $c eq '~' { take 30 + $bangadd } when $c eq '_' \|\| $c eq '`' { take 31 + $bangadd } default { my $i = $c.uc.ord - 64 + $bangadd; take $i >= 0 ?? $i !! $c.ord } } $gotbar = False; $bangadd = 0; } elsif $c eq '\|' { $gotbar = True } else { take $c.ord } } return Blob.new(@result).decode('utf8c8') } my @TESTS = <ALERT\|G 'wert↑>; my @RAND_TESTS = ^256 .roll(10).chrs.join xx 8; my @DECODE_TESTS = < \|LHello\|G\|J\|M \|m\|j\|@\|e\|!t\|m\|!\|? abc\|1de\|5f >; for \|@TESTS, \|@RAND_TESTS -> $t { my $encoded = GSTrans-encode($t); my $decoded = GSTrans-decode($encoded); say "String $t encoded is: $encoded, decoded is: $decoded."; die unless $t ~~ $decoded; } for @DECODE_TESTS -> $enc { say "Encoded string $enc decoded is: ", GSTrans-decode($enc); }</syntaxhighlight> You may [https://ato.pxeger.com/run?1=jVZLb9pAEL5W_IoBIYEVsICUlIZXXjRVlbZS4FAprxq8gBNjt2s7D7Hm2HvPveTQ_qke-0s6612bNSERe7DXs983r52d9a_f1LgJHh__BP643Pj76osXDOG4P6CG45WJM3JNUuz7FPKeTzWYZwCAEj-gDpwtLiKpLlEF1NAoaLpteb4-mlJPH7mzoT4zvu1CuQP5EcyB03HMHiBvQRtlukvNppSaFoHc4dSgcGvYAQF3jIASIktgeeAGPpegXxOSg8CxiedBBVptrgqftXo9VjSxbokDRSTmR9rSKo67KS4opO0ql86hwAqwAHS6uPMatnBRg3CVhgGQ71DIFWIpp-FXuB7HUji2BocZaEO19kbFdZ_iqrVGKsoIl-X-rmwTdx9RZc7QNFWPScZGYPuQHnPuaoyK3mEmzGTUAjDJugLA7SvmJ64_NChuTzRxJjjjL8M0NdzZd4btkVL8qjQzkrdHicc9acPE8KeEwtgVqqNiietE-mSNE-2JTIqTHCtyHLELwBPbaKbXhMe4NKABUdZCIOjliibfuCGyPrEg0sqUzCb-yZBjVKQz8Z-qykV1KmGqNSHiwiqYCw94eWwtwwqfpWQ5RfGGx_gCHDPHmFLSy49WYjuJfgPzZ6qKeaJixXt4ln9-vuQ0NuRcqDbDJf_thvxLlb9I-NuVzUK-Uulfl_Tq8_T4HM7VDhiINJdBtB7JTNfucoidsaDTxj7W7fJpNhtvlSi9FwqVrtYppM5M5Un5rjtn6YOkMlKHSKmhdI-B5Ao5sN2h7pC7omwLmi4bTnSXjPA24Q2Jt41Brz_oo8XW_knvdMCOoXBHqP_vx89OM1o_3f90dBWDLmv1HdCpa9vFakUTd9G1azlwfw8NgT_qHX4-6iWMFrCT98S2XXbMPrCPwGbsmu0xwrI-TrOsC8ZwxKomYfUxdJqQyfC-xYRbJZwo9nkP85NGmRfNmWd3pV3nfa0Zg0TYKkgmIqYLqGc8QA6bsYVHHG3Eqi1vN7FTglhXJJUfei7i8xtW3pxIXyyS9SbmmUeUzguPBNXKTY2M96RJTzrBl1WDudK6CND7UPxgyP-M-H_jPw Attempt This Online!] =={{header\|Rust}}== Line 695 ⟶ 1,187: result.push(if j >= 64 {j - 64} else {i}); } } ~~gotbar = false;~~ gotbar = false; bangadd = 0; } else if c == '\|' { Line 742 ⟶ 1,235: If an invalid byte (following the "\|" flag) is encountered whilst decoding, it is decoded as if the flag were not present. <syntaxhighlight lang="~~ecmascript~~wren">~~class~~import ~~GSTrans~~"./fmt" for {Fmt class GSTrans { static encode(s, upper) { if (!(s is String && s.count > 0)) Fiber.abort("Argument must be a non-empty string.")