GSTrans string conversion: Difference between revisions

← Older edit

GSTrans string conversion (view source)

Revision as of 00:05, 26 January 2024

7,304 bytes added , 4 months ago

→‎{{header|jq}}

Peak

2,458

edits

Revision as of 17:06, 5 November 2023 (view source) Hkdtam (talk \| contribs) (added Raku programming solution) ← Older edit		Latest revision as of 00:05, 26 January 2024 (view source) Peak (talk \| contribs) (→‎{{header\|jq}})
(4 intermediate revisions by 2 users not shown)
Line 203: </syntaxhighlight> No checks for string lengths is done. On decoding, invalid encodings are ignored and skipped, for instance '''\|4''' is decoded as '''4'''. =={{header\|Emacs Lisp}}== <syntaxhighlight lang="lisp"> " ASCII code Symbols used 0 \|@ 1 - 26 \|letter eg \|A (or \|a) = ASCII 1, \|M (or \|m) = ASCII 13 27 \|[ or \|{ 28 \|\ 29 \|] or \|} 30 \|^ or \|~ 31 \|_ or \|' (grave accent) 32 - 126 keyboard character, except for: \" \|\" \| \|\| < \|< 127 \|? 128 - 255 \|!coded symbol eg ASCII 128 = \|!\|@ ASCII 129 = \|!\|A " (defun gst--load-char (encoded) (if (gst--is-end encoded) (error "Unexpected end.") (let ((c (aref (car encoded) (cadr encoded)))) (setcdr encoded (list (1+ (cadr encoded)))) c ))) (defun gst--is-end (lst) (>= (cadr lst) (length (car lst)))) (defun gst--translate-special (c) (cond ((eq c ?@) 0) ((eq c ?\[) 27) ((eq c ?\{) 27) ((eq c ?\\) 28) ((eq c ?\]) 29) ((eq c ?\}) 29) ((eq c ?^) 30) ((eq c ?~) 30) ((eq c ?_) 31) ((eq c ?') 31) ((eq c ?\") ?\") ((eq c ?\|) ?\|) ((eq c ?<) ?<) ((eq c ??) 127) ((and (>= c 65) (<= c 90)) (+ (- c 65) 1)) ((and (>= c 97) (<= c 122)) (+ (- c 97) 1)) (t nil))) (defun gst--load-highpos-token (encoded) (let ((c (gst--load-char encoded)) sp) (cond ((eq c ?\|) (setq sp (gst--load-char encoded)) (+ 128 (gst--translate-special sp))) ((and (> c 31) (< c 127)) (+ 128 c)) (t (error "Not a printable character."))))) (defun gst--load-token (encoded) (let ((c (gst--load-char encoded)) sp) (cond ((eq c ?\|) (setq sp (gst--load-char encoded)) (if (eq sp ?!) (gst--load-highpos-token encoded) (gst--translate-special sp))) ((and (> c 31) (< c 127)) c) (t (error "Not a printable character."))))) (defun gst-parse (text) (let ((encoded (list text 0)) (decoded '())) (while (not (gst--is-end encoded)) (add-to-list 'decoded (gst--load-token encoded) 't)) decoded)) (progn (let ((text "\|LHello\|G\|J\|M")) (message "%s => %s" text (gst-parse "\|LHello\|G\|J\|M")))) </syntaxhighlight> {{out}} <pre> \|LHello\|G\|J\|M => (12 72 101 108 111 7 10 13) </pre> =={{header\|Java}}== Line 365 ⟶ 452: The encoded string \|m\|j\|@\|e\|!t\|m\|!\|? is decoded as CHR$(13)CHR$(10)@CHR$(5)ôCHR$(13)ÿ The encoded string abc\|1de\|5f is decoded as abc1de5f </pre> =={{header\|jq}}== '''Adapted from [[#Wren\|Wren]]''' {{works with\|jq}} '''Works with gojq, the Go implementation of jq''' Strings in jq are just JSON strings, and therefore the constituent codepoints are not restricted to 8-bit bytes. The `encode` and `decode` filters presented here, however, only check that their inputs are non-empty JSON strings. <syntaxhighlight lang="jq"> def encode($upper): # helper function to encode bytes < 128 def f: if (. >= 1 and . <= 26) then "\|" + (if $upper then [. + 64]\|implode else [. + 96]\|implode end) elif . < 32 then "\|" + ([. + 64] \| implode) elif . == 34 # quotation mark then "\|\"" elif . == 60 # less than then "\|<" elif . == 124 # vertical bar then "\|\|" elif . == 127 # DEL then "\|?" else [.]\|implode end ; . as $s \| if ($s \| (type != "string") or (length == 0)) then "Argument of encode must be a non-empty string." \| error else # remove any outer quotation marks ($s \| if (length > 1 and .[:1] == "\"" and .[-1:] == "\"") then .[1:-1] else . end) as $s # iterate through the string's codepoints \| reduce ($s\|explode)[] as $b ( {enc: ""}; if $b < 128 then .enc += ($b\|f) else .enc += "\|!" + (($b - 128)\|f) end) \| .enc end; def decode: # helper function for decoding bytes after "\|" def f: if . == 34 # quotation mark then 34 elif . == 60 # less than then 60 elif . == 63 # question mark then 127 elif . >= 64 and . < 96 # @ + upper case letter + [\]^_ then . - 64 elif . == 96 # grave accent then 31 elif . == 124 # vertical bar then 124 elif . >= 97 and . < 127 # lower case letter + {}~ then . - 96 else . end; . as $s \| if ($s \| (type != "string") or (length == 0)) then "Argument of decode must be a non-empty string." \| error else # remove any outer quotation marks ($s \| if (length > 1 and .[:1] == "\"" and .[-1:] == "\"") then $s[1:-1] else . end) as $s \| ($s\|explode) as $bytes \| ($bytes\|length) as $bc \| {i: 0, dec: "" } # iterate through the string's bytes decoding as we go \| until(.i >= $bc; if $bytes[.i] != 124 then .dec += ([$bytes[.i]] \| implode) \| .i += 1 else if (.i < $bc - 1) and ($bytes[.i+1] != 33) then .dec += ([$bytes[.i+1] \| f ] \| implode) \| .i += 2 else if (.i < $bc - 2) and ($bytes[.i+2] != 124) then .dec += ([128 + $bytes[.i+2]] \| implode) \| .i += 3 else if (.i < $bc - 3) and ($bytes[.i+2] == 124) then .dec += ([128 + ($bytes[.i+3] \| f)] \| implode) \| .i += 4 else .i += 1 end end end end) \| .dec end; def strings: [ "\fHello\u0007\n\r", "\r\n\u0000\u0005\u00f4\r\u00ff" ]; def uppers: [true, false]; def task1: range(0; strings\|length) as $i \| strings[$i] \| uppers[] as $u \| encode($u) as $enc \| ($enc\|decode) as $dec \| "string: \(tojson)", "encoded (\(if $u then "upper" else "lower" end)) : \($enc\|tojson)", "decoded : \($dec\|tojson)", "string == decoded ? \($dec == .)\n" ; def jstrings:[ "ALERT\|G", "wert↑", "@♂aN°$ª7Î", "ÙC▼æÔt6¤☻Ì", "\"@)Ð♠qhýÌÿ", "+☻#o9$u♠©A", "♣àlæi6Ú.é", "ÏÔ♀È♥@ë", "Rç÷\\%◄MZûhZ", "ç>¾AôVâ♫↓P" ]; def task2: "Julia strings: string -> encoded (upper) <- decoded (same or different)\n", ( jstrings[] \| encode(true) as $enc \| ($enc\|decode) as $dec \| " \(tojson) -> \($enc\|tojson) <- \($dec\|tojson) (\( if . == $dec then "same" else "different" end))" ); task1, task2 </syntaxhighlight> '''Invocation''': jq -nr -f gstrans.jq {{output}} <pre> string: "\fHello\u0007\n\r" encoded (upper) : "\|LHello\|G\|J\|M" decoded : "\fHello\u0007\n\r" string == decoded ? true string: "\fHello\u0007\n\r" encoded (lower) : "\|lHello\|g\|j\|m" decoded : "\fHello\u0007\n\r" string == decoded ? true string: "\r\n\u0000\u0005ô\rÿ" encoded (upper) : "\|M\|J\|@\|E\|!t\|M\|!\|?" decoded : "\r\n\u0000\u0005ô\rÿ" string == decoded ? true string: "\r\n\u0000\u0005ô\rÿ" encoded (lower) : "\|m\|j\|@\|e\|!t\|m\|!\|?" decoded : "\r\n\u0000\u0005ô\rÿ" string == decoded ? true Julia strings: string -> encoded (upper) <- decoded (same or different) "ALERT\|G" -> "ALERT\|\|G" <- "ALERT\|G" (same) "wert↑" -> "wert\|!ℑ" <- "wert↑" (same) "@♂aN°$ª7Î" -> "@\|!◂aN\|!0$\|!*7\|!N" <- "@♂aN°$ª7Î" (same) "ÙC▼æÔt6¤☻Ì" -> "\|!YC\|!┼\|!f\|!Tt6\|!$\|!▻\|!L" <- "ÙC▼æÔt6¤☻Ì" (same) "\"@)Ð♠qhýÌÿ" -> "\|\"@)\|!P\|!◠qh\|!}\|!L\|!\|?" <- "\"@)Ð♠qhýÌÿ" (same) "+☻#o9$u♠©A" -> "+\|!▻#o9$u\|!◠\|!)A" <- "+☻#o9$u♠©A" (same) "♣àlæi6Ú.é" -> "\|!◣\|!`l\|!fi6\|!Z.\|!i" <- "♣àlæi6Ú.é" (same) "ÏÔ♀È♥@ë" -> "\|!O\|!T\|!◀\|!H\|!◥@\|!k" <- "ÏÔ♀È♥@ë" (same) "Rç÷\\%◄MZûhZ" -> "R\|!g\|!w\\%\|!╄MZ\|!{hZ" <- "Rç÷\\%◄MZûhZ" (same) "ç>¾AôVâ♫↓P" -> "\|!g>\|!>A\|!tV\|!b\|!◫\|!ℓP" <- "ç>¾AôVâ♫↓P" (same) </pre> Line 976 ⟶ 1,235: If an invalid byte (following the "\|" flag) is encountered whilst decoding, it is decoded as if the flag were not present. <syntaxhighlight lang="~~ecmascript~~wren">~~class~~import ~~GSTrans~~"./fmt" for {Fmt class GSTrans { static encode(s, upper) { if (!(s is String && s.count > 0)) Fiber.abort("Argument must be a non-empty string.")