UTF-8 encode and decode: Difference between revisions

Content added Content deleted
No edit summary
(Added Wren)
Line 1,883: Line 1,883:
€ 20AC E2 82 AC 20AC
€ 20AC E2 82 AC 20AC
? 1D11E F0 9D 84 9E 1D11E
? 1D11E F0 9D 84 9E 1D11E
</pre>

=={{header|Wren}}==
The utf8_decode function was translated from the Go entry.
<lang ecmascript>import "/fmt" for Fmt

var utf8_encode = Fn.new { |cp| String.fromCodePoint(cp).bytes.toList }

var utf8_decode = Fn.new { |b|
var mbMask = 0x3f // non-first bytes start 10 and carry 6 bits of data
var b0 = b[0]
if (b0 < 0x80) {
return b0
} else if (b0 < 0xe0) {
var b2Mask = 0x1f // first byte of a 2-byte encoding starts 110 and carries 5 bits of data
return (b0 & b2Mask) << 6 | (b[1] & mbMask)
} else if (b0 < 0xf0) {
var b3Mask = 0x0f // first byte of a 3-byte encoding starts 1110 and carries 4 bits of data
return (b0 & b3Mask) << 12 | (b[1] & mbMask) << 6 | (b[2] & mbMask)
} else {
var b4Mask = 0x07 // first byte of a 4-byte encoding starts 11110 and carries 3 bits of data
return (b0 & b4Mask) << 18 | (b[1] & mbMask) << 12 | (b[2] & mbMask) << 6 | (b[3] & mbMask)
}
}

var tests = [
["LATIN CAPITAL LETTER A", 0x41],
["LATIN SMALL LETTER O WITH DIAERESIS", 0xf6],
["CYRILLIC CAPITAL LETTER ZHE", 0x416],
["EURO SIGN", 0x20ac],
["MUSICAL SYMBOL G CLEF", 0x1d11e]
]

System.print("Character Name Unicode UTF-8 encoding (hex)")
System.print("---------------------------------------------------------------------------------")

for (test in tests) {
var cp = test[1]
var bytes = utf8_encode.call(cp)
var utf8 = bytes.map { |b| Fmt.Xz(2, b) }.join(" ")
var cp2 = utf8_decode.call(bytes)
var uni = String.fromCodePoint(cp2)
System.print("%(Fmt.s(-11, uni)) %(Fmt.s(-37, test[0])) U+%(Fmt.s(-8, Fmt.Xz(4, cp2))) %(utf8)")
}</lang>

{{out}}
<pre>
Character Name Unicode UTF-8 encoding (hex)
---------------------------------------------------------------------------------
A LATIN CAPITAL LETTER A U+0041 41
ö LATIN SMALL LETTER O WITH DIAERESIS U+00F6 C3 B6
Ж CYRILLIC CAPITAL LETTER ZHE U+0416 D0 96
€ EURO SIGN U+20AC E2 82 AC
𝄞 MUSICAL SYMBOL G CLEF U+1D11E F0 9D 84 9E
</pre>
</pre>