UTF-8 encode and decode: Difference between revisions

no edit summary
(→‎{{header|C}}: rework to_utf8(), remove magic nums, and better main() loop)
No edit summary
Line 855:
€ 8364 e2 82 ac €
𝄞 119070 f0 9d 84 9e 𝄞
</pre>
 
=={{header|Lua}}==
{{works with|Lua|5.3}}
<lang Lua>
-- Accept an integer representing a codepoint.
-- Return the values of the individual octets.
function encode (codepoint)
local codepoint_str = utf8.char(codepoint)
local result = {}
 
for i = 1, #codepoint_str do
result[#result + 1] = string.unpack("B", codepoint_str, i)
end
 
return table.unpack(result)
end
 
-- Accept a variable number of octets.
-- Return the corresponding Unicode character.
function decode (...)
local len = select("#", ...) -- the number of octets
local fmt = string.rep("B", len)
 
return string.pack(fmt, ...)
end
 
-- Run the given test cases.
function test_encode_decode ()
-- "A", "ö", "Ж", "€", "𝄞"
local tests = {tonumber("41", 16), tonumber("f6", 16), tonumber("416", 16),
tonumber("20ac", 16), tonumber("1d11e", 16)}
 
for i, test in ipairs(tests) do
print("Char: ", test)
print("Encoding: ", encode(test))
print("Decoding: ", decode(encode(test)))
end
end
</lang>
{{out}}
<pre>
Char: 65
Encoding: 65
Decoding: A
Char: 246
Encoding: 195 182
Decoding: ö
Char: 1046
Encoding: 208 150
Decoding: Ж
Char: 8364
Encoding: 226 130 172
Decoding: €
Char: 119070
Encoding: 240 157 132 158
Decoding: 𝄞
</pre>