Read a file character by character/UTF8: Difference between revisions

no edit summary
m (Added eof handling)
No edit summary
Line 325:
}</lang>
 
 
=={{header|Lua}}==
{{works with|Lua|5.3}}
<lang Lua>
-- Return whether the given string is a single ASCII character.
function is_ascii (str)
return string.match(str, "[\0-\x7F]")
end
 
-- Return whether the given string is an initial byte in a multibyte sequence.
function is_init (str)
return string.match(str, "[\xC2-\xF4]")
end
 
-- Return whether the given string is a continuation byte in a multibyte sequence.
function is_cont (str)
return string.match(str, "[\x80-\xBF]")
end
 
-- Accept a filestream.
-- Return the next UTF8 character in the file.
function read_char (file)
local multibyte -- build a valid multibyte Unicode character
 
for c in file:lines(1) do
if is_ascii(c) then
if multibyte then
-- We've finished reading a Unicode character; unread the next byte,
-- and return the Unicode character.
file:seek("cur", -1)
return multibyte
else
return c
end
elseif is_init(c) then
if multibyte then
file:seek("cur", -1)
return multibyte
else
multibyte = c
end
elseif is_cont(c) then
multibyte = multibyte .. c
else
assert(false)
end
end
end
 
-- Test.
function read_all ()
testfile = io.open("tmp.txt", "w")
testfile:write("𝄞AöЖ€𝄞Ελληνικάy䮀成长汉\n")
testfile:close()
testfile = io.open("tmp.txt", "r")
 
while true do
local c = read_char(testfile)
if not c then return else io.write(" ", c) end
end
end
</lang>
{{out}}
𝄞 A ö Ж € 𝄞 Ε λ λ η ν ι κ ά y ä ® € 成 长 汉
=={{header|M2000 Interpreter}}==
from revision 27, version 9.3, of M2000 Environment, Chinese 长 letter displayed in console (as displayed in editor)