Read a file character by character/UTF8: Difference between revisions

Line 326:
 
=={{header|M2000 Interpreter}}==
from revision 27, version 9.3, of M2000 Environment, Chinese 长 letter displayed in console (as displayed in editor)
 
<lang M2000 Interpreter>
Module checkit {
Line 380 ⟶ 382:
}
checkit
</lang>
 
using document as final$
 
<lang M2000 Interpreter>
Module checkit {
\\ prepare a file
\\ Save.Doc and Append.Doc to file, Load.Doc and Merge.Doc from file
document a$
a$={First Line
Second line
Third Line
Ελληνικά Greek Letters
y䮀
成长汉
}
Save.Doc a$, "checkthis.txt", 2 ' 2 for UTF-8
b$="*"
document final$
buffer Clear bytes as byte*16
Buffer One as byte
Buffer Two as byte*2
Buffer Three as byte*3
Locale 1033
open "checkthis.txt" for input as #f
seek#f, 4 ' skip BOM
oldb$=""
While b$<>"" {
GetOneUtf8Char(&b$)
\\ if final$ is document then 10 and 13 if comes alone are new line
\\ so we need to throw 10 after the 13, so we have to use oldb$
if b$=chr$(10) then if oldb$=chr$(13) then oldb$="": continue
oldb$=b$
final$=b$ ' we use = for append to document
}
close #f
Report final$
Sub GetOneUtf8Char(&ch$)
ch$=""
if Eof(#f) then Exit Sub
Get #f, One
Return Bytes, 0:=Eval(one, 0)
local mrk=Eval(one, 0)
Try ok {
If Binary.And(mrk, 0xE0)=0xC0 then {
Get #f,one
Return Bytes, 1:=Eval$(one, 0,1)
ch$=Eval$(Bytes, 0, 2)
} Else.if Binary.And(mrk, 0xF0)=0xE0 then {
Get #f,two
Return Bytes, 1:=Eval$(two,0,2)
ch$=Eval$(Bytes, 0, 3)
} Else.if Binary.And(mrk, 0xF8)=0xF0 then {
Get #f,three
Return Bytes, 1:=Eval$(three, 0, 3)
ch$=Eval$(Bytes, 0, 4)
} Else ch$=Eval$(Bytes, 0, 1)
}
if Error or not ok then ch$="" : exit sub
ch$=left$(string$(ch$ as Utf8dec),1)
End Sub
}
checkit
 
</lang>
 
Anonymous user