UTF-8 encode and decode: Difference between revisions
Content added Content deleted
Alpha bravo (talk | contribs) (Added AutoHotkey) |
|||
Line 133: | Line 133: | ||
€ U+20AC E2 82 AC |
€ U+20AC E2 82 AC |
||
𝄞 U+1D11E F0 9D 84 9E |
𝄞 U+1D11E F0 9D 84 9E |
||
</pre> |
|||
=={{header|AutoHotkey}}== |
|||
<lang AutoHotkey>Encode_UTF(hex){ |
|||
Bytes := hex>=0x10000 ? 4 : hex>=0x0800 ? 3 : hex>=0x0080 ? 2 : hex>=0x0001 ? 1 : 0 |
|||
Prefix := [0, 0xC0, 0xE0, 0xF0] |
|||
loop % Bytes { |
|||
if (A_Index < Bytes) |
|||
UTFCode := Format("{:X}", (hex&0x3F) + 0x80) . UTFCode ; 3F=00111111, 80=10000000 |
|||
else |
|||
UTFCode := Format("{:X}", hex + Prefix[Bytes]) . UTFCode ; C0=11000000, E0=11100000, F0=11110000 |
|||
hex := hex>>6 |
|||
} |
|||
return "0x" UTFCode |
|||
} |
|||
;---------------------------------------------------------------------------------------- |
|||
Decode_UTF(hex){ |
|||
Bytes := hex>=0x10000 ? 4 : hex>=0x0800 ? 3 : hex>=0x0080 ? 2 : hex>=0x0001 ? 1 : 0 |
|||
bin := ConvertBase(16, 2, hex) |
|||
loop, % Bytes { |
|||
B := SubStr(bin, -7) |
|||
if Bytes > 1 |
|||
B := LTrim(B, 1) , B := StrReplace(B, 0,,, 1) |
|||
bin := SubStr(bin, 1, StrLen(bin)-8) |
|||
Uni := B . Uni |
|||
} |
|||
return "0x" ConvertBase(2, 16, Uni) |
|||
} |
|||
;---------------------------------------------------------------------------------------- |
|||
; www.autohotkey.com/boards/viewtopic.php?f=6&t=3607#p18985 |
|||
ConvertBase(InputBase, OutputBase, number){ |
|||
static u := A_IsUnicode ? "_wcstoui64" : "_strtoui64" |
|||
static v := A_IsUnicode ? "_i64tow" : "_i64toa" |
|||
VarSetCapacity(s, 65, 0) |
|||
value := DllCall("msvcrt.dll\" u, "Str", number, "UInt", 0, "UInt", InputBase, "CDECL Int64") |
|||
DllCall("msvcrt.dll\" v, "Int64", value, "Str", s, "UInt", OutputBase, "CDECL") |
|||
return s |
|||
}</lang> |
|||
Examples:<lang AutoHotkey>data = |
|||
(comment |
|||
0x0041 |
|||
0x00F6 |
|||
0x0416 |
|||
0x20AC |
|||
0x1D11E |
|||
) |
|||
output := "unicode`t`tUTF`t`tunicode`n" |
|||
for i, Hex in StrSplit(data, "`n", "`r"){ |
|||
UTFCode := Encode_UTF(Hex) |
|||
output .= Hex "`t`t" UTFCode "`t`t" Decode_UTF(UTFCode) "`n" |
|||
} |
|||
MsgBox % output |
|||
return</lang> |
|||
{{out}} |
|||
<pre> |
|||
Unicode Encode_UTF Decode_UTF |
|||
0x0041 0x41 0x41 |
|||
0x00F6 0xC3B6 0xf6 |
|||
0x0416 0xD096 0x416 |
|||
0x20AC 0xE282AC 0x20ac |
|||
0x1D11E 0xF09D849E 0x1d11e |
|||
</pre> |
</pre> |
||