UTF-8 encode and decode: Difference between revisions
Content added Content deleted
(added java) |
|||
Line 157: | Line 157: | ||
T |
T |
||
</lang> |
</lang> |
||
=={{header|Java}}== |
|||
{{works with|Java|7+}} |
|||
<lang java>import java.util.Formatter; |
|||
import java.io.UnsupportedEncodingException; |
|||
public class UTF8EncodeDecode { |
|||
public static final void main(String[] args) throws UnsupportedEncodingException { |
|||
System.out.printf("%-7s %-43s %7s\t%s\t%7s\n", "Char", "Name", "Unicode", "UTF-8 encoded", "Decoded"); |
|||
for (int codepoint : new int[]{0x0041, 0x00F6, 0x0416, 0x20AC, 0x1D11E}) { |
|||
String name = Character.getName(codepoint); |
|||
String string = new String(new int[]{codepoint}, 0, 1); |
|||
byte[] encoded = string.getBytes("UTF-8"); |
|||
Formatter formatter = new Formatter(); |
|||
for (byte b : encoded) { |
|||
formatter.format("%02X ", b); |
|||
} |
|||
String encodedHex = formatter.toString(); |
|||
int decoded = new String(encoded, "UTF-8").codePointAt(0); |
|||
System.out.printf("%-7s %-43s U+%04X\t%-12s\tU+%04X\n", string, name, codepoint, encodedHex, decoded); |
|||
} |
|||
} |
|||
}</lang> |
|||
{{out}} |
|||
<pre> |
|||
Char Name Unicode UTF-8 encoded Decoded |
|||
A LATIN CAPITAL LETTER A U+0041 41 U+0041 |
|||
ö LATIN SMALL LETTER O WITH DIAERESIS U+00F6 C3 B6 U+00F6 |
|||
Ж CYRILLIC CAPITAL LETTER ZHE U+0416 D0 96 U+0416 |
|||
€ EURO SIGN U+20AC E2 82 AC U+20AC |
|||
𝄞 MUSICAL SYMBOL G CLEF U+1D11E F0 9D 84 9E U+1D11E |
|||
</pre> |
|||
=={{header|Perl 6}}== |
=={{header|Perl 6}}== |