UTF-8 encode and decode: Difference between revisions

Content added Content deleted
(→‎{{header|Python}}: Uses "name" directly.)
(→‎{{header|Java}}: remove duplicate code, remove antique exception)
Line 344: Line 344:
=={{header|Java}}==
=={{header|Java}}==
{{works with|Java|7+}}
{{works with|Java|7+}}
<lang java>import java.util.Formatter;
<lang java>import java.nio.charset.StandardCharsets;
import java.io.UnsupportedEncodingException;
import java.util.Formatter;


public class UTF8EncodeDecode {
public class UTF8EncodeDecode {

public static byte[] utf8encode(int codepoint) throws UnsupportedEncodingException {
return new String(new int[]{codepoint}, 0, 1).getBytes("UTF-8");
public static byte[] utf8encode(int codepoint) {
return new String(new int[]{codepoint}, 0, 1).getBytes(StandardCharsets.UTF_8);
}
}

public static int utf8decode(byte[] bytes) throws UnsupportedEncodingException {
public static int utf8decode(byte[] bytes) {
return new String(bytes, "UTF-8").codePointAt(0);
return new String(bytes, StandardCharsets.UTF_8).codePointAt(0);
}
}

public static final void main(String[] args) throws UnsupportedEncodingException {
public static void main(String[] args) {
System.out.printf("%-7s %-43s %7s\t%s\t%7s\n", "Char", "Name", "Unicode", "UTF-8 encoded", "Decoded");
System.out.printf("%-7s %-43s %7s\t%s\t%7s%n",
for (int codepoint : new int[]{0x0041, 0x00F6, 0x0416, 0x20AC, 0x1D11E}) {
"Char", "Name", "Unicode", "UTF-8 encoded", "Decoded");
byte[] encoded = utf8encode(codepoint);

Formatter formatter = new Formatter();
for (byte b : encoded) {
formatter.format("%02X ", b);
}
String encodedHex = formatter.toString();
int decoded = utf8decode(encoded);
System.out.printf("%-7c %-43s U+%04X\t%-12s\tU+%04X\n", codepoint, Character.getName(codepoint), codepoint, encodedHex, decoded);
}
}
public static final void main(String[] args) throws UnsupportedEncodingException {
System.out.printf("%-7s %-43s %7s\t%s\t%s\n", "Char", "Name", "Unicode", "UTF-8 encoded", "Decoded");
for (int codepoint : new int[]{0x0041, 0x00F6, 0x0416, 0x20AC, 0x1D11E}) {
for (int codepoint : new int[]{0x0041, 0x00F6, 0x0416, 0x20AC, 0x1D11E}) {
byte[] encoded = utf8encode(codepoint);
byte[] encoded = utf8encode(codepoint);
Line 377: Line 369:
String encodedHex = formatter.toString();
String encodedHex = formatter.toString();
int decoded = utf8decode(encoded);
int decoded = utf8decode(encoded);
System.out.printf("%-7c %-43s U+%04X\t%-12s\t%c\n", codepoint, Character.getName(codepoint), codepoint, encodedHex, decoded);
System.out.printf("%-7c %-43s U+%04X\t%-12s\tU+%04X%n",
codepoint, Character.getName(codepoint), codepoint, encodedHex, decoded);
}
}
}
}