UTF-8 encode and decode: Difference between revisions

Content added Content deleted
(added java)
m (→‎{{header|Java}}: refactor out encode and decode functions)
Line 164: Line 164:


public class UTF8EncodeDecode {
public class UTF8EncodeDecode {
public static byte[] utf8encode(int codepoint) throws UnsupportedEncodingException {
return new String(new int[]{codepoint}, 0, 1).getBytes("UTF-8");
}
public static int utf8decode(byte[] bytes) throws UnsupportedEncodingException {
return new String(bytes, "UTF-8").codePointAt(0);
}
public static final void main(String[] args) throws UnsupportedEncodingException {
public static final void main(String[] args) throws UnsupportedEncodingException {
System.out.printf("%-7s %-43s %7s\t%s\t%7s\n", "Char", "Name", "Unicode", "UTF-8 encoded", "Decoded");
System.out.printf("%-7s %-43s %7s\t%s\t%7s\n", "Char", "Name", "Unicode", "UTF-8 encoded", "Decoded");
Line 169: Line 175:
String name = Character.getName(codepoint);
String name = Character.getName(codepoint);
String string = new String(new int[]{codepoint}, 0, 1);
String string = new String(new int[]{codepoint}, 0, 1);
byte[] encoded = string.getBytes("UTF-8");
byte[] encoded = utf8encode(codepoint);
Formatter formatter = new Formatter();
Formatter formatter = new Formatter();
for (byte b : encoded) {
for (byte b : encoded) {
Line 175: Line 181:
}
}
String encodedHex = formatter.toString();
String encodedHex = formatter.toString();
int decoded = new String(encoded, "UTF-8").codePointAt(0);
int decoded = utf8decode(encoded);
System.out.printf("%-7s %-43s U+%04X\t%-12s\tU+%04X\n", string, name, codepoint, encodedHex, decoded);
System.out.printf("%-7s %-43s U+%04X\t%-12s\tU+%04X\n", string, name, codepoint, encodedHex, decoded);
}
}