UTF-8 encode and decode: Difference between revisions

(added langur language example)
Line 1,241:
#1D11E -> {#F0,#9D,#84,#9E} -> {#1D11E}
</pre>
 
<lang java>import java.nio.charset.StandardCharsets;
 
Integer[] code_points = {0x0041, 0x00F6, 0x0416, 0x20AC, 0x1D11E};
 
void setup() {
size(850, 230);
background(255);
fill(0);
textSize(16);
int tel_1 = 80;
int tel_2 = 50;
text("Char Name Unicode UTF-8 (encoding) Decoded", 40, 40);
for (int cp : code_points) {
byte[] encoded = new String(new int[]{cp}, 0, 1).getBytes(StandardCharsets.UTF_8);
for (byte b : encoded) {
text(hex(b), tel_2+530, tel_1);
tel_2 += 30;
}
text(char(cp), 50, tel_1);
text(Character.getName(cp), 100, tel_1);
String unicode = hex(cp);
while (unicode.length() > 4 && unicode.indexOf("0") == 0) unicode = unicode.substring(1);
text("U+"+unicode, 450, tel_1);
Character decoded = char(new String(encoded, StandardCharsets.UTF_8).codePointAt(0));
text(decoded, 750, tel_1);
tel_1 += 30; tel_2 = 50;
}
}</lang>
 
 
 
=={{header|PureBasic}}==
47

edits