UTF-8 encode and decode: Difference between revisions

Content added Content deleted
(Realize in F#)
(+perl)
Line 785: Line 785:
<pre>{65, 195, 182, 208, 150, 226, 130, 172}
<pre>{65, 195, 182, 208, 150, 226, 130, 172}
{65, 246, 1046, 8364}
{65, 246, 1046, 8364}
</pre>

=={{header|Perl}}==
<lang perl>#!/usr/bin/perl
use strict;
use warnings;
use Unicode::UCD 'charinfo'; # getting the unicode name of the character
use utf8; # using non-ascii-characters in source code
binmode STDOUT, ":encoding(UTF-8)"; # printing non-ascii-characters to screen

my @chars = map {ord} qw/A ö Ж € 𝄞/; # @chars contains the unicode points
my $print_format = '%5s %-35s';
printf "$print_format %8s %s\n" , 'char', 'name', 'unicode', 'utf-8 encoding';
map{
my $name = charinfo($_)->{'name'}; # get unicode name
printf "$print_format %06x " , chr, lc $name, $_;
my $utf8 = chr; # single char (using implicit $_)
utf8::encode($utf8); # inplace encoding into utf8 parts
map{ # for each utf8 char print ord
printf " %x", ord;
} split //, $utf8;
print "\n";
} @chars;</lang>

{{out}}
<pre>
char name unicode utf-8 encoding
A latin capital letter a 000041 41
ö latin small letter o with diaeresis 0000f6 c3 b6
Ж cyrillic capital letter zhe 000416 d0 96
€ euro sign 0020ac e2 82 ac
𝄞 musical symbol g clef 01d11e f0 9d 84 9e
</pre>
</pre>