String length: Difference between revisions

Add Ecstasy example
(Add Ecstasy example)
 
(43 intermediate revisions by 27 users not shown)
Line 30:
Assembler 360 use EBCDIC coding, so one character is one byte.
The L' atrribute can be seen as the length function for assembler 360.
<langsyntaxhighlight lang="360asm">* String length 06/07/2016
LEN CSECT
USING LEN,15 base register
Line 54:
D DS D double word 8
PG DS CL12 string 12
END LEN</langsyntaxhighlight>
{{out}}
<pre>
Line 67:
{{trans|Z80 Assembly}}
Most 6502-based computers predate Unicode, so only byte length will be demonstrated for now.
<langsyntaxhighlight lang="6502asm">GetStringLength: ;$00 and $01 make up the pointer to the string's base address.
;(Of course, any two consecutive zero-page memory locations can fulfill this role.)
LDY #0 ;Y is both the index into the string and the length counter.
Line 78:
 
exit:
RTS ;string length is now loaded into Y.</langsyntaxhighlight>
 
=={{header|68000 Assembly}}==
===Byte Length (ASCII)===
<langsyntaxhighlight lang="68000devpac">GetStringLength:
; INPUT: A3 = BASE ADDRESS OF STRING
; RETURNS LENGTH IN D1 (MEASURED IN BYTES)
Line 96:
 
done:
RTS</langsyntaxhighlight>
 
=={{header|8086 Assembly}}==
{{trans|68000 Assembly}}
===Byte Length===
<syntaxhighlight lang="asm">;INPUT: DS:SI = BASE ADDR. OF STRING
;TYPICALLY, MS-DOS USES $ TO TERMINATE STRINGS.
GetStringLength:
xor cx,cx ;this takes fewer bytes to encode than "mov cx,0"
cld ;makes string functions post-inc rather than post-dec.
 
loop_GetStringLength:
lodsb ;equivalent of "mov al,[ds:si],inc si" except this doesn't alter the flags.
cmp '$'
je done ;if equal, we're finished.
inc cx ;add 1 to length counter. A null string will have a length of zero.
jmp loop_GetStringLength
 
done:
ret</syntaxhighlight>
 
=={{header|4D}}==
===Byte Length===
<langsyntaxhighlight lang="4d">$length:=Length("Hello, world!")</langsyntaxhighlight>
 
=={{header|AArch64 Assembly}}==
{{works with|as|Raspberry Pi 3B version Buster 64 bits}}
<syntaxhighlight lang="aarch64 assembly">
<lang AArch64 Assembly>
/* ARM assembly AARCH64 Raspberry PI 3B */
/* program stringLength64.s */
Line 202 ⟶ 222:
/* for this file see task include a file in language AArch64 assembly */
.include "../includeARM64.inc"
</syntaxhighlight>
</lang>
=={{header|Action!}}==
<syntaxhighlight lang="action!">PROC Test(CHAR ARRAY s)
PrintF("Length of ""%S"" is %B%E",s,s(0))
RETURN
 
PROC Main()
Test("Hello world!")
Test("")
RETURN</syntaxhighlight>
{{out}}
[https://gitlab.com/amarok8bit/action-rosetta-code/-/raw/master/images/String_length.png Screenshot from Atari 8-bit computer]
<pre>
Length of "Hello world!" is 12
Length of "" is 0
</pre>
 
=={{header|ActionScript}}==
===Byte length===
This uses UTF-8 encoding. For other encodings, the ByteArray's <code>writeMultiByte()</code> method can be used.
<syntaxhighlight lang="actionscript">
<lang ActionScript>
package {
Line 241 ⟶ 277:
}
</syntaxhighlight>
</lang>
 
===Character Length===
<langsyntaxhighlight lang="actionscript">
var s1:String = "The quick brown fox jumps over the lazy dog";
var s2:String = "𝔘𝔫𝔦𝔠𝔬𝔡𝔢";
var s3:String = "José";
trace(s1.length, s2.length, s3.length); // 43, 14, 4
</syntaxhighlight>
</lang>
 
=={{header|Ada}}==
{{works with|GCC|4.1.2}}
===Byte Length===
<langsyntaxhighlight lang="ada">Str : String := "Hello World";
Length : constant Natural := Str'Size / 8;</langsyntaxhighlight>
The 'Size attribute returns the size of an object in bits. Provided that under "byte" one understands an octet of bits, the length in "bytes" will be 'Size divided to 8. Note that this is not necessarily the machine storage unit. In order to make the program portable, System.Storage_Unit should be used instead of "magic number" 8. System.Storage_Unit yields the number of bits in a storage unit on the current machine. Further, the length of a string object is not the length of what the string contains in whatever measurement units. String as an object may have a "dope" to keep the array bounds. In fact the object length can even be 0, if the compiler optimized the object away. So in most cases "byte length" makes no sense in Ada.
 
===Character Length===
<langsyntaxhighlight lang="ada">Latin_1_Str : String := "Hello World";
UCS_16_Str : Wide_String := "Hello World";
Unicode_Str : Wide_Wide_String := "Hello World";
Latin_1_Length : constant Natural := Latin_1_Str'Length;
UCS_16_Length : constant Natural := UCS_16_Str'Length;
Unicode_Length : constant Natural := Unicode_Str'Length;</langsyntaxhighlight>
The attribute 'Length yields the number of elements of an [[array]]. Since strings in Ada are arrays of characters, 'Length is the string length. Ada supports strings of [[Latin-1]], [[UCS-16]] and full [[Unicode]] characters. In the example above character length of all three strings is 11. The length of the objects in bits will differ.
 
=={{header|Aime}}==
===Byte Length===
<langsyntaxhighlight lang="aime">length("Hello, World!")</langsyntaxhighlight>
or
<langsyntaxhighlight lang="aime">~"Hello, World!"</langsyntaxhighlight>
 
=={{header|ALGOL 68}}==
===Bits and Bytes Length===
<langsyntaxhighlight lang="algol68">BITS bits := bits pack((TRUE, TRUE, FALSE, FALSE)); # packed array of BOOL #
BYTES bytes := bytes pack("Hello, world"); # packed array of CHAR #
print((
Line 281 ⟶ 317:
"bits width:", bits width, ", max bits: ", max bits, ", bits:", bits, new line,
"bytes width: ",bytes width, ", UPB:",UPB STRING(bytes), ", string:", STRING(bytes),"!", new line
))</langsyntaxhighlight>
Output:
<pre>
Line 289 ⟶ 325:
</pre>
===Character Length===
<langsyntaxhighlight lang="algol68">STRING str := "hello, world";
INT length := UPB str;
printf(($"Length of """g""" is "g(3)l$,str,length));
Line 295 ⟶ 331:
printf(($l"STRINGS can start at -1, in which case LWB must be used:"l$));
STRING s := "abcd"[@-1];
print(("s:",s, ", LWB:", LWB s, ", UPB:",UPB s, ", LEN:",UPB s - LWB s + 1))</langsyntaxhighlight>
Output:
<pre>
Line 304 ⟶ 340:
 
=={{header|Apex}}==
<syntaxhighlight lang="apex">
<lang Apex>
String myString = 'abcd';
System.debug('Size of String', myString.length());
</syntaxhighlight>
</lang>
 
=={{header|AppleScript}}==
===Byte Length===
<langsyntaxhighlight lang="applescript">count of "Hello World"</langsyntaxhighlight>
Mac OS X 10.5 (Leopard) includes AppleScript 2.0 which uses only Unicode (UTF-16) character strings.
This example has been tested on OSX 10.8.5. Added a combining char for testing.
<langsyntaxhighlight lang="applescript">
set inString to "Hello é̦世界"
set byteCount to 0
Line 346 ⟶ 382:
return 1
end if
end doit</langsyntaxhighlight>
 
===Character Length===
<langsyntaxhighlight lang="applescript">count of "Hello World"</langsyntaxhighlight>
Or:
<langsyntaxhighlight lang="applescript">count "Hello World"</langsyntaxhighlight>
 
=={{header|Applesoft BASIC}}==
<langsyntaxhighlight ApplesoftBASIClang="applesoftbasic">? LEN("HELLO, WORLD!")</langsyntaxhighlight>
=={{header|ARM Assembly}}==
{{works with|as|Raspberry Pi}}
<syntaxhighlight lang="arm assembly">
<lang ARM Assembly>
/* ARM assembly Raspberry PI */
/* program stringLength.s */
Line 460 ⟶ 496:
/***************************************************/
.include "../affichage.inc"
</syntaxhighlight>
</lang>
<pre>
møøse€
Line 470 ⟶ 506:
===Character Length===
 
<langsyntaxhighlight lang="rebol">str: "Hello World"
 
print ["length =" size str]</langsyntaxhighlight>
 
{{out}}
Line 480 ⟶ 516:
=={{header|AutoHotkey}}==
===Character Length===
<langsyntaxhighlight AutoHotkeylang="autohotkey">Msgbox % StrLen("Hello World")</langsyntaxhighlight>
Or:
<langsyntaxhighlight AutoHotkeylang="autohotkey">String := "Hello World"
StringLen, Length, String
Msgbox % Length</langsyntaxhighlight>
 
=={{header|Avail}}==
===Character Length===
Avail represents strings as a tuple of characters, with each character representing a single code point.
<langsyntaxhighlight Availlang="avail">|"møøse"|</langsyntaxhighlight>
===Byte Length===
A UTF-8 byte length can be acquired with the standard library's UTF-8 encoder.
<langsyntaxhighlight Availlang="avail">nonBMPString ::= "𝔘𝔫𝔦𝔠𝔬𝔡𝔢";
encoder ::= a UTF8 encoder;
bytes ::= encoder process nonBMPString;
Line 498 ⟶ 534:
 
// or, as a one-liner
|a UTF8 encoder process "𝔘𝔫𝔦𝔠𝔬𝔡𝔢"|</langsyntaxhighlight>
 
=={{header|AWK}}==
===Byte Length===
From within any code block:
<langsyntaxhighlight lang="awk">w=length("Hello, world!") # static string example
x=length("Hello," s " world!") # dynamic string example
y=length($1) # input field example
z=length(s) # variable name example</langsyntaxhighlight>
Ad hoc program from command line:
<pre> echo "Hello, wørld!" | awk '{print length($0)}' # 14</pre>
From executable script: (prints for every line arriving on stdin)
<langsyntaxhighlight lang="awk">#!/usr/bin/awk -f
{print"The length of this line is "length($0)}</langsyntaxhighlight>
 
=={{header|Axe}}==
Line 517 ⟶ 553:
 
===Byte Length===
<langsyntaxhighlight lang="axe">"HELLO, WORLD"→Str1
Disp length(Str1)▶Dec,i</langsyntaxhighlight>
 
=={{header|BaCon}}==
BaCon has full native support for UTF-8 encoding.
<langsyntaxhighlight lang="qbasic">PRINT "Bytelen of 'hello': ", LEN("hello")
PRINT "Charlen of 'hello': ", ULEN("hello")
 
Line 529 ⟶ 565:
 
PRINT "Bytelen of '𝔘𝔫𝔦𝔠𝔬𝔡𝔢': ", LEN("𝔘𝔫𝔦𝔠𝔬𝔡𝔢")
PRINT "Charlen of '𝔘𝔫𝔦𝔠𝔬𝔡𝔢': ", ULEN("𝔘𝔫𝔦𝔠𝔬𝔡𝔢")</langsyntaxhighlight>
{{out}}
<pre>
Line 543 ⟶ 579:
===Character Length===
{{works with|QBasic}}
 
{{works with|Liberty BASIC}}
 
{{works with|PowerBASIC|PB/CC, PB/DOS}}
 
BASIC only supports single-byte characters. The character "ø" is converted to "°" for printing to the console and length functions, but will still output to a file as "ø".
<langsyntaxhighlight lang="qbasic"> INPUT a$
PRINT LEN(a$)</langsyntaxhighlight>
 
==={{header|ANSI BASIC}}===
The ANSI BASIC needs line numbers.
<syntaxhighlight lang="basic">
10 INPUT A$
20 PRINT LEN(A$)
</syntaxhighlight>
 
==={{header|Applesoft BASIC}}===
The [[#GW-BASIC|GW-BASIC]] solution works without any changes.
 
==={{header|BASIC256}}===
The [[#GW-BASIC|GW-BASIC]] solution works without any changes.
 
==={{header|Chipmunk Basic}}===
The [[#GW-BASIC|GW-BASIC]] solution works without any changes.
 
==={{header|MSX Basic}}===
{{works with|MSX BASIC|any}}
The [[#GW-BASIC|GW-BASIC]] solution works without any changes.>
 
==={{header|Quite BASIC}}===
The [[#GW-BASIC|GW-BASIC]] solution works without any changes.
 
==={{header|True BASIC}}===
The [[#GW-BASIC|GW-BASIC]] solution works without any changes.
 
==={{header|Yabasic}}===
The [[#GW-BASIC|GW-BASIC]] solution works without any changes.
 
==={{header|ZX Spectrum Basic}}===
The ZX Spectrum needs line numbers:
 
<langsyntaxhighlight lang="zxbasic">10 INPUT a$
20 PRINT LEN a$</langsyntaxhighlight>
 
However, it's not quite as trivial as this.
Line 566 ⟶ 629:
Stripping out all entries in the string with codes in the lower 32 will get rid of colour control codes. The character length of a token is not a simple thing to determine, so this version strips them out too by eliminating anything above CHR$ 164 (the last UDG). A 91-entry DATA list of token lengths might be the next step.
 
<langsyntaxhighlight lang="zxbasic">10 INPUT a$
20 LET b$=""
30 FOR x=1 TO LEN a$
Line 573 ⟶ 636:
60 LET b$=b$+a$(k)
70 NEXT x
80 PRINT LEN b$</langsyntaxhighlight>
 
====Grapheme length====
Line 579 ⟶ 642:
Alternatively, the string might include control codes for backspacing and overwriting;
 
<langsyntaxhighlight lang="zxbasic">10 LET a$=CHR$ 111+CHR$ 8+CHR$ 21+CHR$ 1+CHR$ 34</langsyntaxhighlight>
will produce an "o" character overprinted with a quotation mark, resulting in a "passable" impression of an umlaut. The above code will reduce this to two characters when the actual printed length is one (byte length is of course five). The other possible workaround is to print the string and calculate the character length based on the resultant change in screen position. (This will only work for a string with a character length that actually fits on the screen, so below about 670.)
 
<langsyntaxhighlight lang="zxbasic">10 INPUT a$
20 CLS
30 PRINT a$;
40 LET x=PEEK 23688: LET y=PEEK 23689
50 PRINT CHR$ 13;33-x+32*(24-y)</langsyntaxhighlight>
 
==={{header|Commodore BASIC}}===
Commodore BASIC needs line numbers too, and can't use mixed case. When in mixed case mode, everything must be in lower case letters. However, the default is UPPERCASE + graphic characters; thus everything appears as UPPER case character.
 
<langsyntaxhighlight lang="basic">10 INPUT A$
20 PRINT LEN(A$)</langsyntaxhighlight>
 
==={{header|IS-BASIC}}===
<langsyntaxhighlight ISlang="is-BASICbasic">100 INPUT PROMPT "String: ":TX$
110 PRINT LEN(TX$)</langsyntaxhighlight>
 
==={{header|QB64}}===
In QB64 a String variable is assumed to be UTF-8 and thus the byte length is the same as character length. That said there are methods to map UTF-16 and UTF-32 to the CP437 (ASCII) table (see, _MAPUNICODE).
<syntaxhighlight lang QB64="qb64">Print Len(s$)</langsyntaxhighlight>
 
=={{header|Batch File}}==
===Byte Length===
<langsyntaxhighlight lang="dos">@echo off
setlocal enabledelayedexpansion
call :length %1 res
Line 620 ⟶ 683:
set str=!str:~1!
set /a cnt = cnt + 1
goto loop</langsyntaxhighlight>
 
=={{header|BBC BASIC}}==
===Character Length===
<langsyntaxhighlight lang="bbcbasic"> INPUT text$
PRINT LEN(text$)</langsyntaxhighlight>
===Byte Length===
{{works with|BBC BASIC for Windows}}
<langsyntaxhighlight lang="bbcbasic"> CP_ACP = 0
CP_UTF8 = &FDE9
Line 639 ⟶ 702:
PRINT "Length in bytes (ANSI encoding) = " ; LEN(textA$)
PRINT "Length in bytes (UTF-16 encoding) = " ; 2*(nW%-1)
PRINT "Length in bytes (UTF-8 encoding) = " ; LEN($$!^textU$)</langsyntaxhighlight>
Output:
<pre>Length in bytes (ANSI encoding) = 5
Length in bytes (UTF-16 encoding) = 10
Length in bytes (UTF-8 encoding) = 7</pre>
 
=={{header|BQN}}==
Strings are arrays of characters in BQN.
===Byte Length===
Each character is converted to its codepoint, and compared with the respective UTF boundary.
<syntaxhighlight lang="bqn">BLen ← {(≠𝕩)+´⥊𝕩≥⌜@+128‿2048‿65536}</syntaxhighlight>
 
===Character Length===
Character length is just array length.
<syntaxhighlight lang="bqn">Len ← ≠</syntaxhighlight>
 
'''Output'''
<syntaxhighlight lang="bqn">•Show >(⊢⋈⊸∾Len⋈BLen)¨⟨
"møøse"
"𝔘𝔫𝔦𝔠𝔬𝔡𝔢"
"J̲o̲s̲é̲"
⟩</syntaxhighlight>
<syntaxhighlight lang="text">┌─
╵ "møøse" 5 7
"𝔘𝔫𝔦𝔠𝔬𝔡𝔢" 7 28
"J̲o̲s̲é̲" 8 13
┘</syntaxhighlight>
 
=={{header|Bracmat}}==
The solutions work with UTF-8 encoded strings.
===Byte Length===
<langsyntaxhighlight lang="bracmat">(ByteLength=
length
. @(!arg:? [?length)
Line 654 ⟶ 739:
);
 
out$ByteLength$𝔘𝔫𝔦𝔠𝔬𝔡𝔢</langsyntaxhighlight>
Answer:
<pre>28</pre>
===Character Length===
<langsyntaxhighlight lang="bracmat">(CharacterLength=
length c
. 0:?length
Line 673 ⟶ 758:
);
 
out$CharacterLength$𝔘𝔫𝔦𝔠𝔬𝔡𝔢</langsyntaxhighlight>
Answer:
<pre>7</pre>
An improved version scans the input string character wise, not byte wise. Thus many string positions that are deemed not to be possible starting positions of UTF-8 are not even tried. The patterns <code>[!p</code> and <code>[?p</code> implement a ratchet mechanism. <code>[!p</code> indicates the start of a character and <code>[?p</code> remembers the end of the character, which becomes the start position of the next byte.
<langsyntaxhighlight lang="bracmat">(CharacterLength=
length c p
. 0:?length:?p
Line 690 ⟶ 775:
)
| !length
);</langsyntaxhighlight>
 
Later versions of Bracmat have the built in function <code>vap</code> that "vaporises" a string into "atoms". If the string is UTF-8 encoded, then each "atom" is one UTF-8 character, so the length of the list of atoms is the character length of the input string. The first argument to the <code>vap</code> function is a function that will be applied to every UTF-8 encoded character in the input string. The outcomes of these function calls are the elements in the resulting list. In the solution below we choose an anonymous function <code>(=.!arg)</code> that just returns the characters themselves.
<langsyntaxhighlight lang="bracmat">(CharacterLength=
length
. vap$((=.!arg).!arg):? [?length&!length
);</langsyntaxhighlight>
 
=={{header|Brainf***}}==
===Byte Length===
There are several limitations Brainf*** has that influence this solution:
*Brainf*** only supports 8-bit numbers in canonical implementations, so it only supports strings of length below 255.
*The rule of thumb in Brainf*** when reading a string is to always store exactly one byte, no matter how much bytes a character represents. That's why this solution is a strictly ByteLength one.
*No way to pass anything to Brainf*** but giving the arguments as input. That's why this program reads a string and outputs the number of bytes in it.
 
[[https://esolangs.org/wiki/Brainfuck_algorithms#Print_value_of_cell_x_as_number_for_ANY_sized_cell_.28eg_8bit dot 2C_100000bit_etc.29]] is used to print the number from memory.
 
<syntaxhighlight lang="bf">
,----- ----- [>,----- -----] ; read a text until a newline
<[+++++ +++++<] ; restore the original text
>[[-]<[>+<-]>+>]< ; add one to the accumulator cell for every byte read
;; from esolang dot org
>[-]>[-]+>[-]+< [>[-<-<<[->+>+<<]>[-<+>]>>]++++++++++>[-]+>[-]>[-]> [-]<<<<<[->-[>+>>]>[[-<+>]+>+>>]<<<<<]>>-[-<<+>>]<[-]++++++++ [-<++++++>]>>[-<<+>>]<<] <[.[-]<]
[-]+++++ +++++. ; print newline
</syntaxhighlight>
 
=={{header|C}}==
Line 703 ⟶ 806:
 
{{works with|GCC|3.3.3}}
<langsyntaxhighlight lang="c">#include <string.h>
 
int main(void)
Line 711 ⟶ 814:
return 0;
}</langsyntaxhighlight>
or by hand:
 
<langsyntaxhighlight lang="c">int main(void)
{
const char *string = "Hello, world!";
Line 723 ⟶ 826:
return 0;
}</langsyntaxhighlight>
 
or (for arrays of char only)
 
<langsyntaxhighlight lang="c">#include <stdlib.h>
 
int main(void)
Line 735 ⟶ 838:
return 0;
}</langsyntaxhighlight>
 
===Character Length===
For wide character strings (usually Unicode uniform-width encodings such as UCS-2 or UCS-4):
 
<langsyntaxhighlight lang="c">#include <stdio.h>
#include <wchar.h>
 
Line 753 ⟶ 856:
return 0;
}</langsyntaxhighlight>
 
===Dealing with raw multibyte string===
Following code is written in UTF-8, and environment locale is assumed to be UTF-8 too. Note that "møøse" is here directly written in the source code for clarity, which is not a good idea in general. <code>mbstowcs()</code>, when passed NULL as the first argument, effectively counts the number of chars in given string under current locale.
<langsyntaxhighlight lang="c">#include <stdio.h>
#include <stdlib.h>
#include <locale.h>
Line 769 ⟶ 872:
 
return 0;
}</langsyntaxhighlight>output<pre>bytes: 7
chars: 5</pre>
 
Line 777 ⟶ 880:
{{works with|C sharp|C #|1.0+}}
===Character Length===
<langsyntaxhighlight lang="csharp">string s = "Hello, world!";
int characterLength = s.Length;</langsyntaxhighlight>
 
===Byte Length===
Strings in .NET are stored in Unicode.
<langsyntaxhighlight lang="csharp">using System.Text;
 
string s = "Hello, world!";
int byteLength = Encoding.Unicode.GetByteCount(s);</langsyntaxhighlight>
To get the number of bytes that the string would require in a different encoding, e.g., UTF8:
<langsyntaxhighlight lang="csharp">int utf8ByteLength = Encoding.UTF8.GetByteCount(s);</langsyntaxhighlight>
 
=={{header|C++}}==
Line 793 ⟶ 896:
{{works with|ISO C++}}
{{works with|g++|4.0.2}}
<langsyntaxhighlight lang="cpp">#include <string> // (not <string.h>!)
using std::string;
 
Line 803 ⟶ 906:
// In bytes same as above since sizeof(char) == 1
string::size_type bytes = s.length() * sizeof(string::value_type);
}</langsyntaxhighlight>
For wide character strings:
 
<langsyntaxhighlight lang="cpp">#include <string>
using std::wstring;
Line 813 ⟶ 916:
wstring s = L"\u304A\u306F\u3088\u3046";
wstring::size_type length = s.length() * sizeof(wstring::value_type); // in bytes
}</langsyntaxhighlight>
 
===Character Length===
Line 821 ⟶ 924:
For wide character strings:
 
<langsyntaxhighlight lang="cpp">#include <string>
using std::wstring;
 
Line 828 ⟶ 931:
wstring s = L"\u304A\u306F\u3088\u3046";
wstring::size_type length = s.length();
}</langsyntaxhighlight>
 
For narrow character strings:
Line 835 ⟶ 938:
{{works with|clang++|3.0}}
 
<langsyntaxhighlight lang="cpp">#include <iostream>
#include <codecvt>
int main()
Line 843 ⟶ 946:
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> conv;
std::cout << "Character length: " << conv.from_bytes(utf8).size() << '\n';
}</langsyntaxhighlight>
 
{{works with|C++98}}
{{works with|g++|4.1.2 20061115 (prerelease) (SUSE Linux)}}
<langsyntaxhighlight lang="cpp">#include <cwchar> // for mbstate_t
#include <locale>
 
Line 883 ⟶ 986:
// return the result
return length;
}</langsyntaxhighlight>
 
Example usage (note that the locale names are OS specific):
 
<langsyntaxhighlight lang="cpp">#include <iostream>
 
int main()
Line 896 ⟶ 999:
// Tür in ISO-8859-1
std::cout << char_length("\x54\xfc\x72", "de_DE") << "\n"; // outputs 3
}</langsyntaxhighlight>
 
Note that the strings are given as explicit hex sequences, so that the encoding used for the source code won't matter.
Line 904 ⟶ 1,007:
Clean Strings are unboxed arrays of characters. Characters are always a single byte. The function size returns the number of elements in an array.
 
<langsyntaxhighlight lang="clean">import StdEnv
 
strlen :: String -> Int
strlen string = size string
 
Start = strlen "Hello, world!"</langsyntaxhighlight>
 
=={{header|Clojure}}==
===Byte Length===
<langsyntaxhighlight lang="clojure">(def utf-8-octet-length #(-> % (.getBytes "UTF-8") count))
(map utf-8-octet-length ["møøse" "𝔘𝔫𝔦𝔠𝔬𝔡𝔢" "J\u0332o\u0332s\u0332e\u0301\u0332"]) ; (7 28 14)
 
Line 920 ⟶ 1,023:
 
(def code-unit-length count)
(map code-unit-length ["møøse" "𝔘𝔫𝔦𝔠𝔬𝔡𝔢" "J\u0332o\u0332s\u0332e\u0301\u0332"]) ; (5 14 9)</langsyntaxhighlight>
 
===Character length===
<langsyntaxhighlight lang="clojure">(def character-length #(.codePointCount % 0 (count %)))
(map character-length ["møøse" "𝔘𝔫𝔦𝔠𝔬𝔡𝔢" "J\u0332o\u0332s\u0332e\u0301\u0332"]) ; (5 7 9)</langsyntaxhighlight>
 
===Grapheme Length===
<langsyntaxhighlight lang="clojure">(def grapheme-length
#(->> (doto (java.text.BreakIterator/getCharacterInstance)
(.setText %))
Line 934 ⟶ 1,037:
(take-while (partial not= java.text.BreakIterator/DONE))
count))
(map grapheme-length ["møøse" "𝔘𝔫𝔦𝔠𝔬𝔡𝔢" "J\u0332o\u0332s\u0332e\u0301\u0332"]) ; (5 7 4)</langsyntaxhighlight>
 
=={{header|COBOL}}==
===Byte Length===
<syntaxhighlight lang ="cobol">FUNCTION BYTE-LENGTH(str)</langsyntaxhighlight>
 
Alternative, non-standard extensions:
{{works with|GNU Cobol}}
<syntaxhighlight lang ="cobol">LENGTH OF str</langsyntaxhighlight>
 
{{works with|GNU Cobol}}
{{works with|Visual COBOL}}
<syntaxhighlight lang ="cobol">FUNCTION LENGTH-AN(str)</langsyntaxhighlight>
 
===Character Length===
<syntaxhighlight lang ="cobol">FUNCTION LENGTH(str)</langsyntaxhighlight>
 
=={{header|ColdFusion}}==
===Byte Length===
<langsyntaxhighlight lang="cfm">
<cfoutput>
<cfset str = "Hello World">
Line 960 ⟶ 1,063:
<p>#arrayLen(t)#</p>
</cfoutput>
</syntaxhighlight>
</lang>
 
===Character Length===
<langsyntaxhighlight lang="cfm">#len("Hello World")#</langsyntaxhighlight>
 
=={{header|Common Lisp}}==
Line 970 ⟶ 1,073:
 
{{works with|SBCL}}
<langsyntaxhighlight lang="lisp">(length (sb-ext:string-to-octets "Hello Wørld"))</langsyntaxhighlight>
returns 12.
===Character Length===
Common Lisp represents strings as sequences of characters, not bytes, so there is no ambiguity about the encoding. The [http://www.lispworks.com/documentation/HyperSpec/Body/f_length.htm length] function always returns the number of characters in a string.
<langsyntaxhighlight lang="lisp">(length "Hello World")</langsyntaxhighlight>
returns 11, and
<pre>(length "Hello Wørld")</pre>
Line 983 ⟶ 1,086:
 
===Character Length===
<langsyntaxhighlight lang="oberon2">
MODULE TestLen;
 
Line 998 ⟶ 1,101:
 
END TestLen.
</syntaxhighlight>
</lang>
 
A symbol ''$'' in ''LEN(s$)'' in Component Pascal allows to copy sequence of characters up to null-terminated character. So, ''LEN(s$)'' returns a real length of characters instead of allocated by variable.
Line 1,009 ⟶ 1,112:
 
===Byte Length===
<langsyntaxhighlight lang="oberon2">
MODULE TestLen;
 
Line 1,025 ⟶ 1,128:
 
END TestLen.
</syntaxhighlight>
</lang>
 
Running command ''TestLen.DoByteLength'' gives following output:
Line 1,032 ⟶ 1,135:
Length of characters in bytes: 10
</pre>
 
=={{header|Crystal}}==
UTF8 is the default encoding in Crystal.
===Byte Length===
<syntaxhighlight lang="crystal">"J̲o̲s̲é̲".bytesize</syntaxhighlight>
 
===Character Length===
<syntaxhighlight lang="crystal">"J̲o̲s̲é̲".chars.length</syntaxhighlight>
 
=={{header|D}}==
===Byte Length===
<langsyntaxhighlight lang="d">import std.stdio;
 
void showByteLen(T)(T[] str) {
Line 1,065 ⟶ 1,176:
dstring s3c = "J̲o̲s̲é̲";
showByteLen(s3c);
}</langsyntaxhighlight>
{{out}}
<pre>Byte length: 7 - 6dc3b8c3b87365
Line 1,080 ⟶ 1,191:
 
===Character Length===
<langsyntaxhighlight lang="d">import std.stdio, std.range, std.conv;
 
void showCodePointsLen(T)(T[] str) {
Line 1,110 ⟶ 1,221:
dstring s3c = "J̲o̲s̲é̲";
showCodePointsLen(s3c);
}</langsyntaxhighlight>
{{out}}
<pre>Character length: 5 - 6d f8 f8 73 65
Line 1,126 ⟶ 1,237:
=={{header|DataWeave}}==
===Character Length===
<langsyntaxhighlight DataWeavelang="dataweave">sizeOf("foo")</langsyntaxhighlight>
 
{{out}}
Line 1,136 ⟶ 1,247:
===Byte Length===
Dc's "P" command prints numbers as strings. The number 22405534230753963835153736737 (hint: look at it in hex) represents "Hello world!". Counting the byte length of it is counting how often it iteratively can be divided by 256 with non zero result. The snippet defines the macro which calculates the length, prints the string 1st and then its length.
<langsyntaxhighlight Dclang="dc">[256 / d 0<L 1 + ] sL
22405534230753963835153736737 d P A P
lL x f</langsyntaxhighlight>
<pre>
Hello world!
Line 1,146 ⟶ 1,257:
===Character Length===
The following code output 5, which is the length of the string "abcde"
<syntaxhighlight lang Dc="dc">[abcde]Zp</langsyntaxhighlight>
 
=={{header|Déjà Vu}}==
===Byte Length===
Byte length depends on the encoding, which internally is UTF-8, but users of the language can only get at the raw bytes after encoding a string into a blob.
<langsyntaxhighlight lang="dejavu">!. len !encode!utf-8 "møøse"
!. len !encode!utf-8 "𝔘𝔫𝔦𝔠𝔬𝔡𝔢"</langsyntaxhighlight>
{{out}}
<pre>
Line 1,159 ⟶ 1,270:
 
===Character Length===
<langsyntaxhighlight lang="dejavu">!. len "møøse"
!. len "𝔘𝔫𝔦𝔠𝔬𝔡𝔢"</langsyntaxhighlight>
{{out}}
<pre>5
Line 1,167 ⟶ 1,278:
See [https://rosettacode.org/wiki/String_length#Pascal Pascal].
=={{header|Dyalect}}==
<langsyntaxhighlight lang="dyalect">"Hello World".lenLength()</langsyntaxhighlight>
 
=={{header|E}}==
===Character Length===
<langsyntaxhighlight lang="e">"Hello World".size()</langsyntaxhighlight>
 
=={{header|EasyLang}}==
===Character Length===
<syntaxhighlight lang="easylang>
# 5
print len "møøse"
# 7
print len "𝔘𝔫𝔦𝔠𝔬𝔡𝔢"
# 8
print len "J̲o̲s̲é̲"
# 1
print len "😀"
</syntaxhighlight>
 
=={{header|Ecstasy}}==
<syntaxhighlight lang="ecstasy">
module StrLen {
@Inject Console console;
 
void run(String s = "José") {
console.print($|For the string {s.quoted()}:
| Character length: {s.size}
| UTF-8 byte length: {s.calcUtf8Length()}
);
}
}
</syntaxhighlight>
 
{{out}}
<pre>
For the string "José":
Character length: 4
UTF-8 byte length: 5
</pre>
 
=={{header|Elena}}==
===Character Length===
ELENA 4.x :
<langsyntaxhighlight lang="elena">import extensions;
public program()
Line 1,186 ⟶ 1,331:
var ws_length := ws.Length; // Number of UTF-16 characters
var u_length := ws.toArray().Length; //Number of UTF-32 characters
}</langsyntaxhighlight>
 
===Byte Length===
ELENA 4.x :
<langsyntaxhighlight lang="elena">import extensions;
public program()
Line 1,199 ⟶ 1,344:
var s_byte_length := s.toByteArray().Length; // Number of bytes
var ws_byte_length := ws.toByteArray().Length; // Number of bytes
}</langsyntaxhighlight>
 
=={{header|Elixir}}==
===Byte Length===
<langsyntaxhighlight lang="elixir">
name = "J\x{332}o\x{332}s\x{332}e\x{301}\x{332}"
byte_size(name)
# => 14
</syntaxhighlight>
</lang>
===Character Length===
<langsyntaxhighlight lang="elixir">
name = "J\x{332}o\x{332}s\x{332}e\x{301}\x{332}"
Enum.count(String.codepoints(name))
# => 9
</syntaxhighlight>
</lang>
===Grapheme Length===
<langsyntaxhighlight lang="elixir">
name = "J\x{332}o\x{332}s\x{332}e\x{301}\x{332}"
String.length(name)
# => 4
</syntaxhighlight>
</lang>
 
=={{header|Emacs Lisp}}==
===Character Length===
<langsyntaxhighlight lang="lisp">(length "hello")
;; => 5</langsyntaxhighlight>
===Byte Length===
<langsyntaxhighlight lang="lisp">(string-bytes "\u1D518\u1D52B\u1D526")
;; => 12</langsyntaxhighlight>
 
<code>string-bytes</code> is the length of Emacs' internal representation. In Emacs 23 up this is utf-8. In earlier versions it was "emacs-mule".
Line 1,234 ⟶ 1,379:
<code>string-width</code> is the displayed width of a string in the current frame and window. This is not the same as grapheme length since various Asian characters may display in 2 columns, depending on the type of tty or GUI.
 
<langsyntaxhighlight lang="lisp">(let ((str (apply 'string
(mapcar (lambda (c) (decode-char 'ucs c))
'(#x1112 #x1161 #x11ab #x1100 #x1173 #x11af)))))
Line 1,240 ⟶ 1,385:
(string-bytes str)
(string-width str)))
;; => (6 18 4) ;; in emacs 23 up</langsyntaxhighlight>
 
=={{header|EMal}}==
<syntaxhighlight lang="emal">
text moose = "møøse"
text unicode = "𝔘𝔫𝔦𝔠𝔬𝔡𝔢"
text jose = "J" + 0U0332 + "o" + 0U0332 + "s" + 0U0332 + "e" + 0U0301 + 0U0332
text emoji = "𠇰😈🎶🔥é-"
</syntaxhighlight>
===Byte Length===
<syntaxhighlight lang="emal">
writeLine((blob!moose).length)
writeLine((blob!unicode).length)
writeLine((blob!jose).length)
writeLine((blob!emoji).length)
</syntaxhighlight>
{{out}}
<pre>
7
28
14
19
</pre>
===Character Length===
<syntaxhighlight lang="emal">
writeLine(moose.codePointsLength)
writeLine(unicode.codePointsLength)
writeLine(jose.codePointsLength)
writeLine(emoji.codePointsLength)
</syntaxhighlight>
{{out}}
<pre>
5
7
9
6
</pre>
===Grapheme Length===
<syntaxhighlight lang="emal">
writeLine(moose.graphemesLength)
writeLine(unicode.graphemesLength)
writeLine(jose.graphemesLength)
writeLine(emoji.graphemesLength)
</syntaxhighlight>
{{out}}
<pre>
5
7
4
6
</pre>
 
=={{header|Erlang}}==
Line 1,254 ⟶ 1,449:
=={{header|Euphoria}}==
===Character Length===
<langsyntaxhighlight Euphorialang="euphoria">print(1,length("Hello World"))</langsyntaxhighlight>
 
=={{header|F_Sharp|F#}}==
This is delegated to the standard .Net framework string and encoding functions.
===Byte Length===
<langsyntaxhighlight lang="fsharp">open System.Text
let byte_length str = Encoding.UTF8.GetByteCount(str)</langsyntaxhighlight>
===Character Length===
<langsyntaxhighlight lang="fsharp">"Hello, World".Length</langsyntaxhighlight>
 
=={{header|Factor}}==
===Byte Length===
Here are two words to compute the byte length of strings. The first one doesn't allocate new memory, the second one can easily be adapted to measure the byte length of encodings other than UTF8.
<langsyntaxhighlight lang="factor">: string-byte-length ( string -- n ) [ code-point-length ] map-sum ;
: string-byte-length-2 ( string -- n ) utf8 encode length ;</langsyntaxhighlight>
===Character Length===
<code>length</code> works on any sequece, of which strings are one. Strings are UTF8 encoded.
<syntaxhighlight lang ="factor">length</langsyntaxhighlight>
 
=={{header|Fantom}}==
Line 1,279 ⟶ 1,474:
A string can be converted into an instance of <code>Buf</code> to treat the string as a sequence of bytes according to a given charset: the default is UTF8, but 16-bit representations can also be used.
 
<langsyntaxhighlight lang="fantom">
fansh> c := "møøse"
møøse
Line 1,294 ⟶ 1,489:
fansh> c.toBuf(Charset.utf16BE).toHex // display as UTF16 big-endian
006d00f800f800730065
</syntaxhighlight>
</lang>
 
===Character length===
 
<langsyntaxhighlight lang="fantom">
fansh> c := "møøse"
møøse
fansh> c.size
5
</syntaxhighlight>
</lang>
 
=={{header|Forth}}==
Line 1,314 ⟶ 1,509:
A counted string is a single pointer to a short string in memory. The string's first byte is the count of the number of characters in the string. This is how symbols are stored in a Forth dictionary.
 
<langsyntaxhighlight lang="forth">CREATE s ," Hello world" \ create string "s"
s C@ ( -- length=11 )
s COUNT ( addr len ) \ convert to a stack string, described below</langsyntaxhighlight>
 
'''Stack string'''
Line 1,322 ⟶ 1,517:
A string on the stack is represented by a pair of cells: the address of the string data and the length of the string data (in characters). The word '''COUNT''' converts a counted string into a stack string. The STRING utility wordset of ANS Forth works on these addr-len pairs. This representation has the advantages of not requiring null-termination, easy representation of substrings, and not being limited to 255 characters.
 
<langsyntaxhighlight lang="forth">S" string" ( addr len)
DUP . \ 6</langsyntaxhighlight>
 
===Character Length===
Line 1,330 ⟶ 1,525:
The following code will count the number of UTF-8 characters in a null-terminated string. It relies on the fact that all bytes of a UTF-8 character except the first have the the binary bit pattern "10xxxxxx".
 
<langsyntaxhighlight lang="forth">2 base !
: utf8+ ( str -- str )
begin
Line 1,338 ⟶ 1,533:
10000000 <>
until ;
decimal</langsyntaxhighlight>
 
<langsyntaxhighlight lang="forth">: count-utf8 ( zstr -- n )
0
begin
Line 1,347 ⟶ 1,542:
utf8+
swap 1+
repeat drop ;</langsyntaxhighlight>
 
=={{header|Fortran}}==
Line 1,364 ⟶ 1,559:
 
=={{header|FreeBASIC}}==
<langsyntaxhighlight lang="freebasic">' FB 1.05.0 Win64
 
Dim s As String = "moose" '' variable length ascii string
Line 1,386 ⟶ 1,581:
Print "w : " ; w, "Character Length : "; Len(s), "Byte Length : "; SizeOf(w)
Print
Sleep</langsyntaxhighlight>
 
{{out}}
Line 1,400 ⟶ 1,595:
===Byte Length===
A string can be converted to an array of bytes in any supported encoding.
<langsyntaxhighlight lang="frink">
b = "𝔘𝔫𝔦𝔠𝔬𝔡𝔢"
length[stringToBytes[b, "UTF-8"]]
</syntaxhighlight>
</lang>
 
===Character Length===
Frink's string operations correctly handle upper-plane Unicode characters as a single codepoint.
<langsyntaxhighlight lang="frink">
b = "𝔘𝔫𝔦𝔠𝔬𝔡𝔢"
length[b]
</syntaxhighlight>
</lang>
 
===Grapheme Length===
<langsyntaxhighlight lang="frink">
b = "𝔘𝔫𝔦𝔠𝔬𝔡𝔢"
graphemeLength[b]
</syntaxhighlight>
</lang>
 
=={{header|GAP}}==
<langsyntaxhighlight lang="gap">Length("abc");
# or same result with
Size("abc");</langsyntaxhighlight>
 
=={{header|Gnuplot}}==
===Byte Length===
<langsyntaxhighlight lang="gnuplot">print strlen("hello")
=> 5</langsyntaxhighlight>
 
=={{header|Go}}==
====Byte Length====
<langsyntaxhighlight lang="go">package main
 
import "fmt"
Line 1,439 ⟶ 1,634:
j := "J̲o̲s̲é̲"
fmt.Printf("%d %s % x\n", len(m), m, m)
fmt.Printf("%d %s % x\n", len(u), u, u)
fmt.Printf("%d %s % x\n", len(j), j, j)
}</langsyntaxhighlight>
Output:
<pre>
7 møøse  6d c3 b8 c3 b8 73 65
28 𝔘𝔫𝔦𝔠𝔬𝔡𝔢 f09d9498f09d94abf09d94a6f09d94a0f09d94acf09d94a1f09d94a2f0 9d 94 98 f0 9d 94 ab f0 9d 94 a6 f0 9d 94 a0 f0 9d 94 ac f0 9d 94 a1 f0 9d 94 a2
1413 J̲o̲s̲é̲  4a cc b2 6f cc b2 73 cc b2 65 ccc3 81a9 cc b2
</pre>
 
====Character Length====
<langsyntaxhighlight lang="go">package main
 
import (
Line 1,463 ⟶ 1,659:
fmt.Printf("%d %s %x\n", utf8.RuneCountInString(u), u, []rune(u))
fmt.Printf("%d %s %x\n", utf8.RuneCountInString(j), j, []rune(j))
}</langsyntaxhighlight>
Output:
<pre>
Line 1,472 ⟶ 1,668:
===Grapheme Length===
Go does not have language or library features to recognize graphemes directly. For example, it does not provide functions implementing [http://www.unicode.org/reports/tr29/ Unicode Standard Annex #29, Unicode Text Segmentation]. It does however have convenient functions for recognizing Unicode character categories, and so an expected subset of grapheme possibilites is easy to recognize. Here is a solution recognizing the category "Mn", which includes the combining characters used in the task example.
<langsyntaxhighlight lang="go">package main
 
import (
Line 1,501 ⟶ 1,697:
}
return gr
}</langsyntaxhighlight>
Output:
<pre>
Line 1,512 ⟶ 1,708:
Calculating "Byte-length" (by which one typically means "in-memory storage size in bytes") is not possible through the facilities of the Groovy language alone. Calculating "Character length" is built into the Groovy extensions to java.lang.String.
===Character Length===
<syntaxhighlight lang="groovy">
<lang groovy>println "Hello World!".size()</lang>
println "Hello World!".size()
println "møøse".size()
println "𝔘𝔫𝔦𝔠𝔬𝔡𝔢".size()
println "J̲o̲s̲é̲".size()
</syntaxhighlight>
 
Output:
<pre>12</pre>
12
5
14
8
</pre>
 
Note: The Java "String.length()" method also works in Groovy, but "size()" is consistent with usage in other sequential or composite types.
Line 1,522 ⟶ 1,728:
GW-BASIC only supports single-byte characters.
 
<langsyntaxhighlight lang="qbasic">10 INPUT A$
20 PRINT LEN(A$)</langsyntaxhighlight>
 
=={{header|Haskell}}==
Line 1,533 ⟶ 1,739:
There are several (non-standard, so far) Unicode encoding libraries available on [http://hackage.haskell.org/ Hackage]. As an example, we'll use [http://hackage.haskell.org/packages/archive/encoding/0.2/doc/html/Data-Encoding.html encoding-0.2], as ''Data.Encoding'':
 
<langsyntaxhighlight lang="haskell">import Data.Encoding
import Data.ByteString as B
 
Line 1,543 ⟶ 1,749:
 
strlenUTF8 = B.length strUTF8
strlenUTF32 = B.length strUTF32</langsyntaxhighlight>
===Character Length===
{{works with|GHC|GHCi|6.6}}
Line 1,549 ⟶ 1,755:
The base type ''Char'' defined by the standard is already intended for (plain) Unicode characters.
 
<langsyntaxhighlight lang="haskell">strlen = length "Hello, world!"</langsyntaxhighlight>
 
=={{header|HicEst}}==
<langsyntaxhighlight lang="hicest">LEN("1 character == 1 byte") ! 21</langsyntaxhighlight>
 
=={{header|HolyC}}==
===Byte Length===
<langsyntaxhighlight lang="holyc">U8 *string = "Hello, world!";
Print("%d\n", StrLen(string));
</syntaxhighlight>
</lang>
 
=={{header|Icon}} and {{header|Unicon}}==
==== Character Length ====
<langsyntaxhighlight Iconlang="icon"> length := *s</langsyntaxhighlight>
 
Note: Neither Icon nor Unicon currently supports double-byte character sets.
Line 1,571 ⟶ 1,777:
'''Compiler:''' any IDL compiler should do
 
<langsyntaxhighlight lang="idl">length = strlen("Hello, world!")</langsyntaxhighlight>
===Character Length===
{{needs-review|IDL}}
<langsyntaxhighlight lang="idl">length = strlen("Hello, world!")</langsyntaxhighlight>
 
=={{header|Io}}==
===Byte Length===
<langsyntaxhighlight lang="io">"møøse" sizeInBytes</langsyntaxhighlight>
 
===Character Length===
<langsyntaxhighlight lang="io">"møøse" size</langsyntaxhighlight>
 
=={{header|J}}==
===Byte Length===
<langsyntaxhighlight lang="j"> # 'møøse'
7</langsyntaxhighlight>
Here we use the default encoding for character literals (8 bit wide literals).
===Character Length===
<langsyntaxhighlight lang="j"> #7 u: 'møøse'
5</langsyntaxhighlight>
Here we have used 16 bit wide character literals. See also the dictionary page for [http://www.jsoftware.com/help/dictionary/duco.htm u:].
 
=={{header|Jakt}}==
===Character Length===
<syntaxhighlight lang="jakt">
fn character_length(string: String) -> i64 {
mut length = 0
for _ in string.code_points() {
length++
}
return length
}
 
fn main() {
for string in [
"Hello world!"
"møøse"
"𝔘𝔫𝔦𝔠𝔬𝔡𝔢"
"J̲o̲s̲é̲"
] {
println("\"{}\" {}", string, character_length(string))
}
}
</syntaxhighlight>
{{out}}
<pre>
"Hello world!" 12
"møøse" 5
"𝔘𝔫𝔦𝔠𝔬𝔡𝔢" 7
"J̲o̲s̲é̲" 8
</pre>
 
===Byte Length===
<syntaxhighlight lang="jakt">
fn main() {
for string in [
"Hello world!"
"møøse"
"𝔘𝔫𝔦𝔠𝔬𝔡𝔢"
"J̲o̲s̲é̲"
] {
println("\"{}\" {}", string, string.length())
}
}
</syntaxhighlight>
{{out}}
<pre>
"Hello world!" 12
"møøse" 7
"𝔘𝔫𝔦𝔠𝔬𝔡𝔢" 28
"J̲o̲s̲é̲" 13
</pre>
 
=={{header|Java}}==
Line 1,599 ⟶ 1,856:
Another way to know the byte length of a string -who cares- is to explicitly specify the charset we desire.
 
<langsyntaxhighlight lang="java5">String s = "Hello, world!";
int byteCountUTF16 = s.getBytes("UTF-16").length; // Incorrect: it yields 28 (that is with the BOM)
int byteCountUTF16LE = s.getBytes("UTF-16LE").length; // Correct: it yields 26
int byteCountUTF8 = s.getBytes("UTF-8").length; // yields 13 </langsyntaxhighlight>
 
===Character Length===
Line 1,608 ⟶ 1,865:
 
The length method of String objects is not the length of that String in characters. Instead, it only gives the number of 16-bit code units used to encode a string. This is not (always) the number of Unicode characters (code points) in the string.
<langsyntaxhighlight lang="java5">String s = "Hello, world!";
int not_really_the_length = s.length(); // XXX: does not (always) count Unicode characters (code points)! </langsyntaxhighlight>
 
Since Java 1.5, the actual number of characters (code points) can be determined by calling the codePointCount method.
<langsyntaxhighlight lang="java5">String str = "\uD834\uDD2A"; //U+1D12A
int not_really__the_length = str.length(); // value is 2, which is not the length in characters
int actual_length = str.codePointCount(0, str.length()); // value is 1, which is the length in characters</langsyntaxhighlight>
===Grapheme Length===
 
<lang java>import java.text.BreakIterator;
Since JDK 20<ref>https://bugs.openjdk.org/browse/JDK-8291660</ref>.
 
<syntaxhighlight lang="java">import java.text.BreakIterator;
 
public class Grapheme {
Line 1,634 ⟶ 1,894:
System.out.println("Grapheme length: " + count+ " " + s);
}
}</langsyntaxhighlight>
Output:
<pre>
Line 1,643 ⟶ 1,903:
 
=={{header|JavaScript}}==
 
===Byte Length===
===Byte length===
JavaScript encodes strings in UTF-16, which represents each character with one or two 16-bit values. The length property of string objects gives the number of 16-bit values used to encode a string, so the number of bytes can be determined by doubling that number.
 
<syntaxhighlight lang="javascript">
<lang javascript>var s = "Hello, world!";
var s = "Hello, world!";
var byteCount = s.length * 2; //26</lang>
var byteCount = s.length * 2; // 26
===Character Length===
</syntaxhighlight>
 
It's easier to use Buffer.byteLength (Node.JS specific, not ECMAScript).
 
<syntaxhighlight lang="javascript">
a = '👩‍❤️‍👩'
Buffer.byteLength(a, 'utf16le'); // 16
Buffer.byteLength(a, 'utf8'); // 20
Buffer.byteLength(s, 'utf16le'); // 26
Buffer.byteLength(s, 'utf8'); // 13
</syntaxhighlight>
 
In pure ECMAScript, TextEncoder() can be used to return the UTF-8 byte size:
 
<syntaxhighlight lang="javascript">
(new TextEncoder().encode(a)).length; // 20
(new TextEncoder().encode(s)).length; // 13
</syntaxhighlight>
 
=== Unicode codepoint length ===
 
JavaScript encodes strings in UTF-16, which represents each character with one or two 16-bit values. The most commonly used characters are represented by one 16-bit value, while rarer ones like some mathematical symbols are represented by two.
 
JavaScript has no built-in way to determine how many characters are in a string. However, ifIf the string only contains commonly used characters, the number of characters will be equal to the number of 16-bit values used to represent the characters.
 
<lang javascript>var str1 = "Hello, world!";
<syntaxhighlight lang="javascript">
var len1 = str1.length; //13
var str1 = "Hello, world!";
var len1 = str1.length; // 13
 
var str2 = "\uD834\uDD2A"; // U+1D12A represented by a UTF-16 surrogate pair
var len2 = str2.length; // 2
</syntaxhighlight>
 
More generally, the expansion operator in an array can be used to enumerate Unicode code points:
 
<syntaxhighlight lang="javascript">
[...str2].length // 1
</syntaxhighlight>
 
=== Unicode grapheme length ===
 
Counting Unicode codepoints when using combining characters such as joining sequences or diacritics will return the wrong size, so we must count graphemes instead. Intl.Segmenter() default granularity is grapheme.
 
<syntaxhighlight lang="javascript">
[...new Intl.Segmenter().segment(a)].length; // 1
</syntaxhighlight>
 
var str2 = "\uD834\uDD2A"; //U+1D12A represented by a UTF-16 surrogate pair
var len2 = str2.length; //2</lang>
===ES6 destructuring/iterators===
 
ES6 provides several ways to get a string split into an array of code points instead of UTF-16 code units:
<langsyntaxhighlight lang="javascript">let
str='AöЖ€𝄞'
,countofcodeunits=str.length // 6
Line 1,680 ⟶ 1,981:
countofcodepoints=cparr.length // 5
}
</syntaxhighlight>
</lang>
 
=={{header|Joy}}==
;Byte length
<syntaxhighlight lang="joy">"Café" size.</syntaxhighlight>
{{out}}
<pre>5</pre>
 
=={{header|jq}}==
jq strings are JSON strings and are therefore encoded as UTF-8. When given a JSON string, the <tt>length</tt> filter emits the number of Unicode codepoints that it contains:
<langsyntaxhighlight lang="jq">$ cat String_length.jq
def describe:
"length of \(.) is \(length)";
 
("J̲o̲s̲é̲", "𝔘𝔫𝔦𝔠𝔬𝔡𝔢") | describe</langsyntaxhighlight><syntaxhighlight lang ="sh">
$ jq -n -f String_length.jq
"length of J̲o̲s̲é̲ is 8"
"length of 𝔘𝔫𝔦𝔠𝔬𝔡𝔢 is 7"</langsyntaxhighlight>
 
=={{header|JudoScript}}==
===Byte Length===
{{needs-review|JudoScript}}
<langsyntaxhighlight lang="judoscript">//Store length of hello world in length and print it
. length = "Hello World".length();</langsyntaxhighlight>
===Character Length===
{{needs-review| JudoScript}}
<langsyntaxhighlight lang="judoscript">//Store length of hello world in length and print it
. length = "Hello World".length()</langsyntaxhighlight>
 
=={{header|Julia}}==
 
Julia encodes strings as UTF-8, so the byte length (via <code>sizeof</code>) will be different from the string length (via <code>length</code>) only if the string contains non-ASCII characters.
 
===Byte Length===
 
<lang julia>sizeof("Hello, world!") # gives 13
<syntaxhighlight lang="julia">
sizeof("Hellö, wørld!") # gives 15</lang>
sizeof("møøse") # 7
sizeof("𝔘𝔫𝔦𝔠𝔬𝔡𝔢") # 28
sizeof("J̲o̲s̲é̲") # 13
</syntaxhighlight>
 
===Character Length===
 
<lang julia>length("Hello, world!") # gives 13
<syntaxhighlight lang="julia">
length("Hellö, wørld!") # gives 13</lang>
length("møøse") # 5
length("𝔘𝔫𝔦𝔠𝔬𝔡𝔢") # 7
length("J̲o̲s̲é̲") # 8
</syntaxhighlight>
 
===Grapheme Length===
 
<syntaxhighlight lang="julia">
import Unicode
length(Unicode.graphemes("møøse")) # 5
length(Unicode.graphemes("𝔘𝔫𝔦𝔠𝔬𝔡𝔢")) # 7
length(Unicode.graphemes("J̲o̲s̲é̲")) # 4
</syntaxhighlight>
 
=={{header|K}}==
===Character Length===
<syntaxhighlight lang="k">
<lang K>
#"Hello, world!"
13
#"Hëllo, world!"
13
</syntaxhighlight>
</lang>
 
=={{header|Kotlin}}==
Line 1,727 ⟶ 2,052:
 
As each UTF-16 character occupies 2 bytes, it follows that the number of bytes occupied by the string will be twice the length:
<syntaxhighlight lang="kotlin">
<lang scala>// version 1.0.6
fun main(args: Array<String>) {
val s = "José"
println("The char length is ${s.length}")
println("The byte length is ${CharacterChar.BYTESSIZE_BYTES * s.length}")
}</langsyntaxhighlight>
 
{{out}}
Line 1,752 ⟶ 2,077:
The lambdatalk {W.length string} function returns the number of bytes in a string. For Unicode characters made of two bytes things are a little bit more tricky. It's easy to add (inline) a new javascript primitive to the dictionary:
 
<langsyntaxhighlight lang="scheme">
{script
LAMBDATALK.DICT["W.unicodeLength"] = function() {
Line 1,786 ⟶ 2,111:
{W.length 𝔘𝔫𝔦𝔠𝔬𝔡𝔢} -> 14
{W.unicodeLength 𝔘𝔫𝔦𝔠𝔬𝔡𝔢} -> 7
</syntaxhighlight>
</lang>
 
 
=={{header|Lasso}}==
===Character Length===
<langsyntaxhighlight Lassolang="lasso">'Hello, world!'->size // 13
'møøse'->size // 5
'𝔘𝔫𝔦𝔠𝔬𝔡𝔢'->size // 7</langsyntaxhighlight>
 
===Byte Length===
<langsyntaxhighlight Lassolang="lasso">'Hello, world!'->asBytes->size // 13
'møøse'->asBytes->size // 7
'𝔘𝔫𝔦𝔠𝔬𝔡𝔢'->asBytes->size // 28</langsyntaxhighlight>
 
=={{header|LFE}}==
Line 1,804 ⟶ 2,129:
=== Character Length ===
 
<langsyntaxhighlight lang="lisp">
(length "ASCII text")
10
Line 1,813 ⟶ 2,138:
> (length (unicode:characters_to_list encoded 'utf8))
12
</syntaxhighlight>
</lang>
 
=== Byte Length ===
 
<langsyntaxhighlight lang="lisp">
> (set encoded (binary ("𝔘𝔫𝔦𝔠𝔬𝔡𝔢 𝔗𝔢𝒙𝔱" utf8)))
#B(240 157 148 152 240 157 148 171 240 157 ...)
Line 1,830 ⟶ 2,155:
> (byte_size encoded)
10
</syntaxhighlight>
</lang>
 
=={{header|Liberty BASIC}}==
Line 1,837 ⟶ 2,162:
=={{header|Lingo}}==
===Character Length===
<langsyntaxhighlight lang="lingo">utf8Str = "Hello world äöü"
put utf8Str.length
-- 15</langsyntaxhighlight>
===Byte Length===
<langsyntaxhighlight lang="lingo">utf8Str = "Hello world äöü"
put bytearray(utf8Str).length
-- 18</langsyntaxhighlight>
 
=={{header|LiveCode}}==
Line 1,850 ⟶ 2,175:
 
===Character Length===
<langsyntaxhighlight LiveCodelang="livecode ">put the length of "Hello World" </langsyntaxhighlight>
or
<syntaxhighlight lang="livecode LiveCode ">put the number of characters in "Hello World" -- 'chars' short for characters is also valid</langsyntaxhighlight>
or
<langsyntaxhighlight LiveCodelang="livecode ">put length("Hello World")</langsyntaxhighlight>
 
for Unicode character count use the code units keyword
<syntaxhighlight lang="livecode LiveCode ">put the number of codeunits of "Hello World" -- count of unicode characters </langsyntaxhighlight>
 
===Byte Length===
Use the 'byte' keyword in LiveCode for an accurate unicode char byte count
<langsyntaxhighlight LiveCodelang="livecode">put the number of bytes in "Hello World" </langsyntaxhighlight>
 
=={{header|Logo}}==
Logo is so old that only ASCII encoding is supported. Modern versions of Logo may have enhanced character set support.
<langsyntaxhighlight lang="logo">print count "|Hello World| ; 11
print count "møøse ; 5
print char 248 ; ø - implies ISO-Latin character set</langsyntaxhighlight>
 
=={{header|LSE64}}==
===Byte Length===
LSE stores strings as arrays of characters in 64-bit cells plus a count.
<langsyntaxhighlight lang="lse64">" Hello world" @ 1 + 8 * , # 96 = (11+1)*(size of a cell) = 12*8</langsyntaxhighlight>
===Character Length===
LSE uses counted strings: arrays of characters, where the first cell contains the number of characters in the string.
<langsyntaxhighlight lang="lse64">" Hello world" @ , # 11</langsyntaxhighlight>
 
=={{header|Lua}}==
Line 1,881 ⟶ 2,206:
 
In Lua, a character is always the size of one byte so there is no difference between byte length and character length.
 
===Byte Length===
 
<lang lua>str = "Hello world"
Byte length =in #str</lang>UTF-8:
 
<syntaxhighlight lang="lua">str = "Hello world"
length = #str</syntaxhighlight>
 
or
 
<langsyntaxhighlight lang="lua">str = "Hello world"
length = string.len(str)</langsyntaxhighlight>
 
===Character Length===
 
<lang lua>str = "Hello world"
Only valid for ASCII:
length = #str</lang>
 
<syntaxhighlight lang="lua">str = "Hello world"
length = #str</syntaxhighlight>
 
or
 
<langsyntaxhighlight lang="lua">str = "Hello world"
length = string.len(str)</langsyntaxhighlight>
 
For Unicode string, use utf8 module:
 
<syntaxhighlight lang="lua">
utf8.len("møøse")
utf8.len("𝔘𝔫𝔦𝔠𝔬𝔡𝔢")
utf8.len("J̲o̲s̲é̲")
</syntaxhighlight>
 
{{out}}
 
<pre>
5
7
8
</pre>
 
=={{header|M2000 Interpreter}}==
<syntaxhighlight lang="m2000 interpreter">
<lang M2000 Interpreter>
module String_length {
A$=format$("J\u0332o\u0332s\u0332e\u0301\u0332")
A$=format$("J\u0332o\u0332s\u0332e\u0301\u0332")
Print Len(A$) = 9 ' true Utf-16LE
Print Len.Disp(A$) = 49 \\ display' lengthtrue Utf-16LE
Print Len.Disp(A$) = 4 \\ display length
Buffer Clear Mem as Byte*100
Buffer Clear Mem as Byte*100
\\ Write at memory at offset 0 or address Mem(0)
\\ Write at memory at offset 0 or address Mem(0)
Return Mem, 0:=A$
Print Return Eval$(Mem, 0, 18):=A$
Print Eval$(Mem, 0, 18)
For i=0 to 17 step 2
For i=0 to 17 step 2
\\ print hex value and character
\\ print hex value and character
Hex Eval(Mem, i as integer), ChrCode$(Eval(Mem, i as integer))
Hex Eval(Mem, i as integer), ChrCode$(Eval(Mem, i as integer))
Next i
Next i
Document B$=A$
Document B$=A$
\\ encode to utf-8 with BOM (3 bytes 0xEF,0xBB,0xBF)
\\ encode to utf-8 with BOM (3 bytes 0xEF,0xBB,0xBF)
Save.Doc B$, "Checklen.doc", 2
Print Save.Doc B$, Filelen("Checklen.doc")=17, 2
Print Filelen("Checklen.doc")=17
\\ So length is 14 bytes + 3 the BOM
\\ So length is 14 bytes + 3 the BOM
</lang>
Mem=Buffer("Checklen.doc")
Print len(Mem)=17 // len works for buffers too - unit byte
// version 12 can handle strings without suffix $
C=eval$(mem, 3, 14) // from 4th byte get 14 bytes in a string
Print len(C)*2=14 ' bytes // len()) for strings return double type of words (can return 0.5)
C=string$(C as utf8dec) ' decode bytes from utf8 to utf16LE
Print len(C)=9, C=A$, Len.Disp(C)=4
Print C
Report 2, C // proportional print on console - for text center justified rendering (2 - center)
}
String_length
</syntaxhighlight>
 
=={{header|Maple}}==
=== Character length ===
<langsyntaxhighlight lang="maple">length("Hello world");</langsyntaxhighlight>
=== Byte count ===
<langsyntaxhighlight lang="maple">nops(convert("Hello world",bytes));</langsyntaxhighlight>
 
=={{header|Mathematica}}/{{header|Wolfram Language}}==
=== Character length ===
<langsyntaxhighlight lang="mathematica">StringLength["Hello world"]</langsyntaxhighlight>
=== Byte length ===
<langsyntaxhighlight lang="mathematica">StringByteCount["Hello world"]</langsyntaxhighlight>
 
=={{header|MATLAB}}==
===Character Length===
<langsyntaxhighlight MATLABlang="matlab">>> length('møøse')
 
ans =
 
5</langsyntaxhighlight>
===Byte Length===
MATLAB apparently encodes strings using UTF-16.
<langsyntaxhighlight MATLABlang="matlab">>> numel(dec2hex('møøse'))
 
ans =
 
10</langsyntaxhighlight>
 
=={{header|Maxima}}==
<langsyntaxhighlight lang="maxima">s: "the quick brown fox jumps over the lazy dog";
slength(s);
/* 43 */</langsyntaxhighlight>
 
=={{header|MAXScript}}==
===Character Length===
<langsyntaxhighlight lang="maxscript">"Hello world".count</langsyntaxhighlight>
 
=={{header|Mercury}}==
Line 1,963 ⟶ 2,323:
 
===Byte Length===
<langsyntaxhighlight lang="mercury">:- module string_byte_length.
:- interface.
 
Line 1,982 ⟶ 2,342:
write_length(String, !IO):-
NumBytes = count_utf8_code_units(String),
io.format("%s: %d bytes\n", [s(String), i(NumBytes)], !IO).</langsyntaxhighlight>
 
Output:
Line 1,993 ⟶ 2,353:
===Character Length===
The function <tt>string.count_codepoints/1</tt> returns the number of code points in a string.
<langsyntaxhighlight lang="mercury">:- module string_character_length.
:- interface.
 
Line 2,012 ⟶ 2,372:
write_length(String, !IO) :-
NumChars = count_codepoints(String),
io.format("%s: %d characters\n", [s(String), i(NumChars)], !IO).</langsyntaxhighlight>
 
Output:
Line 2,025 ⟶ 2,385:
Metafont has no way of handling properly encodings different from ASCII. So it is able to count only the number of bytes in a string.
 
<langsyntaxhighlight lang="metafont">string s;
s := "Hello Moose";
show length(s); % 11 (ok)
s := "Hello Møøse";
show length(s); % 13 (number of bytes when the string is UTF-8 encoded,
% since ø takes two bytes)</langsyntaxhighlight>
 
'''Note''': in the lang tag, Møøse is Latin1-reencoded, showing up two bytes (as Latin1) instead of one
Line 2,036 ⟶ 2,396:
=={{header|MIPS Assembly}}==
This only supports ASCII encoding, so it'll return both byte length and char length.
<langsyntaxhighlight lang="mips">
.data
#.asciiz automatically adds the NULL terminator character, \0 for us.
Line 2,058 ⟶ 2,418:
li $v0,10 #set syscall to cleanly exit EXIT_SUCCESS
syscall
</syntaxhighlight>
</lang>
 
=={{header|mIRC Scripting Language}}==
===Byte Length===
{{needs-review|mIRC Scripting Language}}
<langsyntaxhighlight lang="mirc">alias stringlength { echo -a Your Name is: $len($$?="Whats your name") letters long! }</langsyntaxhighlight>
===Character Length===
{{needs-review|mIRC Scripting Language}}
''$utfdecode()'' converts an UTF-8 string to the locale encoding, with unrepresentable characters as question marks. Since mIRC is not yet fully Unicode aware, entering Unicode text trough a dialog box will automatically convert it to ASCII.
<langsyntaxhighlight lang="mirc">alias utf8len { return $len($utfdecode($1)) }
alias stringlength2 {
var %name = Børje
echo -a %name is: $utf8len(%name) characters long!
}</langsyntaxhighlight>
 
=={{header|Modula-3}}==
===Byte Length===
<langsyntaxhighlight lang="modula3">MODULE ByteLength EXPORTS Main;
 
IMPORT IO, Fmt, Text;
Line 2,083 ⟶ 2,443:
BEGIN
IO.Put("Byte length of s: " & Fmt.Int((Text.Length(s) * BYTESIZE(s))) & "\n");
END ByteLength.</langsyntaxhighlight>
===Character Length===
<langsyntaxhighlight lang="modula3">MODULE StringLength EXPORTS Main;
 
IMPORT IO, Fmt, Text;
Line 2,093 ⟶ 2,453:
BEGIN
IO.Put("String length of s: " & Fmt.Int(Text.Length(s)) & "\n");
END StringLength.</langsyntaxhighlight>
 
=={{header|Nemerle}}==
Both examples rely on .Net faculties, so they're almost identical to C#
===Character Length===
<langsyntaxhighlight Nemerlelang="nemerle">def message = "How long am I anyways?";
def charlength = message.Length;</langsyntaxhighlight>
 
===Byte Length===
<langsyntaxhighlight Nemerlelang="nemerle">using System.Text;
 
def message = "How long am I anyways?";
def bytelength = Encoding.Unicode.GetByteCount(message);</langsyntaxhighlight>
 
=={{header|NewLISP}}==
===Character Length===
<langsyntaxhighlight NewLISPlang="newlisp">(set 'Str "møøse")
(println Str " is " (length Str) " characters long")</langsyntaxhighlight>
 
=={{header|Nim}}==
In Nim, <code>len</code> returns the byte length of strings, ignoring the UTF-8 encoding. When dealing with Unicode strings, the module <code>unicode</code> must be used.
<lang Nim>import strformat, unicode
 
===Byte Length===
var s: string = "Hello, world! ☺"
 
<syntaxhighlight lang="nim">
echo &"“{s}” has byte length {s.len}."
echo "møøse".len # 7
echo &"“{s}” has Unicode char length {s.runeLen}."</lang>
echo "𝔘𝔫𝔦𝔠𝔬𝔡𝔢".len # 28
echo "J̲o̲s̲é̲".len # 13
</syntaxhighlight>
 
===Character Length===
{{out}}
 
<pre>“Hello, world! ☺” has byte length 17.
<syntaxhighlight lang="nim">
“Hello, world! ☺” has Unicode char length 15.</pre>
import unicode
echo "møøse".runeLen # 5
echo "𝔘𝔫𝔦𝔠𝔬𝔡𝔢".runeLen # 7
echo "J̲o̲s̲é̲".runeLen # 8
</syntaxhighlight>
 
===Grapheme Length===
 
[https://nim-lang.org/docs/unicode.html#graphemeLen%2Cstring%2CNatural graphemeLen()] does not do what you expect. It doesn't return the number of grapheme in a string but returns the number of bytes at a character/codepoint index for a given string.
 
=={{header|Oberon-2}}==
 
===Byte Length===
<langsyntaxhighlight lang="oberon2">MODULE Size;
 
IMPORT Out;
Line 2,141 ⟶ 2,511:
Out.LongInt(s,0);
Out.Ln;
END Size.</langsyntaxhighlight>
 
Output:
Line 2,149 ⟶ 2,519:
 
===Character Length===
<langsyntaxhighlight lang="oberon2">MODULE Length;
 
IMPORT Out, Strings;
Line 2,162 ⟶ 2,532:
Out.Int(l,0);
Out.Ln;
END Length.</langsyntaxhighlight>
 
Output:
Line 2,173 ⟶ 2,543:
 
===Character Length===
<langsyntaxhighlight lang="objeck">
"Foo"->Size()->PrintLine();
</syntaxhighlight>
</lang>
 
===Byte Length===
<langsyntaxhighlight lang="objeck">
"Foo"->Size()->PrintLine();
</syntaxhighlight>
</lang>
 
=={{header|Objective-C}}==
Line 2,190 ⟶ 2,560:
 
The length method of NSString objects is not the length of that string in characters. Instead, it only gives the number of 16-bit code units used to encode a string. This is not (always) the number of Unicode characters (code points) in the string.
<langsyntaxhighlight lang="objc">// Return the length in characters
// XXX: does not (always) count Unicode characters (code points)!
unsigned int numberOfCharacters = [@"møøse" length]; // 5</langsyntaxhighlight>
 
Since Mac OS X 10.6, CFString has methods for converting between supplementary characters and surrogate pair. However, the easiest way to get the number of characters is probably to encode it in UTF-32 (which is a fixed-length encoding) and divide by 4:
<langsyntaxhighlight lang="objc">int realCharacterCount = [s lengthOfBytesUsingEncoding: NSUTF32StringEncoding] / 4;</langsyntaxhighlight>
 
===Byte Length===
Objective-C encodes strings in UTF-16, which represents each character with one or two 16-bit values. The length method of NSString objects returns the number of 16-bit values used to encode a string, so the number of bytes can be determined by doubling that number.
 
<langsyntaxhighlight lang="objc">int byteCount = [@"møøse" length] * 2; // 10</langsyntaxhighlight>
 
Another way to know the byte length of a string is to explicitly specify the charset we desire.
 
<langsyntaxhighlight lang="objc">// Return the number of bytes depending on the encoding,
// here explicitly UTF-8
unsigned numberOfBytes =
[@"møøse" lengthOfBytesUsingEncoding: NSUTF8StringEncoding]; // 7</langsyntaxhighlight>
 
=={{header|OCaml}}==
Line 2,218 ⟶ 2,588:
 
Standard OCaml strings are classic ASCII ISO 8859-1, so the function String.length returns the byte length which is the character length in this encoding:
<langsyntaxhighlight lang="ocaml">String.length "Hello world" ;;</langsyntaxhighlight>
 
===Character Length===
 
While using the '''UTF8''' module of ''Camomile'' the byte length of an utf8 encoded string will be get with <tt>String.length</tt> and the character length will be returned by <tt>UTF8.length</tt>:
<langsyntaxhighlight lang="ocaml">open CamomileLibrary
 
let () =
Printf.printf " %d\n" (String.length "møøse");
Printf.printf " %d\n" (UTF8.length "møøse");
;;</langsyntaxhighlight>
 
Run this code with the command:
Line 2,235 ⟶ 2,605:
7
5
</pre>
 
Alternatively, you can use the UChar module (available since OCaml 4.03) to do it without additional modules.
<syntaxhighlight lang="OCaml">
let utf8_length (s: String.t) =
let byte_length = String.length s in
let rec count acc n =
if n = byte_length
then acc
else
let n' = n + (String.get_utf_8_uchar s n |> Uchar.utf_decode_length) in
count (succ acc) n'
in
count 0 0
;;
</syntaxhighlight>
 
<pre>
# utf8_length "møøse"
- : int = 5
</pre>
 
=={{header|Octave}}==
<langsyntaxhighlight lang="octave">s = "string";
stringlen = length(s)</langsyntaxhighlight>
 
This gives the number of bytes, not of characters. e.g. length("è") is 2 when "è" is encoded e.g. as UTF-8.
Line 2,252 ⟶ 2,642:
 
=={{header|Ol}}==
<langsyntaxhighlight lang="scheme">
; Character length
(print (string-length "Hello, wørld!"))
Line 2,260 ⟶ 2,650:
(print (length (string->bytes "Hello, wørld!")))
; ==> 14
</syntaxhighlight>
</lang>
 
=={{header|OpenEdge/Progress}}==
Line 2,266 ⟶ 2,656:
 
===Character Length===
<langsyntaxhighlight lang="progress">DEF VAR lcc AS LONGCHAR.
FIX-CODEPAGE( lcc ) = "UTF-8".
lcc = "møøse".
 
MESSAGE LENGTH( lcc ) VIEW-AS ALERT-BOX.</langsyntaxhighlight>
===Byte Length===
<langsyntaxhighlight lang="progress">DEF VAR lcc AS LONGCHAR.
FIX-CODEPAGE( lcc ) = "UTF-8".
lcc = "møøse".
 
MESSAGE LENGTH( lcc, "RAW" ) VIEW-AS ALERT-BOX.</langsyntaxhighlight>
 
=={{header|Oz}}==
===Byte Length===
<langsyntaxhighlight lang="oz">{Show {Length "Hello World"}}</langsyntaxhighlight>
Oz uses a single-byte encoding by default. So for normal strings, this will also show the correct character length.
 
Line 2,288 ⟶ 2,678:
===Character Length===
Characters = bytes in Pari; the underlying strings are C strings interpreted as US-ASCII.
<langsyntaxhighlight lang="parigp">len(s)=#s; \\ Alternately, len(s)=length(s); or even len=length;</langsyntaxhighlight>
===Byte Length===
This works on objects of any sort, not just strings, and includes overhead.
<langsyntaxhighlight lang="parigp">len(s)=sizebyte(s);</langsyntaxhighlight>
 
=={{header|Pascal}}==
===Byte Length===
<langsyntaxhighlight lang="pascal">
const
s = 'abcdef';
Line 2,301 ⟶ 2,691:
writeln (length(s))
end.
</syntaxhighlight>
</lang>
Output:
<pre>
Line 2,313 ⟶ 2,703:
Strings in Perl consist of characters. Measuring the byte length therefore requires conversion to some binary representation (called encoding, both noun and verb).
 
<langsyntaxhighlight lang="perl">use utf8; # so we can use literal characters like ☺ in source
use Encode qw(encode);
 
Line 2,320 ⟶ 2,710:
 
print length encode 'UTF-16', "Hello, world! ☺";
# 32. 2 bytes for the BOM, then 15 byte pairs for each character.</langsyntaxhighlight>
 
===Character Length===
{{works with|Perl|5.X}}
 
<langsyntaxhighlight lang="perl">my $length = length "Hello, world!";</langsyntaxhighlight>
 
===Grapheme Length===
Line 2,333 ⟶ 2,723:
 
{{works with|Perl|5.12}}
<langsyntaxhighlight lang="perl">use v5.12;
my $string = "\x{1112}\x{1161}\x{11ab}\x{1100}\x{1173}\x{11af}"; # 한글
my $len;
$len++ while ($string =~ /\X/g);
printf "Grapheme length: %d\n", $len;</langsyntaxhighlight>
 
{{out}}
Line 2,345 ⟶ 2,735:
{{libheader|Phix/basics}}
The standard length function returns the number of bytes, character length is achieved by converting to utf32
<!--<langsyntaxhighlight Phixlang="phix">-->
<span style="color: #008080;">constant</span> <span style="color: #000000;">s</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">"𝔘𝔫𝔦𝔠𝔬𝔡𝔢"</span>
<span style="color: #0000FF;">?<span style="color: #7060A8;">length<span style="color: #0000FF;">(<span style="color: #000000;">s<span style="color: #0000FF;">)</span>
<span style="color: #0000FF;">?<span style="color: #7060A8;">length<span style="color: #0000FF;">(<span style="color: #000000;">utf8_to_utf32<span style="color: #0000FF;">(<span style="color: #000000;">s<span style="color: #0000FF;">)<span style="color: #0000FF;">)
<!--</langsyntaxhighlight>-->
{{out}}
<pre>
Line 2,358 ⟶ 2,748:
=={{header|PHP}}==
Program in a UTF8 linux:
<langsyntaxhighlight PHPlang="php"><?php
foreach (array('møøse', '𝔘𝔫𝔦𝔠𝔬𝔡𝔢', 'J̲o̲s̲é̲') as $s1) {
printf('String "%s" measured with strlen: %d mb_strlen: %s grapheme_strlen %s%s',
$s1, strlen($s1),mb_strlen($s1), grapheme_strlen($s1), PHP_EOL);
}
</syntaxhighlight>
</lang>
yields the result:
<pre>
Line 2,372 ⟶ 2,762:
 
=={{header|PicoLisp}}==
<langsyntaxhighlight PicoLisplang="picolisp">(let Str "møøse"
(prinl "Character Length of \"" Str "\" is " (length Str))
(prinl "Byte Length of \"" Str "\" is " (size Str)) )</langsyntaxhighlight>
Output:
<pre>Character Length of "møøse" is 5
Line 2,381 ⟶ 2,771:
 
=={{header|PL/I}}==
<langsyntaxhighlight lang="pli">declare WS widechar (13) initial ('Hello world.');
put ('Character length=', length (WS));
put skip list ('Byte length=', size(WS));
Line 2,387 ⟶ 2,777:
declare SM graphic (13) initial ('Hello world');
put ('Character length=', length(SM));
put skip list ('Byte length=', size(trim(SM)));</langsyntaxhighlight>
 
=={{header|PL/SQL}}==
Line 2,396 ⟶ 2,786:
LENGTH4 uses UCS4 code points.
===Byte Length===
<langsyntaxhighlight lang="plsql">DECLARE
string VARCHAR2(50) := 'Hello, world!';
stringlength NUMBER;
BEGIN
stringlength := LENGTHB(string);
END;</langsyntaxhighlight>
 
===Character Length===
<langsyntaxhighlight lang="plsql">DECLARE
string VARCHAR2(50) := 'Hello, world!';
stringlength NUMBER;
Line 2,415 ⟶ 2,805:
ucs2length := LENGTH2(string);
ucs4length := LENGTH4(string);
END;</langsyntaxhighlight>
 
=={{header|Plain English}}==
===Byte Length===
{{libheader|Plain English-output}}
Plain English does not handle Unicode, so strings return their length in bytes.
<syntaxhighlight lang="text">
To run:
Start up.
Put "møøse" into a string.
Write the string's length to the output.
Wait for the escape key.
Shut down.
</syntaxhighlight>
 
=={{header|Pop11}}==
Line 2,421 ⟶ 2,824:
Currently Pop11 supports only strings consisting of 1-byte units. Strings can carry arbitrary binary data, so user can for example use UTF-8 (however builtin procedures will treat each byte as a single character). The length function for strings returns length in bytes:
 
<langsyntaxhighlight lang="pop11">lvars str = 'Hello, world!';
lvars len = length(str);</langsyntaxhighlight>
 
=={{header|PostScript}}==
===Character Length===
<syntaxhighlight lang="text">
(Hello World) length =
11
</syntaxhighlight>
</lang>
 
=={{header|Potion}}==
===Character Length===
<langsyntaxhighlight lang="potion">"møøse" length print
"𝔘𝔫𝔦𝔠𝔬𝔡𝔢" length print
"J̲o̲s̲é̲" length print</langsyntaxhighlight>
 
=={{header|PowerShell}}==
===Character Length===
<langsyntaxhighlight lang="powershell">$s = "Hëlló Wørłð"
$s.Length</langsyntaxhighlight>
===Byte Length===
{{trans|C#}}
 
For UTF-16, which is the default in .NET and therefore PowerShell:
<langsyntaxhighlight lang="powershell">$s = "Hëlló Wørłð"
[System.Text.Encoding]::Unicode.GetByteCount($s)</langsyntaxhighlight>
For UTF-8:
<langsyntaxhighlight lang="powershell">[System.Text.Encoding]::UTF8.GetByteCount($s)</langsyntaxhighlight>
 
=={{header|PureBasic}}==
===Character Length===
<langsyntaxhighlight PureBasiclang="purebasic"> a = Len("Hello World") ;a will be 11</langsyntaxhighlight>
 
===Byte Length===
Line 2,459 ⟶ 2,862:
Note: The number of bytes returned does not include the terminating Null-Character of the string. The size of the Null-Character is 1 byte for Ascii and UTF8 mode and 2 bytes for Unicode mode.
 
<langsyntaxhighlight PureBasiclang="purebasic">a = StringByteLength("ä", #PB_UTF8) ;a will be 2
b = StringByteLength("ä", #PB_Ascii) ;b will be 1
c = StringByteLength("ä", #PB_Unicode) ;c will be 2
</syntaxhighlight>
</lang>
 
=={{header|Python}}==
Line 2,472 ⟶ 2,875:
 
For 8-bit strings, the byte length is the same as the character length:
<langsyntaxhighlight lang="python">print len('ascii')
# 5</langsyntaxhighlight>
 
For Unicode strings, length depends on the internal encoding. Since version 2.2 Python shipped with two build options: it either uses 2 or 4 bytes per character. The internal representation is not interesting for the user.
 
<langsyntaxhighlight lang="python"># The letter Alef
print len(u'\u05d0'.encode('utf-8'))
# 2
print len(u'\u05d0'.encode('iso-8859-8'))
# 1</langsyntaxhighlight>
 
Example from the problem statement:
<langsyntaxhighlight lang="python">#!/bin/env python
# -*- coding: UTF-8 -*-
s = u"møøse"
assert len(s) == 5
assert len(s.encode('UTF-8')) == 7
assert len(s.encode('UTF-16-BE')) == 10 # There are 3 different UTF-16 encodings: LE and BE are little endian and big endian respectively, the third one (without suffix) adds 2 extra leading bytes: the byte-order mark (BOM).</langsyntaxhighlight>
====Character Length====
{{works with|Python|2.4}}
Line 2,495 ⟶ 2,898:
len() returns the number of code units (not code points!) in a Unicode string or plain ASCII string. On a wide build, this is the same as the number of code points, but on a narrow one it is not. Most linux distributions install the wide build by default, you can check the build at runtime with:
 
<langsyntaxhighlight lang="python">import sys
sys.maxunicode # 1114111 on a wide build, 65535 on a narrow build </langsyntaxhighlight>
 
To get the length of encoded string, you have to decode it first:
<langsyntaxhighlight lang="python">print len('ascii')
# 5
print len(u'\u05d0') # the letter Alef as unicode literal
Line 2,506 ⟶ 2,909:
# 1
print hex(sys.maxunicode), len(unichr(0x1F4A9))
# ('0x10ffff', 1)</langsyntaxhighlight>
 
On a narrow build, len() gives the wrong answer for non-BMP chars
 
<langsyntaxhighlight lang="python">print hex(sys.maxunicode), len(unichr(0x1F4A9))
# ('0xffff', 2)</langsyntaxhighlight>
 
===3.x===
Line 2,521 ⟶ 2,924:
You can use len() to get the length of a byte sequence.
 
<langsyntaxhighlight lang="python">print(len(b'Hello, World!'))
# 13</langsyntaxhighlight>
 
To get a byte sequence from a string, you have to encode it with the desired encoding:
 
<langsyntaxhighlight lang="python"># The letter Alef
print(len('\u05d0'.encode())) # the default encoding is utf-8 in Python3
# 2
print(len('\u05d0'.encode('iso-8859-8')))
# 1</langsyntaxhighlight>
 
Example from the problem statement:
<langsyntaxhighlight lang="python">#!/bin/env python
# -*- coding: UTF-8 -*-
s = "møøse"
Line 2,541 ⟶ 2,944:
u="𝔘𝔫𝔦𝔠𝔬𝔡𝔢"
assert len(u.encode()) == 28
assert len(u.encode('UTF-16-BE')) == 28</langsyntaxhighlight>
====Character Length====
 
Line 2,548 ⟶ 2,951:
Thus Python is able to avoid memory overhead when dealing with only ASCII strings, while handling correctly all codepoints in Unicode. len() returns the number of characters/codepoints:
 
<langsyntaxhighlight lang="python">print(len("𝔘𝔫𝔦𝔠𝔬𝔡𝔢"))
# 7</langsyntaxhighlight>
 
Until Python 3.2 instead, length depended on the internal encoding, since it shipped with two build options: it either used 2 or 4 bytes per character.
Line 2,555 ⟶ 2,958:
len() returned the number of code units in a string, which could be different from the number of characters. In a narrow build, this is not a reliable way to get the number of characters. You can only easily count code points in a wide build. Most linux distributions install the wide build by default, you can check the build at runtime with:
 
<langsyntaxhighlight lang="python">import sys
sys.maxunicode # 1114111 on a wide build, 65535 on a narrow build</langsyntaxhighlight>
 
<langsyntaxhighlight lang="python">print(len('ascii'))
# 5
print(len('\u05d0')) # the letter Alef as unicode literal
# 1</langsyntaxhighlight>
 
To get the length of an encoded byte sequence, you have to decode it first:
 
<langsyntaxhighlight lang="python">print(len(b'\xd7\x90'.decode('utf-8'))) # Alef encoded as utf-8 byte sequence
# 1</langsyntaxhighlight>
 
<langsyntaxhighlight lang="python">print(hex(sys.maxunicode), len(unichr(0x1F4A9)))
# ('0x10ffff', 1)</langsyntaxhighlight>
 
On a narrow build, len() gives the wrong answer for non-BMP chars
 
<langsyntaxhighlight lang="python">print(hex(sys.maxunicode), len(unichr(0x1F4A9)))
# ('0xffff', 2)</langsyntaxhighlight>
 
=={{header|R}}==
 
===Byte length===
<langsyntaxhighlight lang="rsplus">a <- "m\u00f8\u00f8se"
print(nchar(a, type="bytes")) # print 7</langsyntaxhighlight>
 
===Character length===
<langsyntaxhighlight lang="rsplus">print(nchar(a, type="chars")) # print 5</langsyntaxhighlight>
 
=={{header|Racket}}==
 
Using this definition:
<langsyntaxhighlight Racketlang="racket">(define str "J\u0332o\u0332s\u0332e\u0301\u0332")</langsyntaxhighlight>
on the REPL, we get the following:
 
===Character length===
<langsyntaxhighlight Racketlang="racket">-> (printf "str has ~a characters" (string-length str))
str has 9 characters</langsyntaxhighlight>
 
===Byte length===
<langsyntaxhighlight Racketlang="racket">-> (printf "str has ~a bytes in utf-8" (bytes-length (string->bytes/utf-8 str)))
str has 14 bytes in utf-8</langsyntaxhighlight>
 
=={{header|Raku}}==
Line 2,603 ⟶ 3,006:
===Byte Length===
 
<syntaxhighlight lang="raku" perl6line>say 'møøse'.encode('UTF-8').bytes;</langsyntaxhighlight>
 
===Character Length===
 
<syntaxhighlight lang="raku" perl6line>say 'møøse'.codes;</langsyntaxhighlight>
 
===Grapheme Length===
 
<syntaxhighlight lang="raku" perl6line>say 'møøse'.chars;</langsyntaxhighlight>
 
=={{header|REBOL}}==
Line 2,622 ⟶ 3,025:
===Byte Length===
 
<langsyntaxhighlight REBOLlang="rebol">;; r2
length? "møøse"
 
;; r3
length? to-binary "møøse"</langsyntaxhighlight>
 
===Character length===
 
<langsyntaxhighlight REBOLlang="rebol">;; r3
length? "møøse"</langsyntaxhighlight>
 
=={{header|ReScript}}==
===Byte Length===
<langsyntaxhighlight ReScriptlang="rescript">Js.String2.length("abcd") == 4</langsyntaxhighlight>
 
=={{header|Retro}}==
===Byte Length===
<syntaxhighlight lang Retro="retro">'møøse s:length n:put</langsyntaxhighlight>
 
===Character Length===
Retro does not have built-in support for Unicode, but counting of characters can be done with a small amount of effort.
 
<langsyntaxhighlight Retrolang="retro">chain: UTF8'
{{
: utf+ ( $-$ )
Line 2,658 ⟶ 3,061:
;chain
 
"møøse" ^UTF8'getLength putn</langsyntaxhighlight>
 
=={{header|REXX}}==
Line 2,665 ⟶ 3,068:
<br>is stored as character strings.
===Byte Length===
<langsyntaxhighlight REXXlang="rexx">/*REXX program displays the lengths (in bytes/characters) for various strings. */
/* 1 */ /*a handy-dandy over/under scale.*/
/* 123456789012345 */
Line 2,675 ⟶ 3,078:
sum = 5+1 ; say 'the length of SUM is ' length(sum)
/* [↑] is, of course, 6. */
/*stick a fork in it, we're done.*/</langsyntaxhighlight>
'''output'''
<pre>
Line 2,688 ⟶ 3,091:
=={{header|Ring}}==
===Character Length===
<langsyntaxhighlight lang="ring">
aString = "Welcome to the Ring Programming Language"
aStringSize = len(aString)
see "Character lenghts : " + aStringSize
</syntaxhighlight>
</lang>
 
=={{header|Robotic}}==
===Character Length===
<langsyntaxhighlight lang="robotic">
set "$local1" to "Hello world!"
* "String length: &$local1.length&"
end
</syntaxhighlight>
</lang>
 
Unfortunately, only character length can be retrieved in this language.
 
=={{header|RPL}}==
RPL strings are all made of 8-bit characters.
"RPL" SIZE
 
=={{header|Ruby}}==
UTF8 is the default encoding in Ruby.
===Byte Length===
<langsyntaxhighlight lang="ruby">"J̲o̲s̲é̲".bytesize</langsyntaxhighlight>
 
===Character Length===
<langsyntaxhighlight lang="ruby">"J̲o̲s̲é̲".chars.length</langsyntaxhighlight>
===Grapheme Length===
<langsyntaxhighlight lang="ruby">"J̲o̲s̲é̲".grapheme_clusters.length</langsyntaxhighlight>
===Code Set Independence===
The next examples show the '''byte length''' and '''character length''' of "møøse" in different encodings.
Line 2,728 ⟶ 3,135:
! Output
|-
| <langsyntaxhighlight lang="ruby"># -*- coding: iso-8859-1 -*-
s = "møøse"
puts "Byte length: %d" % s.bytesize
puts "Character length: %d" % s.length</langsyntaxhighlight>
| <pre>Byte length: 5
Character length: 5</pre>
|-
| <langsyntaxhighlight lang="ruby"># -*- coding: utf-8 -*-
s = "møøse"
puts "Byte length: %d" % s.bytesize
puts "Character length: %d" % s.length</langsyntaxhighlight>
| <pre>Byte length: 7
Character length: 5</pre>
|-
| <langsyntaxhighlight lang="ruby"># -*- coding: gb18030 -*-
s = "møøse"
puts "Byte length: %d" % s.bytesize
puts "Character length: %d" % s.length</langsyntaxhighlight>
| <pre>Byte length: 11
Character length: 5</pre>
Line 2,760 ⟶ 3,167:
Then either <code>string.scan(/./u).size</code> or <code>string.gsub(/./u, ' ').size</code> counts the UTF-8 characters in string.
 
<langsyntaxhighlight lang="ruby"># -*- coding: utf-8 -*-
 
class String
Line 2,771 ⟶ 3,178:
s = "文字化け"
puts "Byte length: %d" % s.bytesize
puts "Character length: %d" % s.gsub(/./u, ' ').size</langsyntaxhighlight>
 
=={{header|Run BASIC}}==
<langsyntaxhighlight lang="runbasic">input a$
print len(a$)</langsyntaxhighlight>
 
=={{header|Rust}}==
===Byte Length===
<syntaxhighlight lang="text">
fn main() {
let s = "文字化け"; // UTF-8
println!("Byte Length: {}", s.len());
}
</syntaxhighlight>
</lang>
===Character Length===
<langsyntaxhighlight lang="rust">
fn main() {
let s = "文字化け"; // UTF-8
println!("Character length: {}", s.chars().count());
}
</syntaxhighlight>
</lang>
 
=={{header|SAS}}==
<langsyntaxhighlight lang="sas">data _null_;
a="Hello, World!";
b=length(c);
put _all_;
run;</langsyntaxhighlight>
 
=={{header|Scala}}==
{{libheader|Scala}}
<langsyntaxhighlight lang="scala">
object StringLength extends App {
val s1 = "møøse"
Line 2,814 ⟶ 3,221:
} UTF16bytes= ${s.getBytes("UTF-16LE").size}"))
}
</syntaxhighlight>
</lang>
{{out}}
<pre>The string: møøse, characterlength= 5 UTF8bytes= 7 UTF16bytes= 10
Line 2,824 ⟶ 3,231:
{{works_with|Gauche|0.8.7 [utf-8,pthreads]}}
'''string-size''' function is only Gauche function.
<langsyntaxhighlight lang="scheme">(string-size "Hello world")</langsyntaxhighlight>
 
{{works with|PLT Scheme|4.2.4}}
<langsyntaxhighlight lang="scheme">(bytes-length #"Hello world")</langsyntaxhighlight>
 
===Character Length===
{{works_with|Gauche|0.8.7 [utf-8,pthreads]}}
'''string-length''' function is in [[R5RS]], [[R6RS]].
<langsyntaxhighlight lang="scheme"> (string-length "Hello world")</langsyntaxhighlight>
 
=={{header|sed}}==
Line 2,839 ⟶ 3,246:
Text is read from standard input e.g. <code>echo "string" | sed -f script.sed</code> or <code>sed -f script.sed file.txt</code> (The solution given would be the contents of a text file <code>script.sed</code> in these cases).
For files with more than one line, sed will give a count for each line.
<syntaxhighlight lang="sed"># create unary numeral (i = 1)
The 'convert to digits' section is based off of [http://unix.stackexchange.com/a/36959/11750 this StackExchange answer].
s/./i/g
<lang sed># Change all characters to '|'.
:loop
s/./\|/g;
# divide by 10 (x = 10)
 
s/i\{10\}/x/g
# Convert to digits
# convert remainder to decimal digit
:convert
/i/!s/[0-9]*$/0&/
s/||||||||||/</g
s/<i\([0-{9]*\)$}/<0\19/g
s/|||||||||i\{8\}/98/g;
s/i\{7\}/7/
s/|||||||||/9/g; s/||||||||/8/g; s/|||||||/7/g; s/||||||/6/g;
s/i\{6\}/6/
s/|||||/5/g; s/||||/4/g; s/|||/3/g; s/||/2/g; s/|/1/g;
s/<iiiii/|5/g
s/iiii/4/
t convert
s/^$iii/03/</lang>
s/ii/2/
s/i/1/
# convert quotient (10s) to 1s
y/x/i/
# start over for the next magnitude (if any)
/i/b loop</syntaxhighlight>
 
=={{header|Seed7}}==
===Character Length===
<langsyntaxhighlight lang="seed7">length("Hello, world!")</langsyntaxhighlight>
 
=={{header|SETL}}==
===Character Length===
<langsyntaxhighlight lang="haskell">print(# "Hello, world!"); -- '#' is the cardinality operator. Works on strings, tuples, and sets.</langsyntaxhighlight>
 
=={{header|Sidef}}==
 
<langsyntaxhighlight lang="ruby">var str = "J\x{332}o\x{332}s\x{332}e\x{301}\x{332}";</langsyntaxhighlight>
 
===Byte Length===
UTF-8 byte length (default):
<langsyntaxhighlight lang="ruby">say str.bytes.len; #=> 14</langsyntaxhighlight>
 
UTF-16 byte length:
<langsyntaxhighlight lang="ruby">say str.encode('UTF-16').bytes.len; #=> 20</langsyntaxhighlight>
 
===Character Length===
<langsyntaxhighlight lang="ruby">say str.chars.len; #=> 9</langsyntaxhighlight>
 
===Grapheme Length===
<langsyntaxhighlight lang="ruby">say str.graphs.len; #=> 4</langsyntaxhighlight>
 
=={{header|Simula}}==
Line 2,891 ⟶ 3,304:
</pre>
===Byte Length===
<langsyntaxhighlight lang="simula">BEGIN
TEXT LINE;
WHILE NOT LASTITEM DO
Line 2,905 ⟶ 3,318:
END;
END.
</syntaxhighlight>
</lang>
{{out}}
<pre>
Line 2,916 ⟶ 3,329:
===Character Length===
To calculate the character length, one can do it manually:
<langsyntaxhighlight lang="simula">BEGIN
 
! NUMBER OF UFT8 CHARACTERS IN STRING ;
Line 2,961 ⟶ 3,374:
END;
 
END.</langsyntaxhighlight>
{{out}}
<pre>"møøse" CHARACTER LENGTH = 5
Line 2,970 ⟶ 3,383:
 
=={{header|Slate}}==
<syntaxhighlight lang ="slate">'Hello, world!' length.</langsyntaxhighlight>
 
=={{header|Slope}}==
 
=== Character Length ===
<syntaxhighlight lang="slope">(length "møøse")</syntaxhighlight>
=== Byte Lenth ===
<syntaxhighlight lang="slope">(length (string->bytes "møøse"))</syntaxhighlight>
 
=={{header|Smalltalk}}==
Line 2,976 ⟶ 3,396:
 
{{works with|Smalltalk/X}}
<langsyntaxhighlight lang="smalltalk">'hello' size -> 5
'hello' utf8Encoded size -> 5
'hello' utf8Encoded asByteArray -> #[104 101 108 108 111]
Line 2,990 ⟶ 3,410:
'𝔘𝔫𝔦𝔠𝔬𝔡𝔢' utf8Encoded asByteArray -> #[240 157 148 152 240 157 148 171 240 157 148 166 240 157 148 160 240 157 148 172 240 157 148 161 240 157 148 162]
'𝔘𝔫𝔦𝔠𝔬𝔡𝔢' utf16Encoded size -> 14
'𝔘𝔫𝔦𝔠𝔬𝔡𝔢' utf8Encoded asWordArray -> WordArray(55349 56600 55349 56619 55349 56614 55349 56608 55349 56620 55349 56609 55349 56610)</langsyntaxhighlight>
 
===Byte Length===
{{works with|GNU Smalltalk}}
<langsyntaxhighlight lang="smalltalk">string := 'Hello, world!'.
string size.</langsyntaxhighlight>
===Character Length===
{{works with|GNU Smalltalk}}
<langsyntaxhighlight lang="smalltalk">string := 'Hello, world!'.
string numberOfCharacters.</langsyntaxhighlight>
 
requires loading the Iconv package:
 
<syntaxhighlight lang ="smalltalk">PackageLoader fileInPackage: 'Iconv'</langsyntaxhighlight>
 
=={{header|SNOBOL4}}==
===Byte Length ===
<langsyntaxhighlight lang="snobol4">
output = "Byte length: " size(trim(input))
end
</syntaxhighlight>
</lang>
 
===Character Length ===
The example works AFAIK only with CSnobol4 by Phil Budne
<langsyntaxhighlight lang="snobol4">
-include "utf.sno"
output = "Char length: " utfsize(trim(input))
end
</syntaxhighlight>
</lang>
 
=={{header|Sparkling}}==
===Byte length===
<langsyntaxhighlight Sparklinglang="sparkling">spn:1> sizeof "Hello, wørld!"
= 14</langsyntaxhighlight>
 
=={{header|SPL}}==
Line 3,029 ⟶ 3,449:
All strings in SPL are Unicode. See code below.
===Character Length===
<langsyntaxhighlight lang="spl">t = ["abc","J̲o̲s̲é̲","møøse","𝔘𝔫𝔦𝔠𝔬𝔡𝔢"]
 
> i, 1..#.size(t,1)
Line 3,052 ⟶ 3,472:
<
#.output(s)
<</langsyntaxhighlight>
{{out}}
<pre>
Line 3,085 ⟶ 3,505:
{{works with|Db2 LUW}}
With SQL only:
<langsyntaxhighlight lang="sql pl">
VALUES LENGTH('møøse', CODEUNITS16);
VALUES LENGTH('møøse', CODEUNITS32);
Line 3,101 ⟶ 3,521:
VALUES LENGTH2('J̲o̲s̲é̲');
VALUES LENGTH4('J̲o̲s̲é̲');
</syntaxhighlight>
</lang>
Output:
<pre>
Line 3,216 ⟶ 3,636:
{{works with|Db2 LUW}}
With SQL only:
<langsyntaxhighlight lang="sql pl">
VALUES LENGTH('møøse');
VALUES LENGTHB('møøse');
Line 3,223 ⟶ 3,643:
VALUES LENGTH('J̲o̲s̲é̲');
VALUES LENGTHB('J̲o̲s̲é̲');
</syntaxhighlight>
</lang>
Output:
<pre>
Line 3,277 ⟶ 3,697:
{{works with|Moscow ML|2.01}}
{{works with|MLton|20061107}}
<langsyntaxhighlight lang="sml">val strlen = size "Hello, world!";</langsyntaxhighlight>
===Character Length===
{{works with|Standard ML of New Jersey|SML/NJ|110.74}}
<langsyntaxhighlight lang="sml">val strlen = UTF8.size "Hello, world!";</langsyntaxhighlight>
 
=={{header|Stata}}==
Line 3,286 ⟶ 3,706:
Use '''[https://www.stata.com/help.cgi?f_strlen strlen]''' for byte length, and '''[https://www.stata.com/help.cgi?f_ustrlen ustrlen]''' for the number of Unicode characters in a string.
 
<langsyntaxhighlight lang="stata">scalar s="Ἐν ἀρχῇ ἐποίησεν ὁ θεὸς τὸν οὐρανὸν καὶ τὴν γῆν"
 
di strlen(s)
Line 3,292 ⟶ 3,712:
 
di ustrlen(s)
47</langsyntaxhighlight>
 
=={{header|Stringle}}==
The only current implementation of Stringle uses 8-bit character sets, meaning character and byte length is always the same.
 
This prints the length of a string from input:
 
<syntaxhighlight lang="stringle">$ #$</syntaxhighlight>
 
=={{header|Swift}}==
Line 3,301 ⟶ 3,728:
To count "characters" (Unicode grapheme clusters):
{{works with|Swift|2.x}}
<langsyntaxhighlight lang="swift">let numberOfCharacters = "møøse".characters.count // 5</langsyntaxhighlight>
{{works with|Swift|1.2}}
<langsyntaxhighlight lang="swift">let numberOfCharacters = count("møøse") // 5</langsyntaxhighlight>
{{works with|Swift|1.0-1.1}}
<langsyntaxhighlight lang="swift">let numberOfCharacters = countElements("møøse") // 5</langsyntaxhighlight>
 
===Character Length===
To count Unicode code points:
{{works with|Swift|2.x}}
<langsyntaxhighlight lang="swift">let numberOfCodePoints = "møøse".unicodeScalars.count // 5</langsyntaxhighlight>
{{works with|Swift|1.2}}
<langsyntaxhighlight lang="swift">let numberOfCodePoints = count("møøse".unicodeScalars) // 5</langsyntaxhighlight>
{{works with|Swift|1.0-1.1}}
<langsyntaxhighlight lang="swift">let numberOfCodePoints = countElements("møøse".unicodeScalars) // 5</langsyntaxhighlight>
 
===Byte Length===
Line 3,321 ⟶ 3,748:
For length in UTF-8, count the number of UTF-8 code units:
{{works with|Swift|2.x}}
<langsyntaxhighlight lang="swift">let numberOfBytesUTF8 = "møøse".utf8.count // 7</langsyntaxhighlight>
{{works with|Swift|1.2}}
<langsyntaxhighlight lang="swift">let numberOfBytesUTF8 = count("møøse".utf8) // 7</langsyntaxhighlight>
{{works with|Swift|1.0-1.1}}
<langsyntaxhighlight lang="swift">let numberOfBytesUTF8 = countElements("møøse".utf8) // 7</langsyntaxhighlight>
 
For length in UTF-16, count the number of UTF-16 code units, and multiply by 2:
{{works with|Swift|2.x}}
<langsyntaxhighlight lang="swift">let numberOfBytesUTF16 = "møøse".utf16.count * 2 // 10</langsyntaxhighlight>
{{works with|Swift|1.2}}
<langsyntaxhighlight lang="swift">let numberOfBytesUTF16 = count("møøse".utf16) * 2 // 10</langsyntaxhighlight>
{{works with|Swift|1.0-1.1}}
<langsyntaxhighlight lang="swift">let numberOfBytesUTF16 = countElements("møøse".utf16) * 2 // 10</langsyntaxhighlight>
 
=={{header|Symsyn}}==
===Byte Length===
<langsyntaxhighlight lang="symsyn">
c : 'abcdefgh'
#c []
</syntaxhighlight>
</lang>
Output:
<pre>
Line 3,349 ⟶ 3,776:
===Byte Length===
Formally, Tcl does not guarantee to use any particular representation for its strings internally (the underlying implementation objects can hold strings in at least three different formats, mutating between them as necessary) so the way to calculate the "byte length" of a string can only be done with respect to some user-selected encoding. This is done this way (for UTF-8):
<langsyntaxhighlight lang="tcl">string length [encoding convertto utf-8 $theString]</langsyntaxhighlight>
<!-- Yes, there's <tt>string bytelength</tt>; don't use it. It's deeply wrong-headed and will probably go away in future releases. [[DKF]] -->
Thus, we have these examples:
<langsyntaxhighlight lang="tcl">set s1 "hello, world"
set s2 "\u304A\u306F\u3088\u3046"
set enc utf-8
Line 3,358 ⟶ 3,785:
$s1 [string length [encoding convertto $enc $s1]]]
puts [format "length of \"%s\" in bytes is %d" \
$s2 [string length [encoding convertto $enc $s2]]]</langsyntaxhighlight>
 
===Character Length===
Basic version:
 
<langsyntaxhighlight lang="tcl">string length "Hello, world!"</langsyntaxhighlight>
 
or more elaborately, needs '''Interpreter''' any 8.X. Tested on 8.4.12.
 
<langsyntaxhighlight lang="tcl">fconfigure stdout -encoding utf-8; #So that Unicode string will print correctly
set s1 "hello, world"
set s2 "\u304A\u306F\u3088\u3046"
puts [format "length of \"%s\" in characters is %d" $s1 [string length $s1]]
puts [format "length of \"%s\" in characters is %d" $s2 [string length $s2]]</langsyntaxhighlight>
 
=={{header|TI-89 BASIC}}==
Line 3,377 ⟶ 3,804:
The TI-89 uses an fixed 8-bit encoding so there is no difference between character length and byte length.
 
<langsyntaxhighlight lang="ti89b">■ dim("møøse") 5</langsyntaxhighlight>
 
=={{header|Toka}}==
===Byte Length===
<langsyntaxhighlight lang="toka">" hello, world!" string.getLength</langsyntaxhighlight>
 
=={{header|Trith}}==
===Character Length===
<langsyntaxhighlight lang="trith">"møøse" length</langsyntaxhighlight>
===Byte Length===
<langsyntaxhighlight lang="trith">"møøse" size</langsyntaxhighlight>
 
=={{header|TUSCRIPT}}==
===Character Length ===
<langsyntaxhighlight lang="tuscript">
$$ MODE TUSCRIPT
string="hello, world"
l=LENGTH (string)
PRINT "character length of string '",string,"': ",l
</syntaxhighlight>
</lang>
Output:
<pre>
Line 3,403 ⟶ 3,830:
 
=={{header|UNIX Shell}}==
====Byte Lengthlength via external utility:====
====With external utility:====
 
{{works with|Bourne Shell}}
<langsyntaxhighlight lang="bash">string='Hello, world!'
length=`expr "x$string" : '.*' - 1`
echo $length # if you want it printed to the terminal</langsyntaxhighlight>
 
====With [[Unix|SUSv3]] parameter expansion modifier:====
 
This returns the byte count in ash/dash, but the character count in bash, ksh, and zsh:
 
{{works with|Almquist SHell}}
{{works with|Bourne Again SHell|3.2}}
{{works with|pdksh|5.2.14Korn 99/07/13.2Shell|93}}
{{works with|Z SHell}}
<langsyntaxhighlight lang="bash">string='Hello, world!'
length="${#string}"
echo $length # if you want it printed to the terminal</langsyntaxhighlight>
 
=={{header|Vala}}==
===Character Length===
<langsyntaxhighlight lang="vala">
string s = "Hello, world!";
int characterLength = s.length;
</syntaxhighlight>
</lang>
 
=={{header|VBA}}==
Line 3,433 ⟶ 3,861:
=={{header|VBScript}}==
===Byte Length===
<syntaxhighlight lang ="vbscript">LenB(string|varname)</langsyntaxhighlight>
 
Returns the number of bytes required to store a string in memory. Returns null if string|varname is null.
===Character Length===
<syntaxhighlight lang ="vbscript">Len(string|varname)</langsyntaxhighlight>
 
Returns the length of the string|varname . Returns null if string|varname is null.
Line 3,454 ⟶ 3,882:
One method of Encoding returns the number of bytes required to encode a .NET string in that encoding (encoding objects can be obtained through readonly static [Shared in VB.NET] properties of the Encoding class).
 
<langsyntaxhighlight lang="vbnet">Module ByteLength
Function GetByteLength(s As String, encoding As Text.Encoding) As Integer
Return encoding.GetByteCount(s)
End Function
End Module</langsyntaxhighlight>
 
====Character Length====
Line 3,466 ⟶ 3,894:
An alternative implementation is to count the number of UTF-16 surrogate pairs in a string and subtract that number from the number of UTF-16 code units in the string.
 
<langsyntaxhighlight lang="vbnet">Module CharacterLength
Function GetUTF16CodeUnitsLength(s As String) As Integer
Return s.Length
Line 3,485 ⟶ 3,913:
Return GetByteLength(s, Text.Encoding.UTF32) \ 4
End Function
End Module</langsyntaxhighlight>
 
====Grapheme Length====
Line 3,491 ⟶ 3,919:
<code>System.Globalization.StringInfo</code> provides a means of enumerating the text elements of a string, where each "text element" is a Unicode grapheme.
 
<langsyntaxhighlight lang="vbnet">Module GraphemeLength
' Wraps an IEnumerator, allowing it to be used as an IEnumerable.
Private Iterator Function AsEnumerable(enumerator As IEnumerator) As IEnumerable
Line 3,503 ⟶ 3,931:
Return AsEnumerable(elements).OfType(Of String).Count()
End Function
End Module</langsyntaxhighlight>
 
====Test Code====
Line 3,509 ⟶ 3,937:
The compiler constant <code>PRINT_TESTCASE</code> toggles whether to write the contents of each test case to the console; disable for inputs that may mess with the console.
 
<langsyntaxhighlight lang="vbnet">#Const PRINT_TESTCASE = True
 
Module Program
Line 3,547 ⟶ 3,975:
 
End Sub
End Module</langsyntaxhighlight>
 
{{out}}
Line 3,594 ⟶ 4,022:
bytes (UTF-16) 18
bytes (UTF-32) 36
</pre>+
 
=={{header|V (Vlang)}}==
{{trans|go}}
====Byte Length====
<syntaxhighlight lang="v (vlang)">fn main() {
m := "møøse"
u := "𝔘𝔫𝔦𝔠𝔬𝔡𝔢"
j := "J̲o̲s̲é̲"
println("$m.len $m ${m.bytes()}")
println("$u.len $u ${u.bytes()}")
println("$j.len $j ${j.bytes()}")
}</syntaxhighlight>
Output:
<pre>
7 møøse [m, 0xc3, 0xb8, 0xc3, 0xb8, s, e]
28 𝔘𝔫𝔦𝔠𝔬𝔡𝔢 [0xf0, 0x9d, 0x94, 0x98, 0xf0, 0x9d, 0x94, 0xab, 0xf0, 0x9d, 0x94, 0xa6, 0xf0, 0x9d, 0x94, 0xa0, 0xf0, 0x9d, 0x94, 0xac, 0xf0, 0x9d, 0x94, 0xa1, 0xf0, 0x9d, 0x94, 0xa2]
13 J̲o̲s̲é̲ [J, 0xcc, 0xb2, o, 0xcc, 0xb2, s, 0xcc, 0xb2, 0xc3, 0xa9, 0xcc, 0xb2]
</pre>
====Character Length====
<syntaxhighlight lang="v (vlang)">fn main() {
m := "møøse"
u := "𝔘𝔫𝔦𝔠𝔬𝔡𝔢"
j := "J̲o̲s̲é̲"
println("$m.runes().len $m ${m.runes()}")
println("$u.runes().len $u ${u.runes()}")
println("$j.runes().len $j ${j.runes()}")
}</syntaxhighlight>
Output:
<pre>
5 møøse [`m`, `ø`, `ø`, `s`, `e`]
7 𝔘𝔫𝔦𝔠𝔬𝔡𝔢 [`𝔘`, `𝔫`, `𝔦`, `𝔠`, `𝔬`, `𝔡`, `𝔢`]
8 J̲o̲s̲é̲ [`J`, `̲`, `o`, `̲`, `s`, `̲`, `é`, `̲`]
</pre>
 
=={{header|Wren}}==
===Byte Length===
<langsyntaxhighlight ecmascriptlang="wren">System.print("møøse".bytes.count)
System.print("𝔘𝔫𝔦𝔠𝔬𝔡𝔢".bytes.count)
System.print("J̲o̲s̲é̲".bytes.count)</langsyntaxhighlight>
 
{{out}}
Line 3,610 ⟶ 4,071:
 
===Character Length===
<langsyntaxhighlight ecmascriptlang="wren">System.print("møøse".count)
System.print("𝔘𝔫𝔦𝔠𝔬𝔡𝔢".count)
System.print("J̲o̲s̲é̲".count)</langsyntaxhighlight>
 
{{out}}
Line 3,619 ⟶ 4,080:
7
8
</pre>
 
===Grapheme Length===
{{libheader|Wren-upc}}
<syntaxhighlight lang="wren">import "./upc" for Graphemes
 
System.print(Graphemes.clusterCount("møøse"))
System.print(Graphemes.clusterCount("𝔘𝔫𝔦𝔠𝔬𝔡𝔢"))
System.print(Graphemes.clusterCount("J̲o̲s̲é̲"))</syntaxhighlight>
 
{{out}}
<pre>
5
7
4
</pre>
 
Line 3,625 ⟶ 4,101:
The following code uses AT&T syntax and was tested using AS (the portable GNU assembler) under Linux.
 
<syntaxhighlight lang="x86 assembly">
<lang x86 Assembly>
.data
string: .asciz "Test"
Line 3,650 ⟶ 4,126:
leave
ret
</syntaxhighlight>
</lang>
 
=={{header|XPL0}}==
<langsyntaxhighlight XPL0lang="xpl0">include c:\cxpl\stdlib;
IntOut(0, StrLen("Character length = Byte length = String length = "))</langsyntaxhighlight>
 
Output:
Line 3,663 ⟶ 4,139:
=={{header|XSLT}}==
===Character Length===
<langsyntaxhighlight lang="xml"><?xml version="1.0" encoding="UTF-8"?></langsyntaxhighlight>
...
<langsyntaxhighlight lang="xml"><xsl:value-of select="string-length('møøse')" /> <!-- 5 --></langsyntaxhighlight>
 
=={{header|xTalk}}==
Line 3,673 ⟶ 4,149:
LiveCode fully supports multi-byte Unicode characters since version 7. See the LiveCode section for more information.
 
<langsyntaxhighlight lang="xtalk">put the length of "Hello World" </langsyntaxhighlight>
or
<langsyntaxhighlight lang="xtalk">put the number of characters in "Hello World" -- 'chars' short for characters is also valid</langsyntaxhighlight>
 
===Byte Length===
<langsyntaxhighlight LiveCodelang="livecode">put the number of bytes in "Hello World" -- use byte keyword in LiveCode for multi-byte Unicode</langsyntaxhighlight>
 
=={{header|Yorick}}==
===Character Length===
<langsyntaxhighlight lang="yorick">strlen("Hello, world!")</langsyntaxhighlight>
 
=={{header|Z80 Assembly}}==
Line 3,690 ⟶ 4,166:
===Byte Length===
Code is called as a subroutine, i.e. <code>CALL getStringLength</code>.
<langsyntaxhighlight lang="z80">; input: HL - pointer to the 0th char of a string.
; outputs length to B. HL will point to the last character in the string just before the terminator.
; length is one-indexed and does not include the terminator. A null string will return 0 in B.
Line 3,707 ⟶ 4,183:
inc hl ;next char
inc b ;increment the byte count
jr loop_getStringLength</langsyntaxhighlight>
 
=={{header|zkl}}==
Line 3,714 ⟶ 4,190:
If your terminal/editor deals with UTF-8 (mine doesn't), you don't need to use the escapes, just put the unicode characters in quotes (ie the editor inserts UTF bytes, which are non zero).
===Character Length===
<langsyntaxhighlight lang="zkl">"abc".len() //-->3
"\ufeff\u00A2 \u20ac".len() //-->9 "BOM¢ €"</langsyntaxhighlight>
===Byte Length===
<langsyntaxhighlight lang="zkl">"abc".len() //-->3
"\ufeff\u00A2 \u20ac".len() //-->9
Data(0,Int,"\ufeff\u00A2 \u20ac") //-->Data(9) (bytes)
"J\u0332o\u0332s\u0332e\u0301\u0332".len() //-->14
"\U1D518;\U1D52B;\U1D526;\U1D520;\U1D52C;\U1D521;\U1D522;".len() //-->28</langsyntaxhighlight>
===Character Length===
UTF-8 characters are counted, modifiers (such as underscore) are counted as separate characters.
<langsyntaxhighlight lang="zkl">"abc".len(8) //-->3
"\ufeff\u00A2 \u20ac".len(8) //-->4 "BOM¢ €"
"\U1000;".len(8) //-->Exception thrown: ValueError(Invalid UTF-8 string)
"\uD800" //-->SyntaxError : Line 2: Bad Unicode constant (\uD800-\uDFFF)
"J\u0332o\u0332s\u0332e\u0301\u0332".len(8) //-->9 "J̲o̲s̲é̲"
"\U1D518;\U1D52B;\U1D526;\U1D520;\U1D52C;\U1D521;\U1D522;".len(8) //-->7 "𝔘𝔫𝔦𝔠𝔬𝔡𝔢"</langsyntaxhighlight>
[[Wikipedia::https://en.wikipedia.org/wiki/Comparison_of_programming_languages_%28string_functions%29#length]]
 
=={{header|Zig}}==
<syntaxhighlight lang="zig">const std = @import("std");
 
fn printResults(alloc: std.mem.Allocator, string: []const u8) !void {
const cnt_codepts_utf8 = try std.unicode.utf8CountCodepoints(string);
// There is no sane and portable extended ascii, so the best
// we get is counting the bytes and assume regular ascii.
const cnt_bytes_utf8 = string.len;
const stdout_wr = std.io.getStdOut().writer();
try stdout_wr.print("utf8 codepoints = {d}, bytes = {d}\n", .{ cnt_codepts_utf8, cnt_bytes_utf8 });
 
const utf16str = try std.unicode.utf8ToUtf16LeWithNull(alloc, string);
const cnt_codepts_utf16 = try std.unicode.utf16CountCodepoints(utf16str);
const cnt_2bytes_utf16 = try std.unicode.calcUtf16LeLen(string);
try stdout_wr.print("utf16 codepoints = {d}, bytes = {d}\n", .{ cnt_codepts_utf16, 2 * cnt_2bytes_utf16 });
}
 
pub fn main() !void {
var arena_instance = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer arena_instance.deinit();
const arena = arena_instance.allocator();
const string1: []const u8 = "Hello, world!";
try printResults(arena, string1);
const string2: []const u8 = "møøse";
try printResults(arena, string2);
const string3: []const u8 = "𝔘𝔫𝔦𝔠𝔬𝔡𝔢";
try printResults(arena, string3);
// \u{332} is underscore of previous character, which the browser may not
// copy correctly
const string4: []const u8 = "J\u{332}o\u{332}s\u{332}e\u{301}\u{332}";
try printResults(arena, string4);
}</syntaxhighlight>
 
{{out}}
 
<pre>
utf8 codepoints = 13, bytes = 13
utf16 codepoints = 13, bytes = 26
utf8 codepoints = 5, bytes = 7
utf16 codepoints = 5, bytes = 10
utf8 codepoints = 7, bytes = 28
utf16 codepoints = 7, bytes = 28
utf8 codepoints = 9, bytes = 14
utf16 codepoints = 9, bytes = 18
</pre>
162

edits