String length: Difference between revisions

Add Ecstasy example
m (→‎{{header|Kotlin}}: made the kotlin example not use java)
(Add Ecstasy example)
 
(15 intermediate revisions by 12 users not shown)
Line 579:
===Character Length===
{{works with|QBasic}}
 
{{works with|Liberty BASIC}}
 
{{works with|PowerBASIC|PB/CC, PB/DOS}}
 
Line 587 ⟶ 585:
<syntaxhighlight lang="qbasic"> INPUT a$
PRINT LEN(a$)</syntaxhighlight>
 
==={{header|ANSI BASIC}}===
The ANSI BASIC needs line numbers.
<syntaxhighlight lang="basic">
10 INPUT A$
20 PRINT LEN(A$)
</syntaxhighlight>
 
==={{header|Applesoft BASIC}}===
The [[#GW-BASIC|GW-BASIC]] solution works without any changes.
 
==={{header|BASIC256}}===
The [[#GW-BASIC|GW-BASIC]] solution works without any changes.
 
==={{header|Chipmunk Basic}}===
The [[#GW-BASIC|GW-BASIC]] solution works without any changes.
 
==={{header|MSX Basic}}===
{{works with|MSX BASIC|any}}
The [[#GW-BASIC|GW-BASIC]] solution works without any changes.>
 
==={{header|Quite BASIC}}===
The [[#GW-BASIC|GW-BASIC]] solution works without any changes.
 
==={{header|True BASIC}}===
The [[#GW-BASIC|GW-BASIC]] solution works without any changes.
 
==={{header|Yabasic}}===
The [[#GW-BASIC|GW-BASIC]] solution works without any changes.
 
==={{header|ZX Spectrum Basic}}===
Line 1,266 ⟶ 1,293:
# 8
print len "J̲o̲s̲é̲"
# 1
print len "😀"
</syntaxhighlight>
 
=={{header|Ecstasy}}==
<syntaxhighlight lang="ecstasy">
module StrLen {
@Inject Console console;
 
void run(String s = "José") {
console.print($|For the string {s.quoted()}:
| Character length: {s.size}
| UTF-8 byte length: {s.calcUtf8Length()}
);
}
}
</syntaxhighlight>
 
{{out}}
<pre>
For the string "José":
Character length: 4
UTF-8 byte length: 5
</pre>
 
=={{header|Elena}}==
Line 1,336 ⟶ 1,386:
(string-width str)))
;; => (6 18 4) ;; in emacs 23 up</syntaxhighlight>
 
=={{header|EMal}}==
<syntaxhighlight lang="emal">
text moose = "møøse"
text unicode = "𝔘𝔫𝔦𝔠𝔬𝔡𝔢"
text jose = "J" + 0U0332 + "o" + 0U0332 + "s" + 0U0332 + "e" + 0U0301 + 0U0332
text emoji = "𠇰😈🎶🔥é-"
</syntaxhighlight>
===Byte Length===
<syntaxhighlight lang="emal">
writeLine((blob!moose).length)
writeLine((blob!unicode).length)
writeLine((blob!jose).length)
writeLine((blob!emoji).length)
</syntaxhighlight>
{{out}}
<pre>
7
28
14
19
</pre>
===Character Length===
<syntaxhighlight lang="emal">
writeLine(moose.codePointsLength)
writeLine(unicode.codePointsLength)
writeLine(jose.codePointsLength)
writeLine(emoji.codePointsLength)
</syntaxhighlight>
{{out}}
<pre>
5
7
9
6
</pre>
===Grapheme Length===
<syntaxhighlight lang="emal">
writeLine(moose.graphemesLength)
writeLine(unicode.graphemesLength)
writeLine(jose.graphemesLength)
writeLine(emoji.graphemesLength)
</syntaxhighlight>
{{out}}
<pre>
5
7
4
6
</pre>
 
=={{header|Erlang}}==
Line 1,698 ⟶ 1,798:
5</syntaxhighlight>
Here we have used 16 bit wide character literals. See also the dictionary page for [http://www.jsoftware.com/help/dictionary/duco.htm u:].
 
=={{header|Jakt}}==
===Character Length===
<syntaxhighlight lang="jakt">
fn character_length(string: String) -> i64 {
mut length = 0
for _ in string.code_points() {
length++
}
return length
}
 
fn main() {
for string in [
"Hello world!"
"møøse"
"𝔘𝔫𝔦𝔠𝔬𝔡𝔢"
"J̲o̲s̲é̲"
] {
println("\"{}\" {}", string, character_length(string))
}
}
</syntaxhighlight>
{{out}}
<pre>
"Hello world!" 12
"møøse" 5
"𝔘𝔫𝔦𝔠𝔬𝔡𝔢" 7
"J̲o̲s̲é̲" 8
</pre>
 
===Byte Length===
<syntaxhighlight lang="jakt">
fn main() {
for string in [
"Hello world!"
"møøse"
"𝔘𝔫𝔦𝔠𝔬𝔡𝔢"
"J̲o̲s̲é̲"
] {
println("\"{}\" {}", string, string.length())
}
}
</syntaxhighlight>
{{out}}
<pre>
"Hello world!" 12
"møøse" 7
"𝔘𝔫𝔦𝔠𝔬𝔡𝔢" 28
"J̲o̲s̲é̲" 13
</pre>
 
=={{header|Java}}==
Line 1,722 ⟶ 1,873:
int actual_length = str.codePointCount(0, str.length()); // value is 1, which is the length in characters</syntaxhighlight>
===Grapheme Length===
 
Since JDK 20<ref>https://bugs.openjdk.org/browse/JDK-8291660</ref>.
 
<syntaxhighlight lang="java">import java.text.BreakIterator;
 
Line 1,749 ⟶ 1,903:
 
=={{header|JavaScript}}==
 
===Byte Length===
===Byte length===
JavaScript encodes strings in UTF-16, which represents each character with one or two 16-bit values. The length property of string objects gives the number of 16-bit values used to encode a string, so the number of bytes can be determined by doubling that number.
 
<syntaxhighlight lang="javascript">var s = "Hello, world!";
var s = "Hello, world!";
var byteCount = s.length * 2; //26</syntaxhighlight>
var byteCount = s.length * 2; // 26
===Character Length===
</syntaxhighlight>
 
It's easier to use Buffer.byteLength (Node.JS specific, not ECMAScript).
 
<syntaxhighlight lang="javascript">
a = '👩‍❤️‍👩'
Buffer.byteLength(a, 'utf16le'); // 16
Buffer.byteLength(a, 'utf8'); // 20
Buffer.byteLength(s, 'utf16le'); // 26
Buffer.byteLength(s, 'utf8'); // 13
</syntaxhighlight>
 
In pure ECMAScript, TextEncoder() can be used to return the UTF-8 byte size:
 
<syntaxhighlight lang="javascript">
(new TextEncoder().encode(a)).length; // 20
(new TextEncoder().encode(s)).length; // 13
</syntaxhighlight>
 
=== Unicode codepoint length ===
 
JavaScript encodes strings in UTF-16, which represents each character with one or two 16-bit values. The most commonly used characters are represented by one 16-bit value, while rarer ones like some mathematical symbols are represented by two.
 
JavaScript has no built-in way to determine how many characters are in a string. However, ifIf the string only contains commonly used characters, the number of characters will be equal to the number of 16-bit values used to represent the characters.
 
<syntaxhighlight lang="javascript">var str1 = "Hello, world!";
<syntaxhighlight lang="javascript">
var len1 = str1.length; //13
var str1 = "Hello, world!";
var len1 = str1.length; // 13
 
var str2 = "\uD834\uDD2A"; // U+1D12A represented by a UTF-16 surrogate pair
var len2 = str2.length; // 2
</syntaxhighlight>
 
More generally, the expansion operator in an array can be used to enumerate Unicode code points:
 
<syntaxhighlight lang="javascript">
[...str2].length // 1
</syntaxhighlight>
 
=== Unicode grapheme length ===
 
Counting Unicode codepoints when using combining characters such as joining sequences or diacritics will return the wrong size, so we must count graphemes instead. Intl.Segmenter() default granularity is grapheme.
 
<syntaxhighlight lang="javascript">
[...new Intl.Segmenter().segment(a)].length; // 1
</syntaxhighlight>
 
var str2 = "\uD834\uDD2A"; //U+1D12A represented by a UTF-16 surrogate pair
var len2 = str2.length; //2</syntaxhighlight>
===ES6 destructuring/iterators===
 
ES6 provides several ways to get a string split into an array of code points instead of UTF-16 code units:
<syntaxhighlight lang="javascript">let
Line 1,787 ⟶ 1,982:
}
</syntaxhighlight>
 
=={{header|Joy}}==
;Byte length
<syntaxhighlight lang="joy">"Café" size.</syntaxhighlight>
{{out}}
<pre>5</pre>
 
=={{header|jq}}==
Line 2,048 ⟶ 2,249:
=={{header|M2000 Interpreter}}==
<syntaxhighlight lang="m2000 interpreter">
module String_length {
A$=format$("J\u0332o\u0332s\u0332e\u0301\u0332")
A$=format$("J\u0332o\u0332s\u0332e\u0301\u0332")
Print Len(A$) = 9 ' true Utf-16LE
Print Len.Disp(A$) = 49 \\ display' lengthtrue Utf-16LE
Print Len.Disp(A$) = 4 \\ display length
Buffer Clear Mem as Byte*100
Buffer Clear Mem as Byte*100
\\ Write at memory at offset 0 or address Mem(0)
\\ Write at memory at offset 0 or address Mem(0)
Return Mem, 0:=A$
Print Return Eval$(Mem, 0, 18):=A$
Print Eval$(Mem, 0, 18)
For i=0 to 17 step 2
For i=0 to 17 step 2
\\ print hex value and character
\\ print hex value and character
Hex Eval(Mem, i as integer), ChrCode$(Eval(Mem, i as integer))
Hex Eval(Mem, i as integer), ChrCode$(Eval(Mem, i as integer))
Next i
Next i
Document B$=A$
Document B$=A$
\\ encode to utf-8 with BOM (3 bytes 0xEF,0xBB,0xBF)
\\ encode to utf-8 with BOM (3 bytes 0xEF,0xBB,0xBF)
Save.Doc B$, "Checklen.doc", 2
Print Save.Doc B$, Filelen("Checklen.doc")=17, 2
Print Filelen("Checklen.doc")=17
\\ So length is 14 bytes + 3 the BOM
\\ So length is 14 bytes + 3 the BOM
Mem=Buffer("Checklen.doc")
Print len(Mem)=17 // len works for buffers too - unit byte
// version 12 can handle strings without suffix $
C=eval$(mem, 3, 14) // from 4th byte get 14 bytes in a string
Print len(C)*2=14 ' bytes // len()) for strings return double type of words (can return 0.5)
C=string$(C as utf8dec) ' decode bytes from utf8 to utf16LE
Print len(C)=9, C=A$, Len.Disp(C)=4
Print C
Report 2, C // proportional print on console - for text center justified rendering (2 - center)
}
String_length
</syntaxhighlight>
 
Line 2,893 ⟶ 3,106:
 
Unfortunately, only character length can be retrieved in this language.
 
=={{header|RPL}}==
RPL strings are all made of 8-bit characters.
"RPL" SIZE
 
=={{header|Ruby}}==
Line 3,029 ⟶ 3,246:
Text is read from standard input e.g. <code>echo "string" | sed -f script.sed</code> or <code>sed -f script.sed file.txt</code> (The solution given would be the contents of a text file <code>script.sed</code> in these cases).
For files with more than one line, sed will give a count for each line.
<syntaxhighlight lang="sed"># create unary numeral (i = 1)
The 'convert to digits' section is based off of [http://unix.stackexchange.com/a/36959/11750 this StackExchange answer].
s/./i/g
<syntaxhighlight lang="sed"># Change all characters to '|'.
:loop
s/./\|/g;
# divide by 10 (x = 10)
 
s/i\{10\}/x/g
# Convert to digits
# convert remainder to decimal digit
:convert
/i/!s/[0-9]*$/0&/
s/||||||||||/</g
s/<i\([0-{9]*\)$}/<0\19/g
s/|||||||||i\{8\}/98/g;
s/i\{7\}/7/
s/|||||||||/9/g; s/||||||||/8/g; s/|||||||/7/g; s/||||||/6/g;
s/i\{6\}/6/
s/|||||/5/g; s/||||/4/g; s/|||/3/g; s/||/2/g; s/|/1/g;
s/<iiiii/|5/g
s/iiii/4/
t convert
s/iii/3/
s/^$/0/</syntaxhighlight>
s/ii/2/
s/i/1/
# convert quotient (10s) to 1s
y/x/i/
# start over for the next magnitude (if any)
/i/b loop</syntaxhighlight>
 
=={{header|Seed7}}==
Line 3,490 ⟶ 3,713:
di ustrlen(s)
47</syntaxhighlight>
 
=={{header|Stringle}}==
The only current implementation of Stringle uses 8-bit character sets, meaning character and byte length is always the same.
 
This prints the length of a string from input:
 
<syntaxhighlight lang="stringle">$ #$</syntaxhighlight>
 
=={{header|Swift}}==
Line 3,600 ⟶ 3,830:
 
=={{header|UNIX Shell}}==
====Byte Lengthlength via external utility:====
====With external utility:====
 
{{works with|Bourne Shell}}
Line 3,609 ⟶ 3,838:
 
====With [[Unix|SUSv3]] parameter expansion modifier:====
 
This returns the byte count in ash/dash, but the character count in bash, ksh, and zsh:
 
{{works with|Almquist SHell}}
{{works with|Bourne Again SHell|3.2}}
{{works with|pdksh|5.2.14Korn 99/07/13.2Shell|93}}
{{works with|Z SHell}}
<syntaxhighlight lang="bash">string='Hello, world!'
length="${#string}"
echo $length # if you want it printed to the terminal</syntaxhighlight>
 
Line 3,828 ⟶ 4,059:
=={{header|Wren}}==
===Byte Length===
<syntaxhighlight lang="ecmascriptwren">System.print("møøse".bytes.count)
System.print("𝔘𝔫𝔦𝔠𝔬𝔡𝔢".bytes.count)
System.print("J̲o̲s̲é̲".bytes.count)</syntaxhighlight>
Line 3,840 ⟶ 4,071:
 
===Character Length===
<syntaxhighlight lang="ecmascriptwren">System.print("møøse".count)
System.print("𝔘𝔫𝔦𝔠𝔬𝔡𝔢".count)
System.print("J̲o̲s̲é̲".count)</syntaxhighlight>
Line 3,853 ⟶ 4,084:
===Grapheme Length===
{{libheader|Wren-upc}}
<syntaxhighlight lang="ecmascriptwren">import "./upc" for Graphemes
 
System.print(Graphemes.clusterCount("møøse"))
162

edits