String length: Difference between revisions
Content added Content deleted
(add Crystal example) |
(Add Zig utf32le, utf32be will follow someday.) |
||
Line 3,841: | Line 3,841: | ||
<syntaxhighlight lang="zig">const std = @import("std"); |
<syntaxhighlight lang="zig">const std = @import("std"); |
||
fn printResults(alloc: std.mem.Allocator, string: []const u8) !void { |
|||
⚫ | |||
⚫ | |||
const cnt_codepts_utf8 = try std.unicode.utf8CountCodepoints(string); |
const cnt_codepts_utf8 = try std.unicode.utf8CountCodepoints(string); |
||
// There is no sane and portable extended ascii, so the best |
// There is no sane and portable extended ascii, so the best |
||
Line 3,848: | Line 3,847: | ||
const cnt_bytes_utf8 = string.len; |
const cnt_bytes_utf8 = string.len; |
||
const stdout_wr = std.io.getStdOut().writer(); |
const stdout_wr = std.io.getStdOut().writer(); |
||
try stdout_wr.print("codepoints = {d}, bytes = {d}\n", .{ cnt_codepts_utf8, cnt_bytes_utf8 }); |
try stdout_wr.print("utf8 codepoints = {d}, bytes = {d}\n", .{ cnt_codepts_utf8, cnt_bytes_utf8 }); |
||
// TODO utf16 |
|||
const utf16str = try std.unicode.utf8ToUtf16LeWithNull(alloc, string); |
|||
const cnt_codepts_utf16 = try std.unicode.utf16CountCodepoints(utf16str); |
|||
const cnt_2bytes_utf16 = try std.unicode.calcUtf16LeLen(string); |
|||
try stdout_wr.print("utf16 codepoints = {d}, bytes = {d}\n", .{ cnt_codepts_utf16, 2 * cnt_2bytes_utf16 }); |
|||
} |
|||
⚫ | |||
var arena_instance = std.heap.ArenaAllocator.init(std.heap.page_allocator); |
|||
defer arena_instance.deinit(); |
|||
const arena = arena_instance.allocator(); |
|||
⚫ | |||
try printResults(arena, string1); |
|||
const string2: []const u8 = "møøse"; |
|||
try printResults(arena, string2); |
|||
const string3: []const u8 = "𝔘𝔫𝔦𝔠𝔬𝔡𝔢"; |
|||
try printResults(arena, string3); |
|||
// \u{332} is underscore of previous character, which the browser may not |
|||
// copy correctly |
|||
const string4: []const u8 = "J\u{332}o\u{332}s\u{332}e\u{301}\u{332}"; |
|||
try printResults(arena, string4); |
|||
// utf8 codepoints = 13, bytes = 13 |
|||
// utf16 codepoints = 13, bytes = 26 |
|||
// utf8 codepoints = 5, bytes = 7 |
|||
// utf16 codepoints = 5, bytes = 10 |
|||
// utf8 codepoints = 7, bytes = 28 |
|||
// utf16 codepoints = 7, bytes = 28 |
|||
// utf8 codepoints = 9, bytes = 14 |
|||
// utf16 codepoints = 9, bytes = 18 |
|||
}</syntaxhighlight> |
}</syntaxhighlight> |