Category talk:Wren-str: Difference between revisions
Content added Content deleted
(Copied over source code from previous 'str' module talk page.) |
(→Source code: Added a Str.indexOf method and a Utf8 class.) |
||
Line 27: | Line 27: | ||
static isWhitespace(c) { (c = code(c)) && (c == 32 || (c >= 9 && c <= 13)) } |
static isWhitespace(c) { (c = code(c)) && (c == 32 || (c >= 9 && c <= 13)) } |
||
/* Rather than use combinations of the above, these only call the 'code' |
/* Rather than use combinations of the above, these only call the 'code' method once. */ |
||
static isLetter(c) { |
static isLetter(c) { |
||
Line 229: | Line 229: | ||
if (!(s is String)) s = "%(s)" |
if (!(s is String)) s = "%(s)" |
||
return (i >= 0 && i < s.count) ? s.toList[i] : null |
return (i >= 0 && i < s.count) ? s.toList[i] : null |
||
} |
|||
// Returns the codepoint index (not byte index) at which 'search' first occurs in 's' |
|||
// or -1 if 'search' is not found. |
|||
static indexOf(s, search) { |
|||
if (!(search is String)) Fiber.abort("Search argument must be a string.") |
|||
if (!(s is String)) s = "%(s)" |
|||
var ix = s.indexOf(search) |
|||
if (ix == -1) return -1 |
|||
if (ix == 0) return 0 |
|||
var cpCount = 1 |
|||
var byteCount = 0 |
|||
for (cp in s.codePoints) { |
|||
byteCount = byteCount + Utf8.byteCount(cp) |
|||
if (ix == byteCount) return cpCount |
|||
cpCount = cpCount + 1 |
|||
} |
|||
} |
} |
||
Line 234: | Line 251: | ||
static change(s, i, t) { |
static change(s, i, t) { |
||
if (!(i is Num && i.isInteger && i >= 0)) Fiber.abort("Index must be a non-negative integer.") |
if (!(i is Num && i.isInteger && i >= 0)) Fiber.abort("Index must be a non-negative integer.") |
||
if (!(t is String)) Fiber.abort(" |
if (!(t is String)) Fiber.abort("Replacement must be a string.") |
||
if (!(s is String)) s = "%(s)" |
if (!(s is String)) s = "%(s)" |
||
var chars = s.toList |
var chars = s.toList |
||
Line 294: | Line 311: | ||
if (final > 0) res.add(sub(s, first..-1)) |
if (final > 0) res.add(sub(s, first..-1)) |
||
return res |
return res |
||
} |
|||
} |
|||
/* |
|||
Utf8 contains routines which are specific to the UTF-8 encoding of a string's bytes or codepoints. |
|||
*/ |
|||
class Utf8 { |
|||
// Returns the number of bytes in the UTF-8 encoding of its codepoint argument. |
|||
static byteCount(cp) { |
|||
if (cp < 0 || cp > 0x10ffff) Fiber.abort("Codepoint is out of range.") |
|||
if (cp < 0x80) return 1 |
|||
if (cp < 0x800) return 2 |
|||
if (cp < 0x10000) return 3 |
|||
return 4 |
|||
} |
|||
// Converts a Unicode codepoint into its constituent UTF-8 bytes. |
|||
static encode(cp) { String.fromCodePoint(cp).bytes.toList } |
|||
// Converts a list of UTF-8 encoded bytes into the equivalent Unicode codepoint. |
|||
static decode(b) { |
|||
if (!((b is List) && b.count >= 1 && b.count <= 4 && (b[0] is Num) && b[0].isInteger)) { |
|||
Fiber.abort("Argument must be a byte list of length 1 to 4.") |
|||
} |
|||
var mbMask = 0x3f // non-first bytes start 10 and carry 6 bits of data |
|||
var b0 = b[0] |
|||
if (b0 < 0x80) { |
|||
return b0 |
|||
} else if (b0 < 0xe0) { |
|||
var b2Mask = 0x1f // first byte of a 2-byte encoding starts 110 and carries 5 bits of data |
|||
return (b0 & b2Mask) << 6 | (b[1] & mbMask) |
|||
} else if (b0 < 0xf0) { |
|||
var b3Mask = 0x0f // first byte of a 3-byte encoding starts 1110 and carries 4 bits of data |
|||
return (b0 & b3Mask) << 12 | (b[1] & mbMask) << 6 | (b[2] & mbMask) |
|||
} else { |
|||
var b4Mask = 0x07 // first byte of a 4-byte encoding starts 11110 and carries 3 bits of data |
|||
return (b0 & b4Mask) << 18 | (b[1] & mbMask) << 12 | (b[2] & mbMask) << 6 | (b[3] & mbMask) |
|||
} |
|||
} |
} |
||
} |
} |
||
// Type aliases for classes in case of any name clashes with other modules. |
// Type aliases for classes in case of any name clashes with other modules. |
||
var |
var Str_Char = Char |
||
var |
var Str_Str = Str |
||
var Str_Utf8 = Utf8</lang> |