Category talk:Wren-str: Difference between revisions

Line 27:

static isWhitespace(c) { (c = code(c)) && (c == 32 || (c >= 9 && c <= 13)) }

/* Rather than use combinations of the above, these only call the 'code' ~~nethod~~ once. */

/* Rather than use combinations of the above, these only call the 'code' method once. */

static isLetter(c) {

Line 229:

if (!(s is String)) s = "%(s)"

return (i >= 0 && i < s.count) ? s.toList[i] : null

}

// Returns the codepoint index (not byte index) at which 'search' first occurs in 's'

// or -1 if 'search' is not found.

static indexOf(s, search) {

if (!(search is String)) Fiber.abort("Search argument must be a string.")

if (!(s is String)) s = "%(s)"

var ix = s.indexOf(search)

if (ix == -1) return -1

if (ix == 0) return 0

var cpCount = 1

var byteCount = 0

for (cp in s.codePoints) {

byteCount = byteCount + Utf8.byteCount(cp)

if (ix == byteCount) return cpCount

cpCount = cpCount + 1

}

Line 234:

Line 251:

static change(s, i, t) {

if (!(i is Num && i.isInteger && i >= 0)) Fiber.abort("Index must be a non-negative integer.")

if (!(t is String)) Fiber.abort("~~Replacment~~ must be a string.")

if (!(t is String)) Fiber.abort("Replacement must be a string.")

if (!(s is String)) s = "%(s)"

var chars = s.toList

Line 294:

Line 311:

if (final > 0) res.add(sub(s, first..-1))

return res

}

/*

Utf8 contains routines which are specific to the UTF-8 encoding of a string's bytes or codepoints.

*/

class Utf8 {

// Returns the number of bytes in the UTF-8 encoding of its codepoint argument.

static byteCount(cp) {

if (cp < 0 || cp > 0x10ffff) Fiber.abort("Codepoint is out of range.")

if (cp < 0x80) return 1

if (cp < 0x800) return 2

if (cp < 0x10000) return 3

return 4

}

// Converts a Unicode codepoint into its constituent UTF-8 bytes.

static encode(cp) { String.fromCodePoint(cp).bytes.toList }

// Converts a list of UTF-8 encoded bytes into the equivalent Unicode codepoint.

static decode(b) {

if (!((b is List) && b.count >= 1 && b.count <= 4 && (b[0] is Num) && b[0].isInteger)) {

Fiber.abort("Argument must be a byte list of length 1 to 4.")

}

var mbMask = 0x3f // non-first bytes start 10 and carry 6 bits of data

var b0 = b[0]

if (b0 < 0x80) {

return b0

} else if (b0 < 0xe0) {

var b2Mask = 0x1f // first byte of a 2-byte encoding starts 110 and carries 5 bits of data

return (b0 & b2Mask) << 6 | (b[1] & mbMask)

} else if (b0 < 0xf0) {

var b3Mask = 0x0f // first byte of a 3-byte encoding starts 1110 and carries 4 bits of data

return (b0 & b3Mask) << 12 | (b[1] & mbMask) << 6 | (b[2] & mbMask)

} else {

var b4Mask = 0x07 // first byte of a 4-byte encoding starts 11110 and carries 3 bits of data

return (b0 & b4Mask) << 18 | (b[1] & mbMask) << 12 | (b[2] & mbMask) << 6 | (b[3] & mbMask)

}

// Type aliases for classes in case of any name clashes with other modules.

var ~~Fmt_Char~~ = Char

var Str_Char = Char

var ~~Fmt_Str~~ = Str~~</lang>~~

var Str_Str = Str

var Str_Utf8 = Utf8</lang>