Category talk:Wren-str: Difference between revisions

→‎Source code: Added Str.lastIndexOf method.
(→‎Source code: Added Greek class.)
(→‎Source code: Added Str.lastIndexOf method.)
 
(20 intermediate revisions by the same user not shown)
Line 1:
===Source code===
 
<langsyntaxhighlight ecmascriptlang="wren">/* Module "str.wren" */
 
/*
Line 63:
static isWhitespace(c) {
var d = code(c)
return d == 32 || (d >= 9 && cd <= 13) || d == 160
}
Line 144:
 
// Checks if a string falls into a particular category.
static allAscii(s) { s != "" && s.codePoints.all { |c| c < 128 } }
static allLatin1(s) { s != "" && s.codePoints.all { |c| c < 256 } }
static allDigits(s) { s != "" && s.codePoints.all { |c| c >= 48 && c <= 57 } }
static allAsciiLower(s) { s != "" && s.codePoints.all { |c| c >= 97 && c <= 122 } }
static allAsciiUpper(s) { s != "" && s.codePoints.all { |c| c >= 65 && c <= 90 } }
static allAsciiLetters(s) { s != "" && s.toList.all { |c| Char.isAsciiLetter(c) } }
static allAsciiAlphaNum(s) { s != "" && s.toList.all { |c| Char.isAsciiAlphaNum(c) } }
static allSpace(s) { s != "" && s.toList.all { |c| Char.isSpace(c) } }
static allLower(s) { s != {"" && s.toList.all { |c| Char.isLower(c) } }
static allUpper(s) { s != {"" && s.toList.all { |c| Char.isUpper(c) } }
static allLetters(s) { s != {"" && s.toList.all { |c| Char.isLetter(c) } }
static allAlphaNumeric(s) { s != {"" && s.toList.all { |c| Char.isAlphanumeric(c) } }
static allPrintable(s) { s != {"" && s.toList.all { |c| Char.isPrintable(c) } }
static allGraphic(s) { s != {"" && s.toList.all { |c| Char.isGraphic(c) } }
static allWhitespace(s) { s != {"" && s.toList.all { |c| Char.isWhitespace(c) } }
 
// Checks whether a string can be parsed to a number, an integer or a non-integer (float).
Line 236:
var words = s.split(" ")
return Strs.join(words.map { |w| capitalize(w) }.toList, " ")
}
 
// Removes accents and cedillas from all Latin-1 supplement characters in a string
// and also expands digraphs before returning the result.
static unaccent(s) {
if (!(s is String)) s = "%(s)"
if (s == "") return s
var accented = [
"àáâãäå", "ÀÁÂÃÄÅ", "ç", "Ç", "ð", "Ð", "èéêë", "ÈÉÊË", "ìíîï", "ÌÍÎÏ",
"ñ", "Ñ", "òóôõöø", "ÒÓÔÕÖØ", "ùúûü", "ÙÚÛÜ", "ýÿ", "Ý"
]
var unaccented = "aAcCdDeEiInNoOuUyY"
var digraphs = { "æ": "ae", "Æ": "AE", "þ": "th", "Þ": "TH", "ß": "ss" }
var r = ""
var chars = s.toList
var count = chars.count
var i = 0
for (c in s.codePoints) {
if (c >= 0xc0 && c <= 0xff) {
var found = false
for (j in 0...accented.count) {
if (accented[j].indexOf(chars[i]) >= 0) {
chars[i] = unaccented[j]
found = true
break
}
}
if (!found && digraphs.containsKey(chars[i])) chars[i] = digraphs[chars[i]]
}
i = i + 1
}
return (count < 1000) ? Strs.concat_(chars) : Strs.concat(chars, 1000)
}
 
Line 244 ⟶ 276:
}
 
// Performs a circular shift of the characters of 's' one'n' placeplaces to the left.
// If 'n' is negative performs a circular right shift by '-n' places instead.
static lshift(s) {
static lshift(s, n) {
if (!(s is String)) s = "%(s)"
if (!(n is Num) || !n.isInteger) Fiber.abort("'n' must be an integer.")
var chars = s.toList
var count = chars.count
if (count < 2) return s
varif t(n =< chars[0]) return rshift(s, -n)
forn (i= inn % 0..count-2) chars[i] = chars[i+1]
chars[-1]if (n == 0) return ts
for (i in 1..n) {
var t = chars[0]
for (j in 0..count-2) chars[j] = chars[j+1]
chars[-1] = t
}
return (count < 1000) ? Strs.concat_(chars) : Strs.concat(chars, 1000)
}
 
// Performs a circular shift of the characters of 's' one'n' placeplaces to the right.
// If 'n' is negative performs a circular left shift by '-n' places instead.
static rshift(s) {
static rshift(s, n) {
if (!(s is String)) s = "%(s)"
if (!(n is Num) || !n.isInteger) Fiber.abort("'n' must be an integer.")
var chars = s.toList
var count = chars.count
if (count < 2) return s
varif t(n =< 0) return lshift(s, chars[-1]n)
forn (i= inn % count-2..0) chars[i+1] = chars[i]
chars[0]if (n == 0) return ts
for (i in 1..n) {
var t = chars[-1]
for (j in count-2..0) chars[j+1] = chars[j]
chars[0] = t
}
return (count < 1000) ? Strs.concat_(chars) : Strs.concat(chars, 1000)
}
 
// Convenience versions of the above methods which shift by just 1 place.
static lshift(s) { lshift(s, 1) }
static rshift(s) { rshift(s, 1) }
 
/* The indices (or ranges thereof) for all the following functions are measured in codepoints
Line 325 ⟶ 375:
var ix = Str.indexOf(ss, search)
return (ix >= 0) ? start + ix : -1
}
 
// Returns the codepoint index (not byte index) at which 'search' last occurs in 's'
// or -1 if 'search' is not found.
static lastIndexOf(s, search) {
if (!(search is String)) Fiber.abort("Search argument must be a string.")
if (!(s is String)) s = "%(s)"
var l = s.toList
for (i in l.count-1..0) {
if (l[i] == search) return i
}
return -1
}
 
// Returns the number of non-overlapping occurrences of the string 't'
// within the string 's'.
static occurs(s, t) { s.split(t).count - 1 }
 
// Returns the number of non-overlapping occurrences of the string 't'
// within the string 's' starting from codepoint offset 'start'.
static occurs(s, t, start) {
if (start == 0) return occurs(s, t)
return occurs(Str.sub(s, start..-1), t)
}
 
Line 368 ⟶ 441:
if (i == j) return s
var chars = s.toList
var t = chars[.swap(i], j)
chars[i] = chars[j]
chars[j] = t
return Strs.concat(chars)
}
 
// Returns 's' with 'from' replaced by 'to' up to 'n' times (all times if n is negative)
// but skipping the first 'skip' matches.
static replace(s, from, to, n, skip) {
if (!(from is String)) Fiber.abort("'from 'must be a string.")
if (!(to is String)) Fiber.abort("'to' must be a string.")
if (!(n is Num && n.isInteger)) Fiber.abort("'n' must be an integer.")
if (!(skip is Num && skip.isInteger && skip >= 0)) {
Fiber.abort("'skip' must be a non-negative integer.")
}
if (!(s is String)) s = "%(s)"
if (n < 0) {
if (skip == 0) return s.replace(from, to)
n = Num.maxSafeInteger
}
if (n == 0 || skip >= n) return s
var count = 0
var split = s.split(from)
var res = ""
for (i in 0...split.count-1) {
count = count + 1
res = res + split[i] + ((count <= skip || count > n) ? from : to)
}
return res + split[-1]
}
 
// Convenience version of 'replace' where 'skip' is always zero.
static replace(s, from, to, n) { replace(s, from, to, n, 0) }
 
// Adds 'by' to the start of each line of 's'
// and returns the result.
static indent(s, by) {
if (!(s is String)) Fiber.abort("First argument must be a string.")
if (!(by is String)) Fiber.abort("Second argument must be a string.")
var lines = s.split("\n")
return lines.map { |line| by + line }.join("\n")
}
 
// Removes 'by' from the start of each line of 's' which begins with it
// and returns the result.
static dedent(s, by) {
if (!(s is String)) Fiber.abort("First argument must be a string.")
if (!(by is String)) Fiber.abort("Second argument must be a string.")
var lines = s.split("\n")
var c = by.bytes.count
return lines.map { |line|
if (line.startsWith(by)) return line[c..-1]
return line
}.join("\n")
}
 
// Removes all spaces and tabs from the end of each line of s
// and returns the result.
static tidy(s) {
if (!(s is String)) Fiber.abort("Argument must be a string.")
var lines = s.split("\n")
return lines.map { |line| line.trimEnd(" \t") }.join("\n")
}
 
// Returns 's' repeated 'reps' times.
Line 416 ⟶ 545:
return res
}
 
// Splits 's' into a list of one or more strings separated by 'sep' but removes
// any empty elements from the list.
static splitNoEmpty(s, sep) {
if (!(s is String)) s = "%(s)"
if (!(sep is String) || sep.isEmpty) Fiber.abort("Separator must be a non-empty string.")
var split = s.split(sep)
return split.where { |e| !e.isEmpty }.toList
}
 
// Splits a CSV 'line' into a list of one or more strings separated by 'sep' which must be
// a single character (except \v). Deals properly with embedded separators in quoted fields.
// Removes leading and trailing quotes from quoted fields if 'dequote' is true.
static splitCsv(line, sep, dequote) {
if (!(line is String)) line = "%(line)"
if (!(sep is String) || sep.count != 1) {
Fiber.abort("Separator must be a single character string.")
}
if (!(dequote is Bool)) Fiber.abort("Dequote must be a boolean.")
var fields = line.split(sep)
var count = 0
var quoted = false
var chars = line.toList
for (i in 0...fields.count) {
var f = fields[i]
var fc = f.count
if (fc > 0) {
count = count + fc
if (!quoted && f[0] == "\"") {
if (f[-1] != "\"") {
quoted = true
chars[count] = "\v"
}
} else if (quoted && f[-1] == "\"") {
quoted = false
} else if (quoted) {
chars[count] = "\v"
}
} else if (quoted) {
chars[count] = "\v"
}
count = count + 1
}
fields = chars.join("").split(sep)
for (i in 0...fields.count) fields[i] = fields[i].replace("\v", sep)
if (dequote) {
for (i in 0...fields.count) {
var f = fields[i]
var fc = f.count
if (fc < 2) continue
if (f[0] == "\"" && f[-1] == "\"") fields[i] = f[1...-1]
}
}
return fields
}
 
// Convenience versions of the above method which use default parameters.
static splitCsv(line, sep) { splitCsv(line, sep, true) }
static splitCsv(line) { splitCsv(line, ",", true) }
 
// Splits a string 's' into two parts, before and after the first occurrence
// of 'delim' and returns a list of those parts.
// The 'delim' itself can be optionally included in the second part.
// If 'delim' does not occur in 's', returns [s, ""].
static bisect(s, delim, include) {
if (!(delim is String)) Fiber.abort("Delimiter must be a string.")
if (!(include is Bool)) Fiber.abort("Include must be true or false.")
if (!(s is String)) s = "%(s)"
var ix = s.indexOf(delim)
if (ix == -1) return [s, ""]
if (include) return [s[0...ix], s[ix..-1]]
var len = delim.bytes.count
return [s[0...ix], s[ix + len..-1]]
}
 
// Convenience version of bisect method which never includes the delimiter.
static bisect(s, delim) { bisect(s, delim, false) }
 
// Creates and returns a string from a list of bytes.
Line 433 ⟶ 639:
var chars = ca.map { |c| String.fromCodePoint(c) }.toList
return (count < 1000) ? Strs.concat_(chars) : Strs.concat(chars, 1000)
}
 
// After trimming whitespace from the string 's', takes as many characters as possible
// to form a valid number and converts it thereto using the Num.fromString method.
// Returns null if such a conversion is impossible.
static toNum(s) {
if (s is Num) return s
if (!(s is String)) s = "%(s)"
s = s.trim()
var n = Num.fromString(s)
if (n) return n
if (s.count < 2) return null
var chars = s.toList
for (i in chars.count-1..1) {
chars.removeAt(i)
if (n = Num.fromString(chars.join())) return n
}
return null
}
 
// Converts a pattern into a list of tokens for processing by the 'isMatch' method.
// Characters within the pattern are represented as follows:
// Non-wildcard characters as themselves (i.e. single character strings);
// * (or **) by the number 0;
// ? (or *?) by the number 1;
// [set] by a list of the tokens within the set:
// single characters by themselves;
// a range of characters, a-b, by a Range of codepoints from 'a' to 'b'.
// If the first character of the set is '!' then the number -1 is inserted
// as a separate token immediately before the list.
static tokenize(pattern) {
var tokens = []
var i = 0
var j
while (i < pattern.count) {
var c = pattern[i]
if (c == "*") {
if (i == 0 || tokens[-1] != 0) tokens.add(0)
} else if (c == "?") {
if (i > 0 && tokens[-1] == 0) tokens[-1] = 1 else tokens.add(1)
} else if (c == "[") {
if (i == pattern.count - 1) {
tokens.add(c)
} else if ((j = indexOf(pattern, "]", i + 1)) == -1) {
tokens.add(c)
} else {
var l = []
var s = sub(pattern, i+1...j)
var k = 0
while (k < s.count) {
var d = s[k]
if (d == "!") {
if (k == 0) tokens.add(-1) else l.add(d)
} else if (k < s.count - 2 && s[k+1] == "-") {
l.add(d.codePoints[0]..s[k+2].codePoints[0])
k = k + 2
} else {
l.add(d)
}
k = k + 1
}
if (l.count == 0) Fiber.abort("set cannot be empty.")
tokens.add(l)
i = i + s.count + 1
}
} else {
tokens.add(c)
}
i = i + 1
}
return tokens
}
 
// Returns whether a string 's' matches a 'pattern' which may already be tokenized
// if many strings are to be matched. Matching is case sensitive.
// Patterns may contain the following wildcards:
// * (or **) matches zero or more characters until the next token (if any) matches
// and doesn't backtrack in the event of subsequent failure;
// ? (or *?) matches exactly one character;
// [set] matches a single character from the set within the brackets e.g. [aeiou].
// The set can also contain ranges of characters separated by '-' e.g. [a-zA-Z].
// If the first character of the set is '!' then only characters NOT within the rest
// of the set are matched e.g. [!0-9] matches any character other than a digit.
static isMatch(s, pattern) {
var tokens = pattern
if (tokens is String) tokens = tokenize(tokens)
if (!((tokens is List) && tokens.count > 0)) {
Fiber.abort("'pattern' must be a non-empty string or list of tokens.")
}
var i = 0
var j = 0
var star = false
var neg = false
while (i < s.count && j < tokens.count) {
var c = s[i]
var t = tokens[j]
if (t is Num) {
if (t == 0) {
star = true
} else if (t == 1) {
i = i + 1
star = false
} else if (t == -1) {
neg = true
} else {
Fiber.abort("'%(t)' is not a recognized token.")
}
j = j + 1
} else if (t is String) {
if (!star && c != t) return false
if (star && c == t) star = false
i = i + 1
if (!star) j = j + 1
} else if (t is List) {
var matched = false
for (e in t) {
if (e is String) {
if (e == c) {
matched = true
break
}
} else if (e is Range){
var cp = c.codePoints[0]
if (cp >= e.from && cp <= e.to) {
matched = true
break
}
} else {
Fiber.abort("'%(e)' is not a recognized token within a set.")
}
}
if (!star && !neg && !matched) return false
if (!star && neg && matched) return false
if (star && matched) star = false
i = i + 1
neg = false
if (!star) j = j + 1
} else {
Fiber.abort("'%(t)' is not a recognized token.")
}
}
if (i == s.count && j == tokens.count) return true
if (j == tokens.count && tokens[-1] == 0) return true
if (j == tokens.count - 1 && tokens[-1] == 0) return true
return false
}
}
Line 560 ⟶ 911:
return (b0 & b4Mask) << 18 | (b[1] & mbMask) << 12 | (b[2] & mbMask) << 6 | (b[3] & mbMask)
}
}
 
/* The next four methods extend the casing performed by the corresponding 'Str' methods to include
Latin Extended-A, parts of Latin Extended-B, Latin Extended Additional, Greek, Cyrillic,
Armenian and Georgian. */
 
// Converts a UTF-8 string to lower case.
static lower(s) {
if (!(s is String)) s = "%(s)"
if (s == "") return s
var chars = s.toList
var count = chars.count
var i = 0
for (c in s.codePoints) {
if ((c >= 65 && c <= 90) || (c >= 192 && c <= 214) || (c >= 216 && c <= 222)) {
chars[i] = String.fromCodePoint(c + 32)
} else if (c < 256) {
// catch other Latin-1 characters quickly.
} else if ((c >= 0x0100 && c <= 0x0136) && (c % 2 == 0)) {
chars[i] = String.fromCodePoint(c + 1)
} else if ((c >= 0x0139 && c <= 0x0147) && (c % 2 == 1)) {
chars[i] = String.fromCodePoint(c + 1)
} else if ((c >= 0x014A && c <= 0x0176) && (c % 2 == 0)) {
chars[i] = String.fromCodePoint(c + 1)
} else if (c == 0x0178) {
chars[i] = "ÿ"
} else if (c == 0x0179 || c == 0x017B || c == 0x017D ||
c == 0x01A0 || c == 0x01AF || c == 0x01F4) {
chars[i] = String.fromCodePoint(c + 1)
} else if (c == 0x01C4 || c == 0x01C7 || c == 0x01CA || c == 0x01F1) {
chars[i] = String.fromCodePoint(c + 2)
} else if (c == 0x01C5 || c == 0x01C8 || c == 0x01CB || c == 0x01F2) {
chars[i] = String.fromCodePoint(c + 1)
} else if ((c >= 0x01DE && c <= 0x01EE) && (c % 2 == 0)) {
chars[i] = String.fromCodePoint(c + 1)
} else if ((c >= 0x01F8 && c <= 0x021E) && (c % 2 == 0)) {
chars[i] = String.fromCodePoint(c + 1)
} else if ((c >= 0x1E00 && c <= 0x1E94) && (c % 2 == 0)) {
chars[i] = String.fromCodePoint(c + 1)
} else if (c == 0x1E9E) {
chars[i] = "ß"
} else if ((c >= 0x1EA0 && c <= 0x1EFE) && (c % 2 == 0)) {
chars[i] = String.fromCodePoint(c + 1)
} else if (c == 0x0386) {
chars[i] = "ά"
} else if (c == 0x0388 || c == 0x0389 || c == 0x038A) {
chars[i] = String.fromCodePoint(c + 37)
} else if (c == 0x038C) {
chars[i] = "ό"
} else if (c == 0x038E || c == 0x038F) {
chars[i] = String.fromCodePoint(c + 63)
} else if (c >= 0x0391 && c <= 0x03A1) {
chars[i] = String.fromCodePoint(c + 32)
} else if (c == 0x03A3) {
chars[i] = (i == count - 1) ? "ς" : "σ"
} else if (c >= 0x03A4 && c <= 0x03AB) {
chars[i] = String.fromCodePoint(c + 32)
} else if (c >= 0x0400 && c <= 0x041F) {
chars[i] = String.fromCodePoint(c + 80)
} else if (c >= 0x0410 && c <= 0x042F) {
chars[i] = String.fromCodePoint(c + 32)
} else if ((c >= 0x048A && c <= 0x04BE) && (c % 2 == 0)) {
chars[i] = String.fromCodePoint(c + 1)
} else if ((c >= 0x04C1 && c <= 0x04CD) && (c % 2 == 1)) {
chars[i] = String.fromCodePoint(c + 1)
} else if ((c >= 0x04D0 && c <= 0x052E) && (c % 2 == 0)) {
chars[i] = String.fromCodePoint(c + 1)
} else if (c >= 0x0531 && c <= 0x0556) {
chars[i] = String.fromCodePoint(c + 48)
} else if (c >= 0x10A0 && c <= 0x10C5) {
chars[i] = String.fromCodePoint(c + 48)
}
i = i + 1
}
return (count < 1000) ? Strs.concat_(chars) : Strs.concat(chars, 1000)
}
 
// Converts a UTF-8 string to upper case.
static upper(s) {
if (!(s is String)) s = "%(s)"
if (s == "") return s
var chars = s.toList
var count = chars.count
var i = 0
for (c in s.codePoints) {
if ((c >= 97 && c <= 122) || (c >= 224 && c <= 246) || (c >= 248 && c <= 254)) {
chars[i] = String.fromCodePoint(c - 32)
} else if (c == 223) {
chars[i] = "ẞ"
} else if (c == 255) {
chars[i] = "Ŷ"
} else if (c < 255) {
// catch other Latin-1 characters quickly.
} else if ((c >= 0x0101 && c <= 0x0137) && (c % 2 == 1)) {
chars[i] = String.fromCodePoint(c - 1)
} else if ((c >= 0x013A && c <= 0x0148) && (c % 2 == 0)) {
chars[i] = String.fromCodePoint(c - 1)
} else if ((c >= 0x014B && c <= 0x0177) && (c % 2 == 1)) {
chars[i] = String.fromCodePoint(c - 1)
} else if (c == 0x017A || c == 0x017C || c == 0x017E ||
c == 0x01A1 || c == 0x01B0 || c == 0x01F5) {
chars[i] = String.fromCodePoint(c - 1)
} else if (c == 0x01C5 || c == 0x01C8 || c == 0x01CB || c == 0x01F2) {
chars[i] = String.fromCodePoint(c - 1)
} else if (c == 0x01C6 || c == 0x01C9 || c == 0x01CC || c == 0x01F3) {
chars[i] = String.fromCodePoint(c - 2)
} else if ((c >= 0x01DF && c <= 0x01EF) && (c % 2 == 1)) {
chars[i] = String.fromCodePoint(c - 1)
} else if ((c >= 0x01F9 && c <= 0x021F) && (c % 2 == 1)) {
chars[i] = String.fromCodePoint(c - 1)
} else if ((c >= 0x1E01 && c <= 0x1E95) && (c % 2 == 1)) {
chars[i] = String.fromCodePoint(c - 1)
} else if (c == 0x1E9E) {
chars[i] = "ß"
} else if ((c >= 0x1EA1 && c <= 0x1EFF) && (c % 2 == 1)) {
chars[i] = String.fromCodePoint(c - 1)
} else if (c == 0x03AC) {
chars[i] = "Ά"
} else if (c == 0x03AD || c == 0x03AE || c == 0x03AF) {
chars[i] = String.fromCodePoint(c - 37)
} else if (c >= 0x03B1 && c <= 0x03C1) {
chars[i] = String.fromCodePoint(c - 32)
} else if (c == 0x03C2) {
chars[i] = "Σ"
} else if (c >= 0x03C3 && c <= 0x03CB) {
chars[i] = String.fromCodePoint(c - 32)
} else if (c == 0x03CC) {
chars[i] = "Ό"
} else if (c == 0x03CD || c == 0x03CE) {
chars[i] = String.fromCodePoint(c - 63)
} else if (c >= 0x0430 && c <= 0x044F) {
chars[i] = String.fromCodePoint(c - 32)
} else if (c >= 0x0450 && c <= 0x045F) {
chars[i] = String.fromCodePoint(c - 80)
} else if ((c >= 0x048B && c <= 0x04BF) && (c % 2 == 1)) {
chars[i] = String.fromCodePoint(c - 1)
} else if ((c >= 0x04C2 && c <= 0x04CE) && (c % 2 == 0)) {
chars[i] = String.fromCodePoint(c - 1)
} else if ((c >= 0x04D1 && c <= 0x052F) && (c % 2 == 1)) {
chars[i] = String.fromCodePoint(c - 1)
} else if (c >= 0x0561 && c <= 0x0586) {
chars[i] = String.fromCodePoint(c - 48)
} else if (c >= 0x10D0 && c <= 0x10F5) {
chars[i] = String.fromCodePoint(c - 48)
}
i = i + 1
}
return (count < 1000) ? Strs.concat_(chars) : Strs.concat(chars, 1000)
}
 
// Capitalizes the first character of a UTF-8 string.
// Uses title rather than upper case variant if it's one of 4 supported digraphs.
static capitalize(s) {
if (!(s is String)) s = "%(s)"
if (s == "") return s
var start = (s.startsWith("[") && s.count > 1) ? 1 : 0
var cs = upper(s[start])
var c = cs.codePoints[0]
if (c == 0x01C4 || c == 0x01C7 || c == 0x01CA || c == 0x01F1) {
cs = String.fromCodePoint(c + 1)
}
if (s.count > start + 1) cs = cs + s[start+1..-1]
if (start == 1) cs = "[" + cs
return cs
}
 
// Capitalizes the first character of each word of a UTF-8 string.
// Uses title rather than upper case variant if it's one of 4 supported digraphs.
static title(s) {
if (!(s is String)) s = "%(s)"
if (s == "") return s
var words = s.split(" ")
return Strs.join(words.map { |w| capitalize(w) }.toList, " ")
}
 
// Removes accents and other diacritical marks from all characters in a string,
// expands digraphs and removes all combining characters before returning the result.
// As well as Latin-1 Supplement, coverage includes Latin Extended-A and various
// other characters found in modern European languages which use the Latin alphabet.
static unaccent(s) {
if (!(s is String)) s = "%(s)"
if (s == "") return s
var accented = [
"àáâãäåāăą", "ÀÁÂÃÄÅĀĂĄ", "ḃ", "Ḃ", "çćĉċč", "ÇĆĈĊČ", "ðďđḋ", "ÐĎĐḊ",
"èéêëēĕėęě", "ÈÉÊËĒĔĖĘĚ", "ḟ", "Ḟ", "ĝğġģ", "ĜĞĠĢ", "ĥħ", "ĤĦ",
"ìíîïĩīĭįı", "ÌÍÎÏĨĪĬĮİ", "Ĵ", "Ĵ", "ķĸ", "Ķ", "ĺļľŀł", "ĹĻĽĿŁ",
"ṁ", "Ṁ", "ñńņňʼn", "ÑŃŅŇ", "òóôõöøōŏő", "ÒÓÔÕÖØŌŎŐ", "ṗ", "Ṗ",
"ŕŗř", "ŔŖŘ", "śŝşšșſ", "ŚŜŞŠȘ", "ţťŧṱț", "ŢŤŦṰȚ", "ùúûüũūŭůűų",
"ÙÚÛÜŨŪŬŮŰŲ", "ŵẁẃẅ", "ŴẀẂẄ", "ýÿỳŷ", "ÝŸỲŶ", "źżž", "ŹŻŽ"
]
var unaccented = "aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPrRsStTuUwWyYzZ"
var digraphs = {
"æ": "ae", "Æ": "AE", "þ": "th", "Þ": "TH", "ß": "ss", "ẞ": "SS",
"ij": "ij", "IJ": "IJ", "ŋ": "ng", "Ŋ": "NG", "œ": "OE", "Œ": "OE"
}
var r = ""
var chars = s.toList
var count = chars.count
var i = 0
for (c in s.codePoints) {
if ((c >= 0x00c0 && c <= 0x012B) || c >= 0x1e02 && c <= 0x1e9e) {
var found = false
for (j in 0...accented.count) {
if (accented[j].indexOf(chars[i]) >= 0) {
chars[i] = unaccented[j]
found = true
break
}
}
if (!found && digraphs.containsKey(chars[i])) chars[i] = digraphs[chars[i]]
} else if (c >= 0x0300 && c <= 0x036F) chars[i] = ""
i = i + 1
}
return (count < 1000) ? Strs.concat_(chars) : Strs.concat(chars, 1000)
}
 
// Converts a Windows-1252 encoded byte string to a UTF-8 encoded string.
static fromWin1252(win1252) {
if (!(win1252 is String)) System.print("Argument must be a byte string.")
if (win1252.count == 0) return ""
// mapping for Windows 1252 bytes 128-159.
// Unused bytes are mapped to the corresponding ISO-8859-1 control codes.
var bm = [
0x20ac, 0x0081, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021,
0x02c6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008d, 0x017d, 0x008f,
0x0090, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
0x02dc, 0x2122, 0x0161, 0x203a, 0x0153, 0x009d, 0x017e, 0x0178
]
var bytes = win1252.bytes
var utf8 = List.filled(bytes.count, 0)
for (i in 0...bytes.count) {
var b = bytes[i]
if (b < 128 || b > 159) {
utf8[i] = String.fromCodePoint(b)
} else {
utf8[i] = String.fromCodePoint(bm[b-128])
}
}
return utf8.join()
}
}
Line 568 ⟶ 1,158:
*/
class Greek {
// Returns the Greek alphabet, lower then upper case characters.
static alphabet { "αβγδεζηθικλμνξοπρςστυφχψωΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡ\u03a2ΣΤΥΦΧΨΩ" }
 
// Returns a list of the names of all Greek letters in alphabetical order.
static names {
return [
"alpha", "beta", "gamma", "delta", "epsilon", "zeta", "eta", "theta",
"0iotaiota", "kappa", "lambda", "mu", "nu", "xi", "omicron", "pi",
"rho", "sigma final", "sigma", "tau", "upsilon", "phi", "chi", "psi", "omega"
]
}
 
// Returns the name of a Greek character or null if not found.
// Upper case characters are returned with the initial letter capitalized.
static name(char) {
if (char.count != 1) return null
var ix = alphabet.toList.indexOf(char)
if (ix == -1) return null
if (ix < 25) return names[ix]
return Str.capitalize(names[ix-25])
}
 
// Finds and returns a Greek lower case character from its name.
static lower(name) {
name = Str.lower(name)
var ix = names.indexOf(name)
if (ix == -1) Fiber.abort("Name not found.")
Line 586 ⟶ 1,190:
// Finds and returns a Greek upper case character from its name.
static upper(name) {
name = Str.lower(name)
var ix = names.indexOf(name)
if (ix == -1) Fiber.abort("Name not found.")
Line 591 ⟶ 1,196:
return String.fromCodePoint(0x0391 + ix)
}
}</langsyntaxhighlight>
9,476

edits