Category talk:Wren-str: Difference between revisions

→‎Source code: Added Str.lastIndexOf method.
(→‎Source code: Adjustments mostly to extend case change methods to Latin-1 (ASCII only previously).)
(→‎Source code: Added Str.lastIndexOf method.)
 
(25 intermediate revisions by the same user not shown)
Line 1:
===Source code===
 
<langsyntaxhighlight ecmascriptlang="wren">/* Module "str.wren" */
 
/*
Line 63:
static isWhitespace(c) {
var d = code(c)
return d == 32 || (d >= 9 && cd <= 13) || d == 160
}
Line 133:
static compare(s1, s2) {
if (s1 == s2) return 0
var cp1 = s1.codePoints.toList
var cp2 = s2.codePoints.toList
var len = (cp1.count <= cp2.count) ? cp1.count : cp2.count
for (i in 0...len) {
Line 144:
 
// Checks if a string falls into a particular category.
static allAscii(s) { s != "" && s.codePoints.all { |c| c < 128 } }
static allLatin1(s) { s != "" && s.codePoints.all { |c| c < 256 } }
static allDigits(s) { s != "" && s.codePoints.all { |c| c >= 48 && c <= 57 } }
static allAsciiLower(s) { s != "" && s.codePoints.all { |c| c >= 97 && c <= 122 } }
static allAsciiUpper(s) { s != "" && s.codePoints.all { |c| c >= 65 && c <= 90 } }
static allAsciiLetters(s) { s != "" && s.toList.all { |c| Char.isAsciiLetter(c) } }
static allAsciiAlphaNum(s) { s != "" && s.toList.all { |c| Char.isAsciiAlphaNum(c) } }
static allSpace(s) { s != "" && s.toList.all { |c| Char.isSpace(c) } }
static allLower(s) { s != {"" && s.toList.all { |c| Char.isLower(c) } }
static allUpper(s) { s != {"" && s.toList.all { |c| Char.isUpper(c) } }
static allLetters(s) { s != {"" && s.toList.all { |c| Char.isLetter(c) } }
static allAlphaNumeric(s) { s != {"" && s.toList.all { |c| Char.isAlphanumeric(c) } }
static allPrintable(s) { s != {"" && s.toList.all { |c| Char.isPrintable(c) } }
static allGraphic(s) { s != {"" && s.toList.all { |c| Char.isGraphic(c) } }
static allWhitespace(s) { s != {"" && s.toList.all { |c| Char.isWhitespace(c) } }
 
// Checks whether a string can be parsed to a number, an integer or a non-integer (float).
Line 236:
var words = s.split(" ")
return Strs.join(words.map { |w| capitalize(w) }.toList, " ")
}
 
// Removes accents and cedillas from all Latin-1 supplement characters in a string
// and also expands digraphs before returning the result.
static unaccent(s) {
if (!(s is String)) s = "%(s)"
if (s == "") return s
var accented = [
"àáâãäå", "ÀÁÂÃÄÅ", "ç", "Ç", "ð", "Ð", "èéêë", "ÈÉÊË", "ìíîï", "ÌÍÎÏ",
"ñ", "Ñ", "òóôõöø", "ÒÓÔÕÖØ", "ùúûü", "ÙÚÛÜ", "ýÿ", "Ý"
]
var unaccented = "aAcCdDeEiInNoOuUyY"
var digraphs = { "æ": "ae", "Æ": "AE", "þ": "th", "Þ": "TH", "ß": "ss" }
var r = ""
var chars = s.toList
var count = chars.count
var i = 0
for (c in s.codePoints) {
if (c >= 0xc0 && c <= 0xff) {
var found = false
for (j in 0...accented.count) {
if (accented[j].indexOf(chars[i]) >= 0) {
chars[i] = unaccented[j]
found = true
break
}
}
if (!found && digraphs.containsKey(chars[i])) chars[i] = digraphs[chars[i]]
}
i = i + 1
}
return (count < 1000) ? Strs.concat_(chars) : Strs.concat(chars, 1000)
}
 
Line 244 ⟶ 276:
}
 
// Performs a circular shift of the characters of 's' one'n' placeplaces to the left.
// If 'n' is negative performs a circular right shift by '-n' places instead.
static lshift(s) {
static lshift(s, n) {
if (!(s is String)) s = "%(s)"
if (!(n is Num) || !n.isInteger) Fiber.abort("'n' must be an integer.")
var chars = s.toList
var count = chars.count
if (count < 2) return s
varif t(n =< chars[0]) return rshift(s, -n)
forn (i= inn % 0..count-2) chars[i] = chars[i+1]
chars[-1]if (n == 0) return ts
for (i in 1..n) {
var t = chars[0]
for (j in 0..count-2) chars[j] = chars[j+1]
chars[-1] = t
}
return (count < 1000) ? Strs.concat_(chars) : Strs.concat(chars, 1000)
}
 
// Performs a circular shift of the characters of 's' one'n' placeplaces to the right.
// If 'n' is negative performs a circular left shift by '-n' places instead.
static rshift(s) {
static rshift(s, n) {
if (!(s is String)) s = "%(s)"
if (!(n is Num) || !n.isInteger) Fiber.abort("'n' must be an integer.")
var chars = s.toList
var count = chars.count
if (count < 2) return s
varif t(n =< 0) return lshift(s, chars[-1]n)
forn (i= inn % count-2..0) chars[i+1] = chars[i]
chars[0]if (n == 0) return ts
for (i in 1..n) {
var t = chars[-1]
for (j in count-2..0) chars[j+1] = chars[j]
chars[0] = t
}
return (count < 1000) ? Strs.concat_(chars) : Strs.concat(chars, 1000)
}
 
// Convenience versions of the above methods which shift by just 1 place.
/* The indices (or ranges thereof) for all the following functions are measured in codepoints (not bytes). Negative indices count backwards from the end of the string.
static lshift(s) { lshift(s, 1) }
As with core library methods, the indices must be within bounds or errors will be generated. */
static rshift(s) { rshift(s, 1) }
 
/* The indices (or ranges thereof) for all the following functions are measured in codepoints
(not bytes). Negative indices count backwards from the end of the string. As with core
library methods, the indices must be within bounds or errors will be generated. */
 
// Extracts the sub-string of 's' over the range 'r'.
Line 316 ⟶ 367:
cpCount = cpCount + 1
}
}
 
// Returns the codepoint index (not byte index) at which 'search' first occurs in 's'
// or -1 if 'search' is not found, starting from codepoint offset 'start'.
static indexOf(s, search, start) {
var ss = (start > 0) ? Str.sub(s, start..-1) : s
var ix = Str.indexOf(ss, search)
return (ix >= 0) ? start + ix : -1
}
 
// Returns the codepoint index (not byte index) at which 'search' last occurs in 's'
// or -1 if 'search' is not found.
static lastIndexOf(s, search) {
if (!(search is String)) Fiber.abort("Search argument must be a string.")
if (!(s is String)) s = "%(s)"
var l = s.toList
for (i in l.count-1..0) {
if (l[i] == search) return i
}
return -1
}
 
// Returns the number of non-overlapping occurrences of the string 't'
// within the string 's'.
static occurs(s, t) { s.split(t).count - 1 }
 
// Returns the number of non-overlapping occurrences of the string 't'
// within the string 's' starting from codepoint offset 'start'.
static occurs(s, t, start) {
if (start == 0) return occurs(s, t)
return occurs(Str.sub(s, start..-1), t)
}
 
Line 359 ⟶ 441:
if (i == j) return s
var chars = s.toList
var t = chars[.swap(i], j)
chars[i] = chars[j]
chars[j] = t
return Strs.concat(chars)
}
 
// Returns 's' with 'from' replaced by 'to' up to 'n' times (all times if n is negative)
// Private helper method for 'repeat'.
// but skipping the first 'skip' matches.
static repeat_(s, reps) {
static replace(s, from, to, varn, rsskip) = ""{
forif (i!(from inis 0...repsString)) rsFiber.abort("'from ='must rsbe +a sstring.")
if (!(to is String)) Fiber.abort("'to' must be a string.")
return rs
if (!(n is Num && n.isInteger)) Fiber.abort("'n' must be an integer.")
if (!(skip is Num && skip.isInteger && skip >= 0)) {
Fiber.abort("'skip' must be a non-negative integer.")
}
if (!(s is String)) s = "%(s)"
if (n < 0) {
if (skip == 0) return s.replace(from, to)
n = Num.maxSafeInteger
}
if (n == 0 || skip >= n) return s
var count = 0
var split = s.split(from)
var res = ""
for (i in 0...split.count-1) {
count = count + 1
res = res + split[i] + ((count <= skip || count > n) ? from : to)
}
return res + split[-1]
}
 
// Convenience version of 'replace' where 'skip' is always zero.
static replace(s, from, to, n) { replace(s, from, to, n, 0) }
 
// Adds 'by' to the start of each line of 's'
// and returns the result.
static indent(s, by) {
if (!(s is String)) Fiber.abort("First argument must be a string.")
if (!(by is String)) Fiber.abort("Second argument must be a string.")
var lines = s.split("\n")
return lines.map { |line| by + line }.join("\n")
}
 
// Removes 'by' from the start of each line of 's' which begins with it
// and returns the result.
static dedent(s, by) {
if (!(s is String)) Fiber.abort("First argument must be a string.")
if (!(by is String)) Fiber.abort("Second argument must be a string.")
var lines = s.split("\n")
var c = by.bytes.count
return lines.map { |line|
if (line.startsWith(by)) return line[c..-1]
return line
}.join("\n")
}
 
// Removes all spaces and tabs from the end of each line of s
// and returns the result.
static tidy(s) {
if (!(s is String)) Fiber.abort("Argument must be a string.")
var lines = s.split("\n")
return lines.map { |line| line.trimEnd(" \t") }.join("\n")
}
 
// Returns 's' repeated 'reps' times.
static repeat(s, reps) {
// If 'chunkSize' is chosen appropriately, this should be much faster than String's * operator
// for a large number of repetitions.
static repeat(s, reps, chunkSize) {
if (!(s is String)) s = "%(s)"
if (!(reps is Num && reps.isInteger && reps >= 0)) {
Fiber.abort("Repetitions must be a positivenon-negative integer.")
}
var rs = ""
if (!(chunkSize is Num && chunkSize.isInteger && chunkSize > 0)) {
if (reps < 10) {
Fiber.abort("Chunk size must be a positive integer.")
for (i in 0...reps) rs = rs + s
}
if (reps == 0) return ""
var chunks = (reps/chunkSize).floor
if (chunks == 0) return repeat_(s, reps)
var lastSize = reps % chunkSize
if (lastSize == 0) {
lastSize = chunkSize
} else {
chunkswhile =(true) chunks + 1{
if (reps % 2 == 1) rs = rs + s
}
var rs reps = ""reps >> 1
var chunk = repeat_ if (s,reps chunkSize== 0) break
var lastChunk s = repeat_(s, lastSize)+ s
for (i in 0...chunks) {}
rs = rs + ((i < chunks - 1) ? chunk : lastChunk)
}
return rs
}
 
// Convenience version of the above which uses a 'chunkSize' of 8000. This usually gives a good result.
static repeat(s, reps) { repeat(s, reps, 8000) }
 
// Splits a string 's' into chunks of not more than 'size' characters.
Line 425 ⟶ 544:
if (final > 0) res.add(sub(s, first..-1))
return res
}
 
// Splits 's' into a list of one or more strings separated by 'sep' but removes
// any empty elements from the list.
static splitNoEmpty(s, sep) {
if (!(s is String)) s = "%(s)"
if (!(sep is String) || sep.isEmpty) Fiber.abort("Separator must be a non-empty string.")
var split = s.split(sep)
return split.where { |e| !e.isEmpty }.toList
}
 
// Splits a CSV 'line' into a list of one or more strings separated by 'sep' which must be
// a single character (except \v). Deals properly with embedded separators in quoted fields.
// Removes leading and trailing quotes from quoted fields if 'dequote' is true.
static splitCsv(line, sep, dequote) {
if (!(line is String)) line = "%(line)"
if (!(sep is String) || sep.count != 1) {
Fiber.abort("Separator must be a single character string.")
}
if (!(dequote is Bool)) Fiber.abort("Dequote must be a boolean.")
var fields = line.split(sep)
var count = 0
var quoted = false
var chars = line.toList
for (i in 0...fields.count) {
var f = fields[i]
var fc = f.count
if (fc > 0) {
count = count + fc
if (!quoted && f[0] == "\"") {
if (f[-1] != "\"") {
quoted = true
chars[count] = "\v"
}
} else if (quoted && f[-1] == "\"") {
quoted = false
} else if (quoted) {
chars[count] = "\v"
}
} else if (quoted) {
chars[count] = "\v"
}
count = count + 1
}
fields = chars.join("").split(sep)
for (i in 0...fields.count) fields[i] = fields[i].replace("\v", sep)
if (dequote) {
for (i in 0...fields.count) {
var f = fields[i]
var fc = f.count
if (fc < 2) continue
if (f[0] == "\"" && f[-1] == "\"") fields[i] = f[1...-1]
}
}
return fields
}
 
// Convenience versions of the above method which use default parameters.
static splitCsv(line, sep) { splitCsv(line, sep, true) }
static splitCsv(line) { splitCsv(line, ",", true) }
 
// Splits a string 's' into two parts, before and after the first occurrence
// of 'delim' and returns a list of those parts.
// The 'delim' itself can be optionally included in the second part.
// If 'delim' does not occur in 's', returns [s, ""].
static bisect(s, delim, include) {
if (!(delim is String)) Fiber.abort("Delimiter must be a string.")
if (!(include is Bool)) Fiber.abort("Include must be true or false.")
if (!(s is String)) s = "%(s)"
var ix = s.indexOf(delim)
if (ix == -1) return [s, ""]
if (include) return [s[0...ix], s[ix..-1]]
var len = delim.bytes.count
return [s[0...ix], s[ix + len..-1]]
}
 
// Convenience version of bisect method which never includes the delimiter.
static bisect(s, delim) { bisect(s, delim, false) }
 
// Creates and returns a string from a list of bytes.
static fromBytes(ba) {
if (!(ba is List)) Fiber.abort("Argument must be list of bytes.")
var count = ba.count
if (count == 0) return ""
var chars = ba.map { |b| String.fromByte(b) }.toList
return (count < 1000) ? Strs.concat_(chars) : Strs.concat(chars, 1000)
}
 
// Creates and returns a string from a list of code points.
static fromCodePoints(ca) {
if (!(ca is List)) Fiber.abort("Argument must be list of code points.")
var count = ca.count
if (count == 0) return ""
var chars = ca.map { |c| String.fromCodePoint(c) }.toList
return (count < 1000) ? Strs.concat_(chars) : Strs.concat(chars, 1000)
}
 
// After trimming whitespace from the string 's', takes as many characters as possible
// to form a valid number and converts it thereto using the Num.fromString method.
// Returns null if such a conversion is impossible.
static toNum(s) {
if (s is Num) return s
if (!(s is String)) s = "%(s)"
s = s.trim()
var n = Num.fromString(s)
if (n) return n
if (s.count < 2) return null
var chars = s.toList
for (i in chars.count-1..1) {
chars.removeAt(i)
if (n = Num.fromString(chars.join())) return n
}
return null
}
 
// Converts a pattern into a list of tokens for processing by the 'isMatch' method.
// Characters within the pattern are represented as follows:
// Non-wildcard characters as themselves (i.e. single character strings);
// * (or **) by the number 0;
// ? (or *?) by the number 1;
// [set] by a list of the tokens within the set:
// single characters by themselves;
// a range of characters, a-b, by a Range of codepoints from 'a' to 'b'.
// If the first character of the set is '!' then the number -1 is inserted
// as a separate token immediately before the list.
static tokenize(pattern) {
var tokens = []
var i = 0
var j
while (i < pattern.count) {
var c = pattern[i]
if (c == "*") {
if (i == 0 || tokens[-1] != 0) tokens.add(0)
} else if (c == "?") {
if (i > 0 && tokens[-1] == 0) tokens[-1] = 1 else tokens.add(1)
} else if (c == "[") {
if (i == pattern.count - 1) {
tokens.add(c)
} else if ((j = indexOf(pattern, "]", i + 1)) == -1) {
tokens.add(c)
} else {
var l = []
var s = sub(pattern, i+1...j)
var k = 0
while (k < s.count) {
var d = s[k]
if (d == "!") {
if (k == 0) tokens.add(-1) else l.add(d)
} else if (k < s.count - 2 && s[k+1] == "-") {
l.add(d.codePoints[0]..s[k+2].codePoints[0])
k = k + 2
} else {
l.add(d)
}
k = k + 1
}
if (l.count == 0) Fiber.abort("set cannot be empty.")
tokens.add(l)
i = i + s.count + 1
}
} else {
tokens.add(c)
}
i = i + 1
}
return tokens
}
 
// Returns whether a string 's' matches a 'pattern' which may already be tokenized
// if many strings are to be matched. Matching is case sensitive.
// Patterns may contain the following wildcards:
// * (or **) matches zero or more characters until the next token (if any) matches
// and doesn't backtrack in the event of subsequent failure;
// ? (or *?) matches exactly one character;
// [set] matches a single character from the set within the brackets e.g. [aeiou].
// The set can also contain ranges of characters separated by '-' e.g. [a-zA-Z].
// If the first character of the set is '!' then only characters NOT within the rest
// of the set are matched e.g. [!0-9] matches any character other than a digit.
static isMatch(s, pattern) {
var tokens = pattern
if (tokens is String) tokens = tokenize(tokens)
if (!((tokens is List) && tokens.count > 0)) {
Fiber.abort("'pattern' must be a non-empty string or list of tokens.")
}
var i = 0
var j = 0
var star = false
var neg = false
while (i < s.count && j < tokens.count) {
var c = s[i]
var t = tokens[j]
if (t is Num) {
if (t == 0) {
star = true
} else if (t == 1) {
i = i + 1
star = false
} else if (t == -1) {
neg = true
} else {
Fiber.abort("'%(t)' is not a recognized token.")
}
j = j + 1
} else if (t is String) {
if (!star && c != t) return false
if (star && c == t) star = false
i = i + 1
if (!star) j = j + 1
} else if (t is List) {
var matched = false
for (e in t) {
if (e is String) {
if (e == c) {
matched = true
break
}
} else if (e is Range){
var cp = c.codePoints[0]
if (cp >= e.from && cp <= e.to) {
matched = true
break
}
} else {
Fiber.abort("'%(e)' is not a recognized token within a set.")
}
}
if (!star && !neg && !matched) return false
if (!star && neg && matched) return false
if (star && matched) star = false
i = i + 1
neg = false
if (!star) j = j + 1
} else {
Fiber.abort("'%(t)' is not a recognized token.")
}
}
if (i == s.count && j == tokens.count) return true
if (j == tokens.count && tokens[-1] == 0) return true
if (j == tokens.count - 1 && tokens[-1] == 0) return true
return false
}
}
Line 552 ⟶ 911:
return (b0 & b4Mask) << 18 | (b[1] & mbMask) << 12 | (b[2] & mbMask) << 6 | (b[3] & mbMask)
}
}
 
/* The next four methods extend the casing performed by the corresponding 'Str' methods to include
Latin Extended-A, parts of Latin Extended-B, Latin Extended Additional, Greek, Cyrillic,
Armenian and Georgian. */
 
// Converts a UTF-8 string to lower case.
static lower(s) {
if (!(s is String)) s = "%(s)"
if (s == "") return s
var chars = s.toList
var count = chars.count
var i = 0
for (c in s.codePoints) {
if ((c >= 65 && c <= 90) || (c >= 192 && c <= 214) || (c >= 216 && c <= 222)) {
chars[i] = String.fromCodePoint(c + 32)
} else if (c < 256) {
// catch other Latin-1 characters quickly.
} else if ((c >= 0x0100 && c <= 0x0136) && (c % 2 == 0)) {
chars[i] = String.fromCodePoint(c + 1)
} else if ((c >= 0x0139 && c <= 0x0147) && (c % 2 == 1)) {
chars[i] = String.fromCodePoint(c + 1)
} else if ((c >= 0x014A && c <= 0x0176) && (c % 2 == 0)) {
chars[i] = String.fromCodePoint(c + 1)
} else if (c == 0x0178) {
chars[i] = "ÿ"
} else if (c == 0x0179 || c == 0x017B || c == 0x017D ||
c == 0x01A0 || c == 0x01AF || c == 0x01F4) {
chars[i] = String.fromCodePoint(c + 1)
} else if (c == 0x01C4 || c == 0x01C7 || c == 0x01CA || c == 0x01F1) {
chars[i] = String.fromCodePoint(c + 2)
} else if (c == 0x01C5 || c == 0x01C8 || c == 0x01CB || c == 0x01F2) {
chars[i] = String.fromCodePoint(c + 1)
} else if ((c >= 0x01DE && c <= 0x01EE) && (c % 2 == 0)) {
chars[i] = String.fromCodePoint(c + 1)
} else if ((c >= 0x01F8 && c <= 0x021E) && (c % 2 == 0)) {
chars[i] = String.fromCodePoint(c + 1)
} else if ((c >= 0x1E00 && c <= 0x1E94) && (c % 2 == 0)) {
chars[i] = String.fromCodePoint(c + 1)
} else if (c == 0x1E9E) {
chars[i] = "ß"
} else if ((c >= 0x1EA0 && c <= 0x1EFE) && (c % 2 == 0)) {
chars[i] = String.fromCodePoint(c + 1)
} else if (c == 0x0386) {
chars[i] = "ά"
} else if (c == 0x0388 || c == 0x0389 || c == 0x038A) {
chars[i] = String.fromCodePoint(c + 37)
} else if (c == 0x038C) {
chars[i] = "ό"
} else if (c == 0x038E || c == 0x038F) {
chars[i] = String.fromCodePoint(c + 63)
} else if (c >= 0x0391 && c <= 0x03A1) {
chars[i] = String.fromCodePoint(c + 32)
} else if (c == 0x03A3) {
chars[i] = (i == count - 1) ? "ς" : "σ"
} else if (c >= 0x03A4 && c <= 0x03AB) {
chars[i] = String.fromCodePoint(c + 32)
} else if (c >= 0x0400 && c <= 0x041F) {
chars[i] = String.fromCodePoint(c + 80)
} else if (c >= 0x0410 && c <= 0x042F) {
chars[i] = String.fromCodePoint(c + 32)
} else if ((c >= 0x048A && c <= 0x04BE) && (c % 2 == 0)) {
chars[i] = String.fromCodePoint(c + 1)
} else if ((c >= 0x04C1 && c <= 0x04CD) && (c % 2 == 1)) {
chars[i] = String.fromCodePoint(c + 1)
} else if ((c >= 0x04D0 && c <= 0x052E) && (c % 2 == 0)) {
chars[i] = String.fromCodePoint(c + 1)
} else if (c >= 0x0531 && c <= 0x0556) {
chars[i] = String.fromCodePoint(c + 48)
} else if (c >= 0x10A0 && c <= 0x10C5) {
chars[i] = String.fromCodePoint(c + 48)
}
i = i + 1
}
return (count < 1000) ? Strs.concat_(chars) : Strs.concat(chars, 1000)
}
 
// Converts a UTF-8 string to upper case.
static upper(s) {
if (!(s is String)) s = "%(s)"
if (s == "") return s
var chars = s.toList
var count = chars.count
var i = 0
for (c in s.codePoints) {
if ((c >= 97 && c <= 122) || (c >= 224 && c <= 246) || (c >= 248 && c <= 254)) {
chars[i] = String.fromCodePoint(c - 32)
} else if (c == 223) {
chars[i] = "ẞ"
} else if (c == 255) {
chars[i] = "Ŷ"
} else if (c < 255) {
// catch other Latin-1 characters quickly.
} else if ((c >= 0x0101 && c <= 0x0137) && (c % 2 == 1)) {
chars[i] = String.fromCodePoint(c - 1)
} else if ((c >= 0x013A && c <= 0x0148) && (c % 2 == 0)) {
chars[i] = String.fromCodePoint(c - 1)
} else if ((c >= 0x014B && c <= 0x0177) && (c % 2 == 1)) {
chars[i] = String.fromCodePoint(c - 1)
} else if (c == 0x017A || c == 0x017C || c == 0x017E ||
c == 0x01A1 || c == 0x01B0 || c == 0x01F5) {
chars[i] = String.fromCodePoint(c - 1)
} else if (c == 0x01C5 || c == 0x01C8 || c == 0x01CB || c == 0x01F2) {
chars[i] = String.fromCodePoint(c - 1)
} else if (c == 0x01C6 || c == 0x01C9 || c == 0x01CC || c == 0x01F3) {
chars[i] = String.fromCodePoint(c - 2)
} else if ((c >= 0x01DF && c <= 0x01EF) && (c % 2 == 1)) {
chars[i] = String.fromCodePoint(c - 1)
} else if ((c >= 0x01F9 && c <= 0x021F) && (c % 2 == 1)) {
chars[i] = String.fromCodePoint(c - 1)
} else if ((c >= 0x1E01 && c <= 0x1E95) && (c % 2 == 1)) {
chars[i] = String.fromCodePoint(c - 1)
} else if (c == 0x1E9E) {
chars[i] = "ß"
} else if ((c >= 0x1EA1 && c <= 0x1EFF) && (c % 2 == 1)) {
chars[i] = String.fromCodePoint(c - 1)
} else if (c == 0x03AC) {
chars[i] = "Ά"
} else if (c == 0x03AD || c == 0x03AE || c == 0x03AF) {
chars[i] = String.fromCodePoint(c - 37)
} else if (c >= 0x03B1 && c <= 0x03C1) {
chars[i] = String.fromCodePoint(c - 32)
} else if (c == 0x03C2) {
chars[i] = "Σ"
} else if (c >= 0x03C3 && c <= 0x03CB) {
chars[i] = String.fromCodePoint(c - 32)
} else if (c == 0x03CC) {
chars[i] = "Ό"
} else if (c == 0x03CD || c == 0x03CE) {
chars[i] = String.fromCodePoint(c - 63)
} else if (c >= 0x0430 && c <= 0x044F) {
chars[i] = String.fromCodePoint(c - 32)
} else if (c >= 0x0450 && c <= 0x045F) {
chars[i] = String.fromCodePoint(c - 80)
} else if ((c >= 0x048B && c <= 0x04BF) && (c % 2 == 1)) {
chars[i] = String.fromCodePoint(c - 1)
} else if ((c >= 0x04C2 && c <= 0x04CE) && (c % 2 == 0)) {
chars[i] = String.fromCodePoint(c - 1)
} else if ((c >= 0x04D1 && c <= 0x052F) && (c % 2 == 1)) {
chars[i] = String.fromCodePoint(c - 1)
} else if (c >= 0x0561 && c <= 0x0586) {
chars[i] = String.fromCodePoint(c - 48)
} else if (c >= 0x10D0 && c <= 0x10F5) {
chars[i] = String.fromCodePoint(c - 48)
}
i = i + 1
}
return (count < 1000) ? Strs.concat_(chars) : Strs.concat(chars, 1000)
}
 
// Capitalizes the first character of a UTF-8 string.
// Uses title rather than upper case variant if it's one of 4 supported digraphs.
static capitalize(s) {
if (!(s is String)) s = "%(s)"
if (s == "") return s
var start = (s.startsWith("[") && s.count > 1) ? 1 : 0
var cs = upper(s[start])
var c = cs.codePoints[0]
if (c == 0x01C4 || c == 0x01C7 || c == 0x01CA || c == 0x01F1) {
cs = String.fromCodePoint(c + 1)
}
if (s.count > start + 1) cs = cs + s[start+1..-1]
if (start == 1) cs = "[" + cs
return cs
}
 
// Capitalizes the first character of each word of a UTF-8 string.
// Uses title rather than upper case variant if it's one of 4 supported digraphs.
static title(s) {
if (!(s is String)) s = "%(s)"
if (s == "") return s
var words = s.split(" ")
return Strs.join(words.map { |w| capitalize(w) }.toList, " ")
}
 
// Removes accents and other diacritical marks from all characters in a string,
// expands digraphs and removes all combining characters before returning the result.
// As well as Latin-1 Supplement, coverage includes Latin Extended-A and various
// other characters found in modern European languages which use the Latin alphabet.
static unaccent(s) {
if (!(s is String)) s = "%(s)"
if (s == "") return s
var accented = [
"àáâãäåāăą", "ÀÁÂÃÄÅĀĂĄ", "ḃ", "Ḃ", "çćĉċč", "ÇĆĈĊČ", "ðďđḋ", "ÐĎĐḊ",
"èéêëēĕėęě", "ÈÉÊËĒĔĖĘĚ", "ḟ", "Ḟ", "ĝğġģ", "ĜĞĠĢ", "ĥħ", "ĤĦ",
"ìíîïĩīĭįı", "ÌÍÎÏĨĪĬĮİ", "Ĵ", "Ĵ", "ķĸ", "Ķ", "ĺļľŀł", "ĹĻĽĿŁ",
"ṁ", "Ṁ", "ñńņňʼn", "ÑŃŅŇ", "òóôõöøōŏő", "ÒÓÔÕÖØŌŎŐ", "ṗ", "Ṗ",
"ŕŗř", "ŔŖŘ", "śŝşšșſ", "ŚŜŞŠȘ", "ţťŧṱț", "ŢŤŦṰȚ", "ùúûüũūŭůűų",
"ÙÚÛÜŨŪŬŮŰŲ", "ŵẁẃẅ", "ŴẀẂẄ", "ýÿỳŷ", "ÝŸỲŶ", "źżž", "ŹŻŽ"
]
var unaccented = "aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPrRsStTuUwWyYzZ"
var digraphs = {
"æ": "ae", "Æ": "AE", "þ": "th", "Þ": "TH", "ß": "ss", "ẞ": "SS",
"ij": "ij", "IJ": "IJ", "ŋ": "ng", "Ŋ": "NG", "œ": "OE", "Œ": "OE"
}
var r = ""
var chars = s.toList
var count = chars.count
var i = 0
for (c in s.codePoints) {
if ((c >= 0x00c0 && c <= 0x012B) || c >= 0x1e02 && c <= 0x1e9e) {
var found = false
for (j in 0...accented.count) {
if (accented[j].indexOf(chars[i]) >= 0) {
chars[i] = unaccented[j]
found = true
break
}
}
if (!found && digraphs.containsKey(chars[i])) chars[i] = digraphs[chars[i]]
} else if (c >= 0x0300 && c <= 0x036F) chars[i] = ""
i = i + 1
}
return (count < 1000) ? Strs.concat_(chars) : Strs.concat(chars, 1000)
}
 
// Converts a Windows-1252 encoded byte string to a UTF-8 encoded string.
static fromWin1252(win1252) {
if (!(win1252 is String)) System.print("Argument must be a byte string.")
if (win1252.count == 0) return ""
// mapping for Windows 1252 bytes 128-159.
// Unused bytes are mapped to the corresponding ISO-8859-1 control codes.
var bm = [
0x20ac, 0x0081, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021,
0x02c6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008d, 0x017d, 0x008f,
0x0090, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
0x02dc, 0x2122, 0x0161, 0x203a, 0x0153, 0x009d, 0x017e, 0x0178
]
var bytes = win1252.bytes
var utf8 = List.filled(bytes.count, 0)
for (i in 0...bytes.count) {
var b = bytes[i]
if (b < 128 || b > 159) {
utf8[i] = String.fromCodePoint(b)
} else {
utf8[i] = String.fromCodePoint(bm[b-128])
}
}
return utf8.join()
}
}
 
/*
// Type aliases for classes in case of any name clashes with other modules.
'Greek' enables characters from the Greek alphabet to be found from their name.
var Str_Char = Char
These characters are often used as mathematical or scientific symbols.
var Str_Str = Str
*/
var Str_Strs = Strs
class Greek {
var Str_Utf8 = Utf8</lang>
// Returns the Greek alphabet, lower then upper case characters.
static alphabet { "αβγδεζηθικλμνξοπρςστυφχψωΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡ\u03a2ΣΤΥΦΧΨΩ" }
 
// Returns a list of the names of all Greek letters in alphabetical order.
static names {
return [
"alpha", "beta", "gamma", "delta", "epsilon", "zeta", "eta", "theta",
"iota", "kappa", "lambda", "mu", "nu", "xi", "omicron", "pi",
"rho", "sigma final", "sigma", "tau", "upsilon", "phi", "chi", "psi", "omega"
]
}
 
// Returns the name of a Greek character or null if not found.
// Upper case characters are returned with the initial letter capitalized.
static name(char) {
if (char.count != 1) return null
var ix = alphabet.toList.indexOf(char)
if (ix == -1) return null
if (ix < 25) return names[ix]
return Str.capitalize(names[ix-25])
}
 
// Finds and returns a Greek lower case character from its name.
static lower(name) {
name = Str.lower(name)
var ix = names.indexOf(name)
if (ix == -1) Fiber.abort("Name not found.")
return String.fromCodePoint(0x03b1 + ix)
}
 
// Finds and returns a Greek upper case character from its name.
static upper(name) {
name = Str.lower(name)
var ix = names.indexOf(name)
if (ix == -1) Fiber.abort("Name not found.")
if (name == "sigma final") ix = ix + 1
return String.fromCodePoint(0x0391 + ix)
}
}</syntaxhighlight>
9,476

edits