Category talk:Wren-str: Difference between revisions

← Older edit

Category talk:Wren-str (view source)

Revision as of 10:53, 30 March 2024

28,148 bytes added , 1 month ago

→‎Source code: Added Str.lastIndexOf method.

PureFox

9,476

edits

Revision as of 17:35, 18 November 2020 (view source) PureFox (talk \| contribs) (→‎Source code: Adjustments mostly to extend case change methods to Latin-1 (ASCII only previously).) ← Older edit		Latest revision as of 10:53, 30 March 2024 (view source) PureFox (talk \| contribs) (→‎Source code: Added Str.lastIndexOf method.)
(25 intermediate revisions by the same user not shown)
Line 1: ===Source code=== <~~lang~~syntaxhighlight ~~ecmascript~~lang="wren">/* Module "str.wren" / / Line 63: static isWhitespace(c) { var d = code(c) return d == 32 \|\| (d >= 9 && cd <= 13) \|\| d == 160 } Line 133: static compare(s1, s2) { if (s1 == s2) return 0 var cp1 = s1.codePoints.toList var cp2 = s2.codePoints.toList var len = (cp1.count <= cp2.count) ? cp1.count : cp2.count for (i in 0...len) { Line 144: // Checks if a string falls into a particular category. static allAscii(s) { s != "" && s.codePoints.all { \|c\| c < 128 } } static allLatin1(s) { s != "" && s.codePoints.all { \|c\| c < 256 } } static allDigits(s) { s != "" && s.codePoints.all { \|c\| c >= 48 && c <= 57 } } static allAsciiLower(s) { s != "" && s.codePoints.all { \|c\| c >= 97 && c <= 122 } } static allAsciiUpper(s) { s != "" && s.codePoints.all { \|c\| c >= 65 && c <= 90 } } static allAsciiLetters(s) { s != "" && s.toList.all { \|c\| Char.isAsciiLetter(c) } } static allAsciiAlphaNum(s) { s != "" && s.toList.all { \|c\| Char.isAsciiAlphaNum(c) } } static allSpace(s) { s != "" && s.toList.all { \|c\| Char.isSpace(c) } } static allLower(s) { s != {"" && s.toList.all { \|c\| Char.isLower(c) } } static allUpper(s) { s != {"" && s.toList.all { \|c\| Char.isUpper(c) } } static allLetters(s) { s != {"" && s.toList.all { \|c\| Char.isLetter(c) } } static allAlphaNumeric(s) { s != {"" && s.toList.all { \|c\| Char.isAlphanumeric(c) } } static allPrintable(s) { s != {"" && s.toList.all { \|c\| Char.isPrintable(c) } } static allGraphic(s) { s != {"" && s.toList.all { \|c\| Char.isGraphic(c) } } static allWhitespace(s) { s != {"" && s.toList.all { \|c\| Char.isWhitespace(c) } } // Checks whether a string can be parsed to a number, an integer or a non-integer (float). Line 236: var words = s.split(" ") return Strs.join(words.map { \|w\| capitalize(w) }.toList, " ") } // Removes accents and cedillas from all Latin-1 supplement characters in a string // and also expands digraphs before returning the result. static unaccent(s) { if (!(s is String)) s = "%(s)" if (s == "") return s var accented = [ "àáâãäå", "ÀÁÂÃÄÅ", "ç", "Ç", "ð", "Ð", "èéêë", "ÈÉÊË", "ìíîï", "ÌÍÎÏ", "ñ", "Ñ", "òóôõöø", "ÒÓÔÕÖØ", "ùúûü", "ÙÚÛÜ", "ýÿ", "Ý" ] var unaccented = "aAcCdDeEiInNoOuUyY" var digraphs = { "æ": "ae", "Æ": "AE", "þ": "th", "Þ": "TH", "ß": "ss" } var r = "" var chars = s.toList var count = chars.count var i = 0 for (c in s.codePoints) { if (c >= 0xc0 && c <= 0xff) { var found = false for (j in 0...accented.count) { if (accented[j].indexOf(chars[i]) >= 0) { chars[i] = unaccented[j] found = true break } } if (!found && digraphs.containsKey(chars[i])) chars[i] = digraphs[chars[i]] } i = i + 1 } return (count < 1000) ? Strs.concat_(chars) : Strs.concat(chars, 1000) } Line 244 ⟶ 276: } // Performs a circular shift of the characters of 's' ~~one~~'n' ~~place~~places to the left. // If 'n' is negative performs a circular right shift by '-n' places instead. ~~static lshift(s) {~~ static lshift(s, n) { if (!(s is String)) s = "%(s)" if (!(n is Num) \|\| !n.isInteger) Fiber.abort("'n' must be an integer.") var chars = s.toList var count = chars.count if (count < 2) return s ~~var~~if t(n =< ~~chars[~~0]) return rshift(s, -n) ~~for~~n (i= inn % ~~0..~~count~~-2) chars[i] = chars[i+1]~~ ~~chars[-1]~~if (n == 0) return ts for (i in 1..n) { var t = chars[0] for (j in 0..count-2) chars[j] = chars[j+1] chars[-1] = t } return (count < 1000) ? Strs.concat_(chars) : Strs.concat(chars, 1000) } // Performs a circular shift of the characters of 's' ~~one~~'n' ~~place~~places to the right. // If 'n' is negative performs a circular left shift by '-n' places instead. ~~static rshift(s) {~~ static rshift(s, n) { if (!(s is String)) s = "%(s)" if (!(n is Num) \|\| !n.isInteger) Fiber.abort("'n' must be an integer.") var chars = s.toList var count = chars.count if (count < 2) return s ~~var~~if t(n =< 0) return lshift(s, ~~chars[~~-1]n) ~~for~~n (i= inn % count~~-2..0) chars[i+1] = chars[i]~~ ~~chars[0]~~if (n == 0) return ts for (i in 1..n) { var t = chars[-1] for (j in count-2..0) chars[j+1] = chars[j] chars[0] = t } return (count < 1000) ? Strs.concat_(chars) : Strs.concat(chars, 1000) } // Convenience versions of the above methods which shift by just 1 place. ~~/* The indices (or ranges thereof) for all the following functions are measured in codepoints (not bytes). Negative indices count backwards from the end of the string.~~ static lshift(s) { lshift(s, 1) } ~~As with core library methods, the indices must be within bounds or errors will be generated. /~~ static rshift(s) { rshift(s, 1) } / The indices (or ranges thereof) for all the following functions are measured in codepoints (not bytes). Negative indices count backwards from the end of the string. As with core library methods, the indices must be within bounds or errors will be generated. / // Extracts the sub-string of 's' over the range 'r'. Line 316 ⟶ 367: cpCount = cpCount + 1 } } // Returns the codepoint index (not byte index) at which 'search' first occurs in 's' // or -1 if 'search' is not found, starting from codepoint offset 'start'. static indexOf(s, search, start) { var ss = (start > 0) ? Str.sub(s, start..-1) : s var ix = Str.indexOf(ss, search) return (ix >= 0) ? start + ix : -1 } // Returns the codepoint index (not byte index) at which 'search' last occurs in 's' // or -1 if 'search' is not found. static lastIndexOf(s, search) { if (!(search is String)) Fiber.abort("Search argument must be a string.") if (!(s is String)) s = "%(s)" var l = s.toList for (i in l.count-1..0) { if (l[i] == search) return i } return -1 } // Returns the number of non-overlapping occurrences of the string 't' // within the string 's'. static occurs(s, t) { s.split(t).count - 1 } // Returns the number of non-overlapping occurrences of the string 't' // within the string 's' starting from codepoint offset 'start'. static occurs(s, t, start) { if (start == 0) return occurs(s, t) return occurs(Str.sub(s, start..-1), t) } Line 359 ⟶ 441: if (i == j) return s var chars = s.toList ~~var t =~~ chars[.swap(i], j) ~~chars[i] = chars[j]~~ ~~chars[j] = t~~ return Strs.concat(chars) } // Returns 's' with 'from' replaced by 'to' up to 'n' times (all times if n is negative) ~~// Private helper method for 'repeat'.~~ // but skipping the first 'skip' matches. ~~static repeat_(s, reps) {~~ static replace(s, from, to, ~~var~~n, rsskip) ~~= ""~~{ ~~for~~if (i!(from inis ~~0...reps~~String)) rsFiber.abort("'from ='must rsbe +a sstring.") if (!(to is String)) Fiber.abort("'to' must be a string.") ~~return rs~~ if (!(n is Num && n.isInteger)) Fiber.abort("'n' must be an integer.") if (!(skip is Num && skip.isInteger && skip >= 0)) { Fiber.abort("'skip' must be a non-negative integer.") } if (!(s is String)) s = "%(s)" if (n < 0) { if (skip == 0) return s.replace(from, to) n = Num.maxSafeInteger } if (n == 0 \|\| skip >= n) return s var count = 0 var split = s.split(from) var res = "" for (i in 0...split.count-1) { count = count + 1 res = res + split[i] + ((count <= skip \|\| count > n) ? from : to) } return res + split[-1] } // Convenience version of 'replace' where 'skip' is always zero. static replace(s, from, to, n) { replace(s, from, to, n, 0) } // Adds 'by' to the start of each line of 's' // and returns the result. static indent(s, by) { if (!(s is String)) Fiber.abort("First argument must be a string.") if (!(by is String)) Fiber.abort("Second argument must be a string.") var lines = s.split("\n") return lines.map { \|line\| by + line }.join("\n") } // Removes 'by' from the start of each line of 's' which begins with it // and returns the result. static dedent(s, by) { if (!(s is String)) Fiber.abort("First argument must be a string.") if (!(by is String)) Fiber.abort("Second argument must be a string.") var lines = s.split("\n") var c = by.bytes.count return lines.map { \|line\| if (line.startsWith(by)) return line[c..-1] return line }.join("\n") } // Removes all spaces and tabs from the end of each line of s // and returns the result. static tidy(s) { if (!(s is String)) Fiber.abort("Argument must be a string.") var lines = s.split("\n") return lines.map { \|line\| line.trimEnd(" \t") }.join("\n") } // Returns 's' repeated 'reps' times. static repeat(s, reps) { ~~// If 'chunkSize' is chosen appropriately, this should be much faster than String's operator~~ ~~// for a large number of repetitions.~~ ~~static repeat(s, reps, chunkSize) {~~ if (!(s is String)) s = "%(s)" if (!(reps is Num && reps.isInteger && reps >= 0)) { Fiber.abort("Repetitions must be a ~~positive~~non-negative integer.") } var rs = "" ~~if (!(chunkSize is Num && chunkSize.isInteger && chunkSize > 0)) {~~ if (reps < 10) { ~~Fiber.abort("Chunk size must be a positive integer.")~~ for (i in 0...reps) rs = rs + s } ~~if (reps == 0) return ""~~ ~~var chunks = (reps/chunkSize).floor~~ ~~if (chunks == 0) return repeat_(s, reps)~~ ~~var lastSize = reps % chunkSize~~ ~~if (lastSize == 0) {~~ ~~lastSize = chunkSize~~ } else { ~~chunks~~while =(true) ~~chunks + 1~~{ if (reps % 2 == 1) rs = rs + s } ~~var~~ rs reps = ""reps >> 1 ~~var~~ ~~chunk~~ = ~~repeat_~~ if (s,reps ~~chunkSize~~== 0) break ~~var~~ ~~lastChunk~~ s = ~~repeat_(~~s, ~~lastSize)~~+ s ~~for~~ (i in ~~0...chunks)~~ {} ~~rs = rs + ((i < chunks - 1) ? chunk : lastChunk)~~ } return rs } ~~// Convenience version of the above which uses a 'chunkSize' of 8000. This usually gives a good result.~~ ~~static repeat(s, reps) { repeat(s, reps, 8000) }~~ // Splits a string 's' into chunks of not more than 'size' characters. Line 425 ⟶ 544: if (final > 0) res.add(sub(s, first..-1)) return res } // Splits 's' into a list of one or more strings separated by 'sep' but removes // any empty elements from the list. static splitNoEmpty(s, sep) { if (!(s is String)) s = "%(s)" if (!(sep is String) \|\| sep.isEmpty) Fiber.abort("Separator must be a non-empty string.") var split = s.split(sep) return split.where { \|e\| !e.isEmpty }.toList } // Splits a CSV 'line' into a list of one or more strings separated by 'sep' which must be // a single character (except \v). Deals properly with embedded separators in quoted fields. // Removes leading and trailing quotes from quoted fields if 'dequote' is true. static splitCsv(line, sep, dequote) { if (!(line is String)) line = "%(line)" if (!(sep is String) \|\| sep.count != 1) { Fiber.abort("Separator must be a single character string.") } if (!(dequote is Bool)) Fiber.abort("Dequote must be a boolean.") var fields = line.split(sep) var count = 0 var quoted = false var chars = line.toList for (i in 0...fields.count) { var f = fields[i] var fc = f.count if (fc > 0) { count = count + fc if (!quoted && f[0] == "\"") { if (f[-1] != "\"") { quoted = true chars[count] = "\v" } } else if (quoted && f[-1] == "\"") { quoted = false } else if (quoted) { chars[count] = "\v" } } else if (quoted) { chars[count] = "\v" } count = count + 1 } fields = chars.join("").split(sep) for (i in 0...fields.count) fields[i] = fields[i].replace("\v", sep) if (dequote) { for (i in 0...fields.count) { var f = fields[i] var fc = f.count if (fc < 2) continue if (f[0] == "\"" && f[-1] == "\"") fields[i] = f[1...-1] } } return fields } // Convenience versions of the above method which use default parameters. static splitCsv(line, sep) { splitCsv(line, sep, true) } static splitCsv(line) { splitCsv(line, ",", true) } // Splits a string 's' into two parts, before and after the first occurrence // of 'delim' and returns a list of those parts. // The 'delim' itself can be optionally included in the second part. // If 'delim' does not occur in 's', returns [s, ""]. static bisect(s, delim, include) { if (!(delim is String)) Fiber.abort("Delimiter must be a string.") if (!(include is Bool)) Fiber.abort("Include must be true or false.") if (!(s is String)) s = "%(s)" var ix = s.indexOf(delim) if (ix == -1) return [s, ""] if (include) return [s[0...ix], s[ix..-1]] var len = delim.bytes.count return [s[0...ix], s[ix + len..-1]] } // Convenience version of bisect method which never includes the delimiter. static bisect(s, delim) { bisect(s, delim, false) } // Creates and returns a string from a list of bytes. static fromBytes(ba) { if (!(ba is List)) Fiber.abort("Argument must be list of bytes.") var count = ba.count if (count == 0) return "" var chars = ba.map { \|b\| String.fromByte(b) }.toList return (count < 1000) ? Strs.concat_(chars) : Strs.concat(chars, 1000) } // Creates and returns a string from a list of code points. static fromCodePoints(ca) { if (!(ca is List)) Fiber.abort("Argument must be list of code points.") var count = ca.count if (count == 0) return "" var chars = ca.map { \|c\| String.fromCodePoint(c) }.toList return (count < 1000) ? Strs.concat_(chars) : Strs.concat(chars, 1000) } // After trimming whitespace from the string 's', takes as many characters as possible // to form a valid number and converts it thereto using the Num.fromString method. // Returns null if such a conversion is impossible. static toNum(s) { if (s is Num) return s if (!(s is String)) s = "%(s)" s = s.trim() var n = Num.fromString(s) if (n) return n if (s.count < 2) return null var chars = s.toList for (i in chars.count-1..1) { chars.removeAt(i) if (n = Num.fromString(chars.join())) return n } return null } // Converts a pattern into a list of tokens for processing by the 'isMatch' method. // Characters within the pattern are represented as follows: // Non-wildcard characters as themselves (i.e. single character strings); // * (or *) by the number 0; // ? (or ?) by the number 1; // [set] by a list of the tokens within the set: // single characters by themselves; // a range of characters, a-b, by a Range of codepoints from 'a' to 'b'. // If the first character of the set is '!' then the number -1 is inserted // as a separate token immediately before the list. static tokenize(pattern) { var tokens = [] var i = 0 var j while (i < pattern.count) { var c = pattern[i] if (c == "") { if (i == 0 \|\| tokens[-1] != 0) tokens.add(0) } else if (c == "?") { if (i > 0 && tokens[-1] == 0) tokens[-1] = 1 else tokens.add(1) } else if (c == "[") { if (i == pattern.count - 1) { tokens.add(c) } else if ((j = indexOf(pattern, "]", i + 1)) == -1) { tokens.add(c) } else { var l = [] var s = sub(pattern, i+1...j) var k = 0 while (k < s.count) { var d = s[k] if (d == "!") { if (k == 0) tokens.add(-1) else l.add(d) } else if (k < s.count - 2 && s[k+1] == "-") { l.add(d.codePoints[0]..s[k+2].codePoints[0]) k = k + 2 } else { l.add(d) } k = k + 1 } if (l.count == 0) Fiber.abort("set cannot be empty.") tokens.add(l) i = i + s.count + 1 } } else { tokens.add(c) } i = i + 1 } return tokens } // Returns whether a string 's' matches a 'pattern' which may already be tokenized // if many strings are to be matched. Matching is case sensitive. // Patterns may contain the following wildcards: // (or *) matches zero or more characters until the next token (if any) matches // and doesn't backtrack in the event of subsequent failure; // ? (or ?) matches exactly one character; // [set] matches a single character from the set within the brackets e.g. [aeiou]. // The set can also contain ranges of characters separated by '-' e.g. [a-zA-Z]. // If the first character of the set is '!' then only characters NOT within the rest // of the set are matched e.g. [!0-9] matches any character other than a digit. static isMatch(s, pattern) { var tokens = pattern if (tokens is String) tokens = tokenize(tokens) if (!((tokens is List) && tokens.count > 0)) { Fiber.abort("'pattern' must be a non-empty string or list of tokens.") } var i = 0 var j = 0 var star = false var neg = false while (i < s.count && j < tokens.count) { var c = s[i] var t = tokens[j] if (t is Num) { if (t == 0) { star = true } else if (t == 1) { i = i + 1 star = false } else if (t == -1) { neg = true } else { Fiber.abort("'%(t)' is not a recognized token.") } j = j + 1 } else if (t is String) { if (!star && c != t) return false if (star && c == t) star = false i = i + 1 if (!star) j = j + 1 } else if (t is List) { var matched = false for (e in t) { if (e is String) { if (e == c) { matched = true break } } else if (e is Range){ var cp = c.codePoints[0] if (cp >= e.from && cp <= e.to) { matched = true break } } else { Fiber.abort("'%(e)' is not a recognized token within a set.") } } if (!star && !neg && !matched) return false if (!star && neg && matched) return false if (star && matched) star = false i = i + 1 neg = false if (!star) j = j + 1 } else { Fiber.abort("'%(t)' is not a recognized token.") } } if (i == s.count && j == tokens.count) return true if (j == tokens.count && tokens[-1] == 0) return true if (j == tokens.count - 1 && tokens[-1] == 0) return true return false } } Line 552 ⟶ 911: return (b0 & b4Mask) << 18 \| (b[1] & mbMask) << 12 \| (b[2] & mbMask) << 6 \| (b[3] & mbMask) } } /* The next four methods extend the casing performed by the corresponding 'Str' methods to include Latin Extended-A, parts of Latin Extended-B, Latin Extended Additional, Greek, Cyrillic, Armenian and Georgian. / // Converts a UTF-8 string to lower case. static lower(s) { if (!(s is String)) s = "%(s)" if (s == "") return s var chars = s.toList var count = chars.count var i = 0 for (c in s.codePoints) { if ((c >= 65 && c <= 90) \|\| (c >= 192 && c <= 214) \|\| (c >= 216 && c <= 222)) { chars[i] = String.fromCodePoint(c + 32) } else if (c < 256) { // catch other Latin-1 characters quickly. } else if ((c >= 0x0100 && c <= 0x0136) && (c % 2 == 0)) { chars[i] = String.fromCodePoint(c + 1) } else if ((c >= 0x0139 && c <= 0x0147) && (c % 2 == 1)) { chars[i] = String.fromCodePoint(c + 1) } else if ((c >= 0x014A && c <= 0x0176) && (c % 2 == 0)) { chars[i] = String.fromCodePoint(c + 1) } else if (c == 0x0178) { chars[i] = "ÿ" } else if (c == 0x0179 \|\| c == 0x017B \|\| c == 0x017D \|\| c == 0x01A0 \|\| c == 0x01AF \|\| c == 0x01F4) { chars[i] = String.fromCodePoint(c + 1) } else if (c == 0x01C4 \|\| c == 0x01C7 \|\| c == 0x01CA \|\| c == 0x01F1) { chars[i] = String.fromCodePoint(c + 2) } else if (c == 0x01C5 \|\| c == 0x01C8 \|\| c == 0x01CB \|\| c == 0x01F2) { chars[i] = String.fromCodePoint(c + 1) } else if ((c >= 0x01DE && c <= 0x01EE) && (c % 2 == 0)) { chars[i] = String.fromCodePoint(c + 1) } else if ((c >= 0x01F8 && c <= 0x021E) && (c % 2 == 0)) { chars[i] = String.fromCodePoint(c + 1) } else if ((c >= 0x1E00 && c <= 0x1E94) && (c % 2 == 0)) { chars[i] = String.fromCodePoint(c + 1) } else if (c == 0x1E9E) { chars[i] = "ß" } else if ((c >= 0x1EA0 && c <= 0x1EFE) && (c % 2 == 0)) { chars[i] = String.fromCodePoint(c + 1) } else if (c == 0x0386) { chars[i] = "ά" } else if (c == 0x0388 \|\| c == 0x0389 \|\| c == 0x038A) { chars[i] = String.fromCodePoint(c + 37) } else if (c == 0x038C) { chars[i] = "ό" } else if (c == 0x038E \|\| c == 0x038F) { chars[i] = String.fromCodePoint(c + 63) } else if (c >= 0x0391 && c <= 0x03A1) { chars[i] = String.fromCodePoint(c + 32) } else if (c == 0x03A3) { chars[i] = (i == count - 1) ? "ς" : "σ" } else if (c >= 0x03A4 && c <= 0x03AB) { chars[i] = String.fromCodePoint(c + 32) } else if (c >= 0x0400 && c <= 0x041F) { chars[i] = String.fromCodePoint(c + 80) } else if (c >= 0x0410 && c <= 0x042F) { chars[i] = String.fromCodePoint(c + 32) } else if ((c >= 0x048A && c <= 0x04BE) && (c % 2 == 0)) { chars[i] = String.fromCodePoint(c + 1) } else if ((c >= 0x04C1 && c <= 0x04CD) && (c % 2 == 1)) { chars[i] = String.fromCodePoint(c + 1) } else if ((c >= 0x04D0 && c <= 0x052E) && (c % 2 == 0)) { chars[i] = String.fromCodePoint(c + 1) } else if (c >= 0x0531 && c <= 0x0556) { chars[i] = String.fromCodePoint(c + 48) } else if (c >= 0x10A0 && c <= 0x10C5) { chars[i] = String.fromCodePoint(c + 48) } i = i + 1 } return (count < 1000) ? Strs.concat_(chars) : Strs.concat(chars, 1000) } // Converts a UTF-8 string to upper case. static upper(s) { if (!(s is String)) s = "%(s)" if (s == "") return s var chars = s.toList var count = chars.count var i = 0 for (c in s.codePoints) { if ((c >= 97 && c <= 122) \|\| (c >= 224 && c <= 246) \|\| (c >= 248 && c <= 254)) { chars[i] = String.fromCodePoint(c - 32) } else if (c == 223) { chars[i] = "ẞ" } else if (c == 255) { chars[i] = "Ŷ" } else if (c < 255) { // catch other Latin-1 characters quickly. } else if ((c >= 0x0101 && c <= 0x0137) && (c % 2 == 1)) { chars[i] = String.fromCodePoint(c - 1) } else if ((c >= 0x013A && c <= 0x0148) && (c % 2 == 0)) { chars[i] = String.fromCodePoint(c - 1) } else if ((c >= 0x014B && c <= 0x0177) && (c % 2 == 1)) { chars[i] = String.fromCodePoint(c - 1) } else if (c == 0x017A \|\| c == 0x017C \|\| c == 0x017E \|\| c == 0x01A1 \|\| c == 0x01B0 \|\| c == 0x01F5) { chars[i] = String.fromCodePoint(c - 1) } else if (c == 0x01C5 \|\| c == 0x01C8 \|\| c == 0x01CB \|\| c == 0x01F2) { chars[i] = String.fromCodePoint(c - 1) } else if (c == 0x01C6 \|\| c == 0x01C9 \|\| c == 0x01CC \|\| c == 0x01F3) { chars[i] = String.fromCodePoint(c - 2) } else if ((c >= 0x01DF && c <= 0x01EF) && (c % 2 == 1)) { chars[i] = String.fromCodePoint(c - 1) } else if ((c >= 0x01F9 && c <= 0x021F) && (c % 2 == 1)) { chars[i] = String.fromCodePoint(c - 1) } else if ((c >= 0x1E01 && c <= 0x1E95) && (c % 2 == 1)) { chars[i] = String.fromCodePoint(c - 1) } else if (c == 0x1E9E) { chars[i] = "ß" } else if ((c >= 0x1EA1 && c <= 0x1EFF) && (c % 2 == 1)) { chars[i] = String.fromCodePoint(c - 1) } else if (c == 0x03AC) { chars[i] = "Ά" } else if (c == 0x03AD \|\| c == 0x03AE \|\| c == 0x03AF) { chars[i] = String.fromCodePoint(c - 37) } else if (c >= 0x03B1 && c <= 0x03C1) { chars[i] = String.fromCodePoint(c - 32) } else if (c == 0x03C2) { chars[i] = "Σ" } else if (c >= 0x03C3 && c <= 0x03CB) { chars[i] = String.fromCodePoint(c - 32) } else if (c == 0x03CC) { chars[i] = "Ό" } else if (c == 0x03CD \|\| c == 0x03CE) { chars[i] = String.fromCodePoint(c - 63) } else if (c >= 0x0430 && c <= 0x044F) { chars[i] = String.fromCodePoint(c - 32) } else if (c >= 0x0450 && c <= 0x045F) { chars[i] = String.fromCodePoint(c - 80) } else if ((c >= 0x048B && c <= 0x04BF) && (c % 2 == 1)) { chars[i] = String.fromCodePoint(c - 1) } else if ((c >= 0x04C2 && c <= 0x04CE) && (c % 2 == 0)) { chars[i] = String.fromCodePoint(c - 1) } else if ((c >= 0x04D1 && c <= 0x052F) && (c % 2 == 1)) { chars[i] = String.fromCodePoint(c - 1) } else if (c >= 0x0561 && c <= 0x0586) { chars[i] = String.fromCodePoint(c - 48) } else if (c >= 0x10D0 && c <= 0x10F5) { chars[i] = String.fromCodePoint(c - 48) } i = i + 1 } return (count < 1000) ? Strs.concat_(chars) : Strs.concat(chars, 1000) } // Capitalizes the first character of a UTF-8 string. // Uses title rather than upper case variant if it's one of 4 supported digraphs. static capitalize(s) { if (!(s is String)) s = "%(s)" if (s == "") return s var start = (s.startsWith("[") && s.count > 1) ? 1 : 0 var cs = upper(s[start]) var c = cs.codePoints[0] if (c == 0x01C4 \|\| c == 0x01C7 \|\| c == 0x01CA \|\| c == 0x01F1) { cs = String.fromCodePoint(c + 1) } if (s.count > start + 1) cs = cs + s[start+1..-1] if (start == 1) cs = "[" + cs return cs } // Capitalizes the first character of each word of a UTF-8 string. // Uses title rather than upper case variant if it's one of 4 supported digraphs. static title(s) { if (!(s is String)) s = "%(s)" if (s == "") return s var words = s.split(" ") return Strs.join(words.map { \|w\| capitalize(w) }.toList, " ") } // Removes accents and other diacritical marks from all characters in a string, // expands digraphs and removes all combining characters before returning the result. // As well as Latin-1 Supplement, coverage includes Latin Extended-A and various // other characters found in modern European languages which use the Latin alphabet. static unaccent(s) { if (!(s is String)) s = "%(s)" if (s == "") return s var accented = [ "àáâãäåāăą", "ÀÁÂÃÄÅĀĂĄ", "ḃ", "Ḃ", "çćĉċč", "ÇĆĈĊČ", "ðďđḋ", "ÐĎĐḊ", "èéêëēĕėęě", "ÈÉÊËĒĔĖĘĚ", "ḟ", "Ḟ", "ĝğġģ", "ĜĞĠĢ", "ĥħ", "ĤĦ", "ìíîïĩīĭįı", "ÌÍÎÏĨĪĬĮİ", "Ĵ", "Ĵ", "ķĸ", "Ķ", "ĺļľŀł", "ĹĻĽĿŁ", "ṁ", "Ṁ", "ñńņňŉ", "ÑŃŅŇ", "òóôõöøōŏő", "ÒÓÔÕÖØŌŎŐ", "ṗ", "Ṗ", "ŕŗř", "ŔŖŘ", "śŝşšșſ", "ŚŜŞŠȘ", "ţťŧṱț", "ŢŤŦṰȚ", "ùúûüũūŭůűų", "ÙÚÛÜŨŪŬŮŰŲ", "ŵẁẃẅ", "ŴẀẂẄ", "ýÿỳŷ", "ÝŸỲŶ", "źżž", "ŹŻŽ" ] var unaccented = "aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPrRsStTuUwWyYzZ" var digraphs = { "æ": "ae", "Æ": "AE", "þ": "th", "Þ": "TH", "ß": "ss", "ẞ": "SS", "ĳ": "ij", "Ĳ": "IJ", "ŋ": "ng", "Ŋ": "NG", "œ": "OE", "Œ": "OE" } var r = "" var chars = s.toList var count = chars.count var i = 0 for (c in s.codePoints) { if ((c >= 0x00c0 && c <= 0x012B) \|\| c >= 0x1e02 && c <= 0x1e9e) { var found = false for (j in 0...accented.count) { if (accented[j].indexOf(chars[i]) >= 0) { chars[i] = unaccented[j] found = true break } } if (!found && digraphs.containsKey(chars[i])) chars[i] = digraphs[chars[i]] } else if (c >= 0x0300 && c <= 0x036F) chars[i] = "" i = i + 1 } return (count < 1000) ? Strs.concat_(chars) : Strs.concat(chars, 1000) } // Converts a Windows-1252 encoded byte string to a UTF-8 encoded string. static fromWin1252(win1252) { if (!(win1252 is String)) System.print("Argument must be a byte string.") if (win1252.count == 0) return "" // mapping for Windows 1252 bytes 128-159. // Unused bytes are mapped to the corresponding ISO-8859-1 control codes. var bm = [ 0x20ac, 0x0081, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021, 0x02c6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008d, 0x017d, 0x008f, 0x0090, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, 0x02dc, 0x2122, 0x0161, 0x203a, 0x0153, 0x009d, 0x017e, 0x0178 ] var bytes = win1252.bytes var utf8 = List.filled(bytes.count, 0) for (i in 0...bytes.count) { var b = bytes[i] if (b < 128 \|\| b > 159) { utf8[i] = String.fromCodePoint(b) } else { utf8[i] = String.fromCodePoint(bm[b-128]) } } return utf8.join() } } / ~~// Type aliases for classes in case of any name clashes with other modules.~~ 'Greek' enables characters from the Greek alphabet to be found from their name. ~~var Str_Char = Char~~ These characters are often used as mathematical or scientific symbols. ~~var Str_Str = Str~~ */ ~~var Str_Strs = Strs~~ class Greek { ~~var Str_Utf8 = Utf8</lang>~~ // Returns the Greek alphabet, lower then upper case characters. static alphabet { "αβγδεζηθικλμνξοπρςστυφχψωΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡ\u03a2ΣΤΥΦΧΨΩ" } // Returns a list of the names of all Greek letters in alphabetical order. static names { return [ "alpha", "beta", "gamma", "delta", "epsilon", "zeta", "eta", "theta", "iota", "kappa", "lambda", "mu", "nu", "xi", "omicron", "pi", "rho", "sigma final", "sigma", "tau", "upsilon", "phi", "chi", "psi", "omega" ] } // Returns the name of a Greek character or null if not found. // Upper case characters are returned with the initial letter capitalized. static name(char) { if (char.count != 1) return null var ix = alphabet.toList.indexOf(char) if (ix == -1) return null if (ix < 25) return names[ix] return Str.capitalize(names[ix-25]) } // Finds and returns a Greek lower case character from its name. static lower(name) { name = Str.lower(name) var ix = names.indexOf(name) if (ix == -1) Fiber.abort("Name not found.") return String.fromCodePoint(0x03b1 + ix) } // Finds and returns a Greek upper case character from its name. static upper(name) { name = Str.lower(name) var ix = names.indexOf(name) if (ix == -1) Fiber.abort("Name not found.") if (name == "sigma final") ix = ix + 1 return String.fromCodePoint(0x0391 + ix) } }</syntaxhighlight>