Category talk:Wren-str: Difference between revisions

← Older edit

Category talk:Wren-str (view source)

Revision as of 10:53, 30 March 2024

26,896 bytes added , 1 month ago

→‎Source code: Added Str.lastIndexOf method.

PureFox

9,476

edits

Revision as of 09:42, 8 April 2022 (view source) PureFox (talk \| contribs) (→‎Source code: Added Greek class.) ← Older edit		Latest revision as of 10:53, 30 March 2024 (view source) PureFox (talk \| contribs) (→‎Source code: Added Str.lastIndexOf method.)
(20 intermediate revisions by the same user not shown)
Line 1: ===Source code=== <~~lang~~syntaxhighlight ~~ecmascript~~lang="wren">/* Module "str.wren" / / Line 63: static isWhitespace(c) { var d = code(c) return d == 32 \|\| (d >= 9 && cd <= 13) \|\| d == 160 } Line 144: // Checks if a string falls into a particular category. static allAscii(s) { s != "" && s.codePoints.all { \|c\| c < 128 } } static allLatin1(s) { s != "" && s.codePoints.all { \|c\| c < 256 } } static allDigits(s) { s != "" && s.codePoints.all { \|c\| c >= 48 && c <= 57 } } static allAsciiLower(s) { s != "" && s.codePoints.all { \|c\| c >= 97 && c <= 122 } } static allAsciiUpper(s) { s != "" && s.codePoints.all { \|c\| c >= 65 && c <= 90 } } static allAsciiLetters(s) { s != "" && s.toList.all { \|c\| Char.isAsciiLetter(c) } } static allAsciiAlphaNum(s) { s != "" && s.toList.all { \|c\| Char.isAsciiAlphaNum(c) } } static allSpace(s) { s != "" && s.toList.all { \|c\| Char.isSpace(c) } } static allLower(s) { s != {"" && s.toList.all { \|c\| Char.isLower(c) } } static allUpper(s) { s != {"" && s.toList.all { \|c\| Char.isUpper(c) } } static allLetters(s) { s != {"" && s.toList.all { \|c\| Char.isLetter(c) } } static allAlphaNumeric(s) { s != {"" && s.toList.all { \|c\| Char.isAlphanumeric(c) } } static allPrintable(s) { s != {"" && s.toList.all { \|c\| Char.isPrintable(c) } } static allGraphic(s) { s != {"" && s.toList.all { \|c\| Char.isGraphic(c) } } static allWhitespace(s) { s != {"" && s.toList.all { \|c\| Char.isWhitespace(c) } } // Checks whether a string can be parsed to a number, an integer or a non-integer (float). Line 236: var words = s.split(" ") return Strs.join(words.map { \|w\| capitalize(w) }.toList, " ") } // Removes accents and cedillas from all Latin-1 supplement characters in a string // and also expands digraphs before returning the result. static unaccent(s) { if (!(s is String)) s = "%(s)" if (s == "") return s var accented = [ "àáâãäå", "ÀÁÂÃÄÅ", "ç", "Ç", "ð", "Ð", "èéêë", "ÈÉÊË", "ìíîï", "ÌÍÎÏ", "ñ", "Ñ", "òóôõöø", "ÒÓÔÕÖØ", "ùúûü", "ÙÚÛÜ", "ýÿ", "Ý" ] var unaccented = "aAcCdDeEiInNoOuUyY" var digraphs = { "æ": "ae", "Æ": "AE", "þ": "th", "Þ": "TH", "ß": "ss" } var r = "" var chars = s.toList var count = chars.count var i = 0 for (c in s.codePoints) { if (c >= 0xc0 && c <= 0xff) { var found = false for (j in 0...accented.count) { if (accented[j].indexOf(chars[i]) >= 0) { chars[i] = unaccented[j] found = true break } } if (!found && digraphs.containsKey(chars[i])) chars[i] = digraphs[chars[i]] } i = i + 1 } return (count < 1000) ? Strs.concat_(chars) : Strs.concat(chars, 1000) } Line 244 ⟶ 276: } // Performs a circular shift of the characters of 's' ~~one~~'n' ~~place~~places to the left. // If 'n' is negative performs a circular right shift by '-n' places instead. ~~static lshift(s) {~~ static lshift(s, n) { if (!(s is String)) s = "%(s)" if (!(n is Num) \|\| !n.isInteger) Fiber.abort("'n' must be an integer.") var chars = s.toList var count = chars.count if (count < 2) return s ~~var~~if t(n =< ~~chars[~~0]) return rshift(s, -n) ~~for~~n (i= inn % ~~0..~~count~~-2) chars[i] = chars[i+1]~~ ~~chars[-1]~~if (n == 0) return ts for (i in 1..n) { var t = chars[0] for (j in 0..count-2) chars[j] = chars[j+1] chars[-1] = t } return (count < 1000) ? Strs.concat_(chars) : Strs.concat(chars, 1000) } // Performs a circular shift of the characters of 's' ~~one~~'n' ~~place~~places to the right. // If 'n' is negative performs a circular left shift by '-n' places instead. ~~static rshift(s) {~~ static rshift(s, n) { if (!(s is String)) s = "%(s)" if (!(n is Num) \|\| !n.isInteger) Fiber.abort("'n' must be an integer.") var chars = s.toList var count = chars.count if (count < 2) return s ~~var~~if t(n =< 0) return lshift(s, ~~chars[~~-1]n) ~~for~~n (i= inn % count~~-2..0) chars[i+1] = chars[i]~~ ~~chars[0]~~if (n == 0) return ts for (i in 1..n) { var t = chars[-1] for (j in count-2..0) chars[j+1] = chars[j] chars[0] = t } return (count < 1000) ? Strs.concat_(chars) : Strs.concat(chars, 1000) } // Convenience versions of the above methods which shift by just 1 place. static lshift(s) { lshift(s, 1) } static rshift(s) { rshift(s, 1) } /* The indices (or ranges thereof) for all the following functions are measured in codepoints Line 325 ⟶ 375: var ix = Str.indexOf(ss, search) return (ix >= 0) ? start + ix : -1 } // Returns the codepoint index (not byte index) at which 'search' last occurs in 's' // or -1 if 'search' is not found. static lastIndexOf(s, search) { if (!(search is String)) Fiber.abort("Search argument must be a string.") if (!(s is String)) s = "%(s)" var l = s.toList for (i in l.count-1..0) { if (l[i] == search) return i } return -1 } // Returns the number of non-overlapping occurrences of the string 't' // within the string 's'. static occurs(s, t) { s.split(t).count - 1 } // Returns the number of non-overlapping occurrences of the string 't' // within the string 's' starting from codepoint offset 'start'. static occurs(s, t, start) { if (start == 0) return occurs(s, t) return occurs(Str.sub(s, start..-1), t) } Line 368 ⟶ 441: if (i == j) return s var chars = s.toList ~~var t =~~ chars[.swap(i], j) ~~chars[i] = chars[j]~~ ~~chars[j] = t~~ return Strs.concat(chars) } // Returns 's' with 'from' replaced by 'to' up to 'n' times (all times if n is negative) // but skipping the first 'skip' matches. static replace(s, from, to, n, skip) { if (!(from is String)) Fiber.abort("'from 'must be a string.") if (!(to is String)) Fiber.abort("'to' must be a string.") if (!(n is Num && n.isInteger)) Fiber.abort("'n' must be an integer.") if (!(skip is Num && skip.isInteger && skip >= 0)) { Fiber.abort("'skip' must be a non-negative integer.") } if (!(s is String)) s = "%(s)" if (n < 0) { if (skip == 0) return s.replace(from, to) n = Num.maxSafeInteger } if (n == 0 \|\| skip >= n) return s var count = 0 var split = s.split(from) var res = "" for (i in 0...split.count-1) { count = count + 1 res = res + split[i] + ((count <= skip \|\| count > n) ? from : to) } return res + split[-1] } // Convenience version of 'replace' where 'skip' is always zero. static replace(s, from, to, n) { replace(s, from, to, n, 0) } // Adds 'by' to the start of each line of 's' // and returns the result. static indent(s, by) { if (!(s is String)) Fiber.abort("First argument must be a string.") if (!(by is String)) Fiber.abort("Second argument must be a string.") var lines = s.split("\n") return lines.map { \|line\| by + line }.join("\n") } // Removes 'by' from the start of each line of 's' which begins with it // and returns the result. static dedent(s, by) { if (!(s is String)) Fiber.abort("First argument must be a string.") if (!(by is String)) Fiber.abort("Second argument must be a string.") var lines = s.split("\n") var c = by.bytes.count return lines.map { \|line\| if (line.startsWith(by)) return line[c..-1] return line }.join("\n") } // Removes all spaces and tabs from the end of each line of s // and returns the result. static tidy(s) { if (!(s is String)) Fiber.abort("Argument must be a string.") var lines = s.split("\n") return lines.map { \|line\| line.trimEnd(" \t") }.join("\n") } // Returns 's' repeated 'reps' times. Line 416 ⟶ 545: return res } // Splits 's' into a list of one or more strings separated by 'sep' but removes // any empty elements from the list. static splitNoEmpty(s, sep) { if (!(s is String)) s = "%(s)" if (!(sep is String) \|\| sep.isEmpty) Fiber.abort("Separator must be a non-empty string.") var split = s.split(sep) return split.where { \|e\| !e.isEmpty }.toList } // Splits a CSV 'line' into a list of one or more strings separated by 'sep' which must be // a single character (except \v). Deals properly with embedded separators in quoted fields. // Removes leading and trailing quotes from quoted fields if 'dequote' is true. static splitCsv(line, sep, dequote) { if (!(line is String)) line = "%(line)" if (!(sep is String) \|\| sep.count != 1) { Fiber.abort("Separator must be a single character string.") } if (!(dequote is Bool)) Fiber.abort("Dequote must be a boolean.") var fields = line.split(sep) var count = 0 var quoted = false var chars = line.toList for (i in 0...fields.count) { var f = fields[i] var fc = f.count if (fc > 0) { count = count + fc if (!quoted && f[0] == "\"") { if (f[-1] != "\"") { quoted = true chars[count] = "\v" } } else if (quoted && f[-1] == "\"") { quoted = false } else if (quoted) { chars[count] = "\v" } } else if (quoted) { chars[count] = "\v" } count = count + 1 } fields = chars.join("").split(sep) for (i in 0...fields.count) fields[i] = fields[i].replace("\v", sep) if (dequote) { for (i in 0...fields.count) { var f = fields[i] var fc = f.count if (fc < 2) continue if (f[0] == "\"" && f[-1] == "\"") fields[i] = f[1...-1] } } return fields } // Convenience versions of the above method which use default parameters. static splitCsv(line, sep) { splitCsv(line, sep, true) } static splitCsv(line) { splitCsv(line, ",", true) } // Splits a string 's' into two parts, before and after the first occurrence // of 'delim' and returns a list of those parts. // The 'delim' itself can be optionally included in the second part. // If 'delim' does not occur in 's', returns [s, ""]. static bisect(s, delim, include) { if (!(delim is String)) Fiber.abort("Delimiter must be a string.") if (!(include is Bool)) Fiber.abort("Include must be true or false.") if (!(s is String)) s = "%(s)" var ix = s.indexOf(delim) if (ix == -1) return [s, ""] if (include) return [s[0...ix], s[ix..-1]] var len = delim.bytes.count return [s[0...ix], s[ix + len..-1]] } // Convenience version of bisect method which never includes the delimiter. static bisect(s, delim) { bisect(s, delim, false) } // Creates and returns a string from a list of bytes. Line 433 ⟶ 639: var chars = ca.map { \|c\| String.fromCodePoint(c) }.toList return (count < 1000) ? Strs.concat_(chars) : Strs.concat(chars, 1000) } // After trimming whitespace from the string 's', takes as many characters as possible // to form a valid number and converts it thereto using the Num.fromString method. // Returns null if such a conversion is impossible. static toNum(s) { if (s is Num) return s if (!(s is String)) s = "%(s)" s = s.trim() var n = Num.fromString(s) if (n) return n if (s.count < 2) return null var chars = s.toList for (i in chars.count-1..1) { chars.removeAt(i) if (n = Num.fromString(chars.join())) return n } return null } // Converts a pattern into a list of tokens for processing by the 'isMatch' method. // Characters within the pattern are represented as follows: // Non-wildcard characters as themselves (i.e. single character strings); // * (or *) by the number 0; // ? (or ?) by the number 1; // [set] by a list of the tokens within the set: // single characters by themselves; // a range of characters, a-b, by a Range of codepoints from 'a' to 'b'. // If the first character of the set is '!' then the number -1 is inserted // as a separate token immediately before the list. static tokenize(pattern) { var tokens = [] var i = 0 var j while (i < pattern.count) { var c = pattern[i] if (c == "") { if (i == 0 \|\| tokens[-1] != 0) tokens.add(0) } else if (c == "?") { if (i > 0 && tokens[-1] == 0) tokens[-1] = 1 else tokens.add(1) } else if (c == "[") { if (i == pattern.count - 1) { tokens.add(c) } else if ((j = indexOf(pattern, "]", i + 1)) == -1) { tokens.add(c) } else { var l = [] var s = sub(pattern, i+1...j) var k = 0 while (k < s.count) { var d = s[k] if (d == "!") { if (k == 0) tokens.add(-1) else l.add(d) } else if (k < s.count - 2 && s[k+1] == "-") { l.add(d.codePoints[0]..s[k+2].codePoints[0]) k = k + 2 } else { l.add(d) } k = k + 1 } if (l.count == 0) Fiber.abort("set cannot be empty.") tokens.add(l) i = i + s.count + 1 } } else { tokens.add(c) } i = i + 1 } return tokens } // Returns whether a string 's' matches a 'pattern' which may already be tokenized // if many strings are to be matched. Matching is case sensitive. // Patterns may contain the following wildcards: // (or *) matches zero or more characters until the next token (if any) matches // and doesn't backtrack in the event of subsequent failure; // ? (or ?) matches exactly one character; // [set] matches a single character from the set within the brackets e.g. [aeiou]. // The set can also contain ranges of characters separated by '-' e.g. [a-zA-Z]. // If the first character of the set is '!' then only characters NOT within the rest // of the set are matched e.g. [!0-9] matches any character other than a digit. static isMatch(s, pattern) { var tokens = pattern if (tokens is String) tokens = tokenize(tokens) if (!((tokens is List) && tokens.count > 0)) { Fiber.abort("'pattern' must be a non-empty string or list of tokens.") } var i = 0 var j = 0 var star = false var neg = false while (i < s.count && j < tokens.count) { var c = s[i] var t = tokens[j] if (t is Num) { if (t == 0) { star = true } else if (t == 1) { i = i + 1 star = false } else if (t == -1) { neg = true } else { Fiber.abort("'%(t)' is not a recognized token.") } j = j + 1 } else if (t is String) { if (!star && c != t) return false if (star && c == t) star = false i = i + 1 if (!star) j = j + 1 } else if (t is List) { var matched = false for (e in t) { if (e is String) { if (e == c) { matched = true break } } else if (e is Range){ var cp = c.codePoints[0] if (cp >= e.from && cp <= e.to) { matched = true break } } else { Fiber.abort("'%(e)' is not a recognized token within a set.") } } if (!star && !neg && !matched) return false if (!star && neg && matched) return false if (star && matched) star = false i = i + 1 neg = false if (!star) j = j + 1 } else { Fiber.abort("'%(t)' is not a recognized token.") } } if (i == s.count && j == tokens.count) return true if (j == tokens.count && tokens[-1] == 0) return true if (j == tokens.count - 1 && tokens[-1] == 0) return true return false } } Line 560 ⟶ 911: return (b0 & b4Mask) << 18 \| (b[1] & mbMask) << 12 \| (b[2] & mbMask) << 6 \| (b[3] & mbMask) } } /* The next four methods extend the casing performed by the corresponding 'Str' methods to include Latin Extended-A, parts of Latin Extended-B, Latin Extended Additional, Greek, Cyrillic, Armenian and Georgian. / // Converts a UTF-8 string to lower case. static lower(s) { if (!(s is String)) s = "%(s)" if (s == "") return s var chars = s.toList var count = chars.count var i = 0 for (c in s.codePoints) { if ((c >= 65 && c <= 90) \|\| (c >= 192 && c <= 214) \|\| (c >= 216 && c <= 222)) { chars[i] = String.fromCodePoint(c + 32) } else if (c < 256) { // catch other Latin-1 characters quickly. } else if ((c >= 0x0100 && c <= 0x0136) && (c % 2 == 0)) { chars[i] = String.fromCodePoint(c + 1) } else if ((c >= 0x0139 && c <= 0x0147) && (c % 2 == 1)) { chars[i] = String.fromCodePoint(c + 1) } else if ((c >= 0x014A && c <= 0x0176) && (c % 2 == 0)) { chars[i] = String.fromCodePoint(c + 1) } else if (c == 0x0178) { chars[i] = "ÿ" } else if (c == 0x0179 \|\| c == 0x017B \|\| c == 0x017D \|\| c == 0x01A0 \|\| c == 0x01AF \|\| c == 0x01F4) { chars[i] = String.fromCodePoint(c + 1) } else if (c == 0x01C4 \|\| c == 0x01C7 \|\| c == 0x01CA \|\| c == 0x01F1) { chars[i] = String.fromCodePoint(c + 2) } else if (c == 0x01C5 \|\| c == 0x01C8 \|\| c == 0x01CB \|\| c == 0x01F2) { chars[i] = String.fromCodePoint(c + 1) } else if ((c >= 0x01DE && c <= 0x01EE) && (c % 2 == 0)) { chars[i] = String.fromCodePoint(c + 1) } else if ((c >= 0x01F8 && c <= 0x021E) && (c % 2 == 0)) { chars[i] = String.fromCodePoint(c + 1) } else if ((c >= 0x1E00 && c <= 0x1E94) && (c % 2 == 0)) { chars[i] = String.fromCodePoint(c + 1) } else if (c == 0x1E9E) { chars[i] = "ß" } else if ((c >= 0x1EA0 && c <= 0x1EFE) && (c % 2 == 0)) { chars[i] = String.fromCodePoint(c + 1) } else if (c == 0x0386) { chars[i] = "ά" } else if (c == 0x0388 \|\| c == 0x0389 \|\| c == 0x038A) { chars[i] = String.fromCodePoint(c + 37) } else if (c == 0x038C) { chars[i] = "ό" } else if (c == 0x038E \|\| c == 0x038F) { chars[i] = String.fromCodePoint(c + 63) } else if (c >= 0x0391 && c <= 0x03A1) { chars[i] = String.fromCodePoint(c + 32) } else if (c == 0x03A3) { chars[i] = (i == count - 1) ? "ς" : "σ" } else if (c >= 0x03A4 && c <= 0x03AB) { chars[i] = String.fromCodePoint(c + 32) } else if (c >= 0x0400 && c <= 0x041F) { chars[i] = String.fromCodePoint(c + 80) } else if (c >= 0x0410 && c <= 0x042F) { chars[i] = String.fromCodePoint(c + 32) } else if ((c >= 0x048A && c <= 0x04BE) && (c % 2 == 0)) { chars[i] = String.fromCodePoint(c + 1) } else if ((c >= 0x04C1 && c <= 0x04CD) && (c % 2 == 1)) { chars[i] = String.fromCodePoint(c + 1) } else if ((c >= 0x04D0 && c <= 0x052E) && (c % 2 == 0)) { chars[i] = String.fromCodePoint(c + 1) } else if (c >= 0x0531 && c <= 0x0556) { chars[i] = String.fromCodePoint(c + 48) } else if (c >= 0x10A0 && c <= 0x10C5) { chars[i] = String.fromCodePoint(c + 48) } i = i + 1 } return (count < 1000) ? Strs.concat_(chars) : Strs.concat(chars, 1000) } // Converts a UTF-8 string to upper case. static upper(s) { if (!(s is String)) s = "%(s)" if (s == "") return s var chars = s.toList var count = chars.count var i = 0 for (c in s.codePoints) { if ((c >= 97 && c <= 122) \|\| (c >= 224 && c <= 246) \|\| (c >= 248 && c <= 254)) { chars[i] = String.fromCodePoint(c - 32) } else if (c == 223) { chars[i] = "ẞ" } else if (c == 255) { chars[i] = "Ŷ" } else if (c < 255) { // catch other Latin-1 characters quickly. } else if ((c >= 0x0101 && c <= 0x0137) && (c % 2 == 1)) { chars[i] = String.fromCodePoint(c - 1) } else if ((c >= 0x013A && c <= 0x0148) && (c % 2 == 0)) { chars[i] = String.fromCodePoint(c - 1) } else if ((c >= 0x014B && c <= 0x0177) && (c % 2 == 1)) { chars[i] = String.fromCodePoint(c - 1) } else if (c == 0x017A \|\| c == 0x017C \|\| c == 0x017E \|\| c == 0x01A1 \|\| c == 0x01B0 \|\| c == 0x01F5) { chars[i] = String.fromCodePoint(c - 1) } else if (c == 0x01C5 \|\| c == 0x01C8 \|\| c == 0x01CB \|\| c == 0x01F2) { chars[i] = String.fromCodePoint(c - 1) } else if (c == 0x01C6 \|\| c == 0x01C9 \|\| c == 0x01CC \|\| c == 0x01F3) { chars[i] = String.fromCodePoint(c - 2) } else if ((c >= 0x01DF && c <= 0x01EF) && (c % 2 == 1)) { chars[i] = String.fromCodePoint(c - 1) } else if ((c >= 0x01F9 && c <= 0x021F) && (c % 2 == 1)) { chars[i] = String.fromCodePoint(c - 1) } else if ((c >= 0x1E01 && c <= 0x1E95) && (c % 2 == 1)) { chars[i] = String.fromCodePoint(c - 1) } else if (c == 0x1E9E) { chars[i] = "ß" } else if ((c >= 0x1EA1 && c <= 0x1EFF) && (c % 2 == 1)) { chars[i] = String.fromCodePoint(c - 1) } else if (c == 0x03AC) { chars[i] = "Ά" } else if (c == 0x03AD \|\| c == 0x03AE \|\| c == 0x03AF) { chars[i] = String.fromCodePoint(c - 37) } else if (c >= 0x03B1 && c <= 0x03C1) { chars[i] = String.fromCodePoint(c - 32) } else if (c == 0x03C2) { chars[i] = "Σ" } else if (c >= 0x03C3 && c <= 0x03CB) { chars[i] = String.fromCodePoint(c - 32) } else if (c == 0x03CC) { chars[i] = "Ό" } else if (c == 0x03CD \|\| c == 0x03CE) { chars[i] = String.fromCodePoint(c - 63) } else if (c >= 0x0430 && c <= 0x044F) { chars[i] = String.fromCodePoint(c - 32) } else if (c >= 0x0450 && c <= 0x045F) { chars[i] = String.fromCodePoint(c - 80) } else if ((c >= 0x048B && c <= 0x04BF) && (c % 2 == 1)) { chars[i] = String.fromCodePoint(c - 1) } else if ((c >= 0x04C2 && c <= 0x04CE) && (c % 2 == 0)) { chars[i] = String.fromCodePoint(c - 1) } else if ((c >= 0x04D1 && c <= 0x052F) && (c % 2 == 1)) { chars[i] = String.fromCodePoint(c - 1) } else if (c >= 0x0561 && c <= 0x0586) { chars[i] = String.fromCodePoint(c - 48) } else if (c >= 0x10D0 && c <= 0x10F5) { chars[i] = String.fromCodePoint(c - 48) } i = i + 1 } return (count < 1000) ? Strs.concat_(chars) : Strs.concat(chars, 1000) } // Capitalizes the first character of a UTF-8 string. // Uses title rather than upper case variant if it's one of 4 supported digraphs. static capitalize(s) { if (!(s is String)) s = "%(s)" if (s == "") return s var start = (s.startsWith("[") && s.count > 1) ? 1 : 0 var cs = upper(s[start]) var c = cs.codePoints[0] if (c == 0x01C4 \|\| c == 0x01C7 \|\| c == 0x01CA \|\| c == 0x01F1) { cs = String.fromCodePoint(c + 1) } if (s.count > start + 1) cs = cs + s[start+1..-1] if (start == 1) cs = "[" + cs return cs } // Capitalizes the first character of each word of a UTF-8 string. // Uses title rather than upper case variant if it's one of 4 supported digraphs. static title(s) { if (!(s is String)) s = "%(s)" if (s == "") return s var words = s.split(" ") return Strs.join(words.map { \|w\| capitalize(w) }.toList, " ") } // Removes accents and other diacritical marks from all characters in a string, // expands digraphs and removes all combining characters before returning the result. // As well as Latin-1 Supplement, coverage includes Latin Extended-A and various // other characters found in modern European languages which use the Latin alphabet. static unaccent(s) { if (!(s is String)) s = "%(s)" if (s == "") return s var accented = [ "àáâãäåāăą", "ÀÁÂÃÄÅĀĂĄ", "ḃ", "Ḃ", "çćĉċč", "ÇĆĈĊČ", "ðďđḋ", "ÐĎĐḊ", "èéêëēĕėęě", "ÈÉÊËĒĔĖĘĚ", "ḟ", "Ḟ", "ĝğġģ", "ĜĞĠĢ", "ĥħ", "ĤĦ", "ìíîïĩīĭįı", "ÌÍÎÏĨĪĬĮİ", "Ĵ", "Ĵ", "ķĸ", "Ķ", "ĺļľŀł", "ĹĻĽĿŁ", "ṁ", "Ṁ", "ñńņňŉ", "ÑŃŅŇ", "òóôõöøōŏő", "ÒÓÔÕÖØŌŎŐ", "ṗ", "Ṗ", "ŕŗř", "ŔŖŘ", "śŝşšșſ", "ŚŜŞŠȘ", "ţťŧṱț", "ŢŤŦṰȚ", "ùúûüũūŭůűų", "ÙÚÛÜŨŪŬŮŰŲ", "ŵẁẃẅ", "ŴẀẂẄ", "ýÿỳŷ", "ÝŸỲŶ", "źżž", "ŹŻŽ" ] var unaccented = "aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPrRsStTuUwWyYzZ" var digraphs = { "æ": "ae", "Æ": "AE", "þ": "th", "Þ": "TH", "ß": "ss", "ẞ": "SS", "ĳ": "ij", "Ĳ": "IJ", "ŋ": "ng", "Ŋ": "NG", "œ": "OE", "Œ": "OE" } var r = "" var chars = s.toList var count = chars.count var i = 0 for (c in s.codePoints) { if ((c >= 0x00c0 && c <= 0x012B) \|\| c >= 0x1e02 && c <= 0x1e9e) { var found = false for (j in 0...accented.count) { if (accented[j].indexOf(chars[i]) >= 0) { chars[i] = unaccented[j] found = true break } } if (!found && digraphs.containsKey(chars[i])) chars[i] = digraphs[chars[i]] } else if (c >= 0x0300 && c <= 0x036F) chars[i] = "" i = i + 1 } return (count < 1000) ? Strs.concat_(chars) : Strs.concat(chars, 1000) } // Converts a Windows-1252 encoded byte string to a UTF-8 encoded string. static fromWin1252(win1252) { if (!(win1252 is String)) System.print("Argument must be a byte string.") if (win1252.count == 0) return "" // mapping for Windows 1252 bytes 128-159. // Unused bytes are mapped to the corresponding ISO-8859-1 control codes. var bm = [ 0x20ac, 0x0081, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021, 0x02c6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008d, 0x017d, 0x008f, 0x0090, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, 0x02dc, 0x2122, 0x0161, 0x203a, 0x0153, 0x009d, 0x017e, 0x0178 ] var bytes = win1252.bytes var utf8 = List.filled(bytes.count, 0) for (i in 0...bytes.count) { var b = bytes[i] if (b < 128 \|\| b > 159) { utf8[i] = String.fromCodePoint(b) } else { utf8[i] = String.fromCodePoint(bm[b-128]) } } return utf8.join() } } Line 568 ⟶ 1,158: / class Greek { // Returns the Greek alphabet, lower then upper case characters. static alphabet { "αβγδεζηθικλμνξοπρςστυφχψωΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡ\u03a2ΣΤΥΦΧΨΩ" } // Returns a list of the names of all Greek letters in alphabetical order. static names { return [ "alpha", "beta", "gamma", "delta", "epsilon", "zeta", "eta", "theta", "~~0iota~~iota", "kappa", "lambda", "mu", "nu", "xi", "omicron", "pi", "rho", "sigma final", "sigma", "tau", "upsilon", "phi", "chi", "psi", "omega" ] } // Returns the name of a Greek character or null if not found. // Upper case characters are returned with the initial letter capitalized. static name(char) { if (char.count != 1) return null var ix = alphabet.toList.indexOf(char) if (ix == -1) return null if (ix < 25) return names[ix] return Str.capitalize(names[ix-25]) } // Finds and returns a Greek lower case character from its name. static lower(name) { name = Str.lower(name) var ix = names.indexOf(name) if (ix == -1) Fiber.abort("Name not found.") Line 586 ⟶ 1,190: // Finds and returns a Greek upper case character from its name. static upper(name) { name = Str.lower(name) var ix = names.indexOf(name) if (ix == -1) Fiber.abort("Name not found.") Line 591 ⟶ 1,196: return String.fromCodePoint(0x0391 + ix) } }</~~lang~~syntaxhighlight>