Category talk:Wren-str: Difference between revisions
Content added Content deleted
(→Source code: Changes to make this module more consistent with other Wren modules.) |
(→Source code: Adjustments mostly to extend case change methods to Latin-1 (ASCII only previously).) |
||
Line 5: | Line 5: | ||
/* |
/* |
||
Char contains routines to perform various operations on characters. |
Char contains routines to perform various operations on characters. |
||
A 'character' for this purpose is a single Unicode codepoint. |
|||
⚫ | |||
Categorization and casing is supported for characters < 256 (Latin-1) but no higher. |
|||
The 'symbol' category includes 'other letter', 'other number' and soft hyphen (ªº¹²³¼½¾¯). |
|||
⚫ | |||
as an argument but the methods will only operate on the first character. |
as an argument but the methods will only operate on the first character. |
||
*/ |
*/ |
||
Line 18: | Line 21: | ||
// Checks if the first character of a string falls into a particular category. |
// Checks if the first character of a string falls into a particular category. |
||
static isAscii(c) { code(c) < 128 } |
static isAscii(c) { code(c) < 128 } |
||
static |
static isLatin1(c) { code(c) < 256 } |
||
⚫ | |||
⚫ | |||
⚫ | |||
⚫ | |||
static isPrintable(c) { (c = code(c)) && c >= 32 && c < 127 } |
|||
static isSpace(c) { (c = code(c)) && (c == 32 || c == 9 || c == 10 || c == 13) } |
|||
static isWhitespace(c) { (c = code(c)) && (c == 32 || (c >= 9 && c <= 13)) } |
|||
// ASCII categories. |
|||
/* Rather than use combinations of the above, these only call the 'code' method once. */ |
|||
⚫ | |||
⚫ | |||
⚫ | |||
static isAsciiLetter(c) { isAsciiLower(c) || isAsciiUpper(c) } |
|||
static isAsciiAlphaNum(c) { isAsciiLower(c) || isAsciiUpper(c) || isDigit(c) } |
|||
⚫ | |||
// Latin-1 categories. |
|||
static |
static isLower(c) { |
||
var d = code(c) |
var d = code(c) |
||
return (d >= |
return (d >= 97 && d <= 122) || (d == 181) || (d >= 223 && d <= 246) || |
||
(d >= 248 && d <= 255) |
|||
} |
} |
||
static |
static isUpper(c) { |
||
var d = code(c) |
var d = code(c) |
||
return (d >= 65 && d <= 90) || (d >= |
return (d >= 65 && d <= 90) || (d >= 192 && d <= 214) || (d >= 216 && d <= 222) |
||
} |
} |
||
static |
static isLetter(c) { isLower(c) || isUpper(c) } |
||
static isAlphaNumeric(c) { isLower(c) || isUpper(c) || isDigit(c) } |
|||
static isControl(c) { |
|||
var d = code(c) |
var d = code(c) |
||
return d < 32 || (d >= 127 && d < 160) |
|||
⚫ | |||
if ("$+<=>^`|~".contains(c[0])) return false |
|||
⚫ | |||
} |
} |
||
static isPrintable(c) { |
|||
var d = code(c) |
|||
return (d >= 32 && d < 127) || (d >= 160 && d < 256) |
|||
⚫ | |||
static isGraphic(c) { |
|||
var d = code(c) |
|||
return (d >= 33 && d < 127) || (d >= 161 && d < 256) |
|||
⚫ | |||
static isWhitespace(c) { |
|||
var d = code(c) |
|||
return d == 32 || (d >= 9 && c <= 13) || d == 160 |
|||
} |
|||
static isPunctuation(c) { code(c) && "!\"#\%&'()*,-./:;?@[\\]_{}¡§«¶·»¿".contains(c[0]) } |
|||
static isSymbol(c) { isGraphic(c) && !isAlpaNumeric(c) && !isPunctuation(c) } |
|||
static category(c) { |
static category(c) { |
||
var d = code(c) |
var d = code(c) |
||
return (d < 32 |
return (d < 32) ? "control" : |
||
(d == 32) |
(d == 32) ? "space" : |
||
(d >= 48 && d <= 57) |
(d >= 48 && d <= 57) ? "digit" : |
||
(d >= |
(d >= 65 && d <= 90) ? "upper" : |
||
(d >= 97 && d <= 122) |
(d >= 97 && d <= 122) ? "lower" : |
||
(d >= |
(d >= 127 && d <= 159) ? "control" : |
||
(d == 160) ? "space" : |
|||
(d == 181) ? "lower" : |
|||
(d >= 192 && d <= 214) ? "upper" : |
|||
(d >= 216 && d <= 222) ? "upper" : |
|||
(d >= 223 && d <= 246) ? "lower" : |
|||
(d >= 248 && d <= 255) ? "lower" : |
|||
(d >= 256) ? "non-latin1" : |
|||
isPunctuation(c) ? "punctuation" : "symbol" |
|||
} |
} |
||
// |
// Returns the first character of a string converted to lower case. |
||
static lower(c) { |
|||
static upper(c) { ((c = code(c)) && c >= 97 && c <= 122) ? fromCode(c-32) : fromCode(c) } |
|||
var d = code(c) |
|||
static lower(c) { ((c = code(c)) && c >= 65 && c <= 90) ? fromCode(c+32) : fromCode(c) } |
|||
⚫ | |||
return fromCode(d+32) |
|||
} |
|||
⚫ | |||
} |
|||
// Returns the first character of a string converted to upper case. |
|||
static upper(c) { |
|||
var d = code(c) |
|||
if ((d >= 97 && d <= 122) || (d >= 224 && d <= 246) || (d >= 248 && d <= 254)) { |
|||
return fromCode(d-32) |
|||
} |
|||
return c[0] |
|||
} |
|||
// Swaps the case of the first character in a string. |
|||
static swapCase(c) { |
static swapCase(c) { |
||
var d = code(c) |
var d = code(c) |
||
if (d >= 65 && d <= |
if ((d >= 65 && d <= 90) || (d >= 192 && d <= 214) || (d >= 216 && d <= 222)) { |
||
return fromCode(d+32) |
|||
} |
|||
if ((d >= 97 && d <= 122) || (d >= 224 && d <= 246) || (d >= 248 && d <= 254)) { |
|||
return fromCode(d-32) |
|||
} |
|||
return c[0] |
return c[0] |
||
} |
} |
||
Line 95: | Line 144: | ||
// Checks if a string falls into a particular category. |
// Checks if a string falls into a particular category. |
||
static allAscii(s) { s.codePoints.all { |c| c < 128 |
static allAscii(s) { s.codePoints.all { |c| c < 128 } } |
||
static |
static allLatin1(s) { s.codePoints.all { |c| c < 256 } } |
||
static |
static allDigits(s) { s.codePoints.all { |c| c >= 48 && c <= 57 } } |
||
static |
static allAsciiLower(s) { s.codePoints.all { |c| c >= 97 && c <= 122 } } |
||
static |
static allAsciiUpper(s) { s.codePoints.all { |c| c >= 65 && c <= 90 } } |
||
static |
static allAsciiLetters(s) { s.toList.all { |c| Char.isAsciiLetter(c) } } |
||
static allAsciiAlphaNum(s) { s.toList.all { |c| Char.isAsciiAlphaNum(c) } } |
|||
static |
static allSpace(s) { s.toList.all { |c| Char.isSpace(c) } } |
||
static allLower { s.toList.all { |c| Char.isLower(c) } } |
|||
static allUpper { s.toList.all { |c| Char.isUpper(c) } } |
|||
⚫ | |||
static allLetters { s.toList.all { |c| Char.isLetter(c) } } |
|||
static |
static allAlphaNumeric { s.toList.all { |c| Char.isAlphanumeric(c) } } |
||
static allPrintable { s.toList.all { |c| Char.isPrintable(c) } } |
|||
static allGraphic { s.toList.all { |c| Char.isGraphic(c) } } |
|||
⚫ | |||
static allWhitespace { s.toList.all { |c| Char.isWhitespace(c) } } |
|||
// Checks whether a string can be parsed to a number, an integer or a non-integer (float). |
// Checks whether a string can be parsed to a number, an integer or a non-integer (float). |
||
Line 123: | Line 173: | ||
var i = 0 |
var i = 0 |
||
for (c in s.codePoints) { |
for (c in s.codePoints) { |
||
if (c >= 65 && c <= 90) |
if ((c >= 65 && c <= 90) || (c >= 192 && c <= 214) || (c >= 216 && c <= 222)) { |
||
chars[i] = String.fromCodePoint(c + 32) |
|||
} |
|||
i = i + 1 |
i = i + 1 |
||
} |
} |
||
Line 137: | Line 189: | ||
var i = 0 |
var i = 0 |
||
for (c in s.codePoints) { |
for (c in s.codePoints) { |
||
if (c >= 97 && c <= 122) |
if ((c >= 97 && c <= 122) || (c >= 224 && c <= 246) || (c >= 248 && c <= 254)) { |
||
chars[i] = String.fromCodePoint(c - 32) |
|||
} |
|||
i = i + 1 |
i = i + 1 |
||
} |
} |
||
Line 151: | Line 205: | ||
var i = 0 |
var i = 0 |
||
for (c in s.codePoints) { |
for (c in s.codePoints) { |
||
if (c >= 65 && c <= 90) { |
if ((c >= 65 && c <= 90) || (c >= 192 && c <= 214) || (c >= 216 && c <= 222)) { |
||
chars[i] = String.fromCodePoint(c + 32) |
chars[i] = String.fromCodePoint(c + 32) |
||
} else if (c >= 97 && c <= 122) |
} else if ((c >= 97 && c <= 122) || (c >= 224 && c <= 246) || |
||
(c >= 248 && c <= 254)) { |
|||
chars[i] = String.fromCodePoint(c - 32) |
chars[i] = String.fromCodePoint(c - 32) |
||
} |
} |
||
Line 167: | Line 222: | ||
var start = (s.startsWith("[") && s.count > 1) ? 1 : 0 |
var start = (s.startsWith("[") && s.count > 1) ? 1 : 0 |
||
var c = s[start].codePoints[0] |
var c = s[start].codePoints[0] |
||
if (c >= 97 && c <= 122) { |
if ((c >= 97 && c <= 122) || (c >= 224 && c <= 246) || (c >= 248 && c <= 254)) { |
||
var cs = String.fromCodePoint(c - 32) + s[start+1..-1] |
var cs = String.fromCodePoint(c - 32) + s[start+1..-1] |
||
if (start == 1) cs = "[" + cs |
if (start == 1) cs = "[" + cs |