Category talk:Wren-str: Difference between revisions

Content added Content deleted
(→‎Source code: Changes to make this module more consistent with other Wren modules.)
(→‎Source code: Adjustments mostly to extend case change methods to Latin-1 (ASCII only previously).)
Line 5: Line 5:
/*
/*
Char contains routines to perform various operations on characters.
Char contains routines to perform various operations on characters.
A 'character' for this purpose is a single Unicode codepoint.
For convenience a string containing more than one character can be passed
Categorization and casing is supported for characters < 256 (Latin-1) but no higher.
The 'symbol' category includes 'other letter', 'other number' and soft hyphen (ªº¹²³¼½¾¯).
For convenience a string containing more than one character can be passed
as an argument but the methods will only operate on the first character.
as an argument but the methods will only operate on the first character.
*/
*/
Line 18: Line 21:
// Checks if the first character of a string falls into a particular category.
// Checks if the first character of a string falls into a particular category.
static isAscii(c) { code(c) < 128 }
static isAscii(c) { code(c) < 128 }
static isSymbol(c) { code(c) && "$+<=>^`|~".contains(c[0]) }
static isLatin1(c) { code(c) < 256 }
static isControl(c) { (c = code(c)) && (c < 32 || c == 127) }
static isDigit(c) { (c = code(c)) && c >= 48 && c <= 57 }
static isLower(c) { (c = code(c)) && c >= 97 && c <= 122 }
static isUpper(c) { (c = code(c)) && c >= 65 && c <= 90 }
static isPrintable(c) { (c = code(c)) && c >= 32 && c < 127 }
static isSpace(c) { (c = code(c)) && (c == 32 || c == 9 || c == 10 || c == 13) }
static isWhitespace(c) { (c = code(c)) && (c == 32 || (c >= 9 && c <= 13)) }


// ASCII categories.
/* Rather than use combinations of the above, these only call the 'code' method once. */
static isDigit(c) { (c = code(c)) && c >= 48 && c <= 57 }
static isAsciiLower(c) { (c = code(c)) && c >= 97 && c <= 122 }
static isAsciiUpper(c) { (c = code(c)) && c >= 65 && c <= 90 }
static isAsciiLetter(c) { isAsciiLower(c) || isAsciiUpper(c) }
static isAsciiAlphaNum(c) { isAsciiLower(c) || isAsciiUpper(c) || isDigit(c) }
static isSpace(c) { (c = code(c)) && (c == 32 || c == 9 || c == 10 || c == 13) }


// Latin-1 categories.
static isLetter(c) {
static isLower(c) {
var d = code(c)
var d = code(c)
return (d >= 65 && d <= 90) || (d >= 97 && d <= 122)
return (d >= 97 && d <= 122) || (d == 181) || (d >= 223 && d <= 246) ||
(d >= 248 && d <= 255)
}
}


static isAlphanumeric(c) {
static isUpper(c) {
var d = code(c)
var d = code(c)
return (d >= 65 && d <= 90) || (d >= 97 && d <= 122) || (d >= 48 && d <= 57)
return (d >= 65 && d <= 90) || (d >= 192 && d <= 214) || (d >= 216 && d <= 222)
}
}


static isPunctuation(c) {
static isLetter(c) { isLower(c) || isUpper(c) }
static isAlphaNumeric(c) { isLower(c) || isUpper(c) || isDigit(c) }

static isControl(c) {
var d = code(c)
var d = code(c)
if (d < 33 || d > 126) return false
return d < 32 || (d >= 127 && d < 160)
if ((d >= 65 && d <= 90) || (d >= 97 && d <= 122) || (d >= 48 && d <= 57)) return false
if ("$+<=>^`|~".contains(c[0])) return false
return true
}
}

static isPrintable(c) {
var d = code(c)
return (d >= 32 && d < 127) || (d >= 160 && d < 256)
}

static isGraphic(c) {
var d = code(c)
return (d >= 33 && d < 127) || (d >= 161 && d < 256)
}

static isWhitespace(c) {
var d = code(c)
return d == 32 || (d >= 9 && c <= 13) || d == 160
}
static isPunctuation(c) { code(c) && "!\"#\%&'()*,-./:;?@[\\]_{}¡§«¶·»¿".contains(c[0]) }

static isSymbol(c) { isGraphic(c) && !isAlpaNumeric(c) && !isPunctuation(c) }


static category(c) {
static category(c) {
var d = code(c)
var d = code(c)
return (d < 32 || d == 127) ? "control" :
return (d < 32) ? "control" :
(d == 32) ? "space" :
(d == 32) ? "space" :
(d >= 48 && d <= 57) ? "digit" :
(d >= 48 && d <= 57) ? "digit" :
(d >= 64 && d <= 90) ? "upper" :
(d >= 65 && d <= 90) ? "upper" :
(d >= 97 && d <= 122) ? "lower" :
(d >= 97 && d <= 122) ? "lower" :
(d >=128) ? "non-ascii" :
(d >= 127 && d <= 159) ? "control" :
"$+<=>^`|~".contains(c[0]) ? "symbol" : "punctuation"
(d == 160) ? "space" :
(d == 181) ? "lower" :
(d >= 192 && d <= 214) ? "upper" :
(d >= 216 && d <= 222) ? "upper" :
(d >= 223 && d <= 246) ? "lower" :
(d >= 248 && d <= 255) ? "lower" :
(d >= 256) ? "non-latin1" :
isPunctuation(c) ? "punctuation" : "symbol"
}
}


// Return the first character of a string converted to the appropriate case.
// Returns the first character of a string converted to lower case.
static lower(c) {
static upper(c) { ((c = code(c)) && c >= 97 && c <= 122) ? fromCode(c-32) : fromCode(c) }
var d = code(c)
static lower(c) { ((c = code(c)) && c >= 65 && c <= 90) ? fromCode(c+32) : fromCode(c) }
if ((d >= 65 && d <= 90) || (d >= 192 && d <= 214) || (d >= 216 && d <= 222)) {
return fromCode(d+32)
}
return c[0]
}

// Returns the first character of a string converted to upper case.
static upper(c) {
var d = code(c)
if ((d >= 97 && d <= 122) || (d >= 224 && d <= 246) || (d >= 248 && d <= 254)) {
return fromCode(d-32)
}
return c[0]
}


// Swaps the case of the first character in a string.
static swapCase(c) {
static swapCase(c) {
var d = code(c)
var d = code(c)
if (d >= 65 && d <= 90) return fromCode(d+32)
if ((d >= 65 && d <= 90) || (d >= 192 && d <= 214) || (d >= 216 && d <= 222)) {
if (d >= 97 && d <= 122) return fromCode(d-32)
return fromCode(d+32)
}
if ((d >= 97 && d <= 122) || (d >= 224 && d <= 246) || (d >= 248 && d <= 254)) {
return fromCode(d-32)
}
return c[0]
return c[0]
}
}
Line 95: Line 144:


// Checks if a string falls into a particular category.
// Checks if a string falls into a particular category.
static allAscii(s) { s.codePoints.all { |c| c < 128 } }
static allAscii(s) { s.codePoints.all { |c| c < 128 } }
static allDigits(s) { s.codePoints.all { |c| c >= 48 && c <= 57 } }
static allLatin1(s) { s.codePoints.all { |c| c < 256 } }
static allLower(s) { s.codePoints.all { |c| c >= 97 && c <= 122 } }
static allDigits(s) { s.codePoints.all { |c| c >= 48 && c <= 57 } }
static allUpper(s) { s.codePoints.all { |c| c >= 65 && c <= 90 } }
static allAsciiLower(s) { s.codePoints.all { |c| c >= 97 && c <= 122 } }
static allPrintable(s) { s.codePoints.all { |c| c >= 32 && c < 127 } }
static allAsciiUpper(s) { s.codePoints.all { |c| c >= 65 && c <= 90 } }
static allWhitespace(s) { s.codePoints.all { |c| c == 32 || (c >= 9 && c <= 13) } }
static allAsciiLetters(s) { s.toList.all { |c| Char.isAsciiLetter(c) } }
static allAsciiAlphaNum(s) { s.toList.all { |c| Char.isAsciiAlphaNum(c) } }

static allLetters(s) { s.codePoints.all { |c|
static allSpace(s) { s.toList.all { |c| Char.isSpace(c) } }
return (c >= 65 && c <= 90) || (c >= 97 && c <= 122)
static allLower { s.toList.all { |c| Char.isLower(c) } }
static allUpper { s.toList.all { |c| Char.isUpper(c) } }
} }
static allLetters { s.toList.all { |c| Char.isLetter(c) } }

static allAlphanumeric(s) { s.codepoints.all { |c|
static allAlphaNumeric { s.toList.all { |c| Char.isAlphanumeric(c) } }
return (c >= 65 && c <= 90) || (c >= 97 && c <= 122) || (c >= 48 && c <= 57)
static allPrintable { s.toList.all { |c| Char.isPrintable(c) } }
static allGraphic { s.toList.all { |c| Char.isGraphic(c) } }
} }
static allWhitespace { s.toList.all { |c| Char.isWhitespace(c) } }


// Checks whether a string can be parsed to a number, an integer or a non-integer (float).
// Checks whether a string can be parsed to a number, an integer or a non-integer (float).
Line 123: Line 173:
var i = 0
var i = 0
for (c in s.codePoints) {
for (c in s.codePoints) {
if (c >= 65 && c <= 90) chars[i] = String.fromCodePoint(c + 32)
if ((c >= 65 && c <= 90) || (c >= 192 && c <= 214) || (c >= 216 && c <= 222)) {
chars[i] = String.fromCodePoint(c + 32)
}
i = i + 1
i = i + 1
}
}
Line 137: Line 189:
var i = 0
var i = 0
for (c in s.codePoints) {
for (c in s.codePoints) {
if (c >= 97 && c <= 122) chars[i] = String.fromCodePoint(c - 32)
if ((c >= 97 && c <= 122) || (c >= 224 && c <= 246) || (c >= 248 && c <= 254)) {
chars[i] = String.fromCodePoint(c - 32)
}
i = i + 1
i = i + 1
}
}
Line 151: Line 205:
var i = 0
var i = 0
for (c in s.codePoints) {
for (c in s.codePoints) {
if (c >= 65 && c <= 90) {
if ((c >= 65 && c <= 90) || (c >= 192 && c <= 214) || (c >= 216 && c <= 222)) {
chars[i] = String.fromCodePoint(c + 32)
chars[i] = String.fromCodePoint(c + 32)
} else if (c >= 97 && c <= 122) {
} else if ((c >= 97 && c <= 122) || (c >= 224 && c <= 246) ||
(c >= 248 && c <= 254)) {
chars[i] = String.fromCodePoint(c - 32)
chars[i] = String.fromCodePoint(c - 32)
}
}
Line 167: Line 222:
var start = (s.startsWith("[") && s.count > 1) ? 1 : 0
var start = (s.startsWith("[") && s.count > 1) ? 1 : 0
var c = s[start].codePoints[0]
var c = s[start].codePoints[0]
if (c >= 97 && c <= 122) {
if ((c >= 97 && c <= 122) || (c >= 224 && c <= 246) || (c >= 248 && c <= 254)) {
var cs = String.fromCodePoint(c - 32) + s[start+1..-1]
var cs = String.fromCodePoint(c - 32) + s[start+1..-1]
if (start == 1) cs = "[" + cs
if (start == 1) cs = "[" + cs