Category talk:Wren-str: Difference between revisions

Line 5:

/*

Char contains routines to perform various operations on characters.

A 'character' for this purpose is a single Unicode codepoint.

⚫

For convenience a string containing more than one character can be passed

Categorization and casing is supported for characters < 256 (Latin-1) but no higher.

The 'symbol' category includes 'other letter', 'other number' and soft hyphen (ªº¹²³¼½¾¯).

⚫

For convenience a string containing more than one character can be passed

as an argument but the methods will only operate on the first character.

*/

Line 18:

Line 21:

// Checks if the first character of a string falls into a particular category.

static isAscii(c) { code(c) < 128 }

static ~~isSymbol~~(c) { code(c) ~~&& "$+~~<~~=>^`|~".contains(c[0])~~ }

static isLatin1(c) { code(c) < 256 }

⚫

static ~~isControl~~(c) { (c = code(c)) && (c < ~~32 ||~~ c == ~~127)~~ }

⚫

static ~~isDigit~~(c) { (c = code(c)) && c >= 48 && c <= 57 }

⚫

static ~~isLower~~(c) { (c = code(c)) && c >= 97 && c <= ~~122~~ }

⚫

static ~~isUpper~~(c) { (c = code(c)) && c >= 65 && c <= 90 }

static isPrintable(c) { (c = code(c)) && c >= 32 && c < 127 }

static isSpace(c) { (c = code(c)) && (c == 32 || c == 9 || c == 10 || c == 13) }

static isWhitespace(c) { (c = code(c)) && (c == 32 || (c >= 9 && c <= 13)) }

// ASCII categories.

/* Rather than use combinations of the above, these only call the 'code' method once. */

⚫

static isDigit(c) { (c = code(c)) && c >= 48 && c <= 57 }

⚫

static isAsciiLower(c) { (c = code(c)) && c >= 97 && c <= 122 }

⚫

static isAsciiUpper(c) { (c = code(c)) && c >= 65 && c <= 90 }

static isAsciiLetter(c) { isAsciiLower(c) || isAsciiUpper(c) }

static isAsciiAlphaNum(c) { isAsciiLower(c) || isAsciiUpper(c) || isDigit(c) }

⚫

static isSpace(c) { (c = code(c)) && (c == 32 || c == 9 || c == 10 || c == 13) }

// Latin-1 categories.

static ~~isLetter~~(c) {

static isLower(c) {

var d = code(c)

return (d >= 65 && d <= 90) || (d >= 97 && d <= ~~122~~)

return (d >= 97 && d <= 122) || (d == 181) || (d >= 223 && d <= 246) ||

(d >= 248 && d <= 255)

}

static ~~isAlphanumeric~~(c) {

static isUpper(c) {

var d = code(c)

return (d >= 65 && d <= 90) || (d >= 97 && d <= ~~122~~) || (d >= 48 && d <= 57)

return (d >= 65 && d <= 90) || (d >= 192 && d <= 214) || (d >= 216 && d <= 222)

}

static ~~isPunctuation~~(c) {

static isLetter(c) { isLower(c) || isUpper(c) }

static isAlphaNumeric(c) { isLower(c) || isUpper(c) || isDigit(c) }

static isControl(c) {

var d = code(c)

if (d < 33 || d > ~~126)~~ ~~return~~ ~~false~~

return d < 32 || (d >= 127 && d < 160)

⚫

if ((d >= 65 && d <= 90) || (d >= 97 && d <= ~~122~~) || (d >= 48 && d <= 57)) ~~return false~~

if ("$+<=>^`|~".contains(c[0])) return false

⚫

return ~~true~~

}

static isPrintable(c) {

var d = code(c)

return (d >= 32 && d < 127) || (d >= 160 && d < 256)

⚫

}

static isGraphic(c) {

var d = code(c)

return (d >= 33 && d < 127) || (d >= 161 && d < 256)

⚫

}

static isWhitespace(c) {

var d = code(c)

return d == 32 || (d >= 9 && c <= 13) || d == 160

}

static isPunctuation(c) { code(c) && "!\"#\%&'()*,-./:;?@[\\]_{}¡§«¶·»¿".contains(c[0]) }

static isSymbol(c) { isGraphic(c) && !isAlpaNumeric(c) && !isPunctuation(c) }

static category(c) {

var d = code(c)

return (d < 32 || d == ~~127)~~ ? "control" :

return (d < 32) ? "control" :

(d == 32) ? "space" :

(d >= 48 && d <= 57) ? "digit" :

(d >= 64 && d <= 90) ? "upper" :

(d >= 65 && d <= 90) ? "upper" :

(d >= 97 && d <= 122) ? "lower" :

(d >=~~128)~~ ~~? "non-ascii"~~ :

(d >= 127 && d <= 159) ? "control" :

~~"$+<=>^`|~".contains~~(~~c[0]~~) ? "~~symbol~~" : ~~"punctuation"~~

(d == 160) ? "space" :

(d == 181) ? "lower" :

(d >= 192 && d <= 214) ? "upper" :

(d >= 216 && d <= 222) ? "upper" :

(d >= 223 && d <= 246) ? "lower" :

(d >= 248 && d <= 255) ? "lower" :

(d >= 256) ? "non-latin1" :

isPunctuation(c) ? "punctuation" : "symbol"

}

// ~~Return~~ the first character of a string converted to ~~the appropriate~~ case.

// Returns the first character of a string converted to lower case.

static lower(c) {

static upper(c) { ((c = code(c)) && c >= 97 && c <= 122) ? fromCode(c-32) : fromCode(c) }

var d = code(c)

static lower(c) { ((c = code(c)) && c >= 65 && c <= 90) ? fromCode(c+32) : fromCode(c) }

⚫

if ((d >= 65 && d <= 90) || (d >= 192 && d <= 214) || (d >= 216 && d <= 222)) {

return fromCode(d+32)

}

⚫

return c[0]

}

// Returns the first character of a string converted to upper case.

static upper(c) {

var d = code(c)

if ((d >= 97 && d <= 122) || (d >= 224 && d <= 246) || (d >= 248 && d <= 254)) {

return fromCode(d-32)

}

return c[0]

}

// Swaps the case of the first character in a string.

static swapCase(c) {

var d = code(c)

if (d >= 65 && d <= 90) ~~return~~ ~~fromCode~~(d~~+32~~)

if ((d >= 65 && d <= 90) || (d >= 192 && d <= 214) || (d >= 216 && d <= 222)) {

if ~~(d >= 97 && d~~ <= ~~122)~~ return fromCode(d-32)

return fromCode(d+32)

}

if ((d >= 97 && d <= 122) || (d >= 224 && d <= 246) || (d >= 248 && d <= 254)) {

return fromCode(d-32)

}

return c[0]

}

Line 95:

Line 144:

// Checks if a string falls into a particular category.

static allAscii(s) { s.codePoints.all { |c| c < 128 } }

static ~~allDigits~~(s) { s.codePoints.all { |c| ~~c >= 48 &&~~ c <= 57 } }

static allLatin1(s) { s.codePoints.all { |c| c < 256 } }

static ~~allLower~~(s) { s.codePoints.all { |c| c >= 97 && c <= ~~122~~ } }

static allDigits(s) { s.codePoints.all { |c| c >= 48 && c <= 57 } }

static ~~allUpper~~(s) { s.codePoints.all { |c| c >= 65 && c <= 90 } }

static allAsciiLower(s) { s.codePoints.all { |c| c >= 97 && c <= 122 } }

static ~~allPrintable~~(s) { s.codePoints.all { |c| c >= 32 && c < ~~127~~ } }

static allAsciiUpper(s) { s.codePoints.all { |c| c >= 65 && c <= 90 } }

static ~~allWhitespace~~(s) { s.~~codePoints~~.all { |c| ~~c == 32 ||~~ (c >= ~~9 && c <= 13)~~ } }

static allAsciiLetters(s) { s.toList.all { |c| Char.isAsciiLetter(c) } }

static allAsciiAlphaNum(s) { s.toList.all { |c| Char.isAsciiAlphaNum(c) } }

static ~~allLetters~~(s) { s.~~codePoints~~.all { |c|

static allSpace(s) { s.toList.all { |c| Char.isSpace(c) } }

~~return~~ (c >= 65 && c <= ~~90)~~ || (c >= 97 && c <= ~~122)~~

static allLower { s.toList.all { |c| Char.isLower(c) } }

static allUpper { s.toList.all { |c| Char.isUpper(c) } }

⚫

} }

static allLetters { s.toList.all { |c| Char.isLetter(c) } }

static ~~allAlphanumeric(s)~~ { s.~~codepoints~~.all { |c|

static allAlphaNumeric { s.toList.all { |c| Char.isAlphanumeric(c) } }

~~return~~ (c >= 65 && c <= ~~90)~~ || (c ~~>= 97 && c <= 122~~) || (c >= 48 && ~~c <= 57)~~

static allPrintable { s.toList.all { |c| Char.isPrintable(c) } }

static allGraphic { s.toList.all { |c| Char.isGraphic(c) } }

⚫

} }

static allWhitespace { s.toList.all { |c| Char.isWhitespace(c) } }

// Checks whether a string can be parsed to a number, an integer or a non-integer (float).

Line 123:

Line 173:

var i = 0

for (c in s.codePoints) {

if (c >= 65 && c <= 90) ~~chars[i]~~ = ~~String.fromCodePoint~~(c + 32)

if ((c >= 65 && c <= 90) || (c >= 192 && c <= 214) || (c >= 216 && c <= 222)) {

chars[i] = String.fromCodePoint(c + 32)

}

i = i + 1

}

Line 137:

Line 189:

var i = 0

for (c in s.codePoints) {

if (c >= 97 && c <= 122) ~~chars[i]~~ = ~~String.fromCodePoint~~(c - 32)

if ((c >= 97 && c <= 122) || (c >= 224 && c <= 246) || (c >= 248 && c <= 254)) {

chars[i] = String.fromCodePoint(c - 32)

}

i = i + 1

}

Line 151:

Line 205:

var i = 0

for (c in s.codePoints) {

if (c >= 65 && c <= 90) {

if ((c >= 65 && c <= 90) || (c >= 192 && c <= 214) || (c >= 216 && c <= 222)) {

chars[i] = String.fromCodePoint(c + 32)

} else if (c >= 97 && c <= 122) {

} else if ((c >= 97 && c <= 122) || (c >= 224 && c <= 246) ||

(c >= 248 && c <= 254)) {

chars[i] = String.fromCodePoint(c - 32)

}

Line 167:

Line 222:

var start = (s.startsWith("[") && s.count > 1) ? 1 : 0

var c = s[start].codePoints[0]

if (c >= 97 && c <= 122) {

if ((c >= 97 && c <= 122) || (c >= 224 && c <= 246) || (c >= 248 && c <= 254)) {

var cs = String.fromCodePoint(c - 32) + s[start+1..-1]

if (start == 1) cs = "[" + cs