Category talk:Wren-str: Difference between revisions

Content added Content deleted
(→‎Source code: Now uses Wren S/H lexer.)
(→‎Source code: Added support for wildcard matching to Str class.)
Line 645: Line 645:
}
}
return null
return null
}

// Converts a pattern into a list of tokens for processing by the 'isMatch' method.
// Characters within the pattern are represented as follows:
// Non-wildcard characters as themselves (i.e. single character strings);
// * (or **) by the number 0;
// ? (or *?) by the number 1;
// [set] by a list of the tokens within the set:
// single characters by themselves;
// a range of characters, a-b, by a Range of codepoints from 'a' to 'b'.
// If the first character of the set is '!' then the number -1 is inserted
// as a separate token immediately before the list.
static tokenize(pattern) {
var tokens = []
var i = 0
var j
while (i < pattern.count) {
var c = pattern[i]
if (c == "*") {
if (i == 0 || tokens[-1] != 0) tokens.add(0)
} else if (c == "?") {
if (i > 0 && tokens[-1] == 0) tokens[-1] = 1 else tokens.add(1)
} else if (c == "[") {
if (i == pattern.count - 1) {
tokens.add(c)
} else if ((j = indexOf(pattern, "]", i + 1)) == -1) {
tokens.add(c)
} else {
var l = []
var s = sub(pattern, i+1...j)
var k = 0
while (k < s.count) {
var d = s[k]
if (d == "!") {
if (k == 0) tokens.add(-1) else l.add(d)
} else if (k < s.count - 2 && s[k+1] == "-") {
l.add(d.codePoints[0]..s[k+2].codePoints[0])
k = k + 2
} else {
l.add(d)
}
k = k + 1
}
if (l.count == 0) Fiber.abort("set cannot be empty.")
tokens.add(l)
i = i + s.count + 1
}
} else {
tokens.add(c)
}
i = i + 1
}
return tokens
}

// Returns whether a string 's' matches a 'pattern' which may already be tokenized
// if many strings are to be matched. Matching is case sensitive.
// Patterns may contain the following wildcards:
// * (or **) matches zero or more characters until the next token (if any) matches
// and doesn't backtrack in the event of subsequent failure;
// ? (or *?) matches exactly one character;
// [set] matches a single character from the set within the brackets e.g. [aeiou].
// The set can also contain ranges of characters separated by '-' e.g. [a-zA-Z].
// If the first character of the set is '!' then only characters NOT within the rest
// of the set are matched e.g. [!0-9] matches any character other than a digit.
static isMatch(s, pattern) {
var tokens = pattern
if (tokens is String) tokens = tokenize(tokens)
if (!((tokens is List) && tokens.count > 0)) {
Fiber.abort("'pattern' must be a non-empty string or list of tokens.")
}
var i = 0
var j = 0
var star = false
var neg = false
while (i < s.count && j < tokens.count) {
var c = s[i]
var t = tokens[j]
if (t is Num) {
if (t == 0) {
star = true
} else if (t == 1) {
i = i + 1
star = false
} else if (t == -1) {
neg = true
} else {
Fiber.abort("'%(t)' is not a recognized token.")
}
j = j + 1
} else if (t is String) {
if (!star && c != t) return false
if (star && c == t) star = false
i = i + 1
if (!star) j = j + 1
} else if (t is List) {
var matched = false
for (e in t) {
if (e is String) {
if (e == c) {
matched = true
break
}
} else if (e is Range){
var cp = c.codePoints[0]
if (cp >= e.from && cp <= e.to) {
matched = true
break
}
} else {
Fiber.abort("'%(e)' is not a recognized token within a set.")
}
}
if (!star && !neg && !matched) return false
if (!star && neg && matched) return false
if (star && matched) star = false
i = i + 1
neg = false
if (!star) j = j + 1
} else {
Fiber.abort("'%(t)' is not a recognized token.")
}
}
if (i == s.count && j == tokens.count) return true
if (j == tokens.count && tokens[-1] == 0) return true
if (j == tokens.count - 1 && tokens[-1] == 0) return true
return false
}
}
}
}