Category talk:Wren-str: Difference between revisions
Content added Content deleted
(→Source code: Now uses Wren S/H lexer.) |
(→Source code: Added support for wildcard matching to Str class.) |
||
Line 645: | Line 645: | ||
} |
} |
||
return null |
return null |
||
} |
|||
// Converts a pattern into a list of tokens for processing by the 'isMatch' method. |
|||
// Characters within the pattern are represented as follows: |
|||
// Non-wildcard characters as themselves (i.e. single character strings); |
|||
// * (or **) by the number 0; |
|||
// ? (or *?) by the number 1; |
|||
// [set] by a list of the tokens within the set: |
|||
// single characters by themselves; |
|||
// a range of characters, a-b, by a Range of codepoints from 'a' to 'b'. |
|||
// If the first character of the set is '!' then the number -1 is inserted |
|||
// as a separate token immediately before the list. |
|||
static tokenize(pattern) { |
|||
var tokens = [] |
|||
var i = 0 |
|||
var j |
|||
while (i < pattern.count) { |
|||
var c = pattern[i] |
|||
if (c == "*") { |
|||
if (i == 0 || tokens[-1] != 0) tokens.add(0) |
|||
} else if (c == "?") { |
|||
if (i > 0 && tokens[-1] == 0) tokens[-1] = 1 else tokens.add(1) |
|||
} else if (c == "[") { |
|||
if (i == pattern.count - 1) { |
|||
tokens.add(c) |
|||
} else if ((j = indexOf(pattern, "]", i + 1)) == -1) { |
|||
tokens.add(c) |
|||
} else { |
|||
var l = [] |
|||
var s = sub(pattern, i+1...j) |
|||
var k = 0 |
|||
while (k < s.count) { |
|||
var d = s[k] |
|||
if (d == "!") { |
|||
if (k == 0) tokens.add(-1) else l.add(d) |
|||
} else if (k < s.count - 2 && s[k+1] == "-") { |
|||
l.add(d.codePoints[0]..s[k+2].codePoints[0]) |
|||
k = k + 2 |
|||
} else { |
|||
l.add(d) |
|||
} |
|||
k = k + 1 |
|||
} |
|||
if (l.count == 0) Fiber.abort("set cannot be empty.") |
|||
tokens.add(l) |
|||
i = i + s.count + 1 |
|||
} |
|||
} else { |
|||
tokens.add(c) |
|||
} |
|||
i = i + 1 |
|||
} |
|||
return tokens |
|||
} |
|||
// Returns whether a string 's' matches a 'pattern' which may already be tokenized |
|||
// if many strings are to be matched. Matching is case sensitive. |
|||
// Patterns may contain the following wildcards: |
|||
// * (or **) matches zero or more characters until the next token (if any) matches |
|||
// and doesn't backtrack in the event of subsequent failure; |
|||
// ? (or *?) matches exactly one character; |
|||
// [set] matches a single character from the set within the brackets e.g. [aeiou]. |
|||
// The set can also contain ranges of characters separated by '-' e.g. [a-zA-Z]. |
|||
// If the first character of the set is '!' then only characters NOT within the rest |
|||
// of the set are matched e.g. [!0-9] matches any character other than a digit. |
|||
static isMatch(s, pattern) { |
|||
var tokens = pattern |
|||
if (tokens is String) tokens = tokenize(tokens) |
|||
if (!((tokens is List) && tokens.count > 0)) { |
|||
Fiber.abort("'pattern' must be a non-empty string or list of tokens.") |
|||
} |
|||
var i = 0 |
|||
var j = 0 |
|||
var star = false |
|||
var neg = false |
|||
while (i < s.count && j < tokens.count) { |
|||
var c = s[i] |
|||
var t = tokens[j] |
|||
if (t is Num) { |
|||
if (t == 0) { |
|||
star = true |
|||
} else if (t == 1) { |
|||
i = i + 1 |
|||
star = false |
|||
} else if (t == -1) { |
|||
neg = true |
|||
} else { |
|||
Fiber.abort("'%(t)' is not a recognized token.") |
|||
} |
|||
j = j + 1 |
|||
} else if (t is String) { |
|||
if (!star && c != t) return false |
|||
if (star && c == t) star = false |
|||
i = i + 1 |
|||
if (!star) j = j + 1 |
|||
} else if (t is List) { |
|||
var matched = false |
|||
for (e in t) { |
|||
if (e is String) { |
|||
if (e == c) { |
|||
matched = true |
|||
break |
|||
} |
|||
} else if (e is Range){ |
|||
var cp = c.codePoints[0] |
|||
if (cp >= e.from && cp <= e.to) { |
|||
matched = true |
|||
break |
|||
} |
|||
} else { |
|||
Fiber.abort("'%(e)' is not a recognized token within a set.") |
|||
} |
|||
} |
|||
if (!star && !neg && !matched) return false |
|||
if (!star && neg && matched) return false |
|||
if (star && matched) star = false |
|||
i = i + 1 |
|||
neg = false |
|||
if (!star) j = j + 1 |
|||
} else { |
|||
Fiber.abort("'%(t)' is not a recognized token.") |
|||
} |
|||
} |
|||
if (i == s.count && j == tokens.count) return true |
|||
if (j == tokens.count && tokens[-1] == 0) return true |
|||
if (j == tokens.count - 1 && tokens[-1] == 0) return true |
|||
return false |
|||
} |
} |
||
} |
} |