Category talk:Wren-str: Difference between revisions

Content added Content deleted

Inline

@@ Line 645: / Line 645: @@
         }
         return null
+    }
+    // Converts a pattern into a list of tokens for processing by the 'isMatch' method.
+    // Characters within the pattern are represented as follows:
+    //   Non-wildcard characters as themselves (i.e. single character strings);
+    //   * (or **) by the number 0;
+    //   ? (or *?) by the number 1;
+    //   [set] by a list of the tokens within the set:
+    //     single characters by themselves;
+    //     a range of characters, a-b, by a Range of codepoints from 'a' to 'b'.
+    //   If the first character of the set is '!' then the number -1 is inserted
+    //   as a separate token immediately before the list.
+    static tokenize(pattern) {
+        var tokens = []
+        var i = 0
+        var j
+        while (i < pattern.count) {
+            var c = pattern[i]
+            if (c == "*") {
+                if (i == 0 || tokens[-1] != 0) tokens.add(0)
+            } else if (c == "?") {
+                if (i > 0 && tokens[-1] == 0) tokens[-1] = 1 else tokens.add(1)
+            } else if (c == "[") {
+                if (i == pattern.count - 1) {
+                    tokens.add(c)
+                } else if ((j = indexOf(pattern, "]", i + 1)) == -1) {
+                    tokens.add(c)
+                } else {
+                    var l = []
+                    var s = sub(pattern, i+1...j)
+                    var k = 0
+                    while (k < s.count) {
+                        var d = s[k]
+                        if (d == "!") {
+                            if (k == 0) tokens.add(-1) else l.add(d)
+                        } else if (k < s.count - 2 && s[k+1] == "-") {
+                            l.add(d.codePoints[0]..s[k+2].codePoints[0])
+                            k = k + 2
+                        } else {
+                            l.add(d)
+                        }
+                        k = k + 1
+                    }
+                    if (l.count == 0) Fiber.abort("set cannot be empty.")
+                    tokens.add(l)
+                    i = i + s.count + 1
+                }
+            } else {
+                tokens.add(c)
+            }
+            i = i + 1
+        }
+        return tokens
+    }
+    // Returns whether a string 's' matches a 'pattern' which may already be tokenized
+    // if many strings are to be matched. Matching is case sensitive.
+    // Patterns may contain the following wildcards:
+    //   * (or **) matches zero or more characters until the next token (if any) matches
+    //   and doesn't backtrack in the event of subsequent failure;
+    //   ? (or *?) matches exactly one character;
+    //   [set] matches a single character from the set within the brackets e.g. [aeiou].
+    //   The set can  also contain ranges of characters separated by '-' e.g. [a-zA-Z].
+    //   If the first character of the set is '!' then only characters NOT within the rest
+    //   of the set are matched e.g. [!0-9] matches any character other than a digit.
+    static isMatch(s, pattern) {
+        var tokens = pattern
+        if (tokens is String) tokens = tokenize(tokens)
+        if (!((tokens is List) && tokens.count > 0)) {
+            Fiber.abort("'pattern' must be a non-empty string or list of tokens.")
+        }
+        var i = 0
+        var j = 0
+        var star = false
+        var neg = false
+        while (i < s.count && j < tokens.count) {
+            var c = s[i]
+            var t = tokens[j]
+            if (t is Num) {
+                if (t == 0) {
+                    star = true
+                } else if (t == 1) {
+                    i = i + 1
+                    star = false
+                } else if (t == -1) {
+                    neg = true
+                } else {
+                    Fiber.abort("'%(t)' is not a recognized token.")
+                }
+                j = j + 1
+            } else if (t is String) {
+                if (!star && c != t) return false
+                if (star && c == t) star = false
+                i = i + 1
+                if (!star) j = j + 1
+            } else if (t is List) {
+                var matched = false
+                for (e in t) {
+                    if (e is String) {
+                        if (e == c) {
+                            matched = true
+                            break
+                        }
+                    } else if (e is Range){
+                        var cp = c.codePoints[0]
+                        if (cp >= e.from && cp <= e.to) {
+                            matched = true
+                            break
+                        }
+                    } else {
+                        Fiber.abort("'%(e)' is not a recognized token within a set.")
+                    }
+                }
+                if (!star && !neg && !matched) return false
+                if (!star && neg && matched) return false
+                if (star && matched) star = false
+                i = i + 1
+                neg = false
+                if (!star) j = j + 1
+            } else {
+                Fiber.abort("'%(t)' is not a recognized token.")
+            }
+        }
+        if (i == s.count && j == tokens.count) return true
+        if (j == tokens.count && tokens[-1] == 0) return true
+        if (j == tokens.count - 1 && tokens[-1] == 0) return true
+        return false
     }
 }