Category talk:Wren-regex

From Rosetta Code

Regular expressions

Although the 'Wren-pattern' module performs well for something written entirely in Wren, it is not very fast for large amounts of text, uses non-standard syntax and there are certain patterns which it either does not support at all or supports to only a limited extent.

This module aims to remedy that situation by wrapping Go's 'regexp' package so it can be used from Wren, though at present this can only be done from a special embedded application written in Go and not from Wren-cli.

The wrapper is fairly complete though methods which require a Go function to be passed have had to be be excluded. Only the 'string' methods have been wrapped as there is little point in using the equivalent 'byte slice' methods from Wren.

I have added a few more 'convenience' methods including a couple which replace some rather than all matches ('regexp' only has the latter).

The 'regexp' package is based on RE2 which is less complicated and complete than, say, the C library PCRE2. Moreover, from Wren's perspective, the former will be slower as we need to use WrenGo rather than the C embedding API directly. However, it does have the advantage that it is guaranteed to run in linear time relative to the size of the input and, like Wren, uses UTF-8 strings natively.

It is also much easier to wrap and, as it is likely that an optional regular expression module will be added to Wren's standard library in due course, it may prove to be a temporary expedient in any case.

As with most other languages, it is advisable to use 'raw' strings for regular expressions to avoid the need for escaping the metacharacter '\' which frequently occurs in such expressions.

Source Code (Wren)

/* Module "regex.wren" */

/* Regex represents a reference to a compiled Go Regexp object.
   Strings (never byte lists) are passed to or returned from methods where appropriate.
 */
foreign class Regex {
    // Returns whether 's' contains any match for the (uncompiled) regular expression 'pattern'.
    // Use only for simple queries.
    foreign static isMatch(pattern, s)

    // Returns a string that escapes all regular expression metacharacters inside 's'.
    // The 14 metacharacters used are: \.+*?()|[]{}^$
    foreign static quoteMeta(s)

    // Constructs a Go Regexp object compiled from a regular expression 'str'.
    // Go panics if 'str' cannot be parsed.
    construct compile(str) {}

    // See Go docs for fuller explanations of the followng methods using the 'string' varieties.
    // An 'index pair' means the 2 element list [start, end] where 'end' is exclusive.
    // Where methods take an 'n' parameter, they return results for at most 'n' matches/submatches
    // or all such matches if 'n' is negative.

    foreign literalPrefix()            // returns the 2 element list [prefix, complete]
    foreign longest()                  // makes future searches prefer the leftmost-longest match
    foreign numSubexp                  // returns number of parenthesized subexpressions in 'this'
    foreign subexpIndex(name)          // returns the index of the first subexpression called 'name'
                                       // or -1 if not found
    foreign subexpNames                // returns a list of the names of the parenthesized
                                       // subexpressions in 'this'

    foreign isMatch(s)                 // returns whether 's' contains any match for 'this'

    foreign find(s)                    // returns text of leftmost match in 's' or an empty string if no match
    foreign findIndex(s)               // returns index pair of leftmost match or an empty list if no match
    foreign findSubmatch(s)            // returns a list of the text of leftmost match in 's'
                                       // and of any subexpressions thereof, or an empty list if no match
    foreign findSubmatchIndex(s)       // returns a list of the index pair of leftmost match in 's'
                                       // and of any subexpressions thereof, or an empty list if no match

    foreign findAll(s, n)              // returns a list of 'find' results for up to 'n' matches
    foreign findAllIndex(s, n)         // returns a list of 'findIndex' results for up to 'n' matches
    foreign findAllSubmatch(s, n)      // returns a list of 'findSubmatch' results for up to 'n' matches
    foreign findAllSubmatchIndex(s, n) // returns a list of 'findSubmatchIndex' results for up to 'n' matches

    findAll(s)              { findAll(s, -1) }              // as 'findAll' but for every match
    findAllIndex(s)         { findAllIndex(s, -1) }         // as 'findAllIndex' but for every match
    findAllSubmatch(s)      { findAllSubmatch(s, -1) }      // as 'findAllSubmatch' but for every match
    findAllSubmatchIndex(s) { findAllSubmatchIndex(s, -1) } // as 'findAllSubmatchIndex' but for every match

    foreign expand(dst, template, src)     // returns 'dst' appended with 'template' after replacing variables
                                           // therein with corresponding matches of 'src' for 'this'
                                           // the 'match' parameter will be supplied by Go

    foreign replaceAll(src, repl)          // returns a copy of 'src' replacing matches of 'this' with 'repl'
                                           // except that within 'repl' $k means the text of the 'k'th submatch
    foreign replaceAllLiteral(src, repl)   // returns a copy of 'src' replacing matches of 'this' with 'repl'
                                           // where 'repl' is interpreted literally

    foreign replaceAll(src, repl, n, skip) // as 'replaceAllLiteral' but replaces at most the first 'n' matches
                                           // skipping the first 'skip' matches
    replace(src, repl) {                   // as 'replaceAllLiteral' but replaces only the leftmost match
        return replaceAll(src, repl, 1, 0)
    }

    foreign split(s, n)                    // returns a list of the substrings between up to 'n' matches of 'this'
    split(s) { split(s, -1) }              // as 'split' but for every match

    foreign toString                       // returns the expression used to compile 'this'
}

/* File contains routines for performing simple operations on text files. */
class File {
    foreign static read(path)                 // opens the file, reads and returns all its text, then closes it
    foreign static write(path, text)          // creates the file or truncates it if it already exists, writes
                                              // 'text' to it and closes it
    foreign static rename(oldPath, newPath)   // renames the file
    foreign static remove(path)               // removes the file
}

Source Code (Go)

/* go build wren-regex.go */

package main

import (
    wren "github.com/crazyinfin8/WrenGo"
    "log"
    "math"
    "os"
    "regexp"
    "strings"
)

// type any = interface{}

var null = struct{}{} // produces 'null' when returned to Wren

func check(err error) {
    if err != nil {
        log.Fatal(err)
    }
}

/* Regex methods */

func Regex_isMatch_static(vm *wren.VM, parameters []any) (any, error) {
    pattern := parameters[1].(string)
    s := parameters[2].(string)
    matched, err := regexp.MatchString(pattern, s)
    check(err)
    return matched, nil
}

func Regex_quoteMeta(vm *wren.VM, parameters []any) (any, error) {
    s := parameters[1].(string)
    quoted := regexp.QuoteMeta(s)
    return quoted, nil
}

func Regex_compile(vm *wren.VM, parameters []any) (any, error) {
    str := parameters[1].(string)
    re := regexp.MustCompile(str)
    return &re, nil
}

func Regex_literalPrefix(vm *wren.VM, parameters []any) (any, error) {
    handle := parameters[0].(*wren.ForeignHandle)
    ifc, _ := handle.Get()
    re := ifc.(**regexp.Regexp)
    prefix, complete := (*re).LiteralPrefix()
    lh, _ := vm.NewList()
    lh.Insert(prefix)
    lh.Insert(complete)
    handle.Free()
    return lh, nil
}

func Regex_longest(vm *wren.VM, parameters []any) (any, error) {
    handle := parameters[0].(*wren.ForeignHandle)
    ifc, _ := handle.Get()
    re := ifc.(**regexp.Regexp)
    (*re).Longest()
    handle.Free()
    return null, nil
}

func Regex_numSubexp(vm *wren.VM, parameters []any) (any, error) {
    handle := parameters[0].(*wren.ForeignHandle)
    ifc, _ := handle.Get()
    re := ifc.(**regexp.Regexp)
    n := (*re).NumSubexp()
    handle.Free()
    return n, nil
}

func Regex_subexpIndex(vm *wren.VM, parameters []any) (any, error) {
    handle := parameters[0].(*wren.ForeignHandle)
    ifc, _ := handle.Get()
    re := ifc.(**regexp.Regexp)
    name := parameters[1].(string)
    ix := (*re).SubexpIndex(name)
    handle.Free()
    return ix, nil
}

func Regex_subexpNames(vm *wren.VM, parameters []any) (any, error) {
    handle := parameters[0].(*wren.ForeignHandle)
    ifc, _ := handle.Get()
    re := ifc.(**regexp.Regexp)
    names := (*re).SubexpNames()
    lh, _ := vm.NewList()
    for i := 0; i < len(names); i++ {
        lh.Insert(names[i])
    }
    handle.Free()
    return lh, nil
}

func Regex_isMatch(vm *wren.VM, parameters []any) (any, error) {
    handle := parameters[0].(*wren.ForeignHandle)
    ifc, _ := handle.Get()
    re := ifc.(**regexp.Regexp)
    s := parameters[1].(string)
    matched := (*re).MatchString(s)
    handle.Free()
    return matched, nil
}

func Regex_find(vm *wren.VM, parameters []any) (any, error) {
    handle := parameters[0].(*wren.ForeignHandle)
    ifc, _ := handle.Get()
    re := ifc.(**regexp.Regexp)
    s := parameters[1].(string)
    match := (*re).FindString(s)
    handle.Free()
    return match, nil
}

func Regex_findIndex(vm *wren.VM, parameters []any) (any, error) {
    handle := parameters[0].(*wren.ForeignHandle)
    ifc, _ := handle.Get()
    re := ifc.(**regexp.Regexp)
    s := parameters[1].(string)
    loc := (*re).FindStringIndex(s)
    lh, _ := vm.NewList()
    for i := 0; i < len(loc); i++ {
        lh.Insert(loc[i])
    }
    handle.Free()
    return lh, nil
}

func Regex_findSubmatch(vm *wren.VM, parameters []any) (any, error) {
    handle := parameters[0].(*wren.ForeignHandle)
    ifc, _ := handle.Get()
    re := ifc.(**regexp.Regexp)
    s := parameters[1].(string)
    matches := (*re).FindStringSubmatch(s)
    lh, _ := vm.NewList()
    for i := 0; i < len(matches); i++ {
        lh.Insert(matches[i])
    }
    handle.Free()
    return lh, nil
}

func Regex_findSubmatchIndex(vm *wren.VM, parameters []any) (any, error) {
    handle := parameters[0].(*wren.ForeignHandle)
    ifc, _ := handle.Get()
    re := ifc.(**regexp.Regexp)
    s := parameters[1].(string)
    ixs := (*re).FindStringSubmatchIndex(s)
    lh, _ := vm.NewList()
    for i := 0; i < len(ixs); i++ {
        lh.Insert(ixs[i])
    }
    handle.Free()
    return lh, nil
}

func Regex_findAll(vm *wren.VM, parameters []any) (any, error) {
    handle := parameters[0].(*wren.ForeignHandle)
    ifc, _ := handle.Get()
    re := ifc.(**regexp.Regexp)
    s := parameters[1].(string)
    n := int(parameters[2].(float64))
    matches := (*re).FindAllString(s, n)
    lh, _ := vm.NewList()
    for i := 0; i < len(matches); i++ {
        lh.Insert(matches[i])
    }
    handle.Free()
    return lh, nil
}

func Regex_findAllIndex(vm *wren.VM, parameters []any) (any, error) {
    handle := parameters[0].(*wren.ForeignHandle)
    ifc, _ := handle.Get()
    re := ifc.(**regexp.Regexp)
    s := parameters[1].(string)
    n := int(parameters[2].(float64))
    ixs2d := (*re).FindAllStringIndex(s, n)
    lh, _ := vm.NewList()
    for i := 0; i < len(ixs2d); i++ {
        lh2, _ := vm.NewList()
        for j := 0; j < len(ixs2d[i]); j++ {
            lh2.Insert(ixs2d[i][j])
        }
        lh.Insert(lh2)
    }
    handle.Free()
    return lh, nil
}

func Regex_findAllSubmatch(vm *wren.VM, parameters []any) (any, error) {
    handle := parameters[0].(*wren.ForeignHandle)
    ifc, _ := handle.Get()
    re := ifc.(**regexp.Regexp)
    s := parameters[1].(string)
    n := int(parameters[2].(float64))
    matches2d := (*re).FindAllStringSubmatch(s, n)
    lh, _ := vm.NewList()
    for i := 0; i < len(matches2d); i++ {
        lh2, _ := vm.NewList()
        for j := 0; j < len(matches2d[i]); j++ {
            lh2.Insert(matches2d[i][j])
        }
        lh.Insert(lh2)
    }
    handle.Free()
    return lh, nil
}

func Regex_findAllSubmatchIndex(vm *wren.VM, parameters []any) (any, error) {
    handle := parameters[0].(*wren.ForeignHandle)
    ifc, _ := handle.Get()
    re := ifc.(**regexp.Regexp)
    s := parameters[1].(string)
    n := int(parameters[2].(float64))
    ixs2d := (*re).FindAllStringSubmatchIndex(s, n)
    lh, _ := vm.NewList()
    for i := 0; i < len(ixs2d); i++ {
        lh2, _ := vm.NewList()
        for j := 0; j < len(ixs2d[i]); j++ {
            lh2.Insert(ixs2d[i][j])
        }
        lh.Insert(lh2)
    }
    handle.Free()
    return lh, nil
}

func Regex_expand(vm *wren.VM, parameters []any) (any, error) {
    handle := parameters[0].(*wren.ForeignHandle)
    ifc, _ := handle.Get()
    re := ifc.(**regexp.Regexp)
    dest := []byte(parameters[1].(string))
    template := parameters[2].(string)
    src := parameters[3].(string)
    for _, submatches := range (*re).FindAllStringSubmatchIndex(src, -1) {
        dest = (*re).ExpandString(dest, template, src, submatches)
    }
    handle.Free()
    return dest, nil
}

func Regex_replaceAll(vm *wren.VM, parameters []any) (any, error) {
    handle := parameters[0].(*wren.ForeignHandle)
    ifc, _ := handle.Get()
    re := ifc.(**regexp.Regexp)
    src := parameters[1].(string)
    repl := parameters[2].(string)
    res := (*re).ReplaceAllString(src, repl)
    handle.Free()
    return res, nil
}

func Regex_replaceAllLiteral(vm *wren.VM, parameters []any) (any, error) {
    handle := parameters[0].(*wren.ForeignHandle)
    ifc, _ := handle.Get()
    re := ifc.(**regexp.Regexp)
    src := parameters[1].(string)
    repl := parameters[2].(string)
    res := (*re).ReplaceAllLiteralString(src, repl)
    handle.Free()
    return res, nil
}

func Regex_replaceAllSpecial(vm *wren.VM, parameters []any) (any, error) {
    handle := parameters[0].(*wren.ForeignHandle)
    ifc, _ := handle.Get()
    re := ifc.(**regexp.Regexp)
    src := parameters[1].(string)
    repl := parameters[2].(string)
    n := int(parameters[3].(float64))
    skip := int(parameters[4].(float64))
    handle.Free()
    if skip < 0 {
        skip = 0
    }
    if n < 0 {
        if skip == 0 {
            return (*re).ReplaceAllString(src, repl), nil
        }
        n = math.MaxInt
    }
    if n == 0 || skip >= n {
        return src, nil
    }
    count := 0
    res := (*re).ReplaceAllStringFunc(src, func(s string) string {
        count++
        if count <= skip || count > n {
            return s
        }
        return (*re).ReplaceAllString(s, repl)
    })
    return res, nil
}

func Regex_split(vm *wren.VM, parameters []any) (any, error) {
    handle := parameters[0].(*wren.ForeignHandle)
    ifc, _ := handle.Get()
    re := ifc.(**regexp.Regexp)
    s := parameters[1].(string)
    n := int(parameters[2].(float64))
    res := (*re).Split(s, n)
    lh, _ := vm.NewList()
    for i := 0; i < len(res); i++ {
        lh.Insert(res[i])
    }
    handle.Free()
    return lh, nil
}

func Regex_toString(vm *wren.VM, parameters []any) (any, error) {
    handle := parameters[0].(*wren.ForeignHandle)
    ifc, _ := handle.Get()
    re := ifc.(**regexp.Regexp)
    res := (*re).String()
    handle.Free()
    return res, nil
}

/* File methods */

func File_read(vm *wren.VM, parameters []any) (any, error) {
    name := parameters[1].(string)
    data, err := os.ReadFile(name)
    check(err)
    return data, nil
}

func File_write(vm *wren.VM, parameters []any) (any, error) {
    name := parameters[1].(string)
    data := parameters[2].(string)
    err := os.WriteFile(name, []byte(data), 0o666)
    check(err)
    return null, nil
}

func File_rename(vm *wren.VM, parameters []any) (any, error) {
    oldpath := parameters[1].(string)
    newpath := parameters[2].(string)
    err := os.Rename(oldpath, newpath)
    check(err)
    return null, nil
}

func File_remove(vm *wren.VM, parameters []any) (any, error) {
    name := parameters[1].(string)
    err := os.Remove(name)
    check(err)
    return null, nil
}

func moduleFn(vm *wren.VM, name string) (string, bool) {
    if name != "meta" && name != "random" && !strings.HasSuffix(name, ".wren") {
        name += ".wren"
    }
    return wren.DefaultModuleLoader(vm, name)
}

func main() {
    args := os.Args
    if len(args) != 2 {
        log.Fatal("There should be a single command line argument, namely the Wren fiie name.")
    }
    fileName := args[1]
    cfg := wren.NewConfig()
    cfg.LoadModuleFn = moduleFn
    vm := cfg.NewVM()

    regexMethodMap := wren.MethodMap{
        "static isMatch(_,_)":       Regex_isMatch_static,
        "static quoteMeta(_)":       Regex_quoteMeta,
        "literalPrefix()":           Regex_literalPrefix,
        "longest()":                 Regex_longest,
        "numSubexp":                 Regex_numSubexp,
        "subexpIndex(_)":            Regex_subexpIndex,
        "subexpNames":               Regex_subexpNames,
        "isMatch(_)":                Regex_isMatch,
        "find(_)":                   Regex_find,
        "findIndex(_)":              Regex_findIndex,
        "findSubmatch(_)":           Regex_findSubmatch,
        "findSubmatchIndex(_)":      Regex_findSubmatchIndex,
        "findAll(_,_)":              Regex_findAll,
        "findAllIndex(_,_)":         Regex_findAllIndex,
        "findAllSubmatch(_,_)":      Regex_findAllSubmatch,
        "findAllSubmatchIndex(_,_)": Regex_findAllSubmatchIndex,
        "expand(_,_,_)":             Regex_expand,
        "replaceAll(_,_)":           Regex_replaceAll,
        "replaceAllLiteral(_,_)":    Regex_replaceAllLiteral,
        "replaceAll(_,_,_,_)":       Regex_replaceAllSpecial,
        "split(_,_)":                Regex_split,
        "toString":                  Regex_toString,
    }

    fileMethodMap := wren.MethodMap{
        "static read(_)":     File_read,
        "static write(_,_)":  File_write,
        "static rename(_,_)": File_rename,
        "static remove(_)":   File_remove,
    }

    classMap := wren.ClassMap{
        "Regex": wren.NewClass(Regex_compile, nil, regexMethodMap),
        "File":  wren.NewClass(nil, nil, fileMethodMap),
    }

    classMap2 := wren.ClassMap{}

    module := wren.NewModule(classMap)
    module2 := wren.NewModule(classMap2)
    vm.SetModule("./regex", module)
    vm.SetModule(fileName, module2)
    vm.InterpretFile(fileName)
    vm.Free()
}