Category talk:Wren-regex
Regular expression
Although the 'Wren-pattern' module performs well for something written entirely in Wren, it is not very fast for large amounts of text, uses non-standard syntax and there are certain patterns which it either does not support at all or supports to only a limited extent.
This module aims to remedy that situation by wrapping Go's 'regexp' package so it can be used from Wren, though at present this can only be done from a special embedded application written in Go and not from Wren-cli.
The wrapper is fairly complete though methods which require a Go function to be passed have had to be be excluded. Only the 'string' methods have been wrapped as there is little point in using the equivalent 'byte slice' methods from Wren.
I have added a few more 'convenience' methods including a couple which replace some rather than all matches ('regexp' only has the latter).
The 'regexp' package is based on RE2 which is less complicated and complete than, say, the C library PCRE2. Moreover, from Wren's perspective, the former will be slower as we need to use WrenGo rather than the C embedding API directly. However, it does have the advantage that it is guaranteed to run in linear time relative to the size of the input and, like Wren, uses UTF-8 strings natively.
It is also much easier to wrap and, as it is likely that an optional regular expression module will be added to Wren's standard library in due course, it may prove to be a temporary expedient in any case.
As with most other languages, it is advisable to use 'raw' strings for regular expressions to avoid the need for escaping the metacharacter '\' which frequently occurs in such expressions.
Source Code (Wren)
<lang ecmascript>/* Module "regex.wren" */
/* Regex represents a reference to a compiled Go Regexp object.
Strings (never byte lists) are passed to or returned from methods where appropriate. */
foreign class Regex {
// Returns whether 's' contains any match for the (uncompiled) regular expression 'pattern'. // Use only for simple queries. foreign static isMatch(pattern, s)
// Returns a string that escapes all regular expression metacharacters inside 's'. // The 14 metacharacters used are: \.+*?()|[]{}^$ foreign static quoteMeta(s)
// Constructs a Go Regexp object compiled from a regular expression 'str'. // Go panics if 'str' cannot be parsed. construct compile(str) {}
// See Go docs for fuller explanations of the followng methods using the 'string' varieties. // An 'index pair' means the 2 element list [start, end] where 'end' is exclusive. // Where methods take an 'n' parameter, they return results for at most 'n' matches/submatches // or all such matches if 'n' is negative.
foreign literalPrefix() // returns the 2 element list [prefix, complete] foreign longest() // makes future searches prefer the leftmost-longest match foreign numSubexp // returns number of parenthesized subexpressions in 'this' foreign subexpIndex(name) // returns the index of the first subexpression called 'name' // or -1 if not found foreign subexpNames // returns a list of the names of the parenthesized // subexpressions in 'this'
foreign isMatch(s) // returns whether 's' contains any match for 'this'
foreign find(s) // returns text of leftmost match in 's' or an empty string if no match foreign findIndex(s) // returns index pair of leftmost match or an empty list if no match foreign findSubmatch(s) // returns a list of the text of leftmost match in 's' // and of any subexpressions thereof, or an empty list if no match foreign findSubmatchIndex(s) // returns a list of the index pair of leftmost match in 's' // and of any subexpressions thereof, or an empty list if no match
foreign findAll(s, n) // returns a list of 'find' results for up to 'n' matches foreign findAllIndex(s, n) // returns a list of 'findIndex' results for up to 'n' matches foreign findAllSubmatch(s, n) // returns a list of 'findSubmatch' results for up to 'n' matches foreign findAllSubmatchIndex(s, n) // returns a list of 'findSubmatchIndex' results for up to 'n' matches
findAll(s) { findAll(s, -1) } // as 'findAll' but for every match findAllIndex(s) { findAllIndex(s, -1) } // as 'findAllIndex' but for every match findAllSubmatch(s) { findAllSubmatch(s, -1) } // as 'findAllSubmatch' but for every match findAllSubmatchIndex(s) { findAllSubmatchIndex(s, -1) } // as 'findAllSubmatchIndex' but for every match
foreign expand(dst, template, src) // returns 'dst' appended with 'template' after replacing variables // therein with corresponding matches of 'src' for 'this' // the 'match' parameter will be supplied by Go
foreign replaceAll(src, repl) // returns a copy of 'src' replacing matches of 'this' with 'repl' // except that within 'repl' $k means the text of the 'k'th submatch foreign replaceAllLiteral(src, repl) // returns a copy of 'src' replacing matches of 'this' with 'repl' // where 'repl' is interpreted literally
foreign replaceAll(src, repl, n, skip) // as 'replaceAllLiteral' but replaces at most the first 'n' matches // skipping the first 'skip' matches replace(src, repl) { // as 'replaceAllLiteral' but replaces only the leftmost match return replaceAll(src, repl, 1, 0) }
foreign split(s, n) // returns a list of the substrings between up to 'n' matches of 'this' split(s) { split(s, -1) } // as 'split' but for every match
foreign toString // returns the expression used to compile 'this'
}
/* File contains routines for performing simple operations on text files. */ class File {
foreign static read(path) // opens the file, reads and returns all its text, then closes it foreign static write(path, text) // creates the file or truncates it if it already exists, writes // 'text' to it and closes it foreign static rename(oldPath, newPath) // renames the file foreign static remove(path) // removes the file
}</lang>
Source Code (Go)
<lang go>/* go build wren-regex.go */
package main
import (
wren "github.com/crazyinfin8/WrenGo" "log" "math" "os" "regexp" "strings"
)
// type any = interface{}
var null = struct{}{} // produces 'null' when returned to Wren
func check(err error) {
if err != nil { log.Fatal(err) }
}
/* Regex methods */
func Regex_isMatch_static(vm *wren.VM, parameters []any) (any, error) {
pattern := parameters[1].(string) s := parameters[2].(string) matched, err := regexp.MatchString(pattern, s) check(err) return matched, nil
}
func Regex_quoteMeta(vm *wren.VM, parameters []any) (any, error) {
s := parameters[1].(string) quoted := regexp.QuoteMeta(s) return quoted, nil
}
func Regex_compile(vm *wren.VM, parameters []any) (any, error) {
str := parameters[1].(string) re := regexp.MustCompile(str) return &re, nil
}
func Regex_literalPrefix(vm *wren.VM, parameters []any) (any, error) {
handle := parameters[0].(*wren.ForeignHandle) ifc, _ := handle.Get() re := ifc.(**regexp.Regexp) prefix, complete := (*re).LiteralPrefix() lh, _ := vm.NewList() lh.Insert(prefix) lh.Insert(complete) handle.Free() return lh, nil
}
func Regex_longest(vm *wren.VM, parameters []any) (any, error) {
handle := parameters[0].(*wren.ForeignHandle) ifc, _ := handle.Get() re := ifc.(**regexp.Regexp) (*re).Longest() handle.Free() return null, nil
}
func Regex_numSubexp(vm *wren.VM, parameters []any) (any, error) {
handle := parameters[0].(*wren.ForeignHandle) ifc, _ := handle.Get() re := ifc.(**regexp.Regexp) n := (*re).NumSubexp() handle.Free() return n, nil
}
func Regex_subexpIndex(vm *wren.VM, parameters []any) (any, error) {
handle := parameters[0].(*wren.ForeignHandle) ifc, _ := handle.Get() re := ifc.(**regexp.Regexp) name := parameters[1].(string) ix := (*re).SubexpIndex(name) handle.Free() return ix, nil
}
func Regex_subexpNames(vm *wren.VM, parameters []any) (any, error) {
handle := parameters[0].(*wren.ForeignHandle) ifc, _ := handle.Get() re := ifc.(**regexp.Regexp) names := (*re).SubexpNames() lh, _ := vm.NewList() for i := 0; i < len(names); i++ { lh.Insert(names[i]) } handle.Free() return lh, nil
}
func Regex_isMatch(vm *wren.VM, parameters []any) (any, error) {
handle := parameters[0].(*wren.ForeignHandle) ifc, _ := handle.Get() re := ifc.(**regexp.Regexp) s := parameters[1].(string) matched := (*re).MatchString(s) handle.Free() return matched, nil
}
func Regex_find(vm *wren.VM, parameters []any) (any, error) {
handle := parameters[0].(*wren.ForeignHandle) ifc, _ := handle.Get() re := ifc.(**regexp.Regexp) s := parameters[1].(string) match := (*re).FindString(s) handle.Free() return match, nil
}
func Regex_findIndex(vm *wren.VM, parameters []any) (any, error) {
handle := parameters[0].(*wren.ForeignHandle) ifc, _ := handle.Get() re := ifc.(**regexp.Regexp) s := parameters[1].(string) loc := (*re).FindStringIndex(s) lh, _ := vm.NewList() for i := 0; i < len(loc); i++ { lh.Insert(loc[i]) } handle.Free() return lh, nil
}
func Regex_findSubmatch(vm *wren.VM, parameters []any) (any, error) {
handle := parameters[0].(*wren.ForeignHandle) ifc, _ := handle.Get() re := ifc.(**regexp.Regexp) s := parameters[1].(string) matches := (*re).FindStringSubmatch(s) lh, _ := vm.NewList() for i := 0; i < len(matches); i++ { lh.Insert(matches[i]) } handle.Free() return lh, nil
}
func Regex_findSubmatchIndex(vm *wren.VM, parameters []any) (any, error) {
handle := parameters[0].(*wren.ForeignHandle) ifc, _ := handle.Get() re := ifc.(**regexp.Regexp) s := parameters[1].(string) ixs := (*re).FindStringSubmatchIndex(s) lh, _ := vm.NewList() for i := 0; i < len(ixs); i++ { lh.Insert(ixs[i]) } handle.Free() return lh, nil
}
func Regex_findAll(vm *wren.VM, parameters []any) (any, error) {
handle := parameters[0].(*wren.ForeignHandle) ifc, _ := handle.Get() re := ifc.(**regexp.Regexp) s := parameters[1].(string) n := int(parameters[2].(float64)) matches := (*re).FindAllString(s, n) lh, _ := vm.NewList() for i := 0; i < len(matches); i++ { lh.Insert(matches[i]) } handle.Free() return lh, nil
}
func Regex_findAllIndex(vm *wren.VM, parameters []any) (any, error) {
handle := parameters[0].(*wren.ForeignHandle) ifc, _ := handle.Get() re := ifc.(**regexp.Regexp) s := parameters[1].(string) n := int(parameters[2].(float64)) ixs2d := (*re).FindAllStringIndex(s, n) lh, _ := vm.NewList() for i := 0; i < len(ixs2d); i++ { lh2, _ := vm.NewList() for j := 0; j < len(ixs2d[i]); j++ { lh2.Insert(ixs2d[i][j]) } lh.Insert(lh2) } handle.Free() return lh, nil
}
func Regex_findAllSubmatch(vm *wren.VM, parameters []any) (any, error) {
handle := parameters[0].(*wren.ForeignHandle) ifc, _ := handle.Get() re := ifc.(**regexp.Regexp) s := parameters[1].(string) n := int(parameters[2].(float64)) matches2d := (*re).FindAllStringSubmatch(s, n) lh, _ := vm.NewList() for i := 0; i < len(matches2d); i++ { lh2, _ := vm.NewList() for j := 0; j < len(matches2d[i]); j++ { lh2.Insert(matches2d[i][j]) } lh.Insert(lh2) } handle.Free() return lh, nil
}
func Regex_findAllSubmatchIndex(vm *wren.VM, parameters []any) (any, error) {
handle := parameters[0].(*wren.ForeignHandle) ifc, _ := handle.Get() re := ifc.(**regexp.Regexp) s := parameters[1].(string) n := int(parameters[2].(float64)) ixs2d := (*re).FindAllStringSubmatchIndex(s, n) lh, _ := vm.NewList() for i := 0; i < len(ixs2d); i++ { lh2, _ := vm.NewList() for j := 0; j < len(ixs2d[i]); j++ { lh2.Insert(ixs2d[i][j]) } lh.Insert(lh2) } handle.Free() return lh, nil
}
func Regex_expand(vm *wren.VM, parameters []any) (any, error) {
handle := parameters[0].(*wren.ForeignHandle) ifc, _ := handle.Get() re := ifc.(**regexp.Regexp) dest := []byte(parameters[1].(string)) template := parameters[2].(string) src := parameters[3].(string) for _, submatches := range (*re).FindAllStringSubmatchIndex(src, -1) { dest = (*re).ExpandString(dest, template, src, submatches) } handle.Free() return dest, nil
}
func Regex_replaceAll(vm *wren.VM, parameters []any) (any, error) {
handle := parameters[0].(*wren.ForeignHandle) ifc, _ := handle.Get() re := ifc.(**regexp.Regexp) src := parameters[1].(string) repl := parameters[2].(string) res := (*re).ReplaceAllString(src, repl) handle.Free() return res, nil
}
func Regex_replaceAllLiteral(vm *wren.VM, parameters []any) (any, error) {
handle := parameters[0].(*wren.ForeignHandle) ifc, _ := handle.Get() re := ifc.(**regexp.Regexp) src := parameters[1].(string) repl := parameters[2].(string) res := (*re).ReplaceAllLiteralString(src, repl) handle.Free() return res, nil
}
func Regex_replaceAllSpecial(vm *wren.VM, parameters []any) (any, error) {
handle := parameters[0].(*wren.ForeignHandle) ifc, _ := handle.Get() re := ifc.(**regexp.Regexp) src := parameters[1].(string) repl := parameters[2].(string) n := int(parameters[3].(float64)) skip := int(parameters[4].(float64)) handle.Free() if skip < 0 { skip = 0 } if n < 0 { if skip == 0 { return (*re).ReplaceAllString(src, repl), nil } n = math.MaxInt } if n == 0 || skip >= n { return src, nil } count := 0 res := (*re).ReplaceAllStringFunc(src, func(s string) string { count++ if count <= skip || count > n { return s } return (*re).ReplaceAllString(s, repl) }) return res, nil
}
func Regex_split(vm *wren.VM, parameters []any) (any, error) {
handle := parameters[0].(*wren.ForeignHandle) ifc, _ := handle.Get() re := ifc.(**regexp.Regexp) s := parameters[1].(string) n := int(parameters[2].(float64)) res := (*re).Split(s, n) lh, _ := vm.NewList() for i := 0; i < len(res); i++ { lh.Insert(res[i]) } handle.Free() return lh, nil
}
func Regex_toString(vm *wren.VM, parameters []any) (any, error) {
handle := parameters[0].(*wren.ForeignHandle) ifc, _ := handle.Get() re := ifc.(**regexp.Regexp) res := (*re).String() handle.Free() return res, nil
}
/* File methods */
func File_read(vm *wren.VM, parameters []any) (any, error) {
name := parameters[1].(string) data, err := os.ReadFile(name) check(err) return data, nil
}
func File_write(vm *wren.VM, parameters []any) (any, error) {
name := parameters[1].(string) data := parameters[2].(string) err := os.WriteFile(name, []byte(data), 0o666) check(err) return null, nil
}
func File_rename(vm *wren.VM, parameters []any) (any, error) {
oldpath := parameters[1].(string) newpath := parameters[2].(string) err := os.Rename(oldpath, newpath) check(err) return null, nil
}
func File_remove(vm *wren.VM, parameters []any) (any, error) {
name := parameters[1].(string) err := os.Remove(name) check(err) return null, nil
}
func moduleFn(vm *wren.VM, name string) (string, bool) {
if name != "meta" && name != "random" && !strings.HasSuffix(name, ".wren") { name += ".wren" } return wren.DefaultModuleLoader(vm, name)
}
func main() {
args := os.Args if len(args) != 2 { log.Fatal("There should be a single command line argument, namely the Wren fiie name.") } fileName := args[1] cfg := wren.NewConfig() cfg.LoadModuleFn = moduleFn vm := cfg.NewVM()
regexMethodMap := wren.MethodMap{ "static isMatch(_,_)": Regex_isMatch_static, "static quoteMeta(_)": Regex_quoteMeta, "literalPrefix()": Regex_literalPrefix, "longest()": Regex_longest, "numSubexp": Regex_numSubexp, "subexpIndex(_)": Regex_subexpIndex, "subexpNames": Regex_subexpNames, "isMatch(_)": Regex_isMatch, "find(_)": Regex_find, "findIndex(_)": Regex_findIndex, "findSubmatch(_)": Regex_findSubmatch, "findSubmatchIndex(_)": Regex_findSubmatchIndex, "findAll(_,_)": Regex_findAll, "findAllIndex(_,_)": Regex_findAllIndex, "findAllSubmatch(_,_)": Regex_findAllSubmatch, "findAllSubmatchIndex(_,_)": Regex_findAllSubmatchIndex, "expand(_,_,_)": Regex_expand, "replaceAll(_,_)": Regex_replaceAll, "replaceAllLiteral(_,_)": Regex_replaceAllLiteral, "replaceAll(_,_,_,_)": Regex_replaceAllSpecial, "split(_,_)": Regex_split, "toString": Regex_toString, }
fileMethodMap := wren.MethodMap{ "static read(_)": File_read, "static write(_,_)": File_write, "static rename(_,_)": File_rename, "static remove(_)": File_remove, }
classMap := wren.ClassMap{ "Regex": wren.NewClass(Regex_compile, nil, regexMethodMap), "File": wren.NewClass(nil, nil, fileMethodMap), }
classMap2 := wren.ClassMap{}
module := wren.NewModule(classMap) module2 := wren.NewModule(classMap2) vm.SetModule("./regex", module) vm.SetModule(fileName, module2) vm.InterpretFile(fileName) vm.Free()
}</lang>