McNaughton-Yamada-Thompson algorithm
- Description
The McNaughton-Yamada-Thompson algorithm (aka. Thompson algorithm) is designed to transform a regular expression into a Non-Deterministic Finite Automaton (NDFA) that recognizes the language represented by the regular expression.
As the workings of the algorithm are clearly described in the linked article they will not be repeated here.
- Task
Implement Thompson's Construction Algorithm to convert regular expressions into NFAs and match strings. The implementation should:
1. Support the following operations:
- Concatenation (.) - Union/Alternation (|) - Kleene Star (*) - Plus (+) - Optional (?) - Basic character literals
2. Include the following components:
- A regex-to-postfix converter using Shunting Yard algorithm - NFA state and transition representation - Epsilon closure computation - Pattern matching functionality
3. Implement the following features:
- State structure with: * Character label * Two possible transitions (edge1, edge2) * Support for epsilon transitions - NFA structure containing: * Initial state pointer * Accept state pointer
- String matching that: * Follows epsilon transitions * Tracks current possible states * Processes input characters * Determines if final state is accepting
4. Handle test cases demonstrating:
- Basic concatenation (a.b.c) - Star operations (c*) - Alternation (b|d) - Complex combinations ((a.(b|d))*) - Empty strings and various input patterns
The implementation should efficiently construct NFAs from regular expressions and correctly determine whether input strings match the given patterns.
C#
using System;
using System.Collections.Generic;
namespace NFARegex
{
class Program
{
static void Main(string[] args)
{
string[] infixes = { "a.b.c*", "a.(b|d).c*", "(a.(b|d))*", "a.(b.b)*.c" };
string[] strings = { "", "abc", "abbc", "abcc", "abad", "abbbc" };
foreach (string infix in infixes)
{
foreach (string s in strings)
{
bool result = MatchRegex(infix, s);
Console.WriteLine((result ? "True " : "False ") + infix + " " + s);
}
Console.WriteLine();
}
}
class State
{
public char? Label; // Nullable char to represent character label, null for epsilon
public State Edge1;
public State Edge2;
public State(char? label = null)
{
Label = label;
Edge1 = null;
Edge2 = null;
}
}
class NFA
{
public State Initial;
public State Accept;
public NFA(State initial = null, State accept = null)
{
Initial = initial;
Accept = accept;
}
}
static string Shunt(string infix)
{
Dictionary<char, int> specials = new Dictionary<char, int>
{
{ '*', 60 },
{ '+', 55 },
{ '?', 50 },
{ '.', 40 },
{ '|', 20 }
};
string postfix = "";
Stack<char> stack = new Stack<char>();
foreach (char c in infix)
{
if (c == '(')
{
stack.Push(c);
}
else if (c == ')')
{
while (stack.Count > 0 && stack.Peek() != '(')
{
postfix += stack.Pop();
}
if (stack.Count > 0)
{
stack.Pop(); // Remove '('
}
}
else if (specials.ContainsKey(c))
{
while (stack.Count > 0 && specials.ContainsKey(stack.Peek()) && specials[c] <= specials[stack.Peek()])
{
postfix += stack.Pop();
}
stack.Push(c);
}
else
{
postfix += c;
}
}
while (stack.Count > 0)
{
postfix += stack.Pop();
}
return postfix;
}
static HashSet<State> Followes(State state)
{
HashSet<State> states = new HashSet<State>();
Stack<State> stack = new Stack<State>();
stack.Push(state);
while (stack.Count > 0)
{
State s = stack.Pop();
if (!states.Contains(s))
{
states.Add(s);
if (s.Label == null) // Epsilon transition
{
if (s.Edge1 != null)
{
stack.Push(s.Edge1);
}
if (s.Edge2 != null)
{
stack.Push(s.Edge2);
}
}
}
}
return states;
}
static NFA CompileRegex(string postfix)
{
Stack<NFA> nfaStack = new Stack<NFA>();
foreach (char c in postfix)
{
if (c == '*')
{
NFA nfa1 = nfaStack.Pop();
State initial = new State();
State accept = new State();
initial.Edge1 = nfa1.Initial;
initial.Edge2 = accept;
nfa1.Accept.Edge1 = nfa1.Initial;
nfa1.Accept.Edge2 = accept;
nfaStack.Push(new NFA(initial, accept));
}
else if (c == '.')
{
NFA nfa2 = nfaStack.Pop();
NFA nfa1 = nfaStack.Pop();
nfa1.Accept.Edge1 = nfa2.Initial;
nfaStack.Push(new NFA(nfa1.Initial, nfa2.Accept));
}
else if (c == '|')
{
NFA nfa2 = nfaStack.Pop();
NFA nfa1 = nfaStack.Pop();
State initial = new State();
State accept = new State();
initial.Edge1 = nfa1.Initial;
initial.Edge2 = nfa2.Initial;
nfa1.Accept.Edge1 = accept;
nfa2.Accept.Edge1 = accept;
nfaStack.Push(new NFA(initial, accept));
}
else if (c == '+')
{
NFA nfa1 = nfaStack.Pop();
State initial = new State();
State accept = new State();
initial.Edge1 = nfa1.Initial;
nfa1.Accept.Edge1 = nfa1.Initial;
nfa1.Accept.Edge2 = accept;
nfaStack.Push(new NFA(initial, accept));
}
else if (c == '?')
{
NFA nfa1 = nfaStack.Pop();
State initial = new State();
State accept = new State();
initial.Edge1 = nfa1.Initial;
initial.Edge2 = accept;
nfa1.Accept.Edge1 = accept;
nfaStack.Push(new NFA(initial, accept));
}
else // Literal character
{
State initial = new State(c);
State accept = new State();
initial.Edge1 = accept;
nfaStack.Push(new NFA(initial, accept));
}
}
return nfaStack.Pop();
}
static bool MatchRegex(string infix, string s)
{
string postfix = Shunt(infix);
// Uncomment the next line to see the postfix expression
// Console.WriteLine("Postfix: " + postfix);
NFA nfa = CompileRegex(postfix);
HashSet<State> current = Followes(nfa.Initial);
HashSet<State> nextStates = new HashSet<State>();
foreach (char c in s)
{
foreach (State state in current)
{
if (state.Label == c)
{
HashSet<State> follow = Followes(state.Edge1);
nextStates.UnionWith(follow);
}
}
current = new HashSet<State>(nextStates);
nextStates.Clear();
}
return current.Contains(nfa.Accept);
}
}
}
C++
#include <iostream>
#include <string>
#include <unordered_map>
#include <stack>
#include <vector>
#include <set>
// Define the State structure
struct State {
char label; // Character label, '\0' for epsilon
State* edge1; // First transition
State* edge2; // Second transition
State(char lbl = '\0') : label(lbl), edge1(nullptr), edge2(nullptr) {}
};
// Define the NFA structure
struct NFA {
State* initial;
State* accept;
NFA(State* init = nullptr, State* acc = nullptr) : initial(init), accept(acc) {}
};
// Function to convert infix regex to postfix using the Shunting Yard algorithm
std::string shunt(const std::string& infix) {
std::unordered_map<char, int> specials = {
{'*', 60},
{'+', 55},
{'?', 50},
{'.', 40},
{'|', 20}
};
std::string postfix;
std::stack<char> stack;
for (char c : infix) {
if (c == '(') {
stack.push(c);
}
else if (c == ')') {
while (!stack.empty() && stack.top() != '(') {
postfix += stack.top();
stack.pop();
}
if (!stack.empty()) {
stack.pop(); // Remove '('
}
}
else if (specials.find(c) != specials.end()) {
while (!stack.empty() && specials.find(stack.top()) != specials.end() &&
specials[c] <= specials[stack.top()]) {
postfix += stack.top();
stack.pop();
}
stack.push(c);
}
else {
postfix += c;
}
}
while (!stack.empty()) {
postfix += stack.top();
stack.pop();
}
return postfix;
}
// Function to compute the epsilon closure of a state
std::set<State*> followes(State* state) {
std::set<State*> states;
std::stack<State*> stack;
stack.push(state);
while (!stack.empty()) {
State* s = stack.top();
stack.pop();
if (states.find(s) == states.end()) {
states.insert(s);
if (s->label == '\0') { // Epsilon transition
if (s->edge1 != nullptr) {
stack.push(s->edge1);
}
if (s->edge2 != nullptr) {
stack.push(s->edge2);
}
}
}
}
return states;
}
// Function to compile postfix regex into an NFA
NFA compileRegex(const std::string& postfix) {
std::stack<NFA> nfaStack;
for (char c : postfix) {
if (c == '*') {
NFA nfa1 = nfaStack.top(); nfaStack.pop();
State* initial = new State();
State* accept = new State();
initial->edge1 = nfa1.initial;
initial->edge2 = accept;
nfa1.accept->edge1 = nfa1.initial;
nfa1.accept->edge2 = accept;
nfaStack.push(NFA(initial, accept));
}
else if (c == '.') {
NFA nfa2 = nfaStack.top(); nfaStack.pop();
NFA nfa1 = nfaStack.top(); nfaStack.pop();
nfa1.accept->edge1 = nfa2.initial;
nfaStack.push(NFA(nfa1.initial, nfa2.accept));
}
else if (c == '|') {
NFA nfa2 = nfaStack.top(); nfaStack.pop();
NFA nfa1 = nfaStack.top(); nfaStack.pop();
State* initial = new State();
State* accept = new State();
initial->edge1 = nfa1.initial;
initial->edge2 = nfa2.initial;
nfa1.accept->edge1 = accept;
nfa2.accept->edge1 = accept;
nfaStack.push(NFA(initial, accept));
}
else if (c == '+') {
NFA nfa1 = nfaStack.top(); nfaStack.pop();
State* initial = new State();
State* accept = new State();
initial->edge1 = nfa1.initial;
nfa1.accept->edge1 = nfa1.initial;
nfa1.accept->edge2 = accept;
nfaStack.push(NFA(initial, accept));
}
else if (c == '?') {
NFA nfa1 = nfaStack.top(); nfaStack.pop();
State* initial = new State();
State* accept = new State();
initial->edge1 = nfa1.initial;
initial->edge2 = accept;
nfa1.accept->edge1 = accept;
nfaStack.push(NFA(initial, accept));
}
else { // Literal character
State* initial = new State(c);
State* accept = new State();
initial->edge1 = accept;
nfaStack.push(NFA(initial, accept));
}
}
return nfaStack.top();
}
// Function to match a string against the regex
bool matchRegex(const std::string& infix, const std::string& str) {
std::string postfix = shunt(infix);
// Uncomment the next line to see the postfix expression
// std::cout << "Postfix: " << postfix << std::endl;
NFA nfa = compileRegex(postfix);
std::set<State*> current = followes(nfa.initial);
std::set<State*> nextStates;
for (char c : str) {
for (State* state : current) {
if (state->label == c) {
std::set<State*> follow = followes(state->edge1);
nextStates.insert(follow.begin(), follow.end());
}
}
current = nextStates;
nextStates.clear();
}
return current.find(nfa.accept) != current.end();
}
int main() {
std::vector<std::string> infixes = {"a.b.c*", "a.(b|d).c*", "(a.(b|d))*", "a.(b.b)*.c"};
std::vector<std::string> strings = {"", "abc", "abbc", "abcc", "abad", "abbbc"};
for (const auto& infix : infixes) {
for (const auto& str : strings) {
bool result = matchRegex(infix, str);
std::cout << (result ? "True " : "False ") << infix << " " << str << std::endl;
}
std::cout<<"\n";
}
return 0;
}
FreeBASIC
Const NULL As Any Ptr = 0
Type State
label As String * 1
edge1 As State Ptr
edge2 As State Ptr
End Type
Function createState(label As String, e1 As State Ptr = NULL, e2 As State Ptr = NULL) As State Ptr
Dim As State Ptr s = New State
s->label = label
s->edge1 = e1
s->edge2 = e2
Return s
End Function
Sub addState(state As State Ptr, states() As State Ptr, count As Integer)
If state = NULL Then Exit Sub
For i As Integer = 0 To count - 1
If states(i) = state Then Exit Sub
Next
states(count) = state
count += 1
If state->label = "" Then
addState(state->edge1, states(), count)
addState(state->edge2, states(), count)
End If
End Sub
Type NFA
initial As State Ptr
accept As State Ptr
End Type
Function createNFA(initial As State Ptr, accept As State Ptr) As NFA Ptr
Dim As NFA Ptr nfa = New NFA
nfa->initial = initial
nfa->accept = accept
Return nfa
End Function
Type StateSet
states(1023) As State Ptr
count As Integer = 0
End Type
Sub addToStateSet(Byref st As StateSet, s As State Ptr)
If st.count >= 1024 Then Exit Sub
For i As Integer = 0 To st.count - 1
If st.states(i) = s Then Exit Sub
Next
st.states(st.count) = s
st.count += 1
End Sub
Function containsState(Byref st As StateSet, s As State Ptr) As Boolean
For i As Integer = 0 To st.count - 1
If st.states(i) = s Then Return True
Next
Return False
End Function
Sub clearStateSet(Byref st As StateSet)
st.count = 0
End Sub
Sub addStateWithEpsilon(state As State Ptr, Byref st As StateSet)
If state = NULL Orelse containsState(st, state) Then Exit Sub
addToStateSet(st, state)
If state->label = "" Then
addStateWithEpsilon(state->edge1, st)
addStateWithEpsilon(state->edge2, st)
End If
End Sub
'Convert infix to postfix notation
Function shunt(infix As String) As String
Dim As String postfix = "", stack = ""
For i As Integer = 1 To Len(infix)
Dim As String c = Mid(infix, i, 1)
Select Case c
Case "("
stack = "(" + stack
Case ")"
While Len(stack) > 0 Andalso Left(stack, 1) <> "("
postfix += Left(stack, 1)
stack = Mid(stack, 2)
Wend
If Len(stack) > 0 Then stack = Mid(stack, 2)
Case "*", "+", "?", ".", "|"
Dim As Integer prec
Select Case c
Case "*": prec = 60
Case "+": prec = 55
Case "?": prec = 50
Case ".": prec = 40
Case "|": prec = 20
End Select
While Len(stack) > 0 Andalso Left(stack, 1) <> "("
Dim As Integer topPrec
Select Case Left(stack,1)
Case "*": topPrec = 60
Case "+": topPrec = 55
Case "?": topPrec = 50
Case ".": topPrec = 40
Case "|": topPrec = 20
End Select
If prec <= topPrec Then
postfix += Left(stack, 1)
stack = Mid(stack, 2)
Else
Exit While
End If
Wend
stack = c + stack
Case Else
postfix += c
End Select
Next
Return postfix + stack
End Function
' Function to match string against regex
Function matchRegex(infix As String, text As String) As Boolean
Dim As String postfix = shunt(infix)
Dim As NFA Ptr nfaStack(1023)
Dim As Integer nfaTop = 0
For i As Integer = 1 To Len(postfix)
Dim As String c = Mid(postfix, i, 1)
Select Case c
Case "*"
Dim As NFA Ptr nfa = nfaStack(nfaTop - 1)
Dim As State Ptr initial = createState("")
Dim As State Ptr accept = createState("")
initial->edge1 = nfa->initial
initial->edge2 = accept
nfa->accept->edge1 = nfa->initial
nfa->accept->edge2 = accept
nfaStack(nfaTop - 1) = createNFA(initial, accept)
Case "."
nfaTop -= 1
Dim As NFA Ptr nfa2 = nfaStack(nfaTop)
Dim As NFA Ptr nfa1 = nfaStack(nfaTop - 1)
nfa1->accept->edge1 = nfa2->initial
nfaStack(nfaTop - 1) = createNFA(nfa1->initial, nfa2->accept)
Case "|"
nfaTop -= 1
Dim As NFA Ptr nfa2 = nfaStack(nfaTop)
Dim As NFA Ptr nfa1 = nfaStack(nfaTop - 1)
Dim As State Ptr initial = createState("")
Dim As State Ptr accept = createState("")
initial->edge1 = nfa1->initial
initial->edge2 = nfa2->initial
nfa1->accept->edge1 = accept
nfa2->accept->edge1 = accept
nfaStack(nfaTop - 1) = createNFA(initial, accept)
Case Else ' Literal character
Dim As State Ptr initial = createState(c)
Dim As State Ptr accept = createState("")
initial->edge1 = accept
nfaStack(nfaTop) = createNFA(initial, accept)
nfaTop += 1
End Select
Next
Dim As NFA Ptr nfa = nfaStack(0)
Dim As StateSet current, sgte
addStateWithEpsilon(nfa->initial, current)
For i As Integer = 1 To Len(text)
Dim As String c = Mid(text, i, 1)
clearStateSet(sgte)
For j As Integer = 0 To current.count - 1
If current.states(j)->label = c Then
addStateWithEpsilon(current.states(j)->edge1, sgte)
End If
Next
current = sgte
Next
Return containsState(current, nfa->accept)
End Function
'Test the implementation
Dim As String patterns(3) = {"a.b.c*", "a.(b|d).c*", "(a.(b|d))*", "a.(b.b)*.c"}
Dim As String tests(5) = {"", "abc", "abbc", "abcc", "abad", "abbbc"}
For i As Integer = 0 To 3
For j As Integer = 0 To 5
Print Iif(matchRegex(patterns(i), tests(j)), "True ", "False "); patterns(i); " "; tests(j)
Next
Print
Next
Sleep
- Output:
False a.b.c* True a.b.c* abc False a.b.c* abbc True a.b.c* abcc False a.b.c* abad False a.b.c* abbbc False a.(b|d).c* True a.(b|d).c* abc False a.(b|d).c* abbc True a.(b|d).c* abcc False a.(b|d).c* abad False a.(b|d).c* abbbc True (a.(b|d))* False (a.(b|d))* abc False (a.(b|d))* abbc False (a.(b|d))* abcc True (a.(b|d))* abad False (a.(b|d))* abbbc False a.(b.b)*.c False a.(b.b)*.c abc True a.(b.b)*.c abbc False a.(b.b)*.c abcc False a.(b.b)*.c abad False a.(b.b)*.c abbbc
Go
package main
import (
"fmt"
)
// Define the State structure
type State struct {
label rune // Character label, 0 for epsilon
edge1 *State // First transition
edge2 *State // Second transition
}
// Define the NFA structure
type NFA struct {
initial *State
accept *State
}
// Function to convert infix regex to postfix using the Shunting Yard algorithm
func shunt(infix string) string {
specials := map[rune]int{
'*': 60,
'+': 55,
'?': 50,
'.': 40,
'|': 20,
}
var postfix []rune
var stack []rune
for _, c := range infix {
if c == '(' {
stack = append(stack, c)
} else if c == ')' {
for len(stack) > 0 && stack[len(stack)-1] != '(' {
postfix = append(postfix, stack[len(stack)-1])
stack = stack[:len(stack)-1]
}
if len(stack) > 0 {
stack = stack[:len(stack)-1] // Remove '('
}
} else if _, ok := specials[c]; ok {
for len(stack) > 0 {
top := stack[len(stack)-1]
if _, ok := specials[top]; ok && specials[c] <= specials[top] {
postfix = append(postfix, top)
stack = stack[:len(stack)-1]
} else {
break
}
}
stack = append(stack, c)
} else {
postfix = append(postfix, c)
}
}
for len(stack) > 0 {
postfix = append(postfix, stack[len(stack)-1])
stack = stack[:len(stack)-1]
}
return string(postfix)
}
// Function to compute the epsilon closure of a state
func followes(state *State) map[*State]bool {
states := make(map[*State]bool)
stack := []*State{state}
for len(stack) > 0 {
s := stack[len(stack)-1]
stack = stack[:len(stack)-1]
if !states[s] {
states[s] = true
if s.label == 0 { // Epsilon transition
if s.edge1 != nil {
stack = append(stack, s.edge1)
}
if s.edge2 != nil {
stack = append(stack, s.edge2)
}
}
}
}
return states
}
// Function to compile postfix regex into an NFA
func compileRegex(postfix string) NFA {
var nfaStack []NFA
for _, c := range postfix {
switch c {
case '*':
nfa1 := nfaStack[len(nfaStack)-1]
nfaStack = nfaStack[:len(nfaStack)-1]
initial := &State{}
accept := &State{}
initial.edge1 = nfa1.initial
initial.edge2 = accept
nfa1.accept.edge1 = nfa1.initial
nfa1.accept.edge2 = accept
nfaStack = append(nfaStack, NFA{initial, accept})
case '+':
nfa1 := nfaStack[len(nfaStack)-1]
nfaStack = nfaStack[:len(nfaStack)-1]
initial := &State{}
accept := &State{}
initial.edge1 = nfa1.initial
nfa1.accept.edge1 = nfa1.initial
nfa1.accept.edge2 = accept
nfaStack = append(nfaStack, NFA{initial, accept})
case '?':
nfa1 := nfaStack[len(nfaStack)-1]
nfaStack = nfaStack[:len(nfaStack)-1]
initial := &State{}
accept := &State{}
initial.edge1 = nfa1.initial
initial.edge2 = accept
nfa1.accept.edge1 = accept
nfaStack = append(nfaStack, NFA{initial, accept})
case '.':
nfa2 := nfaStack[len(nfaStack)-1]
nfaStack = nfaStack[:len(nfaStack)-1]
nfa1 := nfaStack[len(nfaStack)-1]
nfaStack = nfaStack[:len(nfaStack)-1]
nfa1.accept.edge1 = nfa2.initial
nfaStack = append(nfaStack, NFA{nfa1.initial, nfa2.accept})
case '|':
nfa2 := nfaStack[len(nfaStack)-1]
nfaStack = nfaStack[:len(nfaStack)-1]
nfa1 := nfaStack[len(nfaStack)-1]
nfaStack = nfaStack[:len(nfaStack)-1]
initial := &State{}
accept := &State{}
initial.edge1 = nfa1.initial
initial.edge2 = nfa2.initial
nfa1.accept.edge1 = accept
nfa2.accept.edge1 = accept
nfaStack = append(nfaStack, NFA{initial, accept})
default: // Literal character
initial := &State{label: c}
accept := &State{}
initial.edge1 = accept
nfaStack = append(nfaStack, NFA{initial, accept})
}
}
return nfaStack[len(nfaStack)-1]
}
// Function to match a string against the regex
func matchRegex(infix, str string) bool {
postfix := shunt(infix)
// Uncomment the next line to see the postfix expression
// fmt.Println("Postfix:", postfix)
nfa := compileRegex(postfix)
currentStates := followes(nfa.initial)
var nextStates map[*State]bool
for _, c := range str {
nextStates = make(map[*State]bool)
for state := range currentStates {
if state.label == c {
follow := followes(state.edge1)
for s := range follow {
nextStates[s] = true
}
}
}
currentStates = nextStates
}
return currentStates[nfa.accept]
}
func main() {
infixes := []string{"a.b.c*", "a.(b|d).c*", "(a.(b|d))*", "a.(b.b)*.c"}
strings := []string{"", "abc", "abbc", "abcc", "abad", "abbbc"}
for _, infix := range infixes {
for _, str := range strings {
result := matchRegex(infix, str)
if result {
fmt.Println("True", infix, str)
} else {
fmt.Println("False", infix, str)
}
}
fmt.Println()
}
}
Java
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Stack;
public final class McNaughtonYamadaThompsonAlgorithm {
public static void main(String[] args) {
List<String> infixes = List.of( "a.b.c*", "a.(b|d).c*", "(a.(b|d))*", "a.(b.b)*.c" );
List<String> strings = List.of( "", "abc", "abbc", "abcc", "abad", "abbbc" );
for ( String infix : infixes ) {
for ( String string : strings ) {
final boolean result = matchRegex(string, infix);
System.out.println(( result ? "True " : "False " ) + infix + " " + string);
}
System.out.println();
}
}
// Match the given string against the given infix regex
private static boolean matchRegex(String text, String infix) {
String postfix = shunt(infix);
// Uncomment the next line to see the postfix expression
// System.out.println("Postfix: " + postfix);
NFA nfa = compileRegex(postfix);
Set<State> current = followes(nfa.initial);
Set<State> nextStates = new HashSet<State>();
for ( char ch : text.toCharArray() ) {
for ( State state : current ) {
if ( state.label == ch ) {
Set<State> follow = followes(state.edge1);
nextStates.addAll(follow);
}
}
current = new HashSet<State>(nextStates);
nextStates.clear();
}
return current.contains(nfa.accept);
}
// Compile the given postfix regex into an NFA
private static NFA compileRegex(String postfix) {
Stack<NFA> nfaStack = new Stack<NFA>();
for ( char ch : postfix.toCharArray() ) {
switch ( ch ) {
case '*' -> {
NFA nfa1 = nfaStack.pop();
State initial = new State();
State accept = new State();
initial.edge1 = nfa1.initial;
initial.edge2 = accept;
nfa1.accept.edge1 = nfa1.initial;
nfa1.accept.edge2 = accept;
nfaStack.push( new NFA(initial, accept) );
}
case '.' -> {
NFA nfa2 = nfaStack.pop();
NFA nfa1 = nfaStack.pop();
nfa1.accept.edge1 = nfa2.initial;
nfaStack.push( new NFA(nfa1.initial, nfa2.accept) );
}
case '|' -> {
NFA nfa2 = nfaStack.pop();
NFA nfa1 = nfaStack.pop();
State initial = new State();
State accept = new State();
initial.edge1 = nfa1.initial;
initial.edge2 = nfa2.initial;
nfa1.accept.edge1 = accept;
nfa2.accept.edge1 = accept;
nfaStack.push( new NFA(initial, accept) );
}
case '+' -> {
NFA nfa1 = nfaStack.pop();
State initial = new State();
State accept = new State();
initial.edge1 = nfa1.initial;
nfa1.accept.edge1 = nfa1.initial;
nfa1.accept.edge2 = accept;
nfaStack.push( new NFA(initial, accept) );
}
case '?' -> {
NFA nfa1 = nfaStack.pop();
State initial = new State();
State accept = new State();
initial.edge1 = nfa1.initial;
initial.edge2 = accept;
nfa1.accept.edge1 = accept;
nfaStack.push( new NFA(initial, accept) );
}
default -> { // Literal character
State initial = new State(ch);
State accept = new State();
initial.edge1 = accept;
nfaStack.push( new NFA(initial, accept) );
}
}
}
return nfaStack.peek();
}
// Compute the epsilon closure of the given state
private static Set<State> followes(State state) {
Set<State> states = new HashSet<State>();
Stack<State> stack = new Stack<State>();
stack.push(state);
while ( ! stack.isEmpty() ) {
State current = stack.pop();
if ( ! states.contains(current) ) {
states.add(current);
if ( current.label == '\0' ) { // Epsilon transition
if ( current.edge1 != null ) {
stack.push(current.edge1);
}
if ( current.edge2 != null ) {
stack.push(current.edge2);
}
}
}
}
return states;
}
// Convert the given infix regex to postfix regex using the Shunting Yard algorithm
private static String shunt(String infix) {
Map<Character, Integer> specials = Map.ofEntries( Map.entry( '*', 60 ), Map.entry( '+', 55 ),
Map.entry( '?', 50 ), Map.entry( '.', 40 ), Map.entry( '|', 20 ) );
Stack<Character> stack = new Stack<Character>();
String postfix = "";
for ( char ch : infix.toCharArray() ) {
if ( ch == '(' ) {
stack.push(ch);
}
else if ( ch == ')' ) {
while ( ! stack.isEmpty() && stack.peek() != '(' ) {
postfix += stack.pop();
}
if ( ! stack.isEmpty() ) {
stack.pop(); // Remove '('
}
}
else if ( specials.containsKey(ch) ) {
while ( ! stack.isEmpty() && specials.containsKey(stack.peek()) &&
specials.get(ch) <= specials.get(stack.peek()) ) {
postfix += stack.pop();
}
stack.push(ch);
}
else {
postfix += ch;
}
}
while ( ! stack.isEmpty() ) {
postfix += stack.pop();
}
return postfix;
}
private static final class State {
public State(char aLabel) {
label = aLabel;
}
public State() {
label = '\0';
}
private State edge1 = null; // First transition
private State edge2 = null; // Second transition
private final char label; // Character label, '\0' for epsilon
}
private static final class NFA {
public NFA(State aInitial, State aAccept) {
initial = aInitial;
accept = aAccept;
}
private State initial;
private State accept;
}
}
JavaScript
class McNaughtonYamadaThompsonAlgorithm {
static main() {
const infixes = ["a.b.c*", "a.(b|d).c*", "(a.(b|d))*", "a.(b.b)*.c"];
const strings = ["", "abc", "abbc", "abcc", "abad", "abbbc"];
for (const infix of infixes) {
for (const string of strings) {
const result = McNaughtonYamadaThompsonAlgorithm.matchRegex(string, infix);
console.log(`${result ? "True" : "False"} ${infix} ${string}`);
}
console.log();
}
}
static matchRegex(text, infix) {
const postfix = McNaughtonYamadaThompsonAlgorithm.shunt(infix);
// Uncomment the next line to see the postfix expression
// console.log("Postfix: " + postfix);
const nfa = McNaughtonYamadaThompsonAlgorithm.compileRegex(postfix);
let current = McNaughtonYamadaThompsonAlgorithm.followes(nfa.initial);
let nextStates = new Set();
for (const ch of text) {
for (const state of current) {
if (state.label === ch) {
const follow = McNaughtonYamadaThompsonAlgorithm.followes(state.edge1);
for (const f of follow) {
nextStates.add(f);
}
}
}
current = new Set(nextStates);
nextStates.clear();
}
return current.has(nfa.accept);
}
static compileRegex(postfix) {
const nfaStack = [];
for (const ch of postfix) {
switch (ch) {
case '*': {
const nfa1 = nfaStack.pop();
const initial = new State();
const accept = new State();
initial.edge1 = nfa1.initial;
initial.edge2 = accept;
nfa1.accept.edge1 = nfa1.initial;
nfa1.accept.edge2 = accept;
nfaStack.push(new NFA(initial, accept));
break;
}
case '.': {
const nfa2 = nfaStack.pop();
const nfa1 = nfaStack.pop();
nfa1.accept.edge1 = nfa2.initial;
nfaStack.push(new NFA(nfa1.initial, nfa2.accept));
break;
}
case '|': {
const nfa2 = nfaStack.pop();
const nfa1 = nfaStack.pop();
const initial = new State();
const accept = new State();
initial.edge1 = nfa1.initial;
initial.edge2 = nfa2.initial;
nfa1.accept.edge1 = accept;
nfa2.accept.edge1 = accept;
nfaStack.push(new NFA(initial, accept));
break;
}
case '+': {
const nfa1 = nfaStack.pop();
const initial = new State();
const accept = new State();
initial.edge1 = nfa1.initial;
nfa1.accept.edge1 = nfa1.initial;
nfa1.accept.edge2 = accept;
nfaStack.push(new NFA(initial, accept));
break;
}
case '?': {
const nfa1 = nfaStack.pop();
const initial = new State();
const accept = new State();
initial.edge1 = nfa1.initial;
initial.edge2 = accept;
nfa1.accept.edge1 = accept;
nfaStack.push(new NFA(initial, accept));
break;
}
default: {
const initial = new State(ch);
const accept = new State();
initial.edge1 = accept;
nfaStack.push(new NFA(initial, accept));
break;
}
}
}
return nfaStack.pop();
}
static followes(state) {
const states = new Set();
const stack = [state];
while (stack.length > 0) {
const current = stack.pop();
if (!states.has(current)) {
states.add(current);
if (current.label === '\0') {
if (current.edge1) stack.push(current.edge1);
if (current.edge2) stack.push(current.edge2);
}
}
}
return states;
}
static shunt(infix) {
const specials = { '*': 60, '+': 55, '?': 50, '.': 40, '|': 20 };
const stack = [];
let postfix = "";
for (const ch of infix) {
if (ch === '(') {
stack.push(ch);
} else if (ch === ')') {
while (stack.length > 0 && stack[stack.length - 1] !== '(') {
postfix += stack.pop();
}
if (stack.length > 0) {
stack.pop(); // Remove '('
}
} else if (specials[ch]) {
while (stack.length > 0 && specials[stack[stack.length - 1]] &&
specials[ch] <= specials[stack[stack.length - 1]]) {
postfix += stack.pop();
}
stack.push(ch);
} else {
postfix += ch;
}
}
while (stack.length > 0) {
postfix += stack.pop();
}
return postfix;
}
}
class State {
constructor(label = '\0') {
this.label = label;
this.edge1 = null;
this.edge2 = null;
}
}
class NFA {
constructor(initial, accept) {
this.initial = initial;
this.accept = accept;
}
}
// Run the main method
McNaughtonYamadaThompsonAlgorithm.main();
Julia
abstract type MYT end
struct NIL_MYT <: MYT end
const None = NIL_MYT()
mutable struct State <: MYT
label::Char
edge1::MYT
edge2::MYT
State(ch = '\0') = new(ch, None, None)
end
mutable struct NFA
initial::State
accept::State
NFA(initial = None, accept = None) = new(initial, accept)
end
function shunt(infix)
specials = Dict(
'*' => 60,
'+' => 55,
'?' => 50,
'.' => 40,
'|' => 20,
)
postfix = Char[]
stack = Char[]
for c in infix
if c == '('
push!(stack, c)
elseif c == ')'
while !isempty(stack) && stack[end] != '('
push!(postfix, pop!(stack))
end
if !isempty(stack)
pop!(stack) # Remove '('
end
elseif haskey(specials, c)
while !isempty(stack) && haskey(specials, stack[end]) && specials[c] <= specials[stack[end]]
push!(postfix, pop!(stack))
end
push!(stack, c)
else
push!(postfix, c)
end
end
while !isempty(stack)
push!(postfix, pop!(stack))
end
return postfix
end
function followes(state)
states = Set{State}()
stack = [state]
while !isempty(stack)
s = pop!(stack)
if s ∉ states
push!(states, s)
if s.label == '\0' # Epsilon transition
if s.edge1 != None
push!(stack, s.edge1)
end
if s.edge2 != None
push!(stack, s.edge2)
end
end
end
end
return states
end
function compileRegex(postfix)
nfaStack = NFA[]
for c in postfix
if c == '*'
nfa1 = pop!(nfaStack)
initial = State()
accept = State()
initial.edge1 = nfa1.initial
initial.edge2 = accept
nfa1.accept.edge1 = nfa1.initial
nfa1.accept.edge2 = accept
push!(nfaStack, NFA(initial, accept))
elseif c == '.'
nfa2 = pop!(nfaStack)
nfa1 = pop!(nfaStack)
nfa1.accept.edge1 = nfa2.initial
push!(nfaStack, NFA(nfa1.initial, nfa2.accept))
elseif c == '|'
nfa2 = pop!(nfaStack)
nfa1 = pop!(nfaStack)
initial = State()
accept = State()
initial.edge1 = nfa1.initial
initial.edge2 = nfa2.initial
nfa1.accept.edge1 = accept
nfa2.accept.edge1 = accept
push!(nfaStack, NFA(initial, accept))
elseif c == '+'
nfa1 = pop!(nfaStack)
initial = State()
accept = State()
initial.edge1 = nfa1.initial
nfa1.accept.edge1 = nfa1.initial
nfa1.accept.edge2 = accept
push!(nfaStack, NFA(initial, accept))
elseif c == '?'
nfa1 = pop!(nfaStack)
initial = State()
accept = State()
initial.edge1 = nfa1.initial
initial.edge2 = accept
nfa1.accept.edge1 = accept
push!(nfaStack, NFA(initial, accept))
else # Literal character
initial = State(c)
accept = State()
initial.edge1 = accept
push!(nfaStack, NFA(initial, accept))
end
end
return pop!(nfaStack)
end
function matchRegex(infix, s)
postfix = shunt(infix)
# Uncomment the next line to see the postfix expression
# print("Postfix:", postfix)
nfa = compileRegex(postfix)
current = Set{State}(followes(nfa.initial))
nextStates = Set{State}()
for c in s
for state in current
if state.label == c
follow = followes(state.edge1)
union!(nextStates, follow)
end
end
current = nextStates
nextStates = Set{State}()
end
return nfa.accept ∈ current
end
const infixes = ["a.b.c*", "a.(b|d).c*", "(a.(b|d))*", "a.(b.b)*.c"]
const strings = ["", "abc", "abbc", "abcc", "abad", "abbbc"]
println("Pattern String Matched?\n", "_"^30)
for infix in infixes
for s in strings
println(rpad(infix, 14), rpad(s, 8), matchRegex(infix, s))
end
println()
end
- Output:
Pattern String Matched? ______________________________ a.b.c* false a.b.c* abc true a.b.c* abbc false a.b.c* abcc true a.b.c* abad false a.b.c* abbbc false
a.(b|d).c* false a.(b|d).c* abc true a.(b|d).c* abbc false a.(b|d).c* abcc true a.(b|d).c* abad false a.(b|d).c* abbbc false
(a.(b|d))* true (a.(b|d))* abc false (a.(b|d))* abbc false (a.(b|d))* abcc false (a.(b|d))* abad true (a.(b|d))* abbbc false
a.(b.b)*.c false a.(b.b)*.c abc false a.(b.b)*.c abbc true a.(b.b)*.c abcc false a.(b.b)*.c abad false a.(b.b)*.c abbbc false
Mathematica /Wolfram Language
(* Initialize global variables *)
ClearAll[createState, setEdge1, setEdge2, shunt, compileRegex, followes, matchRegex];
stateCounter = 0;
stateList = <||>;
(* Function to create a new state *)
createState[label_: Null] := Module[{id},
id = "state" <> ToString[++stateCounter];
stateList[id] = <|"label" -> label, "edge1" -> Null, "edge2" -> Null|>;
id
];
(* Functions to set edges for a state *)
setEdge1[stateID_, edgeID_] := (stateList[stateID, "edge1"] = edgeID);
setEdge2[stateID_, edgeID_] := (stateList[stateID, "edge2"] = edgeID);
(* Function to convert infix regex to postfix using the Shunting Yard algorithm *)
shunt[infix_String] := Module[{specials, postfix = "", stack = {}, cList, c},
specials = <|"*" -> 60, "+" -> 55, "?" -> 50, "." -> 40, "|" -> 20|>;
cList = Characters[infix];
Do[
c = cList[[i]];
If[c == "(",
AppendTo[stack, c],
If[c == ")",
While[stack =!= {} && Last[stack] != "(",
postfix = postfix <> Last[stack];
stack = Most[stack];
];
If[stack =!= {},
stack = Most[stack]; (* Remove "(" *)
],
If[KeyExistsQ[specials, c],
While[stack =!= {} && KeyExistsQ[specials, Last[stack]] && specials[c] <= specials[Last[stack]],
postfix = postfix <> Last[stack];
stack = Most[stack];
];
AppendTo[stack, c],
postfix = postfix <> c
]
]
],
{i, Length[cList]}
];
While[stack =!= {},
postfix = postfix <> Last[stack];
stack = Most[stack];
];
postfix
];
(* Function to compile postfix regex into an NFA *)
compileRegex[postfix_String] := Module[{nfaStack = {}, cList, c, initial, accept, nfa1, nfa2},
cList = Characters[postfix];
Do[
c = cList[[i]];
Switch[c,
"*",
nfa1 = Last[nfaStack]; nfaStack = Most[nfaStack];
initial = createState[];
accept = createState[];
setEdge1[initial, nfa1["initial"]];
setEdge2[initial, accept];
setEdge1[nfa1["accept"], nfa1["initial"]];
setEdge2[nfa1["accept"], accept];
AppendTo[nfaStack, <|"initial" -> initial, "accept" -> accept|>],
".",
nfa2 = Last[nfaStack]; nfaStack = Most[nfaStack];
nfa1 = Last[nfaStack]; nfaStack = Most[nfaStack];
setEdge1[nfa1["accept"], nfa2["initial"]];
AppendTo[nfaStack, <|"initial" -> nfa1["initial"], "accept" -> nfa2["accept"]|>],
"|",
nfa2 = Last[nfaStack]; nfaStack = Most[nfaStack];
nfa1 = Last[nfaStack]; nfaStack = Most[nfaStack];
initial = createState[];
accept = createState[];
setEdge1[initial, nfa1["initial"]];
setEdge2[initial, nfa2["initial"]];
setEdge1[nfa1["accept"], accept];
setEdge1[nfa2["accept"], accept];
AppendTo[nfaStack, <|"initial" -> initial, "accept" -> accept|>],
"+",
nfa1 = Last[nfaStack]; nfaStack = Most[nfaStack];
initial = createState[];
accept = createState[];
setEdge1[initial, nfa1["initial"]];
setEdge1[nfa1["accept"], nfa1["initial"]];
setEdge2[nfa1["accept"], accept];
AppendTo[nfaStack, <|"initial" -> initial, "accept" -> accept|>],
"?",
nfa1 = Last[nfaStack]; nfaStack = Most[nfaStack];
initial = createState[];
accept = createState[];
setEdge1[initial, nfa1["initial"]];
setEdge2[initial, accept];
setEdge1[nfa1["accept"], accept];
AppendTo[nfaStack, <|"initial" -> initial, "accept" -> accept|>],
_, (* Literal character *)
initial = createState[c];
accept = createState[];
setEdge1[initial, accept];
AppendTo[nfaStack, <|"initial" -> initial, "accept" -> accept|>]
],
{i, Length[cList]}
];
Last[nfaStack]
];
(* Function to compute the epsilon closure of a state *)
followes[stateID_] := Module[{states = {}, stack = {stateID}, s},
While[stack =!= {},
s = Last[stack]; stack = Most[stack];
If[! MemberQ[states, s],
AppendTo[states, s];
If[stateList[s, "label"] === Null,
If[stateList[s, "edge1"] =!= Null, AppendTo[stack, stateList[s, "edge1"]]];
If[stateList[s, "edge2"] =!= Null, AppendTo[stack, stateList[s, "edge2"]]];
]
]
];
states
];
(* Function to match a string against the regex *)
matchRegex[infix_String, str_String] := Module[{postfix, nfa, current, nextStates, cList, c},
postfix = shunt[infix];
stateCounter = 0; (* Reset stateCounter *)
stateList = <||>; (* Reset stateList *)
nfa = compileRegex[postfix];
current = followes[nfa["initial"]];
cList = Characters[str];
Do[
nextStates = {};
c = cList[[i]];
Do[
If[stateList[state, "label"] === c,
nextStates = Union[nextStates, followes[stateList[state, "edge1"]]]
],
{state, current}
];
current = nextStates;
,
{i, Length[cList]}
];
MemberQ[current, nfa["accept"]]
];
(* Test cases *)
infixes = {"a.b.c*", "a.(b|d).c*", "(a.(b|d))*", "a.(b.b)*.c"};
strings = {"", "abc", "abbc", "abcc", "abad", "abbbc"};
Do[
Do[
result = matchRegex[infix, str];
Print[If[result, "True ", "False "], infix, " ", str];
,
{str, strings}
];
Print["\n"];
,
{infix, infixes}
];
Phix
sequence stateList = {} -- entries as follows:
enum LABEL, -- character label - null for epsilon
EDGE1, -- first transition
EDGE2 -- second transition
enum INITIAL, ACCEPTS
type nfaentry(sequence n) return length(n)=2 end type
function createState()
stateList &= {{null, null, null}}
return length(stateList)
end function
function compileRegex(string postfix)
stateList = {} // reset
sequence nfa = {}
nfaentry nfa1, nfa2
integer initial, accepts
for c in postfix do
if c!='.' then
initial = createState()
accepts = createState()
end if
if find(c,".|") then
{nfa,nfa2} = {nfa[1..$-1],nfa[$]}
end if
if find(c,"*.|+?") then
{nfa,nfa1} = {nfa[1..$-1],nfa[$]}
end if
if find(c,"*|+?") then
stateList[initial][EDGE1] = nfa1[INITIAL]
end if
if find(c,"*?") then
stateList[initial][EDGE2] = accepts
end if
if find(c,"*+") then
stateList[nfa1[ACCEPTS]][EDGE1] = nfa1[INITIAL]
stateList[nfa1[ACCEPTS]][EDGE2] = accepts
elsif c='.' then
stateList[nfa1[ACCEPTS]][EDGE1] = nfa2[INITIAL]
initial = nfa1[INITIAL]
accepts = nfa2[ACCEPTS]
elsif c='|' then
stateList[initial][EDGE2] = nfa2[INITIAL]
stateList[nfa1[ACCEPTS]][EDGE1] = accepts
stateList[nfa2[ACCEPTS]][EDGE1] = accepts
elsif c='?' then
stateList[nfa1[ACCEPTS]][EDGE1] = accepts
else // Literal character
stateList[initial][LABEL] = c
stateList[initial][EDGE1] = accepts
end if
nfa &= {{initial, accepts}}
end for
return nfa[$]
end function
function followes(integer id)
// compute the epsilon closure of a state
sequence states = {}, stack = {id}
while length(stack) do
id = stack[$]; stack = stack[1..$-1]
if not find(id,states) then
states &= id
integer {label,e1,e2} = stateList[id]
if label=null then
if e1!=null then stack &= e1 end if
if e2!=null then stack &= e2 end if
end if
end if
end while
return states
end function
function shunt(string infix)
// convert infix into postfix notation,
// using the shunting yard algorithm
string postfix = "", stack = "", ops = "|.?+*"
for c in infix do
if c='(' then
stack &= c
elsif c=')' then
while stack[$]!='(' do
postfix &= stack[$]
stack = stack[1..-2]
end while
assert(stack[$]='(')
stack = stack[1..-2] // remove '('
else
integer priority = find(c,ops)
if priority then
while length(stack)
and priority<=find(stack[$],ops) do
postfix &= stack[$]
stack = stack[1..-2]
end while
stack &= c
else
postfix &= c
end if
end if
end for
if length(stack) then
postfix &= reverse(stack)
end if
return postfix
end function
include sets.e -- for union()
procedure matchRegex(string infix, sequence tests, string expected)
string postfix = shunt(infix)
nfaentry nfa = compileRegex(postfix)
string res = ""
for s in tests do
sequence current = followes(nfa[INITIAL]),
nextStates = {}
for c in s do
for state in current do
integer {label,e1,e2} = stateList[state]
if label=c then
nextStates = union(nextStates,followes(e1))
end if
end for
current = nextStates
nextStates = {}
end for
res &= iff(find(nfa[ACCEPTS],current)?'T':'F')
end for
assert(res=expected)
printf(1," infix:%-10s => postfix:%-8s => results:%s\n",{infix,postfix,res})
end procedure
constant tests = {"", "abc", "abbc", "abcc", "abad", "abbbc"},
expressions = {{"a.b.c*", "FTFTFF"},
{"a.(b|d).c*","FTFTFF"},
{"(a.(b|d))*","TFFFTF"},
{"a.(b.b)*.c","FFTFFF"}}
printf(1,"For tests in %v:\n",{tests})
for ie in expressions do
string {infix,expected} = ie
matchRegex(infix,tests,expected)
end for
- Output:
For tests in {"","abc","abbc","abcc","abad","abbbc"}: infix:a.b.c* => postfix:ab.c*. => results:FTFTFF infix:a.(b|d).c* => postfix:abd|.c*. => results:FTFTFF infix:(a.(b|d))* => postfix:abd|.* => results:TFFFTF infix:a.(b.b)*.c => postfix:abb.*.c. => results:FFTFFF
Python
class State:
def __init__(self, label=None):
self.label = label # Character label, None for epsilon
self.edge1 = None # First transition
self.edge2 = None # Second transition
class NFA:
def __init__(self, initial=None, accept=None):
self.initial = initial
self.accept = accept
def shunt(infix):
specials = {
'*': 60,
'+': 55,
'?': 50,
'.': 40,
'|': 20
}
postfix = ''
stack = []
for c in infix:
if c == '(':
stack.append(c)
elif c == ')':
while stack and stack[-1] != '(':
postfix += stack.pop()
if stack:
stack.pop() # Remove '('
elif c in specials:
while stack and stack[-1] in specials and specials[c] <= specials[stack[-1]]:
postfix += stack.pop()
stack.append(c)
else:
postfix += c
while stack:
postfix += stack.pop()
return postfix
def followes(state):
states = set()
stack = [state]
while stack:
s = stack.pop()
if s not in states:
states.add(s)
if s.label is None: # Epsilon transition
if s.edge1 is not None:
stack.append(s.edge1)
if s.edge2 is not None:
stack.append(s.edge2)
return states
def compileRegex(postfix):
nfaStack = []
for c in postfix:
if c == '*':
nfa1 = nfaStack.pop()
initial = State()
accept = State()
initial.edge1 = nfa1.initial
initial.edge2 = accept
nfa1.accept.edge1 = nfa1.initial
nfa1.accept.edge2 = accept
nfaStack.append(NFA(initial, accept))
elif c == '.':
nfa2 = nfaStack.pop()
nfa1 = nfaStack.pop()
nfa1.accept.edge1 = nfa2.initial
nfaStack.append(NFA(nfa1.initial, nfa2.accept))
elif c == '|':
nfa2 = nfaStack.pop()
nfa1 = nfaStack.pop()
initial = State()
accept = State()
initial.edge1 = nfa1.initial
initial.edge2 = nfa2.initial
nfa1.accept.edge1 = accept
nfa2.accept.edge1 = accept
nfaStack.append(NFA(initial, accept))
elif c == '+':
nfa1 = nfaStack.pop()
initial = State()
accept = State()
initial.edge1 = nfa1.initial
nfa1.accept.edge1 = nfa1.initial
nfa1.accept.edge2 = accept
nfaStack.append(NFA(initial, accept))
elif c == '?':
nfa1 = nfaStack.pop()
initial = State()
accept = State()
initial.edge1 = nfa1.initial
initial.edge2 = accept
nfa1.accept.edge1 = accept
nfaStack.append(NFA(initial, accept))
else: # Literal character
initial = State(c)
accept = State()
initial.edge1 = accept
nfaStack.append(NFA(initial, accept))
return nfaStack.pop()
def matchRegex(infix, s):
postfix = shunt(infix)
# Uncomment the next line to see the postfix expression
# print("Postfix:", postfix)
nfa = compileRegex(postfix)
current = set(followes(nfa.initial))
nextStates = set()
for c in s:
for state in current:
if state.label == c:
follow = followes(state.edge1)
nextStates.update(follow)
current = nextStates
nextStates = set()
return nfa.accept in current
def main():
infixes = ["a.b.c*", "a.(b|d).c*", "(a.(b|d))*", "a.(b.b)*.c"]
strings = ["", "abc", "abbc", "abcc", "abad", "abbbc"]
for infix in infixes:
for s in strings:
result = matchRegex(infix, s)
print(("True " if result else "False ") + infix + " " + s)
print()
if __name__ == "__main__":
main()
R
# Initialize global variables
stateCounter <- 0
stateList <- list()
# Function to create a new state
createState <- function(label = NULL) {
stateCounter <<- stateCounter + 1
id <- paste0("state", stateCounter)
stateList[[id]] <<- list(label = label, edge1 = NULL, edge2 = NULL)
id
}
# Functions to set edges for a state
setEdge1 <- function(stateID, edgeID) {
stateList[[stateID]]$edge1 <<- edgeID
}
setEdge2 <- function(stateID, edgeID) {
stateList[[stateID]]$edge2 <<- edgeID
}
# Function to convert infix regex to postfix using the Shunting Yard algorithm
shunt <- function(infix) {
specials <- list('*' = 60, '+' = 55, '?' = 50, '.' = 40, '|' = 20)
postfix <- ""
stack <- c()
cList <- strsplit(infix, "")[[1]]
for (c in cList) {
if (c == "(") {
stack <- c(stack, c)
} else if (c == ")") {
while (length(stack) > 0 && tail(stack, 1) != "(") {
postfix <- paste0(postfix, tail(stack, 1))
stack <- head(stack, -1)
}
if (length(stack) > 0) {
stack <- head(stack, -1) # Remove "("
}
} else if (c %in% names(specials)) {
while (length(stack) > 0 && tail(stack, 1) %in% names(specials) &&
specials[[c]] <= specials[[tail(stack, 1)]]) {
postfix <- paste0(postfix, tail(stack, 1))
stack <- head(stack, -1)
}
stack <- c(stack, c)
} else {
postfix <- paste0(postfix, c)
}
}
while (length(stack) > 0) {
postfix <- paste0(postfix, tail(stack, 1))
stack <- head(stack, -1)
}
postfix
}
# Function to compile postfix regex into an NFA
compileRegex <- function(postfix) {
nfaStack <- list()
cList <- strsplit(postfix, "")[[1]]
for (c in cList) {
if (c == '*') {
nfa1 <- tail(nfaStack, 1)[[1]]
nfaStack <- head(nfaStack, -1)
initial <- createState()
accept <- createState()
setEdge1(initial, nfa1$initial)
setEdge2(initial, accept)
setEdge1(nfa1$accept, nfa1$initial)
setEdge2(nfa1$accept, accept)
nfaStack <- c(nfaStack, list(list(initial = initial, accept = accept)))
} else if (c == '.') {
nfa2 <- tail(nfaStack, 1)[[1]]
nfaStack <- head(nfaStack, -1)
nfa1 <- tail(nfaStack, 1)[[1]]
nfaStack <- head(nfaStack, -1)
setEdge1(nfa1$accept, nfa2$initial)
nfaStack <- c(nfaStack, list(list(initial = nfa1$initial, accept = nfa2$accept)))
} else if (c == '|') {
nfa2 <- tail(nfaStack, 1)[[1]]
nfaStack <- head(nfaStack, -1)
nfa1 <- tail(nfaStack, 1)[[1]]
nfaStack <- head(nfaStack, -1)
initial <- createState()
accept <- createState()
setEdge1(initial, nfa1$initial)
setEdge2(initial, nfa2$initial)
setEdge1(nfa1$accept, accept)
setEdge1(nfa2$accept, accept)
nfaStack <- c(nfaStack, list(list(initial = initial, accept = accept)))
} else if (c == '+') {
nfa1 <- tail(nfaStack, 1)[[1]]
nfaStack <- head(nfaStack, -1)
initial <- createState()
accept <- createState()
setEdge1(initial, nfa1$initial)
setEdge1(nfa1$accept, nfa1$initial)
setEdge2(nfa1$accept, accept)
nfaStack <- c(nfaStack, list(list(initial = initial, accept = accept)))
} else if (c == '?') {
nfa1 <- tail(nfaStack, 1)[[1]]
nfaStack <- head(nfaStack, -1)
initial <- createState()
accept <- createState()
setEdge1(initial, nfa1$initial)
setEdge2(initial, accept)
setEdge1(nfa1$accept, accept)
nfaStack <- c(nfaStack, list(list(initial = initial, accept = accept)))
} else {
# Literal character
initial <- createState(c)
accept <- createState()
setEdge1(initial, accept)
nfaStack <- c(nfaStack, list(list(initial = initial, accept = accept)))
}
}
tail(nfaStack, 1)[[1]]
}
# Function to compute the epsilon closure of a state
followes <- function(stateID) {
states <- c()
stack <- c(stateID)
while (length(stack) > 0) {
s <- tail(stack, 1)
stack <- head(stack, -1)
if (!(s %in% states)) {
states <- c(states, s)
if (is.null(stateList[[s]]$label)) {
if (!is.null(stateList[[s]]$edge1)) {
stack <- c(stack, stateList[[s]]$edge1)
}
if (!is.null(stateList[[s]]$edge2)) {
stack <- c(stack, stateList[[s]]$edge2)
}
}
}
}
states
}
# Function to match a string against the regex
matchRegex <- function(infix, str) {
postfix <- shunt(infix)
stateCounter <<- 0 # Reset stateCounter
stateList <<- list() # Reset stateList
nfa <- compileRegex(postfix)
current <- followes(nfa$initial)
cList <- strsplit(str, "")[[1]]
for (c in cList) {
nextStates <- c()
for (state in current) {
if (!is.null(stateList[[state]]$label) && stateList[[state]]$label == c) {
nextStates <- union(nextStates, followes(stateList[[state]]$edge1))
}
}
current <- nextStates
}
nfa$accept %in% current
}
# Test cases
infixes <- c("a.b.c*", "a.(b|d).c*", "(a.(b|d))*", "a.(b.b)*.c")
strings <- c("", "abc", "abbc", "abcc", "abad", "abbbc")
for (infix in infixes) {
for (str in strings) {
result <- matchRegex(infix, str)
cat(ifelse(result, "True ", "False "), infix, " ", str, "\n")
}
cat("\n")
}
Rust
use std::cell::RefCell;
use std::collections::{HashMap, HashSet};
use std::hash::{Hash, Hasher};
use std::rc::Rc;
// Define the State structure
#[derive(Debug)]
struct State {
label: Option<char>, // None represents epsilon transitions
edge1: Option<Rc<RefCell<State>>>,
edge2: Option<Rc<RefCell<State>>>,
}
impl State {
fn new(label: Option<char>) -> Self {
State {
label,
edge1: None,
edge2: None,
}
}
}
// Wrapper struct for Rc<RefCell<State>>
#[derive(Debug, Clone)]
struct RcState(Rc<RefCell<State>>);
impl PartialEq for RcState {
fn eq(&self, other: &Self) -> bool {
Rc::ptr_eq(&self.0, &other.0)
}
}
impl Eq for RcState {}
impl Hash for RcState {
fn hash<H: Hasher>(&self, state: &mut H) {
// Hash the pointer address
let ptr = Rc::as_ptr(&self.0);
ptr.hash(state);
}
}
// Define the NFA structure
#[derive(Debug)]
struct NFA {
initial: Option<Rc<RefCell<State>>>,
accept: Option<Rc<RefCell<State>>>,
}
impl NFA {
fn new(initial: Option<Rc<RefCell<State>>>, accept: Option<Rc<RefCell<State>>>) -> Self {
NFA { initial, accept }
}
}
// Function to convert infix regex to postfix using the Shunting Yard algorithm
fn shunt(infix: &str) -> String {
let mut specials = HashMap::new();
specials.insert('*', 60);
specials.insert('+', 55);
specials.insert('?', 50);
specials.insert('.', 40);
specials.insert('|', 20);
let mut postfix = String::new();
let mut stack: Vec<char> = Vec::new();
for c in infix.chars() {
if c == '(' {
stack.push(c);
} else if c == ')' {
while let Some(&top) = stack.last() {
if top == '(' {
break;
} else {
postfix.push(stack.pop().unwrap());
}
}
if let Some('(') = stack.last() {
stack.pop();
}
} else if specials.contains_key(&c) {
while let Some(&top) = stack.last() {
if let Some(&top_prec) = specials.get(&top) {
if specials[&c] <= top_prec {
postfix.push(stack.pop().unwrap());
continue;
}
}
break;
}
stack.push(c);
} else {
postfix.push(c);
}
}
while let Some(top) = stack.pop() {
postfix.push(top);
}
postfix
}
// Function to compute the epsilon closure of a state
fn followes(state: Option<Rc<RefCell<State>>>) -> HashSet<RcState> {
let mut states = HashSet::new();
let mut stack = Vec::new();
if let Some(state_rc) = state {
stack.push(RcState(state_rc));
}
while let Some(s_rcstate) = stack.pop() {
if !states.contains(&s_rcstate) {
states.insert(s_rcstate.clone());
let s = s_rcstate.0.borrow();
if s.label.is_none() {
if let Some(ref edge1) = s.edge1 {
stack.push(RcState(edge1.clone()));
}
if let Some(ref edge2) = s.edge2 {
stack.push(RcState(edge2.clone()));
}
}
}
}
states
}
// Function to compile postfix regex into an NFA
fn compile_regex(postfix: &str) -> NFA {
let mut nfa_stack: Vec<NFA> = Vec::new();
for c in postfix.chars() {
if c == '*' {
let nfa1 = nfa_stack.pop().unwrap();
let initial = Rc::new(RefCell::new(State::new(None)));
let accept = Rc::new(RefCell::new(State::new(None)));
initial.borrow_mut().edge1 = nfa1.initial.clone();
initial.borrow_mut().edge2 = Some(accept.clone());
if let Some(ref accept_state) = nfa1.accept {
accept_state.borrow_mut().edge1 = nfa1.initial.clone();
accept_state.borrow_mut().edge2 = Some(accept.clone());
}
nfa_stack.push(NFA::new(Some(initial), Some(accept)));
} else if c == '.' {
let nfa2 = nfa_stack.pop().unwrap();
let nfa1 = nfa_stack.pop().unwrap();
if let Some(ref accept_state) = nfa1.accept {
accept_state.borrow_mut().edge1 = nfa2.initial.clone();
}
nfa_stack.push(NFA::new(nfa1.initial.clone(), nfa2.accept.clone()));
} else if c == '|' {
let nfa2 = nfa_stack.pop().unwrap();
let nfa1 = nfa_stack.pop().unwrap();
let initial = Rc::new(RefCell::new(State::new(None)));
let accept = Rc::new(RefCell::new(State::new(None)));
initial.borrow_mut().edge1 = nfa1.initial.clone();
initial.borrow_mut().edge2 = nfa2.initial.clone();
if let Some(ref accept_state1) = nfa1.accept {
accept_state1.borrow_mut().edge1 = Some(accept.clone());
}
if let Some(ref accept_state2) = nfa2.accept {
accept_state2.borrow_mut().edge1 = Some(accept.clone());
}
nfa_stack.push(NFA::new(Some(initial), Some(accept)));
} else if c == '+' {
let nfa1 = nfa_stack.pop().unwrap();
let initial = Rc::new(RefCell::new(State::new(None)));
let accept = Rc::new(RefCell::new(State::new(None)));
initial.borrow_mut().edge1 = nfa1.initial.clone();
if let Some(ref accept_state) = nfa1.accept {
accept_state.borrow_mut().edge1 = nfa1.initial.clone();
accept_state.borrow_mut().edge2 = Some(accept.clone());
}
nfa_stack.push(NFA::new(Some(initial), Some(accept)));
} else if c == '?' {
let nfa1 = nfa_stack.pop().unwrap();
let initial = Rc::new(RefCell::new(State::new(None)));
let accept = Rc::new(RefCell::new(State::new(None)));
initial.borrow_mut().edge1 = nfa1.initial.clone();
initial.borrow_mut().edge2 = Some(accept.clone());
if let Some(ref accept_state) = nfa1.accept {
accept_state.borrow_mut().edge1 = Some(accept.clone());
}
nfa_stack.push(NFA::new(Some(initial), Some(accept)));
} else {
let initial = Rc::new(RefCell::new(State::new(Some(c))));
let accept = Rc::new(RefCell::new(State::new(None)));
initial.borrow_mut().edge1 = Some(accept.clone());
nfa_stack.push(NFA::new(Some(initial), Some(accept)));
}
}
nfa_stack.pop().unwrap()
}
// Function to match a string against the regex
fn match_regex(infix: &str, s: &str) -> bool {
let postfix = shunt(infix);
let nfa = compile_regex(&postfix);
let mut current = followes(nfa.initial.clone());
let mut next_states = HashSet::new();
for c in s.chars() {
for state_rcstate in ¤t {
let state = state_rcstate.0.borrow();
if state.label == Some(c) {
let follow = followes(state.edge1.clone());
next_states.extend(follow);
}
}
current = next_states;
next_states = HashSet::new();
}
if let Some(accept_state) = nfa.accept {
current.contains(&RcState(accept_state))
} else {
false
}
}
fn main() {
let infixes = vec!["a.b.c*", "a.(b|d).c*", "(a.(b|d))*", "a.(b.b)*.c"];
let strings = vec!["", "abc", "abbc", "abcc", "abad", "abbbc"];
for infix in &infixes {
for s in &strings {
let result = match_regex(infix, s);
println!("{} {} {}", if result { "True" } else { "False" }, infix, s);
}
println!("");
}
}
Wren
import "./dynamic" for Struct
// label, character label - null for epsilon
// edge1, first transition
// edge2, second transition
var State = Struct.create("State", ["label", "edge1", "edge2"])
var NFA = Struct.create("NFA", ["initial", "accept"])
var shunt = Fn.new { |infix|
var specials = { "*": 60, "+": 55, "?": 50, ".": 40, "|": 20 }
var postfix = ""
var stack = []
for (c in infix) {
if (c == "(") {
stack.add(c)
} else if (c == ")") {
while (!stack.isEmpty && stack[-1] != "(") {
postfix = postfix + stack.removeAt(-1)
}
if (!stack.isEmpty) stack.removeAt(-1) // Remove "("
} else if (specials.containsKey(c)) {
while (!stack.isEmpty && specials.containsKey(stack[-1]) &&
specials[c] <= specials[stack[-1]]) {
postfix = postfix + stack.removeAt(-1)
}
stack.add(c)
} else {
postfix = postfix + c
}
}
while (!stack.isEmpty) postfix = postfix + stack.removeAt(-1)
return postfix
}
var followes = Fn.new { |state|
var states = []
var stack = [state]
while (!stack.isEmpty) {
var s = stack.removeAt(-1)
if (!states.contains(s)) {
states.add(s)
if (!s.label) { // Epsilon transition
if (s.edge1) stack.add(s.edge1)
if (s.edge2) stack.add(s.edge2)
}
}
}
return states
}
var compileRegex = Fn.new { |postfix|
var nfaStack = []
for (c in postfix) {
if (c == "*") {
var nfa1 = nfaStack.removeAt(-1)
var initial = State.new(null, null, null)
var accept = State.new(null, null, null)
initial.edge1 = nfa1.initial
initial.edge2 = accept
nfa1.accept.edge1 = nfa1.initial
nfa1.accept.edge2 = accept
nfaStack.add(NFA.new(initial, accept))
} else if (c == ".") {
var nfa2 = nfaStack.removeAt(-1)
var nfa1 = nfaStack.removeAt(-1)
nfa1.accept.edge1 = nfa2.initial
nfaStack.add(NFA.new(nfa1.initial, nfa2.accept))
} else if (c == "|") {
var nfa2 = nfaStack.removeAt(-1)
var nfa1 = nfaStack.removeAt(-1)
var initial = State.new(null, null, null)
var accept = State.new(null, null, null)
initial.edge1 = nfa1.initial
initial.edge2 = nfa2.initial
nfa1.accept.edge1 = accept
nfa2.accept.edge1 = accept
nfaStack.add(NFA.new(initial, accept))
} else if (c == "+") {
var nfa1 = nfaStack.removeAt(-1)
var initial = State.new(null, null, null)
var accept = State.new(null, null, null)
initial.edge1 = nfa1.initial
nfa1.accept.edge1 = nfa1.initial
nfa1.accept.edge2 = accept
nfaStack.add(NFA.new(initial, accept))
} else if (c == "?") {
var nfa1 = nfaStack.removeAt(-1)
var initial = State.new(null, null, null)
var accept = State.new(null, null, null)
initial.edge1 = nfa1.initial
initial.edge2 = accept
nfa1.accept.edge1 = accept
nfaStack.add(NFA.new(initial, accept))
} else { // Literal character
var initial = State.new(c, null, null)
var accept = State.new(null, null, null)
initial.edge1 = accept
nfaStack.add(NFA.new(initial, accept))
}
}
return nfaStack.removeAt(-1)
}
var matchRegex = Fn.new { |infix, s|
var postfix = shunt.call(infix)
// Uncomment the next line to see postfix expression
// System.print("Postfix: %(postfix)")
var nfa = compileRegex.call(postfix)
var current = followes.call(nfa.initial)
var nextStates = []
for (c in s) {
for (state in current) {
if (state.label == c) {
var follow = followes.call(state.edge1)
for (st in follow) {
if (!nextStates.contains(st)) nextStates.add(st)
}
}
}
current = nextStates
nextStates = []
}
return current.contains(nfa.accept)
}
var infixes = ["a.b.c*", "a.(b|d).c*", "(a.(b|d))*", "a.(b.b)*.c"]
var strings = ["", "abc", "abbc", "abcc", "abad", "abbbc"]
for (infix in infixes) {
for (s in strings) {
var result = matchRegex.call(infix, s)
System.print((result ? "True " : "False ") + infix + " " + s)
}
System.print()
}
- Output:
False a.b.c* True a.b.c* abc False a.b.c* abbc True a.b.c* abcc False a.b.c* abad False a.b.c* abbbc False a.(b|d).c* True a.(b|d).c* abc False a.(b|d).c* abbc True a.(b|d).c* abcc False a.(b|d).c* abad False a.(b|d).c* abbbc True (a.(b|d))* False (a.(b|d))* abc False (a.(b|d))* abbc False (a.(b|d))* abcc True (a.(b|d))* abad False (a.(b|d))* abbbc False a.(b.b)*.c False a.(b.b)*.c abc True a.(b.b)*.c abbc False a.(b.b)*.c abcc False a.(b.b)*.c abad False a.(b.b)*.c abbbc
Zig
const std = @import("std");
const mem = std.mem;
pub fn main() !void {
// ------------------------------------------------ allocator
// https://ziglang.org/documentation/master/std/#std.heap.ArenaAllocator
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer arena.deinit();
const allocator = arena.allocator();
var state_pool = StatePool.init(std.heap.page_allocator);
defer state_pool.deinit();
// ----------------------------------------------------------
const stdout = std.io.getStdOut();
const writer = stdout.writer();
const infixes = [_][]const u8{ "a.b.c*", "a.(b|d).c*", "(a.(b|d))*", "a.(b.b)*.c" };
const strings = [_][]const u8{ "", "abc", "abbc", "abcc", "abad", "abbbc" };
for (infixes) |infix| {
for (strings) |str| {
const result = try matchRegex(allocator, &state_pool, infix, str);
_ = arena.reset(.retain_capacity); // reset allocated memory
_ = state_pool.reset(.retain_capacity);
try writer.print("{} {s} {s}\n", .{ result, infix, str });
}
try writer.writeByte('\n');
}
}
// Function to match a string against the regex
fn matchRegex(allocator: mem.Allocator, state_pool: *StatePool, infix: []const u8, str: []const u8) !bool {
const postfix = try shunt(allocator, infix);
// Uncomment the next line to see the postfix expression
// std.debug.print("Postfix: {s}\n", .{postfix});
const nfa = try compileRegex(allocator, state_pool, postfix);
var current: StateSet = try followes(allocator, nfa.initial.?);
var nextStates = StateSet.init(allocator);
for (str) |c| {
for (current.keys()) |state|
if (state.label == c) {
const follow = try followes(allocator, state.edge1.?);
for (follow.keys()) |state2|
try nextStates.put(state2, {});
};
current = nextStates;
nextStates.clearRetainingCapacity();
}
return current.contains(nfa.accept.?);
}
/// Function to convert infix regex to postfix using the Shunting Yard algorithm
fn shunt(allocator: mem.Allocator, infix: []const u8) ![]const u8 {
var specials = blk: {
const specialsK = [_]u8{ '*', '+', '?', '.', '|' };
const specialsV = [specialsK.len]u8{ 60, 55, 50, 40, 20 };
var specials = std.AutoArrayHashMap(u8, u8).init(allocator);
for (specialsK, specialsV) |k, v|
try specials.put(k, v);
break :blk specials;
};
var postfix = std.ArrayList(u8).init(allocator);
var stack = Stack(u8).init(allocator);
for (infix) |c| {
if (c == '(')
try stack.push(c)
else if (c == ')') {
while (!stack.isEmpty() and stack.top() != '(')
try postfix.append(stack.pop());
if (!stack.isEmpty())
_ = stack.pop(); // Remove '('
} else if (specials.contains(c)) {
while (!stack.isEmpty() and specials.contains(stack.top()) and specials.get(c).? <= specials.get(stack.top()).?)
try postfix.append(stack.pop());
try stack.push(c);
} else {
try postfix.append(c);
}
}
while (!stack.isEmpty())
try postfix.append(stack.pop());
return postfix.toOwnedSlice();
}
const StatePool = std.heap.MemoryPoolExtra(State, .{});
const StateSet = std.AutoArrayHashMap(*State, void);
const State = struct {
label: u8 = 0, // Character label, '\0' for epsilon
edge1: ?*State = null, // First transition
edge2: ?*State = null, // Second transition
fn init(label: u8) State {
return .{ .label = label };
}
};
const NFA = struct {
initial: ?*State,
accept: ?*State,
fn init(initial: ?*State, accept: ?*State) NFA {
return .{ .initial = initial, .accept = accept };
}
};
/// Function to compute the epsilon closure of a state
fn followes(allocator: mem.Allocator, state: *State) !StateSet {
var states = StateSet.init(allocator);
var stack = Stack(*State).init(allocator);
try stack.push(state);
while (!stack.isEmpty()) {
const s = stack.pop();
if (!states.contains(s)) {
try states.put(s, {});
if (s.label == 0) { // Epsilon transition
if (s.edge1) |edge1| try stack.push(edge1);
if (s.edge2) |edge2| try stack.push(edge2);
}
}
}
return states;
}
// Function to compile postfix regex into an NFA
fn compileRegex(allocator: mem.Allocator, state_pool: *StatePool, postfix: []const u8) !NFA {
var nfa_stack = Stack(NFA).init(allocator);
for (postfix) |c| {
switch (c) {
'*' => {
var nfa1 = nfa_stack.pop();
var initial = try state_pool.create();
const accept = try state_pool.create();
initial.* = State{};
accept.* = State{};
initial.edge1 = nfa1.initial;
initial.edge2 = accept;
nfa1.accept.?.edge1 = nfa1.initial;
nfa1.accept.?.edge2 = accept;
try nfa_stack.push(NFA.init(initial, accept));
},
'.' => {
const nfa2 = nfa_stack.pop();
const nfa1 = nfa_stack.pop();
nfa1.accept.?.edge1 = nfa2.initial;
try nfa_stack.push(NFA.init(nfa1.initial, nfa2.accept));
},
'|' => {
const nfa2 = nfa_stack.pop();
const nfa1 = nfa_stack.pop();
var initial = try state_pool.create();
const accept = try state_pool.create();
initial.* = State{};
accept.* = State{};
initial.edge1 = nfa1.initial;
initial.edge2 = nfa2.initial;
nfa1.accept.?.edge1 = accept;
nfa2.accept.?.edge1 = accept;
try nfa_stack.push(NFA.init(initial, accept));
},
'+' => {
const nfa1 = nfa_stack.pop();
var initial = try state_pool.create();
const accept = try state_pool.create();
initial.* = State{};
accept.* = State{};
initial.edge1 = nfa1.initial;
nfa1.accept.?.edge1 = nfa1.initial;
nfa1.accept.?.edge2 = accept;
try nfa_stack.push(NFA.init(initial, accept));
},
'?' => {
const nfa1 = nfa_stack.pop();
var initial = try state_pool.create();
const accept = try state_pool.create();
initial.* = State{};
accept.* = State{};
initial.edge1 = nfa1.initial;
initial.edge2 = accept;
nfa1.accept.?.edge1 = accept;
try nfa_stack.push(NFA.init(initial, accept));
},
else => {
// Literal character
var initial = try state_pool.create();
const accept = try state_pool.create();
initial.* = State.init(c);
accept.* = State{};
initial.edge1 = accept;
try nfa_stack.push(NFA.init(initial, accept));
},
}
}
return nfa_stack.pop();
}
// An ad hoc generic stack implementation.
fn Stack(comptime T: type) type {
return struct {
const Self = @This();
stack: std.ArrayList(T),
fn init(allocator: mem.Allocator) Self {
return Self{
.stack = std.ArrayList(T).init(allocator),
};
}
// fn deinit(self: *Self) void {
// self.stack.deinit();
// }
fn push(self: *Self, node: T) !void {
return try self.stack.append(node);
}
fn pop(self: *Self) T {
return self.stack.pop();
}
fn top(self: *const Self) T {
return self.stack.items[self.stack.items.len - 1];
}
fn isEmpty(self: *const Self) bool {
return self.stack.items.len == 0;
}
};
}