Jump to content

McNaughton-Yamada-Thompson algorithm

From Rosetta Code
McNaughton-Yamada-Thompson algorithm is a draft programming task. It is not yet considered ready to be promoted as a complete task, for reasons that should be found in its talk page.
Description

The McNaughton-Yamada-Thompson algorithm (aka. Thompson algorithm) is designed to transform a regular expression into a Non-Deterministic Finite Automaton (NDFA) that recognizes the language represented by the regular expression.

As the workings of the algorithm are clearly described in the linked article they will not be repeated here.

Task

Implement Thompson's Construction Algorithm to convert regular expressions into NFAs and match strings. The implementation should:

1. Support the following operations:

  - Concatenation (.)
  - Union/Alternation (|)
  - Kleene Star (*)
  - Plus (+)
  - Optional (?)
  - Basic character literals

2. Include the following components:

  - A regex-to-postfix converter using Shunting Yard algorithm
  - NFA state and transition representation
  - Epsilon closure computation
  - Pattern matching functionality

3. Implement the following features:

  - State structure with:
    * Character label
    * Two possible transitions (edge1, edge2)
    * Support for epsilon transitions
  
  - NFA structure containing:
    * Initial state pointer
    * Accept state pointer
  - String matching that:
    * Follows epsilon transitions
    * Tracks current possible states
    * Processes input characters
    * Determines if final state is accepting

4. Handle test cases demonstrating:

  - Basic concatenation (a.b.c)
  - Star operations (c*)
  - Alternation (b|d)
  - Complex combinations ((a.(b|d))*)
  - Empty strings and various input patterns

The implementation should efficiently construct NFAs from regular expressions and correctly determine whether input strings match the given patterns.


C#

using System;
using System.Collections.Generic;

namespace NFARegex
{
    class Program
    {
        static void Main(string[] args)
        {
            string[] infixes = { "a.b.c*", "a.(b|d).c*", "(a.(b|d))*", "a.(b.b)*.c" };
            string[] strings = { "", "abc", "abbc", "abcc", "abad", "abbbc" };

            foreach (string infix in infixes)
            {
                foreach (string s in strings)
                {
                    bool result = MatchRegex(infix, s);
                    Console.WriteLine((result ? "True " : "False ") + infix + " " + s);
                }
                Console.WriteLine();
            }
        }

        class State
        {
            public char? Label; // Nullable char to represent character label, null for epsilon
            public State Edge1;
            public State Edge2;

            public State(char? label = null)
            {
                Label = label;
                Edge1 = null;
                Edge2 = null;
            }
        }

        class NFA
        {
            public State Initial;
            public State Accept;

            public NFA(State initial = null, State accept = null)
            {
                Initial = initial;
                Accept = accept;
            }
        }

        static string Shunt(string infix)
        {
            Dictionary<char, int> specials = new Dictionary<char, int>
            {
                { '*', 60 },
                { '+', 55 },
                { '?', 50 },
                { '.', 40 },
                { '|', 20 }
            };
            string postfix = "";
            Stack<char> stack = new Stack<char>();

            foreach (char c in infix)
            {
                if (c == '(')
                {
                    stack.Push(c);
                }
                else if (c == ')')
                {
                    while (stack.Count > 0 && stack.Peek() != '(')
                    {
                        postfix += stack.Pop();
                    }
                    if (stack.Count > 0)
                    {
                        stack.Pop(); // Remove '('
                    }
                }
                else if (specials.ContainsKey(c))
                {
                    while (stack.Count > 0 && specials.ContainsKey(stack.Peek()) && specials[c] <= specials[stack.Peek()])
                    {
                        postfix += stack.Pop();
                    }
                    stack.Push(c);
                }
                else
                {
                    postfix += c;
                }
            }

            while (stack.Count > 0)
            {
                postfix += stack.Pop();
            }

            return postfix;
        }

        static HashSet<State> Followes(State state)
        {
            HashSet<State> states = new HashSet<State>();
            Stack<State> stack = new Stack<State>();
            stack.Push(state);

            while (stack.Count > 0)
            {
                State s = stack.Pop();
                if (!states.Contains(s))
                {
                    states.Add(s);
                    if (s.Label == null) // Epsilon transition
                    {
                        if (s.Edge1 != null)
                        {
                            stack.Push(s.Edge1);
                        }
                        if (s.Edge2 != null)
                        {
                            stack.Push(s.Edge2);
                        }
                    }
                }
            }
            return states;
        }

        static NFA CompileRegex(string postfix)
        {
            Stack<NFA> nfaStack = new Stack<NFA>();

            foreach (char c in postfix)
            {
                if (c == '*')
                {
                    NFA nfa1 = nfaStack.Pop();
                    State initial = new State();
                    State accept = new State();
                    initial.Edge1 = nfa1.Initial;
                    initial.Edge2 = accept;
                    nfa1.Accept.Edge1 = nfa1.Initial;
                    nfa1.Accept.Edge2 = accept;
                    nfaStack.Push(new NFA(initial, accept));
                }
                else if (c == '.')
                {
                    NFA nfa2 = nfaStack.Pop();
                    NFA nfa1 = nfaStack.Pop();
                    nfa1.Accept.Edge1 = nfa2.Initial;
                    nfaStack.Push(new NFA(nfa1.Initial, nfa2.Accept));
                }
                else if (c == '|')
                {
                    NFA nfa2 = nfaStack.Pop();
                    NFA nfa1 = nfaStack.Pop();
                    State initial = new State();
                    State accept = new State();
                    initial.Edge1 = nfa1.Initial;
                    initial.Edge2 = nfa2.Initial;
                    nfa1.Accept.Edge1 = accept;
                    nfa2.Accept.Edge1 = accept;
                    nfaStack.Push(new NFA(initial, accept));
                }
                else if (c == '+')
                {
                    NFA nfa1 = nfaStack.Pop();
                    State initial = new State();
                    State accept = new State();
                    initial.Edge1 = nfa1.Initial;
                    nfa1.Accept.Edge1 = nfa1.Initial;
                    nfa1.Accept.Edge2 = accept;
                    nfaStack.Push(new NFA(initial, accept));
                }
                else if (c == '?')
                {
                    NFA nfa1 = nfaStack.Pop();
                    State initial = new State();
                    State accept = new State();
                    initial.Edge1 = nfa1.Initial;
                    initial.Edge2 = accept;
                    nfa1.Accept.Edge1 = accept;
                    nfaStack.Push(new NFA(initial, accept));
                }
                else // Literal character
                {
                    State initial = new State(c);
                    State accept = new State();
                    initial.Edge1 = accept;
                    nfaStack.Push(new NFA(initial, accept));
                }
            }

            return nfaStack.Pop();
        }

        static bool MatchRegex(string infix, string s)
        {
            string postfix = Shunt(infix);
            // Uncomment the next line to see the postfix expression
            // Console.WriteLine("Postfix: " + postfix);

            NFA nfa = CompileRegex(postfix);

            HashSet<State> current = Followes(nfa.Initial);
            HashSet<State> nextStates = new HashSet<State>();

            foreach (char c in s)
            {
                foreach (State state in current)
                {
                    if (state.Label == c)
                    {
                        HashSet<State> follow = Followes(state.Edge1);
                        nextStates.UnionWith(follow);
                    }
                }
                current = new HashSet<State>(nextStates);
                nextStates.Clear();
            }

            return current.Contains(nfa.Accept);
        }
    }
}

C++

#include <iostream>
#include <string>
#include <unordered_map>
#include <stack>
#include <vector>
#include <set>

// Define the State structure
struct State {
    char label;       // Character label, '\0' for epsilon
    State* edge1;     // First transition
    State* edge2;     // Second transition

    State(char lbl = '\0') : label(lbl), edge1(nullptr), edge2(nullptr) {}
};

// Define the NFA structure
struct NFA {
    State* initial;
    State* accept;

    NFA(State* init = nullptr, State* acc = nullptr) : initial(init), accept(acc) {}
};

// Function to convert infix regex to postfix using the Shunting Yard algorithm
std::string shunt(const std::string& infix) {
    std::unordered_map<char, int> specials = {
        {'*', 60},
        {'+', 55},
        {'?', 50},
        {'.', 40},
        {'|', 20}
    };
    std::string postfix;
    std::stack<char> stack;

    for (char c : infix) {
        if (c == '(') {
            stack.push(c);
        }
        else if (c == ')') {
            while (!stack.empty() && stack.top() != '(') {
                postfix += stack.top();
                stack.pop();
            }
            if (!stack.empty()) {
                stack.pop(); // Remove '('
            }
        }
        else if (specials.find(c) != specials.end()) {
            while (!stack.empty() && specials.find(stack.top()) != specials.end() &&
                   specials[c] <= specials[stack.top()]) {
                postfix += stack.top();
                stack.pop();
            }
            stack.push(c);
        }
        else {
            postfix += c;
        }
    }

    while (!stack.empty()) {
        postfix += stack.top();
        stack.pop();
    }

    return postfix;
}

// Function to compute the epsilon closure of a state
std::set<State*> followes(State* state) {
    std::set<State*> states;
    std::stack<State*> stack;
    stack.push(state);

    while (!stack.empty()) {
        State* s = stack.top();
        stack.pop();
        if (states.find(s) == states.end()) {
            states.insert(s);
            if (s->label == '\0') { // Epsilon transition
                if (s->edge1 != nullptr) {
                    stack.push(s->edge1);
                }
                if (s->edge2 != nullptr) {
                    stack.push(s->edge2);
                }
            }
        }
    }

    return states;
}

// Function to compile postfix regex into an NFA
NFA compileRegex(const std::string& postfix) {
    std::stack<NFA> nfaStack;

    for (char c : postfix) {
        if (c == '*') {
            NFA nfa1 = nfaStack.top(); nfaStack.pop();
            State* initial = new State();
            State* accept = new State();
            initial->edge1 = nfa1.initial;
            initial->edge2 = accept;
            nfa1.accept->edge1 = nfa1.initial;
            nfa1.accept->edge2 = accept;
            nfaStack.push(NFA(initial, accept));
        }
        else if (c == '.') {
            NFA nfa2 = nfaStack.top(); nfaStack.pop();
            NFA nfa1 = nfaStack.top(); nfaStack.pop();
            nfa1.accept->edge1 = nfa2.initial;
            nfaStack.push(NFA(nfa1.initial, nfa2.accept));
        }
        else if (c == '|') {
            NFA nfa2 = nfaStack.top(); nfaStack.pop();
            NFA nfa1 = nfaStack.top(); nfaStack.pop();
            State* initial = new State();
            State* accept = new State();
            initial->edge1 = nfa1.initial;
            initial->edge2 = nfa2.initial;
            nfa1.accept->edge1 = accept;
            nfa2.accept->edge1 = accept;
            nfaStack.push(NFA(initial, accept));
        }
        else if (c == '+') {
            NFA nfa1 = nfaStack.top(); nfaStack.pop();
            State* initial = new State();
            State* accept = new State();
            initial->edge1 = nfa1.initial;
            nfa1.accept->edge1 = nfa1.initial;
            nfa1.accept->edge2 = accept;
            nfaStack.push(NFA(initial, accept));
        }
        else if (c == '?') {
            NFA nfa1 = nfaStack.top(); nfaStack.pop();
            State* initial = new State();
            State* accept = new State();
            initial->edge1 = nfa1.initial;
            initial->edge2 = accept;
            nfa1.accept->edge1 = accept;
            nfaStack.push(NFA(initial, accept));
        }
        else { // Literal character
            State* initial = new State(c);
            State* accept = new State();
            initial->edge1 = accept;
            nfaStack.push(NFA(initial, accept));
        }
    }

    return nfaStack.top();
}

// Function to match a string against the regex
bool matchRegex(const std::string& infix, const std::string& str) {
    std::string postfix = shunt(infix);
    // Uncomment the next line to see the postfix expression
    // std::cout << "Postfix: " << postfix << std::endl;

    NFA nfa = compileRegex(postfix);

    std::set<State*> current = followes(nfa.initial);
    std::set<State*> nextStates;

    for (char c : str) {
        for (State* state : current) {
            if (state->label == c) {
                std::set<State*> follow = followes(state->edge1);
                nextStates.insert(follow.begin(), follow.end());
            }
        }
        current = nextStates;
        nextStates.clear();
    }

    return current.find(nfa.accept) != current.end();
}

int main() {
    std::vector<std::string> infixes = {"a.b.c*", "a.(b|d).c*", "(a.(b|d))*", "a.(b.b)*.c"};
    std::vector<std::string> strings = {"", "abc", "abbc", "abcc", "abad", "abbbc"};

    for (const auto& infix : infixes) {
        for (const auto& str : strings) {
            bool result = matchRegex(infix, str);
            std::cout << (result ? "True " : "False ") << infix << " " << str << std::endl;
        }
        std::cout<<"\n";
    }

    return 0;
}

FreeBASIC

Const NULL As Any Ptr = 0

Type State
    label As String * 1
    edge1 As State Ptr
    edge2 As State Ptr
End Type

Function createState(label As String, e1 As State Ptr = NULL, e2 As State Ptr = NULL) As State Ptr
    Dim As State Ptr s = New State
    s->label = label
    s->edge1 = e1
    s->edge2 = e2
    Return s
End Function

Sub addState(state As State Ptr, states() As State Ptr, count As Integer)
    If state = NULL Then Exit Sub
    
    For i As Integer = 0 To count - 1
        If states(i) = state Then Exit Sub
    Next
    
    states(count) = state
    count += 1
    
    If state->label = "" Then
        addState(state->edge1, states(), count)
        addState(state->edge2, states(), count)
    End If
End Sub

Type NFA
    initial As State Ptr
    accept As State Ptr
End Type

Function createNFA(initial As State Ptr, accept As State Ptr) As NFA Ptr
    Dim As NFA Ptr nfa = New NFA
    nfa->initial = initial
    nfa->accept = accept
    Return nfa
End Function

Type StateSet
    states(1023) As State Ptr
    count As Integer = 0
End Type

Sub addToStateSet(Byref st As StateSet, s As State Ptr)
    If st.count >= 1024 Then Exit Sub
    For i As Integer = 0 To st.count - 1
        If st.states(i) = s Then Exit Sub
    Next
    st.states(st.count) = s
    st.count += 1
End Sub

Function containsState(Byref st As StateSet, s As State Ptr) As Boolean
    For i As Integer = 0 To st.count - 1
        If st.states(i) = s Then Return True
    Next
    Return False
End Function

Sub clearStateSet(Byref st As StateSet)
    st.count = 0
End Sub

Sub addStateWithEpsilon(state As State Ptr, Byref st As StateSet)
    If state = NULL Orelse containsState(st, state) Then Exit Sub
    
    addToStateSet(st, state)
    If state->label = "" Then
        addStateWithEpsilon(state->edge1, st)
        addStateWithEpsilon(state->edge2, st)
    End If
End Sub

'Convert infix to postfix notation
Function shunt(infix As String) As String
    Dim As String postfix = "", stack = ""
    
    For i As Integer = 1 To Len(infix)
        Dim As String c = Mid(infix, i, 1)
        Select Case c
        Case "("
            stack = "(" + stack
        Case ")"
            While Len(stack) > 0 Andalso Left(stack, 1) <> "("
                postfix += Left(stack, 1)
                stack = Mid(stack, 2)
            Wend
            If Len(stack) > 0 Then stack = Mid(stack, 2)
        Case "*", "+", "?", ".", "|"
            Dim As Integer prec
            Select Case c
            Case "*": prec = 60
            Case "+": prec = 55
            Case "?": prec = 50
            Case ".": prec = 40
            Case "|": prec = 20
            End Select
            
            While Len(stack) > 0 Andalso Left(stack, 1) <> "("
                Dim As Integer topPrec
                Select Case Left(stack,1)
                Case "*": topPrec = 60
                Case "+": topPrec = 55
                Case "?": topPrec = 50
                Case ".": topPrec = 40
                Case "|": topPrec = 20
                End Select
                
                If prec <= topPrec Then
                    postfix += Left(stack, 1)
                    stack = Mid(stack, 2)
                Else
                    Exit While
                End If
            Wend
            stack = c + stack
        Case Else
            postfix += c
        End Select
    Next
    Return postfix + stack
End Function

' Function to match string against regex
Function matchRegex(infix As String, text As String) As Boolean
    Dim As String postfix = shunt(infix)
    Dim As NFA Ptr nfaStack(1023)
    Dim As Integer nfaTop = 0
    
    For i As Integer = 1 To Len(postfix)
        Dim As String c = Mid(postfix, i, 1)
        Select Case c
        Case "*"
            Dim As NFA Ptr nfa = nfaStack(nfaTop - 1)
            Dim As State Ptr initial = createState("")
            Dim As State Ptr accept = createState("")
            
            initial->edge1 = nfa->initial
            initial->edge2 = accept
            nfa->accept->edge1 = nfa->initial
            nfa->accept->edge2 = accept
            
            nfaStack(nfaTop - 1) = createNFA(initial, accept)
            
        Case "."
            nfaTop -= 1
            Dim As NFA Ptr nfa2 = nfaStack(nfaTop)
            Dim As NFA Ptr nfa1 = nfaStack(nfaTop - 1)
            nfa1->accept->edge1 = nfa2->initial
            nfaStack(nfaTop - 1) = createNFA(nfa1->initial, nfa2->accept)
            
        Case "|"
            nfaTop -= 1
            Dim As NFA Ptr nfa2 = nfaStack(nfaTop)
            Dim As NFA Ptr nfa1 = nfaStack(nfaTop - 1)
            Dim As State Ptr initial = createState("")
            Dim As State Ptr accept = createState("")
            
            initial->edge1 = nfa1->initial
            initial->edge2 = nfa2->initial
            nfa1->accept->edge1 = accept
            nfa2->accept->edge1 = accept
            
            nfaStack(nfaTop - 1) = createNFA(initial, accept)
            
        Case Else ' Literal character
            Dim As State Ptr initial = createState(c)
            Dim As State Ptr accept = createState("")
            initial->edge1 = accept
            nfaStack(nfaTop) = createNFA(initial, accept)
            nfaTop += 1
        End Select
    Next
    
    Dim As NFA Ptr nfa = nfaStack(0)
    Dim As StateSet current, sgte
    
    addStateWithEpsilon(nfa->initial, current)
    
    For i As Integer = 1 To Len(text)
        Dim As String c = Mid(text, i, 1)
        clearStateSet(sgte)
        
        For j As Integer = 0 To current.count - 1
            If current.states(j)->label = c Then
                addStateWithEpsilon(current.states(j)->edge1, sgte)
            End If
        Next
        
        current = sgte
    Next
    
    Return containsState(current, nfa->accept)
End Function

'Test the implementation
Dim As String patterns(3) = {"a.b.c*", "a.(b|d).c*", "(a.(b|d))*", "a.(b.b)*.c"}
Dim As String tests(5) = {"", "abc", "abbc", "abcc", "abad", "abbbc"}

For i As Integer = 0 To 3
    For j As Integer = 0 To 5
        Print Iif(matchRegex(patterns(i), tests(j)), "True  ", "False "); patterns(i); " "; tests(j)
    Next
    Print
Next

Sleep
Output:
False a.b.c*
True  a.b.c* abc
False a.b.c* abbc
True  a.b.c* abcc
False a.b.c* abad
False a.b.c* abbbc

False a.(b|d).c*
True  a.(b|d).c* abc
False a.(b|d).c* abbc
True  a.(b|d).c* abcc
False a.(b|d).c* abad
False a.(b|d).c* abbbc

True  (a.(b|d))*
False (a.(b|d))* abc
False (a.(b|d))* abbc
False (a.(b|d))* abcc
True  (a.(b|d))* abad
False (a.(b|d))* abbbc

False a.(b.b)*.c
False a.(b.b)*.c abc
True  a.(b.b)*.c abbc
False a.(b.b)*.c abcc
False a.(b.b)*.c abad
False a.(b.b)*.c abbbc

Go

package main

import (
	"fmt"
)

// Define the State structure
type State struct {
	label rune    // Character label, 0 for epsilon
	edge1 *State  // First transition
	edge2 *State  // Second transition
}

// Define the NFA structure
type NFA struct {
	initial *State
	accept  *State
}

// Function to convert infix regex to postfix using the Shunting Yard algorithm
func shunt(infix string) string {
	specials := map[rune]int{
		'*': 60,
		'+': 55,
		'?': 50,
		'.': 40,
		'|': 20,
	}

	var postfix []rune
	var stack []rune

	for _, c := range infix {
		if c == '(' {
			stack = append(stack, c)
		} else if c == ')' {
			for len(stack) > 0 && stack[len(stack)-1] != '(' {
				postfix = append(postfix, stack[len(stack)-1])
				stack = stack[:len(stack)-1]
			}
			if len(stack) > 0 {
				stack = stack[:len(stack)-1] // Remove '('
			}
		} else if _, ok := specials[c]; ok {
			for len(stack) > 0 {
				top := stack[len(stack)-1]
				if _, ok := specials[top]; ok && specials[c] <= specials[top] {
					postfix = append(postfix, top)
					stack = stack[:len(stack)-1]
				} else {
					break
				}
			}
			stack = append(stack, c)
		} else {
			postfix = append(postfix, c)
		}
	}

	for len(stack) > 0 {
		postfix = append(postfix, stack[len(stack)-1])
		stack = stack[:len(stack)-1]
	}

	return string(postfix)
}

// Function to compute the epsilon closure of a state
func followes(state *State) map[*State]bool {
	states := make(map[*State]bool)
	stack := []*State{state}

	for len(stack) > 0 {
		s := stack[len(stack)-1]
		stack = stack[:len(stack)-1]
		if !states[s] {
			states[s] = true
			if s.label == 0 { // Epsilon transition
				if s.edge1 != nil {
					stack = append(stack, s.edge1)
				}
				if s.edge2 != nil {
					stack = append(stack, s.edge2)
				}
			}
		}
	}

	return states
}

// Function to compile postfix regex into an NFA
func compileRegex(postfix string) NFA {
	var nfaStack []NFA

	for _, c := range postfix {
		switch c {
		case '*':
			nfa1 := nfaStack[len(nfaStack)-1]
			nfaStack = nfaStack[:len(nfaStack)-1]
			initial := &State{}
			accept := &State{}
			initial.edge1 = nfa1.initial
			initial.edge2 = accept
			nfa1.accept.edge1 = nfa1.initial
			nfa1.accept.edge2 = accept
			nfaStack = append(nfaStack, NFA{initial, accept})
		case '+':
			nfa1 := nfaStack[len(nfaStack)-1]
			nfaStack = nfaStack[:len(nfaStack)-1]
			initial := &State{}
			accept := &State{}
			initial.edge1 = nfa1.initial
			nfa1.accept.edge1 = nfa1.initial
			nfa1.accept.edge2 = accept
			nfaStack = append(nfaStack, NFA{initial, accept})
		case '?':
			nfa1 := nfaStack[len(nfaStack)-1]
			nfaStack = nfaStack[:len(nfaStack)-1]
			initial := &State{}
			accept := &State{}
			initial.edge1 = nfa1.initial
			initial.edge2 = accept
			nfa1.accept.edge1 = accept
			nfaStack = append(nfaStack, NFA{initial, accept})
		case '.':
			nfa2 := nfaStack[len(nfaStack)-1]
			nfaStack = nfaStack[:len(nfaStack)-1]
			nfa1 := nfaStack[len(nfaStack)-1]
			nfaStack = nfaStack[:len(nfaStack)-1]
			nfa1.accept.edge1 = nfa2.initial
			nfaStack = append(nfaStack, NFA{nfa1.initial, nfa2.accept})
		case '|':
			nfa2 := nfaStack[len(nfaStack)-1]
			nfaStack = nfaStack[:len(nfaStack)-1]
			nfa1 := nfaStack[len(nfaStack)-1]
			nfaStack = nfaStack[:len(nfaStack)-1]
			initial := &State{}
			accept := &State{}
			initial.edge1 = nfa1.initial
			initial.edge2 = nfa2.initial
			nfa1.accept.edge1 = accept
			nfa2.accept.edge1 = accept
			nfaStack = append(nfaStack, NFA{initial, accept})
		default: // Literal character
			initial := &State{label: c}
			accept := &State{}
			initial.edge1 = accept
			nfaStack = append(nfaStack, NFA{initial, accept})
		}
	}

	return nfaStack[len(nfaStack)-1]
}

// Function to match a string against the regex
func matchRegex(infix, str string) bool {
	postfix := shunt(infix)
	// Uncomment the next line to see the postfix expression
	// fmt.Println("Postfix:", postfix)

	nfa := compileRegex(postfix)

	currentStates := followes(nfa.initial)
	var nextStates map[*State]bool

	for _, c := range str {
		nextStates = make(map[*State]bool)
		for state := range currentStates {
			if state.label == c {
				follow := followes(state.edge1)
				for s := range follow {
					nextStates[s] = true
				}
			}
		}
		currentStates = nextStates
	}

	return currentStates[nfa.accept]
}

func main() {
	infixes := []string{"a.b.c*", "a.(b|d).c*", "(a.(b|d))*", "a.(b.b)*.c"}
	strings := []string{"", "abc", "abbc", "abcc", "abad", "abbbc"}

	for _, infix := range infixes {
		for _, str := range strings {
			result := matchRegex(infix, str)
			if result {
				fmt.Println("True", infix, str)
			} else {
				fmt.Println("False", infix, str)
			}
		}
		fmt.Println()
	}
}

Java

import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Stack;

public final class McNaughtonYamadaThompsonAlgorithm {

	public static void main(String[] args) {
		List<String> infixes = List.of( "a.b.c*", "a.(b|d).c*", "(a.(b|d))*", "a.(b.b)*.c" );
	    List<String> strings = List.of( "", "abc", "abbc", "abcc", "abad", "abbbc" );

	    for ( String infix : infixes ) {
	        for ( String string : strings ) {
	            final boolean result = matchRegex(string, infix);
	            System.out.println(( result ? "True " : "False " ) + infix + " " + string);
	        }
	        System.out.println();
	    }

	}
	
	// Match the given string against the given infix regex
	private static boolean matchRegex(String text, String infix) {
	    String postfix = shunt(infix);
	    // Uncomment the next line to see the postfix expression
	    // System.out.println("Postfix: " + postfix);

	    NFA nfa = compileRegex(postfix);

	    Set<State> current = followes(nfa.initial);
	    Set<State> nextStates = new HashSet<State>();

	    for ( char ch : text.toCharArray() ) {
	        for ( State state : current ) {
	            if ( state.label == ch ) {
	                Set<State> follow = followes(state.edge1);
	                nextStates.addAll(follow);
	            }
	        }
	        current = new HashSet<State>(nextStates);
	        nextStates.clear();
	    }

	    return current.contains(nfa.accept);
	}
		
	// Compile the given postfix regex into an NFA
	private static NFA compileRegex(String postfix) {
	    Stack<NFA> nfaStack = new Stack<NFA>();

	    for ( char ch : postfix.toCharArray() ) {
	    	switch ( ch ) {
		    	case '*' -> {
		            NFA nfa1 = nfaStack.pop();
		            State initial = new State();
		            State accept = new State();
		            initial.edge1 = nfa1.initial;
		            initial.edge2 = accept;
		            nfa1.accept.edge1 = nfa1.initial;
		            nfa1.accept.edge2 = accept;
		            nfaStack.push( new NFA(initial, accept) );
		        }
		    	case '.' -> {
		            NFA nfa2 = nfaStack.pop();
		            NFA nfa1 = nfaStack.pop();
		            nfa1.accept.edge1 = nfa2.initial;
		            nfaStack.push( new NFA(nfa1.initial, nfa2.accept) );
		        }
		    	case '|' -> {
		            NFA nfa2 = nfaStack.pop();
		            NFA nfa1 = nfaStack.pop();
		            State initial = new State();
		            State accept = new State();
		            initial.edge1 = nfa1.initial;
		            initial.edge2 = nfa2.initial;
		            nfa1.accept.edge1 = accept;
		            nfa2.accept.edge1 = accept;
		            nfaStack.push( new NFA(initial, accept) );
		        }
		    	case '+' -> {
		            NFA nfa1 = nfaStack.pop();
		            State initial = new State();
		            State accept = new State();
		            initial.edge1 = nfa1.initial;
		            nfa1.accept.edge1 = nfa1.initial;
		            nfa1.accept.edge2 = accept;
		            nfaStack.push( new NFA(initial, accept) );
		        }
		    	case '?' -> {
		            NFA nfa1 = nfaStack.pop();
		            State initial = new State();
		            State accept = new State();
		            initial.edge1 = nfa1.initial;
		            initial.edge2 = accept;
		            nfa1.accept.edge1 = accept;
		            nfaStack.push( new NFA(initial, accept) );
		        }
		    	default -> { // Literal character
		            State initial = new State(ch);
		            State accept = new State();
		            initial.edge1 = accept;
		            nfaStack.push( new NFA(initial, accept) );
		        }
	    	}	        
	    }  
	    
	    return nfaStack.peek(); 	        
	}
	
	// Compute the epsilon closure of the given state
	private static Set<State> followes(State state) {
	    Set<State> states = new HashSet<State>();
	    Stack<State> stack = new Stack<State>();
	    stack.push(state);

	    while ( ! stack.isEmpty() ) {
	        State current = stack.pop();
	        if ( ! states.contains(current) ) {
	            states.add(current);
	            if ( current.label == '\0' ) { // Epsilon transition
	                if ( current.edge1 != null ) {
	                    stack.push(current.edge1);
	                }
	                if ( current.edge2 != null ) {
	                    stack.push(current.edge2);
	                }
	            }
	        }
	    }

	    return states;
	}
		
	// Convert the given infix regex to postfix regex using the Shunting Yard algorithm
	private static String shunt(String infix) {
	    Map<Character, Integer> specials = Map.ofEntries( Map.entry( '*', 60 ), Map.entry( '+', 55 ),
	    						    Map.entry( '?', 50 ), Map.entry( '.', 40 ), Map.entry( '|', 20 ) );	 
	   
	    Stack<Character> stack = new Stack<Character>();
	    String postfix = "";

	    for ( char ch : infix.toCharArray() ) {
	        if ( ch == '(' ) {
	            stack.push(ch);
	        }
	        else if ( ch == ')' ) {
	            while ( ! stack.isEmpty() && stack.peek() != '(' ) {
	                postfix += stack.pop();
	            }
	            if ( ! stack.isEmpty() ) {
	                stack.pop(); // Remove '('
	            }
	        }
	        else if ( specials.containsKey(ch) ) {
	            while ( ! stack.isEmpty() && specials.containsKey(stack.peek()) &&
	            	specials.get(ch) <= specials.get(stack.peek()) ) {
	                postfix += stack.pop();
	            }
	            stack.push(ch);
	        }
	        else {
	            postfix += ch;
	        }	        
	    }
	    
	    while ( ! stack.isEmpty() ) {
	        postfix += stack.pop();
	    }

	    return postfix;
	}
	
	private static final class State {
		
		public State(char aLabel) {
			label = aLabel;
		}
		
		public State() {
			label = '\0';
		}
	    
	    private State edge1 = null; // First transition
	    private State edge2 = null; // Second transition
	    
	    private final char label;   // Character label, '\0' for epsilon
	    
	}
	
	private static final class NFA {
		
		public NFA(State aInitial, State aAccept) {
			initial = aInitial;
			accept = aAccept;
		}
		
	    private State initial;
	    private State accept;
	    
	}

}


JavaScript

class McNaughtonYamadaThompsonAlgorithm {
  static main() {
    const infixes = ["a.b.c*", "a.(b|d).c*", "(a.(b|d))*", "a.(b.b)*.c"];
    const strings = ["", "abc", "abbc", "abcc", "abad", "abbbc"];

    for (const infix of infixes) {
      for (const string of strings) {
        const result = McNaughtonYamadaThompsonAlgorithm.matchRegex(string, infix);
        console.log(`${result ? "True" : "False"} ${infix} ${string}`);
      }
      console.log();
    }
  }

  static matchRegex(text, infix) {
    const postfix = McNaughtonYamadaThompsonAlgorithm.shunt(infix);
    // Uncomment the next line to see the postfix expression
    // console.log("Postfix: " + postfix);

    const nfa = McNaughtonYamadaThompsonAlgorithm.compileRegex(postfix);

    let current = McNaughtonYamadaThompsonAlgorithm.followes(nfa.initial);
    let nextStates = new Set();

    for (const ch of text) {
      for (const state of current) {
        if (state.label === ch) {
          const follow = McNaughtonYamadaThompsonAlgorithm.followes(state.edge1);
          for (const f of follow) {
            nextStates.add(f);
          }
        }
      }
      current = new Set(nextStates);
      nextStates.clear();
    }

    return current.has(nfa.accept);
  }

  static compileRegex(postfix) {
    const nfaStack = [];

    for (const ch of postfix) {
      switch (ch) {
        case '*': {
          const nfa1 = nfaStack.pop();
          const initial = new State();
          const accept = new State();
          initial.edge1 = nfa1.initial;
          initial.edge2 = accept;
          nfa1.accept.edge1 = nfa1.initial;
          nfa1.accept.edge2 = accept;
          nfaStack.push(new NFA(initial, accept));
          break;
        }
        case '.': {
          const nfa2 = nfaStack.pop();
          const nfa1 = nfaStack.pop();
          nfa1.accept.edge1 = nfa2.initial;
          nfaStack.push(new NFA(nfa1.initial, nfa2.accept));
          break;
        }
        case '|': {
          const nfa2 = nfaStack.pop();
          const nfa1 = nfaStack.pop();
          const initial = new State();
          const accept = new State();
          initial.edge1 = nfa1.initial;
          initial.edge2 = nfa2.initial;
          nfa1.accept.edge1 = accept;
          nfa2.accept.edge1 = accept;
          nfaStack.push(new NFA(initial, accept));
          break;
        }
        case '+': {
          const nfa1 = nfaStack.pop();
          const initial = new State();
          const accept = new State();
          initial.edge1 = nfa1.initial;
          nfa1.accept.edge1 = nfa1.initial;
          nfa1.accept.edge2 = accept;
          nfaStack.push(new NFA(initial, accept));
          break;
        }
        case '?': {
          const nfa1 = nfaStack.pop();
          const initial = new State();
          const accept = new State();
          initial.edge1 = nfa1.initial;
          initial.edge2 = accept;
          nfa1.accept.edge1 = accept;
          nfaStack.push(new NFA(initial, accept));
          break;
        }
        default: {
          const initial = new State(ch);
          const accept = new State();
          initial.edge1 = accept;
          nfaStack.push(new NFA(initial, accept));
          break;
        }
      }
    }

    return nfaStack.pop();
  }

  static followes(state) {
    const states = new Set();
    const stack = [state];

    while (stack.length > 0) {
      const current = stack.pop();
      if (!states.has(current)) {
        states.add(current);
        if (current.label === '\0') {
          if (current.edge1) stack.push(current.edge1);
          if (current.edge2) stack.push(current.edge2);
        }
      }
    }

    return states;
  }

  static shunt(infix) {
    const specials = { '*': 60, '+': 55, '?': 50, '.': 40, '|': 20 };
    const stack = [];
    let postfix = "";

    for (const ch of infix) {
      if (ch === '(') {
        stack.push(ch);
      } else if (ch === ')') {
        while (stack.length > 0 && stack[stack.length - 1] !== '(') {
          postfix += stack.pop();
        }
        if (stack.length > 0) {
          stack.pop(); // Remove '('
        }
      } else if (specials[ch]) {
        while (stack.length > 0 && specials[stack[stack.length - 1]] &&
          specials[ch] <= specials[stack[stack.length - 1]]) {
          postfix += stack.pop();
        }
        stack.push(ch);
      } else {
        postfix += ch;
      }
    }

    while (stack.length > 0) {
      postfix += stack.pop();
    }

    return postfix;
  }
}

class State {
  constructor(label = '\0') {
    this.label = label;
    this.edge1 = null;
    this.edge2 = null;
  }
}

class NFA {
  constructor(initial, accept) {
    this.initial = initial;
    this.accept = accept;
  }
}

// Run the main method
McNaughtonYamadaThompsonAlgorithm.main();


Julia

abstract type MYT end

struct NIL_MYT <: MYT end
const None = NIL_MYT()

mutable struct State <: MYT
    label::Char
    edge1::MYT
    edge2::MYT
    State(ch = '\0') = new(ch, None, None)
end

mutable struct NFA
    initial::State
    accept::State
    NFA(initial = None, accept = None) = new(initial, accept)
end

function shunt(infix)
    specials = Dict(
        '*' => 60,
        '+' => 55,
        '?' => 50,
        '.' => 40,
        '|' => 20,
    )
    postfix = Char[]
    stack = Char[]

    for c in infix
        if c == '('
            push!(stack, c)
        elseif c == ')'
            while !isempty(stack) && stack[end] != '('
                push!(postfix, pop!(stack))
            end
            if !isempty(stack)
                pop!(stack)  # Remove '('
            end
        elseif haskey(specials, c)
            while !isempty(stack) && haskey(specials, stack[end]) && specials[c] <= specials[stack[end]]
                push!(postfix, pop!(stack))
            end
            push!(stack, c)
        else
            push!(postfix, c)
        end
    end
    while !isempty(stack)
        push!(postfix, pop!(stack))
    end
    return postfix
end

function followes(state)
    states = Set{State}()
    stack = [state]

    while !isempty(stack)
        s = pop!(stack)
        if s  states
            push!(states, s)
            if s.label == '\0'  # Epsilon transition
                if s.edge1 != None
                    push!(stack, s.edge1)
                end
                if s.edge2 != None
                    push!(stack, s.edge2)
                end
            end
        end
    end
    return states
end

function compileRegex(postfix)
    nfaStack = NFA[]
    for c in postfix
        if c == '*'
            nfa1 = pop!(nfaStack)
            initial = State()
            accept = State()
            initial.edge1 = nfa1.initial
            initial.edge2 = accept
            nfa1.accept.edge1 = nfa1.initial
            nfa1.accept.edge2 = accept
            push!(nfaStack, NFA(initial, accept))
        elseif c == '.'
            nfa2 = pop!(nfaStack)
            nfa1 = pop!(nfaStack)
            nfa1.accept.edge1 = nfa2.initial
            push!(nfaStack, NFA(nfa1.initial, nfa2.accept))
        elseif c == '|'
            nfa2 = pop!(nfaStack)
            nfa1 = pop!(nfaStack)
            initial = State()
            accept = State()
            initial.edge1 = nfa1.initial
            initial.edge2 = nfa2.initial
            nfa1.accept.edge1 = accept
            nfa2.accept.edge1 = accept
            push!(nfaStack, NFA(initial, accept))
        elseif c == '+'
            nfa1 = pop!(nfaStack)
            initial = State()
            accept = State()
            initial.edge1 = nfa1.initial
            nfa1.accept.edge1 = nfa1.initial
            nfa1.accept.edge2 = accept
            push!(nfaStack, NFA(initial, accept))
        elseif c == '?'
            nfa1 = pop!(nfaStack)
            initial = State()
            accept = State()
            initial.edge1 = nfa1.initial
            initial.edge2 = accept
            nfa1.accept.edge1 = accept
            push!(nfaStack, NFA(initial, accept))
        else  # Literal character
            initial = State(c)
            accept = State()
            initial.edge1 = accept
            push!(nfaStack, NFA(initial, accept))
        end
    end
    return pop!(nfaStack)
end

function matchRegex(infix, s)
    postfix = shunt(infix)
    # Uncomment the next line to see the postfix expression
    # print("Postfix:", postfix)

    nfa = compileRegex(postfix)

    current = Set{State}(followes(nfa.initial))
    nextStates = Set{State}()

    for c in s
        for state in current
            if state.label == c
                follow = followes(state.edge1)
                union!(nextStates, follow)
            end
        end
        current = nextStates
        nextStates = Set{State}()
    end
    return nfa.accept  current
end

const infixes = ["a.b.c*", "a.(b|d).c*", "(a.(b|d))*", "a.(b.b)*.c"]
const strings = ["", "abc", "abbc", "abcc", "abad", "abbbc"]

println("Pattern      String  Matched?\n", "_"^30)
for infix in infixes
    for s in strings
        println(rpad(infix, 14), rpad(s, 8), matchRegex(infix, s))
    end
    println()
end
Output:

Pattern String Matched? ______________________________ a.b.c* false a.b.c* abc true a.b.c* abbc false a.b.c* abcc true a.b.c* abad false a.b.c* abbbc false

a.(b|d).c* false a.(b|d).c* abc true a.(b|d).c* abbc false a.(b|d).c* abcc true a.(b|d).c* abad false a.(b|d).c* abbbc false

(a.(b|d))* true (a.(b|d))* abc false (a.(b|d))* abbc false (a.(b|d))* abcc false (a.(b|d))* abad true (a.(b|d))* abbbc false

a.(b.b)*.c false a.(b.b)*.c abc false a.(b.b)*.c abbc true a.(b.b)*.c abcc false a.(b.b)*.c abad false a.(b.b)*.c abbbc false

Mathematica /Wolfram Language

(* Initialize global variables *)
ClearAll[createState, setEdge1, setEdge2, shunt, compileRegex, followes, matchRegex];
stateCounter = 0;
stateList = <||>;

(* Function to create a new state *)
createState[label_: Null] := Module[{id},
  id = "state" <> ToString[++stateCounter];
  stateList[id] = <|"label" -> label, "edge1" -> Null, "edge2" -> Null|>;
  id
];

(* Functions to set edges for a state *)
setEdge1[stateID_, edgeID_] := (stateList[stateID, "edge1"] = edgeID);
setEdge2[stateID_, edgeID_] := (stateList[stateID, "edge2"] = edgeID);

(* Function to convert infix regex to postfix using the Shunting Yard algorithm *)
shunt[infix_String] := Module[{specials, postfix = "", stack = {}, cList, c},
  specials = <|"*" -> 60, "+" -> 55, "?" -> 50, "." -> 40, "|" -> 20|>;
  cList = Characters[infix];
  Do[
    c = cList[[i]];
    If[c == "(",
      AppendTo[stack, c],
      If[c == ")",
        While[stack =!= {} && Last[stack] != "(",
          postfix = postfix <> Last[stack];
          stack = Most[stack];
        ];
        If[stack =!= {},
          stack = Most[stack]; (* Remove "(" *)
        ],
        If[KeyExistsQ[specials, c],
          While[stack =!= {} && KeyExistsQ[specials, Last[stack]] && specials[c] <= specials[Last[stack]],
            postfix = postfix <> Last[stack];
            stack = Most[stack];
          ];
          AppendTo[stack, c],
          postfix = postfix <> c
        ]
      ]
    ],
    {i, Length[cList]}
  ];
  While[stack =!= {},
    postfix = postfix <> Last[stack];
    stack = Most[stack];
  ];
  postfix
];

(* Function to compile postfix regex into an NFA *)
compileRegex[postfix_String] := Module[{nfaStack = {}, cList, c, initial, accept, nfa1, nfa2},
  cList = Characters[postfix];
  Do[
    c = cList[[i]];
    Switch[c,
      "*",
      nfa1 = Last[nfaStack]; nfaStack = Most[nfaStack];
      initial = createState[];
      accept = createState[];
      setEdge1[initial, nfa1["initial"]];
      setEdge2[initial, accept];
      setEdge1[nfa1["accept"], nfa1["initial"]];
      setEdge2[nfa1["accept"], accept];
      AppendTo[nfaStack, <|"initial" -> initial, "accept" -> accept|>],
      
      ".",
      nfa2 = Last[nfaStack]; nfaStack = Most[nfaStack];
      nfa1 = Last[nfaStack]; nfaStack = Most[nfaStack];
      setEdge1[nfa1["accept"], nfa2["initial"]];
      AppendTo[nfaStack, <|"initial" -> nfa1["initial"], "accept" -> nfa2["accept"]|>],
      
      "|",
      nfa2 = Last[nfaStack]; nfaStack = Most[nfaStack];
      nfa1 = Last[nfaStack]; nfaStack = Most[nfaStack];
      initial = createState[];
      accept = createState[];
      setEdge1[initial, nfa1["initial"]];
      setEdge2[initial, nfa2["initial"]];
      setEdge1[nfa1["accept"], accept];
      setEdge1[nfa2["accept"], accept];
      AppendTo[nfaStack, <|"initial" -> initial, "accept" -> accept|>],
      
      "+",
      nfa1 = Last[nfaStack]; nfaStack = Most[nfaStack];
      initial = createState[];
      accept = createState[];
      setEdge1[initial, nfa1["initial"]];
      setEdge1[nfa1["accept"], nfa1["initial"]];
      setEdge2[nfa1["accept"], accept];
      AppendTo[nfaStack, <|"initial" -> initial, "accept" -> accept|>],
      
      "?",
      nfa1 = Last[nfaStack]; nfaStack = Most[nfaStack];
      initial = createState[];
      accept = createState[];
      setEdge1[initial, nfa1["initial"]];
      setEdge2[initial, accept];
      setEdge1[nfa1["accept"], accept];
      AppendTo[nfaStack, <|"initial" -> initial, "accept" -> accept|>],
      
      _, (* Literal character *)
      initial = createState[c];
      accept = createState[];
      setEdge1[initial, accept];
      AppendTo[nfaStack, <|"initial" -> initial, "accept" -> accept|>]
    ],
    {i, Length[cList]}
  ];
  Last[nfaStack]
];

(* Function to compute the epsilon closure of a state *)
followes[stateID_] := Module[{states = {}, stack = {stateID}, s},
  While[stack =!= {},
    s = Last[stack]; stack = Most[stack];
    If[! MemberQ[states, s],
      AppendTo[states, s];
      If[stateList[s, "label"] === Null,
        If[stateList[s, "edge1"] =!= Null, AppendTo[stack, stateList[s, "edge1"]]];
        If[stateList[s, "edge2"] =!= Null, AppendTo[stack, stateList[s, "edge2"]]];
      ]
    ]
  ];
  states
];

(* Function to match a string against the regex *)
matchRegex[infix_String, str_String] := Module[{postfix, nfa, current, nextStates, cList, c},
  postfix = shunt[infix];
  stateCounter = 0; (* Reset stateCounter *)
  stateList = <||>; (* Reset stateList *)
  nfa = compileRegex[postfix];
  current = followes[nfa["initial"]];
  cList = Characters[str];
  Do[
    nextStates = {};
    c = cList[[i]];
    Do[
      If[stateList[state, "label"] === c,
        nextStates = Union[nextStates, followes[stateList[state, "edge1"]]]
      ],
      {state, current}
    ];
    current = nextStates;
    ,
    {i, Length[cList]}
  ];
  MemberQ[current, nfa["accept"]]
];

(* Test cases *)
infixes = {"a.b.c*", "a.(b|d).c*", "(a.(b|d))*", "a.(b.b)*.c"};
strings = {"", "abc", "abbc", "abcc", "abad", "abbbc"};

Do[
  Do[
    result = matchRegex[infix, str];
    Print[If[result, "True ", "False "], infix, " ", str];
    ,
    {str, strings}
  ];
  Print["\n"];
  ,
  {infix, infixes}
];


Phix

sequence stateList = {} -- entries as follows:

enum LABEL, -- character label - null for epsilon
     EDGE1, -- first transition
     EDGE2  -- second transition

enum INITIAL, ACCEPTS
type nfaentry(sequence n) return length(n)=2 end type

function createState()
  stateList &= {{null, null, null}}
  return length(stateList)
end function

function compileRegex(string postfix)
    stateList = {} // reset
    sequence nfa = {}
    nfaentry nfa1, nfa2
    integer initial, accepts
    for c in postfix do
        if c!='.' then
            initial = createState()
            accepts = createState()
        end if
        if find(c,".|") then
            {nfa,nfa2} = {nfa[1..$-1],nfa[$]}
        end if
        if find(c,"*.|+?") then
            {nfa,nfa1} = {nfa[1..$-1],nfa[$]}
        end if
        if find(c,"*|+?") then
            stateList[initial][EDGE1] = nfa1[INITIAL]
        end if
        if find(c,"*?") then
            stateList[initial][EDGE2] = accepts
        end if
        if find(c,"*+") then
            stateList[nfa1[ACCEPTS]][EDGE1] = nfa1[INITIAL]
            stateList[nfa1[ACCEPTS]][EDGE2] = accepts
        elsif c='.' then
            stateList[nfa1[ACCEPTS]][EDGE1] = nfa2[INITIAL]
            initial = nfa1[INITIAL]
            accepts = nfa2[ACCEPTS]
        elsif c='|' then
            stateList[initial][EDGE2] = nfa2[INITIAL]
            stateList[nfa1[ACCEPTS]][EDGE1] = accepts
            stateList[nfa2[ACCEPTS]][EDGE1] = accepts
        elsif c='?' then
            stateList[nfa1[ACCEPTS]][EDGE1] = accepts
        else // Literal character
            stateList[initial][LABEL] = c
            stateList[initial][EDGE1] = accepts
        end if
        nfa &= {{initial, accepts}}
    end for
    return nfa[$]
end function

function followes(integer id)
    // compute the epsilon closure of a state
    sequence states = {}, stack = {id}
    while length(stack) do
        id = stack[$]; stack = stack[1..$-1]
        if not find(id,states) then
            states &= id
            integer {label,e1,e2} = stateList[id]
            if label=null then
                if e1!=null then stack &= e1 end if
                if e2!=null then stack &= e2 end if
            end if
        end if
    end while
    return states
end function

function shunt(string infix)
    // convert infix into postfix notation,
    //  using the shunting yard algorithm
    string postfix = "", stack = "", ops = "|.?+*"
    for c in infix do
        if c='(' then
            stack &= c
        elsif c=')' then
            while stack[$]!='(' do
                postfix &= stack[$]
                stack = stack[1..-2]
            end while
            assert(stack[$]='(')
            stack = stack[1..-2] // remove '('          
        else
            integer priority = find(c,ops)
            if priority then
                while length(stack) 
                  and priority<=find(stack[$],ops) do
                    postfix &= stack[$]
                    stack = stack[1..-2]
                end while
                stack &= c
            else
                postfix &= c
            end if
        end if
    end for
    if length(stack) then
        postfix &= reverse(stack)
    end if
    return postfix
end function

include sets.e -- for union()

procedure matchRegex(string infix, sequence tests, string expected)
    string postfix = shunt(infix)
    nfaentry nfa = compileRegex(postfix)
    string res = ""
    for s in tests do
        sequence current = followes(nfa[INITIAL]),
              nextStates = {}
        for c in s do
            for state in current do
                integer {label,e1,e2} = stateList[state]
                if label=c then
                    nextStates = union(nextStates,followes(e1))
                end if
            end for
            current = nextStates
            nextStates = {}
        end for
        res &= iff(find(nfa[ACCEPTS],current)?'T':'F')
    end for
    assert(res=expected)
    printf(1," infix:%-10s => postfix:%-8s => results:%s\n",{infix,postfix,res})
end procedure

constant tests = {"", "abc", "abbc", "abcc", "abad", "abbbc"},
   expressions = {{"a.b.c*",    "FTFTFF"},
                  {"a.(b|d).c*","FTFTFF"},
                  {"(a.(b|d))*","TFFFTF"},
                  {"a.(b.b)*.c","FFTFFF"}}
printf(1,"For tests in %v:\n",{tests})
for ie in expressions do
    string {infix,expected} = ie
    matchRegex(infix,tests,expected)
end for
Output:
For tests in {"","abc","abbc","abcc","abad","abbbc"}:
 infix:a.b.c*     => postfix:ab.c*.   => results:FTFTFF
 infix:a.(b|d).c* => postfix:abd|.c*. => results:FTFTFF
 infix:(a.(b|d))* => postfix:abd|.*   => results:TFFFTF
 infix:a.(b.b)*.c => postfix:abb.*.c. => results:FFTFFF

Python

class State:
    def __init__(self, label=None):
        self.label = label  # Character label, None for epsilon
        self.edge1 = None   # First transition
        self.edge2 = None   # Second transition

class NFA:
    def __init__(self, initial=None, accept=None):
        self.initial = initial
        self.accept = accept

def shunt(infix):
    specials = {
        '*': 60,
        '+': 55,
        '?': 50,
        '.': 40,
        '|': 20
    }
    postfix = ''
    stack = []

    for c in infix:
        if c == '(':
            stack.append(c)
        elif c == ')':
            while stack and stack[-1] != '(':
                postfix += stack.pop()
            if stack:
                stack.pop()  # Remove '('
        elif c in specials:
            while stack and stack[-1] in specials and specials[c] <= specials[stack[-1]]:
                postfix += stack.pop()
            stack.append(c)
        else:
            postfix += c

    while stack:
        postfix += stack.pop()

    return postfix

def followes(state):
    states = set()
    stack = [state]

    while stack:
        s = stack.pop()
        if s not in states:
            states.add(s)
            if s.label is None:  # Epsilon transition
                if s.edge1 is not None:
                    stack.append(s.edge1)
                if s.edge2 is not None:
                    stack.append(s.edge2)
    return states

def compileRegex(postfix):
    nfaStack = []

    for c in postfix:
        if c == '*':
            nfa1 = nfaStack.pop()
            initial = State()
            accept = State()
            initial.edge1 = nfa1.initial
            initial.edge2 = accept
            nfa1.accept.edge1 = nfa1.initial
            nfa1.accept.edge2 = accept
            nfaStack.append(NFA(initial, accept))
        elif c == '.':
            nfa2 = nfaStack.pop()
            nfa1 = nfaStack.pop()
            nfa1.accept.edge1 = nfa2.initial
            nfaStack.append(NFA(nfa1.initial, nfa2.accept))
        elif c == '|':
            nfa2 = nfaStack.pop()
            nfa1 = nfaStack.pop()
            initial = State()
            accept = State()
            initial.edge1 = nfa1.initial
            initial.edge2 = nfa2.initial
            nfa1.accept.edge1 = accept
            nfa2.accept.edge1 = accept
            nfaStack.append(NFA(initial, accept))
        elif c == '+':
            nfa1 = nfaStack.pop()
            initial = State()
            accept = State()
            initial.edge1 = nfa1.initial
            nfa1.accept.edge1 = nfa1.initial
            nfa1.accept.edge2 = accept
            nfaStack.append(NFA(initial, accept))
        elif c == '?':
            nfa1 = nfaStack.pop()
            initial = State()
            accept = State()
            initial.edge1 = nfa1.initial
            initial.edge2 = accept
            nfa1.accept.edge1 = accept
            nfaStack.append(NFA(initial, accept))
        else:  # Literal character
            initial = State(c)
            accept = State()
            initial.edge1 = accept
            nfaStack.append(NFA(initial, accept))

    return nfaStack.pop()

def matchRegex(infix, s):
    postfix = shunt(infix)
    # Uncomment the next line to see the postfix expression
    # print("Postfix:", postfix)

    nfa = compileRegex(postfix)

    current = set(followes(nfa.initial))
    nextStates = set()

    for c in s:
        for state in current:
            if state.label == c:
                follow = followes(state.edge1)
                nextStates.update(follow)
        current = nextStates
        nextStates = set()

    return nfa.accept in current

def main():
    infixes = ["a.b.c*", "a.(b|d).c*", "(a.(b|d))*", "a.(b.b)*.c"]
    strings = ["", "abc", "abbc", "abcc", "abad", "abbbc"]

    for infix in infixes:
        for s in strings:
            result = matchRegex(infix, s)
            print(("True " if result else "False ") + infix + " " + s)
        print()

if __name__ == "__main__":
    main()

R

# Initialize global variables
stateCounter <- 0
stateList <- list()

# Function to create a new state
createState <- function(label = NULL) {
  stateCounter <<- stateCounter + 1
  id <- paste0("state", stateCounter)
  stateList[[id]] <<- list(label = label, edge1 = NULL, edge2 = NULL)
  id
}

# Functions to set edges for a state
setEdge1 <- function(stateID, edgeID) {
  stateList[[stateID]]$edge1 <<- edgeID
}

setEdge2 <- function(stateID, edgeID) {
  stateList[[stateID]]$edge2 <<- edgeID
}

# Function to convert infix regex to postfix using the Shunting Yard algorithm
shunt <- function(infix) {
  specials <- list('*' = 60, '+' = 55, '?' = 50, '.' = 40, '|' = 20)
  postfix <- ""
  stack <- c()
  cList <- strsplit(infix, "")[[1]]
  for (c in cList) {
    if (c == "(") {
      stack <- c(stack, c)
    } else if (c == ")") {
      while (length(stack) > 0 && tail(stack, 1) != "(") {
        postfix <- paste0(postfix, tail(stack, 1))
        stack <- head(stack, -1)
      }
      if (length(stack) > 0) {
        stack <- head(stack, -1)  # Remove "("
      }
    } else if (c %in% names(specials)) {
      while (length(stack) > 0 && tail(stack, 1) %in% names(specials) &&
             specials[[c]] <= specials[[tail(stack, 1)]]) {
        postfix <- paste0(postfix, tail(stack, 1))
        stack <- head(stack, -1)
      }
      stack <- c(stack, c)
    } else {
      postfix <- paste0(postfix, c)
    }
  }
  while (length(stack) > 0) {
    postfix <- paste0(postfix, tail(stack, 1))
    stack <- head(stack, -1)
  }
  postfix
}

# Function to compile postfix regex into an NFA
compileRegex <- function(postfix) {
  nfaStack <- list()
  cList <- strsplit(postfix, "")[[1]]
  for (c in cList) {
    if (c == '*') {
      nfa1 <- tail(nfaStack, 1)[[1]]
      nfaStack <- head(nfaStack, -1)
      initial <- createState()
      accept <- createState()
      setEdge1(initial, nfa1$initial)
      setEdge2(initial, accept)
      setEdge1(nfa1$accept, nfa1$initial)
      setEdge2(nfa1$accept, accept)
      nfaStack <- c(nfaStack, list(list(initial = initial, accept = accept)))
    } else if (c == '.') {
      nfa2 <- tail(nfaStack, 1)[[1]]
      nfaStack <- head(nfaStack, -1)
      nfa1 <- tail(nfaStack, 1)[[1]]
      nfaStack <- head(nfaStack, -1)
      setEdge1(nfa1$accept, nfa2$initial)
      nfaStack <- c(nfaStack, list(list(initial = nfa1$initial, accept = nfa2$accept)))
    } else if (c == '|') {
      nfa2 <- tail(nfaStack, 1)[[1]]
      nfaStack <- head(nfaStack, -1)
      nfa1 <- tail(nfaStack, 1)[[1]]
      nfaStack <- head(nfaStack, -1)
      initial <- createState()
      accept <- createState()
      setEdge1(initial, nfa1$initial)
      setEdge2(initial, nfa2$initial)
      setEdge1(nfa1$accept, accept)
      setEdge1(nfa2$accept, accept)
      nfaStack <- c(nfaStack, list(list(initial = initial, accept = accept)))
    } else if (c == '+') {
      nfa1 <- tail(nfaStack, 1)[[1]]
      nfaStack <- head(nfaStack, -1)
      initial <- createState()
      accept <- createState()
      setEdge1(initial, nfa1$initial)
      setEdge1(nfa1$accept, nfa1$initial)
      setEdge2(nfa1$accept, accept)
      nfaStack <- c(nfaStack, list(list(initial = initial, accept = accept)))
    } else if (c == '?') {
      nfa1 <- tail(nfaStack, 1)[[1]]
      nfaStack <- head(nfaStack, -1)
      initial <- createState()
      accept <- createState()
      setEdge1(initial, nfa1$initial)
      setEdge2(initial, accept)
      setEdge1(nfa1$accept, accept)
      nfaStack <- c(nfaStack, list(list(initial = initial, accept = accept)))
    } else {
      # Literal character
      initial <- createState(c)
      accept <- createState()
      setEdge1(initial, accept)
      nfaStack <- c(nfaStack, list(list(initial = initial, accept = accept)))
    }
  }
  tail(nfaStack, 1)[[1]]
}

# Function to compute the epsilon closure of a state
followes <- function(stateID) {
  states <- c()
  stack <- c(stateID)
  while (length(stack) > 0) {
    s <- tail(stack, 1)
    stack <- head(stack, -1)
    if (!(s %in% states)) {
      states <- c(states, s)
      if (is.null(stateList[[s]]$label)) {
        if (!is.null(stateList[[s]]$edge1)) {
          stack <- c(stack, stateList[[s]]$edge1)
        }
        if (!is.null(stateList[[s]]$edge2)) {
          stack <- c(stack, stateList[[s]]$edge2)
        }
      }
    }
  }
  states
}

# Function to match a string against the regex
matchRegex <- function(infix, str) {
  postfix <- shunt(infix)
  stateCounter <<- 0  # Reset stateCounter
  stateList <<- list()  # Reset stateList
  nfa <- compileRegex(postfix)
  current <- followes(nfa$initial)
  cList <- strsplit(str, "")[[1]]
  for (c in cList) {
    nextStates <- c()
    for (state in current) {
      if (!is.null(stateList[[state]]$label) && stateList[[state]]$label == c) {
        nextStates <- union(nextStates, followes(stateList[[state]]$edge1))
      }
    }
    current <- nextStates
  }
  nfa$accept %in% current
}

# Test cases
infixes <- c("a.b.c*", "a.(b|d).c*", "(a.(b|d))*", "a.(b.b)*.c")
strings <- c("", "abc", "abbc", "abcc", "abad", "abbbc")

for (infix in infixes) {
  for (str in strings) {
    result <- matchRegex(infix, str)
    cat(ifelse(result, "True ", "False "), infix, " ", str, "\n")
  }
  cat("\n")
}


Rust

use std::cell::RefCell;
use std::collections::{HashMap, HashSet};
use std::hash::{Hash, Hasher};
use std::rc::Rc;

// Define the State structure
#[derive(Debug)]
struct State {
    label: Option<char>, // None represents epsilon transitions
    edge1: Option<Rc<RefCell<State>>>,
    edge2: Option<Rc<RefCell<State>>>,
}

impl State {
    fn new(label: Option<char>) -> Self {
        State {
            label,
            edge1: None,
            edge2: None,
        }
    }
}

// Wrapper struct for Rc<RefCell<State>>
#[derive(Debug, Clone)]
struct RcState(Rc<RefCell<State>>);

impl PartialEq for RcState {
    fn eq(&self, other: &Self) -> bool {
        Rc::ptr_eq(&self.0, &other.0)
    }
}

impl Eq for RcState {}

impl Hash for RcState {
    fn hash<H: Hasher>(&self, state: &mut H) {
        // Hash the pointer address
        let ptr = Rc::as_ptr(&self.0);
        ptr.hash(state);
    }
}

// Define the NFA structure
#[derive(Debug)]
struct NFA {
    initial: Option<Rc<RefCell<State>>>,
    accept: Option<Rc<RefCell<State>>>,
}

impl NFA {
    fn new(initial: Option<Rc<RefCell<State>>>, accept: Option<Rc<RefCell<State>>>) -> Self {
        NFA { initial, accept }
    }
}

// Function to convert infix regex to postfix using the Shunting Yard algorithm
fn shunt(infix: &str) -> String {
    let mut specials = HashMap::new();
    specials.insert('*', 60);
    specials.insert('+', 55);
    specials.insert('?', 50);
    specials.insert('.', 40);
    specials.insert('|', 20);

    let mut postfix = String::new();
    let mut stack: Vec<char> = Vec::new();

    for c in infix.chars() {
        if c == '(' {
            stack.push(c);
        } else if c == ')' {
            while let Some(&top) = stack.last() {
                if top == '(' {
                    break;
                } else {
                    postfix.push(stack.pop().unwrap());
                }
            }
            if let Some('(') = stack.last() {
                stack.pop();
            }
        } else if specials.contains_key(&c) {
            while let Some(&top) = stack.last() {
                if let Some(&top_prec) = specials.get(&top) {
                    if specials[&c] <= top_prec {
                        postfix.push(stack.pop().unwrap());
                        continue;
                    }
                }
                break;
            }
            stack.push(c);
        } else {
            postfix.push(c);
        }
    }

    while let Some(top) = stack.pop() {
        postfix.push(top);
    }

    postfix
}

// Function to compute the epsilon closure of a state
fn followes(state: Option<Rc<RefCell<State>>>) -> HashSet<RcState> {
    let mut states = HashSet::new();
    let mut stack = Vec::new();

    if let Some(state_rc) = state {
        stack.push(RcState(state_rc));
    }

    while let Some(s_rcstate) = stack.pop() {
        if !states.contains(&s_rcstate) {
            states.insert(s_rcstate.clone());
            let s = s_rcstate.0.borrow();
            if s.label.is_none() {
                if let Some(ref edge1) = s.edge1 {
                    stack.push(RcState(edge1.clone()));
                }
                if let Some(ref edge2) = s.edge2 {
                    stack.push(RcState(edge2.clone()));
                }
            }
        }
    }

    states
}

// Function to compile postfix regex into an NFA
fn compile_regex(postfix: &str) -> NFA {
    let mut nfa_stack: Vec<NFA> = Vec::new();

    for c in postfix.chars() {
        if c == '*' {
            let nfa1 = nfa_stack.pop().unwrap();
            let initial = Rc::new(RefCell::new(State::new(None)));
            let accept = Rc::new(RefCell::new(State::new(None)));
            initial.borrow_mut().edge1 = nfa1.initial.clone();
            initial.borrow_mut().edge2 = Some(accept.clone());
            if let Some(ref accept_state) = nfa1.accept {
                accept_state.borrow_mut().edge1 = nfa1.initial.clone();
                accept_state.borrow_mut().edge2 = Some(accept.clone());
            }
            nfa_stack.push(NFA::new(Some(initial), Some(accept)));
        } else if c == '.' {
            let nfa2 = nfa_stack.pop().unwrap();
            let nfa1 = nfa_stack.pop().unwrap();
            if let Some(ref accept_state) = nfa1.accept {
                accept_state.borrow_mut().edge1 = nfa2.initial.clone();
            }
            nfa_stack.push(NFA::new(nfa1.initial.clone(), nfa2.accept.clone()));
        } else if c == '|' {
            let nfa2 = nfa_stack.pop().unwrap();
            let nfa1 = nfa_stack.pop().unwrap();
            let initial = Rc::new(RefCell::new(State::new(None)));
            let accept = Rc::new(RefCell::new(State::new(None)));
            initial.borrow_mut().edge1 = nfa1.initial.clone();
            initial.borrow_mut().edge2 = nfa2.initial.clone();
            if let Some(ref accept_state1) = nfa1.accept {
                accept_state1.borrow_mut().edge1 = Some(accept.clone());
            }
            if let Some(ref accept_state2) = nfa2.accept {
                accept_state2.borrow_mut().edge1 = Some(accept.clone());
            }
            nfa_stack.push(NFA::new(Some(initial), Some(accept)));
        } else if c == '+' {
            let nfa1 = nfa_stack.pop().unwrap();
            let initial = Rc::new(RefCell::new(State::new(None)));
            let accept = Rc::new(RefCell::new(State::new(None)));
            initial.borrow_mut().edge1 = nfa1.initial.clone();
            if let Some(ref accept_state) = nfa1.accept {
                accept_state.borrow_mut().edge1 = nfa1.initial.clone();
                accept_state.borrow_mut().edge2 = Some(accept.clone());
            }
            nfa_stack.push(NFA::new(Some(initial), Some(accept)));
        } else if c == '?' {
            let nfa1 = nfa_stack.pop().unwrap();
            let initial = Rc::new(RefCell::new(State::new(None)));
            let accept = Rc::new(RefCell::new(State::new(None)));
            initial.borrow_mut().edge1 = nfa1.initial.clone();
            initial.borrow_mut().edge2 = Some(accept.clone());
            if let Some(ref accept_state) = nfa1.accept {
                accept_state.borrow_mut().edge1 = Some(accept.clone());
            }
            nfa_stack.push(NFA::new(Some(initial), Some(accept)));
        } else {
            let initial = Rc::new(RefCell::new(State::new(Some(c))));
            let accept = Rc::new(RefCell::new(State::new(None)));
            initial.borrow_mut().edge1 = Some(accept.clone());
            nfa_stack.push(NFA::new(Some(initial), Some(accept)));
        }
    }

    nfa_stack.pop().unwrap()
}

// Function to match a string against the regex
fn match_regex(infix: &str, s: &str) -> bool {
    let postfix = shunt(infix);
    let nfa = compile_regex(&postfix);

    let mut current = followes(nfa.initial.clone());
    let mut next_states = HashSet::new();

    for c in s.chars() {
        for state_rcstate in &current {
            let state = state_rcstate.0.borrow();
            if state.label == Some(c) {
                let follow = followes(state.edge1.clone());
                next_states.extend(follow);
            }
        }
        current = next_states;
        next_states = HashSet::new();
    }

    if let Some(accept_state) = nfa.accept {
        current.contains(&RcState(accept_state))
    } else {
        false
    }
}

fn main() {
    let infixes = vec!["a.b.c*", "a.(b|d).c*", "(a.(b|d))*", "a.(b.b)*.c"];
    let strings = vec!["", "abc", "abbc", "abcc", "abad", "abbbc"];

    for infix in &infixes {
        for s in &strings {
            let result = match_regex(infix, s);
            println!("{} {} {}", if result { "True" } else { "False" }, infix, s);
        }
        println!("");
    }
}

Wren

Translation of: Python
Library: Wren-dynamic
import "./dynamic" for Struct

// label, character label - null for epsilon
// edge1, first transition
// edge2, second transition
var State = Struct.create("State", ["label", "edge1", "edge2"])

var NFA = Struct.create("NFA", ["initial", "accept"])

var shunt = Fn.new { |infix|
    var specials = { "*": 60, "+": 55, "?": 50, ".": 40, "|": 20 }
    var postfix = ""
    var stack = []
    for (c in infix) {
        if (c == "(") {
            stack.add(c)
        } else if (c == ")") {
            while (!stack.isEmpty && stack[-1] != "(") {
                postfix = postfix + stack.removeAt(-1)
            }
            if (!stack.isEmpty) stack.removeAt(-1) // Remove "("
        } else if (specials.containsKey(c)) {
            while (!stack.isEmpty && specials.containsKey(stack[-1]) &&
                   specials[c] <= specials[stack[-1]]) {
                postfix = postfix + stack.removeAt(-1)
            }
            stack.add(c)
        } else {
            postfix = postfix + c
        }
    }
    while (!stack.isEmpty) postfix = postfix + stack.removeAt(-1)
    return postfix
}

var followes = Fn.new { |state|
    var states = []
    var stack = [state]
    while (!stack.isEmpty) {
        var s = stack.removeAt(-1)
        if (!states.contains(s)) {
            states.add(s)
            if (!s.label) {  // Epsilon transition
                if (s.edge1) stack.add(s.edge1)
                if (s.edge2) stack.add(s.edge2)
            }
        }
    }
    return states
}

var compileRegex = Fn.new { |postfix|
    var nfaStack = []
    for (c in postfix) {
        if (c == "*") {
            var nfa1 = nfaStack.removeAt(-1)
            var initial = State.new(null, null, null)
            var accept  = State.new(null, null, null)
            initial.edge1 = nfa1.initial
            initial.edge2 = accept
            nfa1.accept.edge1 = nfa1.initial
            nfa1.accept.edge2 = accept
            nfaStack.add(NFA.new(initial, accept))
        } else if (c == ".") {
            var nfa2 = nfaStack.removeAt(-1)
            var nfa1 = nfaStack.removeAt(-1)
            nfa1.accept.edge1 = nfa2.initial
            nfaStack.add(NFA.new(nfa1.initial, nfa2.accept))
        } else if (c == "|") {
            var nfa2 = nfaStack.removeAt(-1)
            var nfa1 = nfaStack.removeAt(-1)
            var initial = State.new(null, null, null)
            var accept  = State.new(null, null, null)
            initial.edge1 = nfa1.initial
            initial.edge2 = nfa2.initial
            nfa1.accept.edge1 = accept
            nfa2.accept.edge1 = accept
            nfaStack.add(NFA.new(initial, accept))
        } else if (c == "+") {
            var nfa1 = nfaStack.removeAt(-1)
            var initial = State.new(null, null, null)
            var accept  = State.new(null, null, null)
            initial.edge1 = nfa1.initial
            nfa1.accept.edge1 = nfa1.initial
            nfa1.accept.edge2 = accept
            nfaStack.add(NFA.new(initial, accept))
        } else if (c == "?") {
            var nfa1 = nfaStack.removeAt(-1)
            var initial = State.new(null, null, null)
            var accept  = State.new(null, null, null)
            initial.edge1 = nfa1.initial
            initial.edge2 = accept
            nfa1.accept.edge1 = accept
            nfaStack.add(NFA.new(initial, accept))
        } else {  // Literal character
            var initial = State.new(c, null, null)
            var accept  = State.new(null, null, null)
            initial.edge1 = accept
            nfaStack.add(NFA.new(initial, accept))
        }
    }
    return nfaStack.removeAt(-1)
}

var matchRegex = Fn.new { |infix, s|
    var postfix = shunt.call(infix)
    // Uncomment the next line to see postfix expression
    // System.print("Postfix: %(postfix)")
    var nfa = compileRegex.call(postfix)
    var current = followes.call(nfa.initial)
    var nextStates = []
    for (c in s) {
        for (state in current) {
            if (state.label == c) {
                var follow = followes.call(state.edge1)
                for (st in follow) {
                   if (!nextStates.contains(st)) nextStates.add(st)
                }
            }
        }
        current = nextStates
        nextStates = []
    }
    return current.contains(nfa.accept)
}

var infixes = ["a.b.c*", "a.(b|d).c*", "(a.(b|d))*", "a.(b.b)*.c"]
var strings = ["", "abc", "abbc", "abcc", "abad", "abbbc"]

for (infix in infixes) {
    for (s in strings) {
        var result = matchRegex.call(infix, s)
        System.print((result ? "True  " : "False ") + infix + " " + s)
    }
    System.print()
}
Output:
False a.b.c* 
True  a.b.c* abc
False a.b.c* abbc
True  a.b.c* abcc
False a.b.c* abad
False a.b.c* abbbc

False a.(b|d).c* 
True  a.(b|d).c* abc
False a.(b|d).c* abbc
True  a.(b|d).c* abcc
False a.(b|d).c* abad
False a.(b|d).c* abbbc

True  (a.(b|d))* 
False (a.(b|d))* abc
False (a.(b|d))* abbc
False (a.(b|d))* abcc
True  (a.(b|d))* abad
False (a.(b|d))* abbbc

False a.(b.b)*.c 
False a.(b.b)*.c abc
True  a.(b.b)*.c abbc
False a.(b.b)*.c abcc
False a.(b.b)*.c abad
False a.(b.b)*.c abbbc

Zig

Works with: Zig version 0.14dev
Translation of: C++
const std = @import("std");
const mem = std.mem;

pub fn main() !void {
    // ------------------------------------------------ allocator
    // https://ziglang.org/documentation/master/std/#std.heap.ArenaAllocator
    var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
    defer arena.deinit();
    const allocator = arena.allocator();
    var state_pool = StatePool.init(std.heap.page_allocator);
    defer state_pool.deinit();
    // ----------------------------------------------------------
    const stdout = std.io.getStdOut();
    const writer = stdout.writer();

    const infixes = [_][]const u8{ "a.b.c*", "a.(b|d).c*", "(a.(b|d))*", "a.(b.b)*.c" };
    const strings = [_][]const u8{ "", "abc", "abbc", "abcc", "abad", "abbbc" };

    for (infixes) |infix| {
        for (strings) |str| {
            const result = try matchRegex(allocator, &state_pool, infix, str);
            _ = arena.reset(.retain_capacity); // reset allocated memory
            _ = state_pool.reset(.retain_capacity);
            try writer.print("{} {s} {s}\n", .{ result, infix, str });
        }
        try writer.writeByte('\n');
    }
}

// Function to match a string against the regex
fn matchRegex(allocator: mem.Allocator, state_pool: *StatePool, infix: []const u8, str: []const u8) !bool {
    const postfix = try shunt(allocator, infix);
    // Uncomment the next line to see the postfix expression
    // std.debug.print("Postfix: {s}\n", .{postfix});

    const nfa = try compileRegex(allocator, state_pool, postfix);

    var current: StateSet = try followes(allocator, nfa.initial.?);
    var nextStates = StateSet.init(allocator);

    for (str) |c| {
        for (current.keys()) |state|
            if (state.label == c) {
                const follow = try followes(allocator, state.edge1.?);
                for (follow.keys()) |state2|
                    try nextStates.put(state2, {});
            };
        current = nextStates;
        nextStates.clearRetainingCapacity();
    }
    return current.contains(nfa.accept.?);
}

/// Function to convert infix regex to postfix using the Shunting Yard algorithm
fn shunt(allocator: mem.Allocator, infix: []const u8) ![]const u8 {
    var specials = blk: {
        const specialsK = [_]u8{ '*', '+', '?', '.', '|' };
        const specialsV = [specialsK.len]u8{ 60, 55, 50, 40, 20 };

        var specials = std.AutoArrayHashMap(u8, u8).init(allocator);
        for (specialsK, specialsV) |k, v|
            try specials.put(k, v);
        break :blk specials;
    };
    var postfix = std.ArrayList(u8).init(allocator);
    var stack = Stack(u8).init(allocator);

    for (infix) |c| {
        if (c == '(')
            try stack.push(c)
        else if (c == ')') {
            while (!stack.isEmpty() and stack.top() != '(')
                try postfix.append(stack.pop());
            if (!stack.isEmpty())
                _ = stack.pop(); // Remove '('
        } else if (specials.contains(c)) {
            while (!stack.isEmpty() and specials.contains(stack.top()) and specials.get(c).? <= specials.get(stack.top()).?)
                try postfix.append(stack.pop());
            try stack.push(c);
        } else {
            try postfix.append(c);
        }
    }
    while (!stack.isEmpty())
        try postfix.append(stack.pop());
    return postfix.toOwnedSlice();
}

const StatePool = std.heap.MemoryPoolExtra(State, .{});
const StateSet = std.AutoArrayHashMap(*State, void);

const State = struct {
    label: u8 = 0, // Character label, '\0' for epsilon
    edge1: ?*State = null, // First transition
    edge2: ?*State = null, // Second transition

    fn init(label: u8) State {
        return .{ .label = label };
    }
};

const NFA = struct {
    initial: ?*State,
    accept: ?*State,

    fn init(initial: ?*State, accept: ?*State) NFA {
        return .{ .initial = initial, .accept = accept };
    }
};

/// Function to compute the epsilon closure of a state
fn followes(allocator: mem.Allocator, state: *State) !StateSet {
    var states = StateSet.init(allocator);
    var stack = Stack(*State).init(allocator);
    try stack.push(state);
    while (!stack.isEmpty()) {
        const s = stack.pop();
        if (!states.contains(s)) {
            try states.put(s, {});
            if (s.label == 0) { // Epsilon transition
                if (s.edge1) |edge1| try stack.push(edge1);
                if (s.edge2) |edge2| try stack.push(edge2);
            }
        }
    }
    return states;
}

// Function to compile postfix regex into an NFA
fn compileRegex(allocator: mem.Allocator, state_pool: *StatePool, postfix: []const u8) !NFA {
    var nfa_stack = Stack(NFA).init(allocator);

    for (postfix) |c| {
        switch (c) {
            '*' => {
                var nfa1 = nfa_stack.pop();
                var initial = try state_pool.create();
                const accept = try state_pool.create();
                initial.* = State{};
                accept.* = State{};
                initial.edge1 = nfa1.initial;
                initial.edge2 = accept;
                nfa1.accept.?.edge1 = nfa1.initial;
                nfa1.accept.?.edge2 = accept;
                try nfa_stack.push(NFA.init(initial, accept));
            },
            '.' => {
                const nfa2 = nfa_stack.pop();
                const nfa1 = nfa_stack.pop();
                nfa1.accept.?.edge1 = nfa2.initial;
                try nfa_stack.push(NFA.init(nfa1.initial, nfa2.accept));
            },
            '|' => {
                const nfa2 = nfa_stack.pop();
                const nfa1 = nfa_stack.pop();
                var initial = try state_pool.create();
                const accept = try state_pool.create();
                initial.* = State{};
                accept.* = State{};
                initial.edge1 = nfa1.initial;
                initial.edge2 = nfa2.initial;
                nfa1.accept.?.edge1 = accept;
                nfa2.accept.?.edge1 = accept;
                try nfa_stack.push(NFA.init(initial, accept));
            },
            '+' => {
                const nfa1 = nfa_stack.pop();
                var initial = try state_pool.create();
                const accept = try state_pool.create();
                initial.* = State{};
                accept.* = State{};
                initial.edge1 = nfa1.initial;
                nfa1.accept.?.edge1 = nfa1.initial;
                nfa1.accept.?.edge2 = accept;
                try nfa_stack.push(NFA.init(initial, accept));
            },
            '?' => {
                const nfa1 = nfa_stack.pop();
                var initial = try state_pool.create();
                const accept = try state_pool.create();
                initial.* = State{};
                accept.* = State{};
                initial.edge1 = nfa1.initial;
                initial.edge2 = accept;
                nfa1.accept.?.edge1 = accept;
                try nfa_stack.push(NFA.init(initial, accept));
            },
            else => {
                // Literal character
                var initial = try state_pool.create();
                const accept = try state_pool.create();
                initial.* = State.init(c);
                accept.* = State{};
                initial.edge1 = accept;
                try nfa_stack.push(NFA.init(initial, accept));
            },
        }
    }
    return nfa_stack.pop();
}

// An ad hoc generic stack implementation.
fn Stack(comptime T: type) type {
    return struct {
        const Self = @This();
        stack: std.ArrayList(T),

        fn init(allocator: mem.Allocator) Self {
            return Self{
                .stack = std.ArrayList(T).init(allocator),
            };
        }
        // fn deinit(self: *Self) void {
        //     self.stack.deinit();
        // }
        fn push(self: *Self, node: T) !void {
            return try self.stack.append(node);
        }
        fn pop(self: *Self) T {
            return self.stack.pop();
        }
        fn top(self: *const Self) T {
            return self.stack.items[self.stack.items.len - 1];
        }
        fn isEmpty(self: *const Self) bool {
            return self.stack.items.len == 0;
        }
    };
}
Cookies help us deliver our services. By using our services, you agree to our use of cookies.