Execute a Markov algorithm
| This page uses content from Wikipedia. The original article was at Markov_algorithm. The list of authors can be seen in the page history. As with Rosetta Code, the text of Wikipedia is available under the GNU FDL. (See links for details on variance) |
You are encouraged to solve this task according to the task description, using any language you may know.
Create an interpreter for a Markov Algorithm. Rules have the syntax:
<ruleset> ::= ((<comment> | <rule>) <newline>+)*
<comment> ::= # {<any character>}
<rule> ::= <pattern> <whitespace> -> <whitespace> [.] <replacement>
<whitespace> ::= (<tab> | <space>) [<whitespace>]
There is one rule per line. If there is a . present before the <replacement>, then this is a terminating rule in which case the interpreter must halt execution. A ruleset consists of a sequence of rules, with optional comments.
Contents |
[edit] Rulesets
Use the following tests on entries:
[edit] Ruleset 1
# This rules file is extracted from Wikipedia: # http://en.wikipedia.org/wiki/Markov_Algorithm A -> apple B -> bag S -> shop T -> the the shop -> my brother a never used -> .terminating rule
Sample text of:
-
I bought a B of As from T S.
Should generate the output:
-
I bought a bag of apples from my brother.
[edit] Ruleset 2
A test of the terminating rule
# Slightly modified from the rules on Wikipedia A -> apple B -> bag S -> .shop T -> the the shop -> my brother a never used -> .terminating rule
Sample text of:
-
I bought a B of As from T S.
Should generate:
-
I bought a bag of apples from T shop.
[edit] Ruleset 3
This tests for correct substitution order and may trap simple regexp based replacement routines if special regexp characters are not escaped.
# BNF Syntax testing rules A -> apple WWWW -> with Bgage -> ->.* B -> bag ->.* -> money W -> WW S -> .shop T -> the the shop -> my brother a never used -> .terminating rule
Sample text of:
-
I bought a B of As W my Bgage from T S.
Should generate:
-
I bought a bag of apples with my money from T shop.
[edit] Ruleset 4
This tests for correct order of scanning of rules, and may trap replacement routines that scan in the wrong order. It implements a general unary multiplication engine. (Note that the input expression must be placed within underscores in this implementation.)
### Unary Multiplication Engine, for testing Markov Algorithm implementations ### By Donal Fellows. # Unary addition engine _+1 -> _1+ 1+1 -> 11+ # Pass for converting from the splitting of multiplication into ordinary # addition 1! -> !1 ,! -> !+ _! -> _ # Unary multiplication by duplicating left side, right side times 1*1 -> x,@y 1x -> xX X, -> 1,1 X1 -> 1X _x -> _X ,x -> ,X y1 -> 1y y_ -> _ # Next phase of applying 1@1 -> x,@y 1@_ -> @_ ,@_ -> !_ ++ -> + # Termination cleanup for addition _1 -> 1 1+_ -> 1 _+_ ->
Sample text of:
-
_1111*11111_
should generate the output:
-
11111111111111111111
[edit] Ruleset 5
A simple Turing machine, implementing a three-state busy beaver. The tape consists of 0s and 1s, the states are A, B, C and H (for Halt), and the head position is indicated by writing the state letter before the character where the head is. All parts of the initial tape the machine operates on have to be given in the input.
Besides demonstrating that the Markov algorithm is Turing-complete, it also made me catch a bug in the C++ implementation which wasn't caught by the first four rulesets.
# Turing machine: three-state busy beaver # # state A, symbol 0 => write 1, move right, new state B A0 -> 1B # state A, symbol 1 => write 1, move left, new state C 0A1 -> C01 1A1 -> C11 # state B, symbol 0 => write 1, move left, new state A 0B0 -> A01 1B0 -> A11 # state B, symbol 1 => write 1, move right, new state B B1 -> 1B # state C, symbol 0 => write 1, move left, new state B 0C0 -> B01 1C0 -> B11 # state C, symbol 1 => write 1, move left, halt 0C1 -> H01 1C1 -> H11
This ruleset should turn
-
000000A000000
into
-
00011H1111000
[edit] Examples
[edit] Ada
markov.ads:
with Ada.Strings.Unbounded;
package Markov is
use Ada.Strings.Unbounded;
type Ruleset (Length : Natural) is private;
type String_Array is array (Positive range <>) of Unbounded_String;
function Parse (S : String_Array) return Ruleset;
function Apply (R : Ruleset; S : String) return String;
private
type Entry_Kind is (Comment, Rule);
type Set_Entry (Kind : Entry_Kind := Rule) is record
case Kind is
when Rule =>
Source : Unbounded_String;
Target : Unbounded_String;
Is_Terminating : Boolean;
when Comment =>
Text : Unbounded_String;
end case;
end record;
subtype Rule_Entry is Set_Entry (Kind => Rule);
type Entry_Array is array (Positive range <>) of Set_Entry;
type Ruleset (Length : Natural) is record
Entries : Entry_Array (1 .. Length);
end record;
end Markov;
markov.adb:
package body Markov is
function Parse (S : String_Array) return Ruleset is
Result : Ruleset (Length => S'Length);
begin
for I in S'Range loop
if Length (S (I)) = 0 or else Element (S (I), 1) = '#' then
Result.Entries (I) := (Kind => Comment, Text => S (I));
else
declare
Separator : Natural;
Terminating : Boolean;
Target : Unbounded_String;
begin
Separator := Index (S (I), " -> ");
if Separator = 0 then
raise Constraint_Error;
end if;
Target :=
Unbounded_Slice
(Source => S (I),
Low => Separator + 4,
High => Length (S (I)));
Terminating := Length (Target) > 0
and then Element (Target, 1) = '.';
if Terminating then
Delete (Source => Target, From => 1, Through => 1);
end if;
Result.Entries (I) :=
(Kind => Rule,
Source => Unbounded_Slice
(Source => S (I),
Low => 1,
High => Separator - 1),
Target => Target,
Is_Terminating => Terminating);
end;
end if;
end loop;
return Result;
end Parse;
procedure Apply
(R : Rule_Entry;
S : in out Unbounded_String;
Modified : in out Boolean)
is
Pattern : String := To_String (R.Source);
Where : Natural := Index (S, Pattern);
begin
while Where /= 0 loop
Modified := True;
Replace_Slice
(Source => S,
Low => Where,
High => Where + Pattern'Length - 1,
By => To_String (R.Target));
Where := Index (S, Pattern, Where + Length (R.Target));
end loop;
end Apply;
function Apply (R : Ruleset; S : String) return String is
Result : Unbounded_String := To_Unbounded_String (S);
Current_Rule : Set_Entry;
Modified : Boolean := False;
begin
loop
Modified := False;
for I in R.Entries'Range loop
Current_Rule := R.Entries (I);
if Current_Rule.Kind = Rule then
Apply (Current_Rule, Result, Modified);
exit when Current_Rule.Is_Terminating or else Modified;
end if;
end loop;
exit when not Modified;
end loop;
return To_String (Result);
end Apply;
end Markov;
test_markov.adb:
with Ada.Command_Line;
with Ada.Text_IO.Unbounded_IO;
with Ada.Strings.Unbounded;
with Markov;
procedure Test_Markov is
use Ada.Strings.Unbounded;
package IO renames Ada.Text_IO.Unbounded_IO;
Rule_File : Ada.Text_IO.File_Type;
Line_Count : Natural := 0;
begin
if Ada.Command_Line.Argument_Count /= 2 then
Ada.Text_IO.Put_Line ("Usage: test_markov ruleset_file source_file");
return;
end if;
Ada.Text_IO.Open
(File => Rule_File,
Mode => Ada.Text_IO.In_File,
Name => Ada.Command_Line.Argument (1));
while not Ada.Text_IO.End_Of_File (Rule_File) loop
Ada.Text_IO.Skip_Line (Rule_File);
Line_Count := Line_Count + 1;
end loop;
declare
Lines : Markov.String_Array (1 .. Line_Count);
begin
Ada.Text_IO.Reset (Rule_File);
for I in Lines'Range loop
Lines (I) := IO.Get_Line (Rule_File);
end loop;
Ada.Text_IO.Close (Rule_File);
declare
Ruleset : Markov.Ruleset := Markov.Parse (Lines);
Source_File : Ada.Text_IO.File_Type;
begin
Ada.Text_IO.Open
(File => Source_File,
Mode => Ada.Text_IO.In_File,
Name => Ada.Command_Line.Argument (2));
while not Ada.Text_IO.End_Of_File (Source_File) loop
Ada.Text_IO.Put_Line
(Markov.Apply (Ruleset, Ada.Text_IO.Get_Line (Source_File)));
end loop;
Ada.Text_IO.Close (Source_File);
end;
end;
end Test_Markov;
Output (rulesX contains the ruleset of above examples and testX the example text):
$ ./test_markov rules1 test1 I bought a bag of apples from my brother. $ ./test_markov rules2 test2 I bought a bag of apples from T shop. $ ./test_markov rules3 test3 I bought a bag of apples with my money from T shop. $ ./test_markov rules4 test4 11111111111111111111 $ ./test_markov rules5 test5 00011H1111000
[edit] AutoHotkey
;---------------------------------------------------------------------------
; Markov Algorithm.ahk
; by wolf_II
;---------------------------------------------------------------------------
; interpreter for a Markov Algorithm
;---------------------------------------------------------------------------
;---------------------------------------------------------------------------
AutoExecute: ; auto-execute section of the script
;---------------------------------------------------------------------------
#SingleInstance, Force ; only one instance allowed
#NoEnv ; don't check empty variables
StartupDir := A_WorkingDir ; remember startup directory
SetWorkingDir, %A_ScriptDir% ; change directoy
StringCaseSense, On ; case sensitive comparisons
;-----------------------------------------------------------------------
AppName := "Markov Algorithm"
Gosub, GuiCreate
Gui, Show,, %AppName%
Return
;---------------------------------------------------------------------------
GuiCreate: ; create the GUI
;---------------------------------------------------------------------------
; GUI options
Gui, -MinimizeBox
Gui, Add, Edit, y0 h0 ; catch the focus
; Ruleset
Gui, Add, GroupBox, w445 h145 Section, Ruleset
Gui, Add, Edit, xs+15 ys+20 w300 r8 vRuleset
Gui, Add, Button, x+15 w100, Load Ruleset
Gui, Add, Button, wp, Save Ruleset
Gui, Add, Button, w30, 1
Gui, Add, Button, x+5 wp, 2
Gui, Add, Button, x+5 wp, 3
Gui, Add, Button, xs+330 y+6 wp, 4
Gui, Add, Button, x+5 wp, 5
; String
Gui, Add, GroupBox, xs w445 h75 Section, String
Gui, Add, Edit, xs+15 ys+20 w300 vString
Gui, Add, Button, x+15 w100, Apply Ruleset
Gui, Add, Button, xp wp Hidden, Stop
Gui, Add, CheckBox, xs+15 yp+30 vSingleStepping, Single Stepping?
; Output
Gui, Add, GroupBox, xs w445 h235 Section, Output
Gui, Add, Edit, xs+15 ys+20 w415 r15 ReadOnly vOutput HwndhOut
Return
;---------------------------------------------------------------------------
GuiClose:
;---------------------------------------------------------------------------
ExitApp
Return
;---------------------------------------------------------------------------
ButtonLoadRuleset: ; load ruleset from file
;---------------------------------------------------------------------------
Gui, +OwnDialogs
FileSelectFile, RulesetFile,,, Load Ruleset, *.markov
If Not SubStr(RulesetFile, -6) = ".markov"
RulesetFile .= ".markov"
If FileExist(RulesetFile) {
FileRead, Ruleset, %RulesetFile%
GuiControl,, Ruleset, %Ruleset%
} Else
MsgBox, 16, Error - %AppName%, File not found:`n`n"%RulesetFile%"
Return
;---------------------------------------------------------------------------
ButtonSaveRuleset: ; save ruleset to file
;---------------------------------------------------------------------------
Gui, +OwnDialogs
Gui, Submit, NoHide
FileSelectFile, RulesetFile, S16,, Save Ruleset, *.markov
If Not SubStr(RulesetFile, -6) = ".markov"
RulesetFile .= ".markov"
FileDelete, %RulesetFile%
FileAppend, %Ruleset%, %RulesetFile%
Gui, Show
Return
;---------------------------------------------------------------------------
Button1: ; http://rosettacode.org/wiki/Execute_a_Markov_algorithm#Ruleset_1
;---------------------------------------------------------------------------
GuiControl,, Output ; clear output
GuiControl,, String, I bought a B of As from T S.
GuiControl,, Ruleset,
(LTrim
# This rules file is extracted from Wikipedia:
# http://en.wikipedia.org/wiki/Markov_Algorithm
A -> apple
B -> bag
S -> shop
T -> the
the shop -> my brother
a never used -> .terminating rule
)
Return
;---------------------------------------------------------------------------
Button2: ; http://rosettacode.org/wiki/Execute_a_Markov_algorithm#Ruleset_2
;---------------------------------------------------------------------------
GuiControl,, Output ; clear output
GuiControl,, String, I bought a B of As from T S.
GuiControl,, Ruleset,
(LTrim
# Slightly modified from the rules on Wikipedia
A -> apple
B -> bag
S -> .shop
T -> the
the shop -> my brother
a never used -> .terminating rule
)
Return
;---------------------------------------------------------------------------
Button3: ; http://rosettacode.org/wiki/Execute_a_Markov_algorithm#Ruleset_3
;---------------------------------------------------------------------------
GuiControl,, Output ; clear output
GuiControl,, String, I bought a B of As W my Bgage from T S.
GuiControl,, Ruleset,
(LTrim
# BNF Syntax testing rules
A -> apple
WWWW -> with
Bgage -> ->.*
B -> bag
->.* -> money
W -> WW
S -> .shop
T -> the
the shop -> my brother
a never used -> .terminating rule
)
Return
;---------------------------------------------------------------------------
Button4: ; http://rosettacode.org/wiki/Execute_a_Markov_algorithm#Ruleset_4
;---------------------------------------------------------------------------
GuiControl,, Output ; clear output
GuiControl,, String, _1111*11111_
GuiControl,, Ruleset,
(LTrim
### Unary Multiplication Engine, for testing Markov Algorithm implementations
### By Donal Fellows.
# Unary addition engine
_+1 -> _1+
1+1 -> 11+
# Pass for converting from the splitting of multiplication into ordinary
# addition
1! -> !1
,! -> !+
_! -> _
# Unary multiplication by duplicating left side, right side times
1*1 -> x,@y
1x -> xX
X, -> 1,1
X1 -> 1X
_x -> _X
,x -> ,X
y1 -> 1y
y_ -> _
# Next phase of applying
1@1 -> x,@y
1@_ -> @_
,@_ -> !_
++ -> +
# Termination cleanup for addition
_1 -> 1
1+_ -> 1
_+_ ->
)
Return
;---------------------------------------------------------------------------
Button5: ; http://rosettacode.org/wiki/Execute_a_Markov_algorithm#Ruleset_5
;---------------------------------------------------------------------------
GuiControl,, Output ; clear output
GuiControl,, String, 000000A000000
GuiControl,, Ruleset,
(LTrim
# Turing machine: three-state busy beaver
#
# state A, symbol 0 => write 1, move right, new state B
A0 -> 1B
# state A, symbol 1 => write 1, move left, new state C
0A1 -> C01
1A1 -> C11
# state B, symbol 0 => write 1, move left, new state A
0B0 -> A01
1B0 -> A11
# state B, symbol 1 => write 1, move right, new state B
B1 -> 1B
# state C, symbol 0 => write 1, move left, new state B
0C0 -> B01
1C0 -> B11
# state C, symbol 1 => write 1, move left, halt
0C1 -> H01
1C1 -> H11
)
Return
;---------------------------------------------------------------------------
ButtonApplyRuleset: ; flow control for Algorithm
;---------------------------------------------------------------------------
; prepare
Gui, Submit, NoHide
GuiControl,, Output ; clear
Controls(False) ; disable
Count := 0
Subst := True
Stop := False
; keep substituting for as long as necessary
While, Subst {
Subst := False ; reset control variable
IfEqual, Stop, 1, Break
Gosub, Algorithm
}
; clean up
Output("Substitution count: " Count)
Controls(True) ; re-enable
Return
;---------------------------------------------------------------------------
ButtonStop: ; this button is initially hidden
;---------------------------------------------------------------------------
Stop := True
Return
;---------------------------------------------------------------------------
Algorithm: ; http://rosettacode.org/wiki/Execute_a_Markov_algorithm
;---------------------------------------------------------------------------
; Parse the ruleset and apply each rule to the string. Whenever a rule
; has changed the string goto first rule. Continue until a encountering
; a terminating rule, or until no further changes to the strings are
; made.
;-----------------------------------------------------------------------
Loop, Parse, Ruleset, `n, `r ; always start from the beginning
{
; check for comment
If SubStr(A_LoopField, 1, 1) = "#"
Continue ; get next line
; split a rule into $Search, $Terminator and $Replace
LookFor := "(?P<Search>.+) -> (?P<Terminator>\.?)(?P<Replace>.+)"
RegExMatch(A_LoopField, LookFor, $)
; single stepping through possible substitutions
If SingleStepping
MsgBox,, %AppName%, % ""
. "Rule = """ A_LoopField """`n`n"
. "Search`t= """ $Search """`n"
. "Replace`t= """ $Replace """`n"
. "Termintor`t= """ ($Terminator ? "True" : "False") """`n"
; try to substitute
StringReplace, String, String, %$Search%, %$Replace%, UseErrorLevel
; any success?
If ErrorLevel { ; yes, substitution done
Count++ ; keep count
Subst := True ; set control variable
Output(String) ; write new string to output
}
; terminate?
If $Terminator { ; yes, terminate
Stop := True ; set control variable
Break ; back to flow control
}
; we are not yet terminated ...
If Subst ; but we just did a substitution
Break ; back to flow control
}
Return
;---------------------------------------------------------------------------
Controls(Bool) { ; [en|dis]able controls
;---------------------------------------------------------------------------
Enable := Bool ? "+" : "-"
Disable := Bool ? "-" : "+"
Loop, 2
GuiControl, %Disable%ReadOnly, % "Edit" A_Index + 1
Loop, 7
GuiControl, %Disable%Disabled, % "Button" A_Index + 1
GuiControl, %Disable%Disabled, Edit4
GuiControl, %Disable%Hidden, Button10
GuiControl, %Enable%Hidden, Button11
GuiControl, %Disable%Disabled, Button12
}
;---------------------------------------------------------------------------
Output(Text) { ; append text to output
;---------------------------------------------------------------------------
static EM_REPLACESEL = 0xC2
global hOut
Sleep, 100
Text .= "`r`n"
SendMessage, EM_REPLACESEL,, &Text,, ahk_id %hOut%
}
;---------- end of file ----------------------------------------------------
[edit] C
#include <stdio.h>output
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <ctype.h>
typedef struct { char * s; size_t alloc_len; } string;
typedef struct {
char *pat, *repl;
int terminate;
} rule_t;
typedef struct {
int n;
rule_t *rules;
char *buf;
} ruleset_t;
void ruleset_del(ruleset_t *r)
{
if (r->rules) free(r->rules);
if (r->buf) free(r->buf);
free(r);
}
string * str_new(char *s)
{
int l = strlen(s);
string *str = malloc(sizeof(string));
str->s = malloc(l + 1);
strcpy(str->s, s);
str->alloc_len = l + 1;
return str;
}
void str_append(string *str, char *s, int len)
{
int l = strlen(str->s);
if (len == -1) len = strlen(s);
if (str->alloc_len < l + len + 1) {
str->alloc_len = l + len + 1;
str->s = realloc(str->s, str->alloc_len);
}
memcpy(str->s + l, s, len);
str->s[l + len] = '\0';
}
/* swap content of dest and src, and truncate src string */
void str_transfer(string *dest, string *src)
{
size_t tlen = dest->alloc_len;
dest->alloc_len = src->alloc_len;
src->alloc_len = tlen;
char *ts = dest->s;
dest->s = src->s;
src->s = ts;
src->s[0] = '\0';
}
void str_del(string *s)
{
if (s->s) free(s->s);
free(s);
}
void str_markov(string *str, ruleset_t *r)
{
int i, j, sl, pl;
int changed = 0, done = 0;
string *tmp = str_new("");
while (!done) {
changed = 0;
for (i = 0; !done && !changed && i < r->n; i++) {
pl = strlen(r->rules[i].pat);
sl = strlen(str->s);
for (j = 0; j < sl; j++) {
if (strncmp(str->s + j, r->rules[i].pat, pl))
continue;
str_append(tmp, str->s, j);
str_append(tmp, r->rules[i].repl, -1);
str_append(tmp, str->s + j + pl, -1);
str_transfer(str, tmp);
changed = 1;
if (r->rules[i].terminate)
done = 1;
break;
}
}
if (!changed) break;
}
str_del(tmp);
return;
}
ruleset_t* read_rules(char *name)
{
struct stat s;
char *buf;
size_t i, j, k, tmp;
rule_t *rules = 0;
int n = 0; /* number of rules */
int fd = open(name, O_RDONLY);
if (fd == -1) return 0;
fstat(fd, &s);
buf = malloc(s.st_size + 2);
read(fd, buf, s.st_size);
buf[s.st_size] = '\n';
buf[s.st_size + 1] = '\0';
close(fd);
for (i = j = 0; buf[i] != '\0'; i++) {
if (buf[i] != '\n') continue;
/* skip comments */
if (buf[j] == '#' || i == j) {
j = i + 1;
continue;
}
/* find the '->' */
for (k = j + 1; k < i - 3; k++)
if (isspace(buf[k]) && !strncmp(buf + k + 1, "->", 2))
break;
if (k >= i - 3) {
printf("parse error: no -> in %.*s\n", i - j, buf + j);
break;
}
/* left side: backtrack through whitespaces */
for (tmp = k; tmp > j && isspace(buf[--tmp]); );
if (tmp < j) {
printf("left side blank? %.*s\n", i - j, buf + j);
break;
}
buf[++tmp] = '\0';
/* right side */
for (k += 3; k < i && isspace(buf[++k]););
buf[i] = '\0';
rules = realloc(rules, sizeof(rule_t) * (1 + n));
rules[n].pat = buf + j;
if (buf[k] == '.') {
rules[n].terminate = 1;
rules[n].repl = buf + k + 1;
} else {
rules[n].terminate = 0;
rules[n].repl = buf + k;
}
n++;
j = i + 1;
}
ruleset_t *r = malloc(sizeof(ruleset_t));
r->buf = buf;
r->rules = rules;
r->n = n;
return r;
}
int test_rules(char *s, char *file)
{
ruleset_t * r = read_rules(file);
if (!r) return 0;
printf("Rules from '%s' ok\n", file);
string *ss = str_new(s);
printf("text: %s\n", ss->s);
str_markov(ss, r);
printf("markoved: %s\n", ss->s);
str_del(ss);
ruleset_del(r);
return printf("\n");
}
int main()
{
/* rule 1-5 are files containing rules from page top */
test_rules("I bought a B of As from T S.", "rule1");
test_rules("I bought a B of As from T S.", "rule2");
test_rules("I bought a B of As W my Bgage from T S.", "rule3");
test_rules("_1111*11111_", "rule4");
test_rules("000000A000000", "rule5");
return 0;
}
Rules from 'rule1' ok
text: I bought a B of As from T S.
markoved: I bought a bag of apples from my brother.
Rules from 'rule2' ok
text: I bought a B of As from T S.
markoved: I bought a bag of apples from T shop.
Rules from 'rule3' ok
text: I bought a B of As W my Bgage from T S.
markoved: I bought a bag of apples with my money from T shop.
Rules from 'rule4' ok
text: _1111*11111_
markoved: 11111111111111111111
Rules from 'rule5' ok
text: 000000A000000
markoved: 00011H1111000
[edit] C++
Note: Non-use of iswhite is intentional, since depending on the locale, other chars besides space and tab might be detected by that function.
#include <cstdlib>
#include <iostream>
#include <fstream>
#include <vector>
#include <string>
struct rule
{
std::string pattern;
std::string replacement;
bool terminal;
rule(std::string pat, std::string rep, bool term):
pattern(pat),
replacement(rep),
terminal(term)
{
}
};
std::string const whitespace = " \t";
std::string::size_type const npos = std::string::npos;
bool is_whitespace(char c)
{
return whitespace.find(c) != npos;
}
std::vector<rule> read_rules(std::ifstream& rulefile)
{
std::vector<rule> rules;
std::string line;
while (std::getline(rulefile, line))
{
std::string::size_type pos;
// remove comments
pos = line.find('#');
if (pos != npos)
line.resize(pos);
// ignore lines consisting only of whitespace
if (line.find_first_not_of(whitespace) == npos)
continue;
// find "->" surrounded by whitespace
pos = line.find("->");
while (pos != npos && (pos == 0 || !is_whitespace(line[pos-1])))
pos = line.find("->", pos+1);
if (pos == npos || line.length() < pos+3 || !is_whitespace(line[pos+2]))
{
std::cerr << "invalid rule: " << line << "\n";
std::exit(EXIT_FAILURE);
}
std::string pattern = line.substr(0, pos-1);
std::string replacement = line.substr(pos+3);
// remove additional separating whitespace
pattern.erase(pattern.find_last_not_of(whitespace)+1);
replacement.erase(0, replacement.find_first_not_of(whitespace));
// test for terminal rule
bool terminal = !replacement.empty() && replacement[0] == '.';
if (terminal)
replacement.erase(0,1);
rules.push_back(rule(pattern, replacement, terminal));
}
return rules;
}
std::string markov(std::vector<rule> rules, std::string input)
{
std::string& output = input;
std::vector<rule>::iterator iter = rules.begin();
// Loop through each rule, transforming our current version
// with each rule.
while (iter != rules.end())
{
std::string::size_type pos = output.find(iter->pattern);
if (pos != npos)
{
output.replace(pos, iter->pattern.length(), iter->replacement);
if (iter->terminal)
break;
iter = rules.begin();
}
else
++iter;
}
return output;
}
int main(int argc, char* argv[])
{
if (argc != 3)
{
std::cout << "usage:\n " << argv[0] << " rulefile text\n";
return EXIT_FAILURE;
}
std::ifstream rulefile(argv[1]);
std::vector<rule> rules = read_rules(rulefile);
std::string input(argv[2]);
std::string output = markov(rules, input);
std::cout << output << "\n";
}
[edit] D
import std.stdio, std.array, std.file, std.regex, std.string;
void main() {
string[][] rules = readText("markov_rules.txt").splitLines().
split("");
string[] tests = readText("markov_tests.txt").splitLines();
assert(tests.length == rules.length);
// Faster to compile run-time regex
//auto regex = regex(r"^([^#]*?)\s+->\s+(\.?)(.*)");
// Compile-time regex creation
// DMD 2.059 uses about 130 MB RAM to compile this
enum regex = ctRegex!(r"^([^#]*?)\s+->\s+(\.?)(.*)");
foreach (i, test; tests) {
string[][] capt;
foreach (line; rules[i]) {
auto m = line.match(regex);
if (!m.empty)
capt ~= m.captures.array()[1 .. $];
}
REDO:
auto copy = test;
foreach (c; capt) {
test = test.replace(c[0], c[2]);
if (c[1] == ".")
break;
if (test != copy)
goto REDO;
}
writeln(test);
}
}
- Output:
I bought a bag of apples from my brother. I bought a bag of apples from T shop. I bought a bag of apples with my money from T shop. 11111111111111111111 00011H1111000
[edit] Go
package main
import (
"fmt"
"strings"
)
type testCase struct {
ruleSet, sample, output string
}
var testSet []testCase // initialized in separate source file
func main() {
fmt.Println("validating", len(testSet), "test cases")
var failures bool
for i, tc := range testSet {
if r := ma(tc.ruleSet, tc.sample); r != tc.output {
fmt.Println("test", i+1, "fail")
failures = true
}
}
if !failures {
fmt.Println("no failures")
}
}
type rule struct {
pat string
rep string
term bool
}
func ma(rs, s string) string {
// compile rules per task description
var rules []rule
for _, line := range strings.Split(rs, "\n") {
if line == "" || line[0] == '#' {
continue
}
a := strings.Index(line, "->")
if a == -1 {
fmt.Println("invalid rule:", line)
return ""
}
pat := line[:a]
for {
if pat == "" {
b := strings.Index(line[a+2:], "->")
if b == -1 {
fmt.Println("invalid rule:", line)
return ""
}
a += 2 + b
pat = line[:a]
continue
}
last := pat[len(pat)-1]
if last != ' ' && last != '\t' {
break
}
pat = pat[:len(pat)-1]
}
rep := line[a+2:]
for rep > "" && (rep[0] == ' ' || rep[0] == '\t') {
rep = rep[1:]
}
var term bool
if rep > "" && rep[0] == '.' {
term = true
rep = rep[1:]
}
rules = append(rules, rule{pat, rep, term})
}
// execute algorithm per WP
for r := 0; r < len(rules); {
pat := rules[r].pat
if f := strings.Index(s, pat); f == -1 {
r++
} else {
s = s[:f] + rules[r].rep + s[f+len(pat):]
if rules[r].term {
break
}
r = 0
}
}
return s
}
The rule set source file contains all the strings as literals, packaged into a data structure. It starts like this,
package main
func init() {
testSet = []testCase{
{
`# This rules file is extracted from Wikipedia:
# http://en.wikipedia.org/wiki/Markov_Algorithm
A -> apple
B -> bag
S -> shop
T -> the
the shop -> my brother
a never used -> .terminating rule`,
"I bought a B of As from T S.",
"I bought a bag of apples from my brother."},
{
`# Slightly modified from the rules on Wikipedia
A -> apple
B -> bag
...
Compile both files, link, and run. Output:
validating 5 test cases no failures
[edit] Haskell
This program expects a source file as an argument and uses the standard input and output devices for the algorithm's I/O.
import Data.List (isPrefixOf)
import Data.Maybe (catMaybes)
import Control.Monad
import Text.ParserCombinators.Parsec
import System.IO
import System.Environment (getArgs)
main = do
args <- getArgs
unless (length args == 1) $
fail "Please provide exactly one source file as an argument."
let sourcePath = head args
source <- readFile sourcePath
input <- getContents
case parse markovParser sourcePath source of
Right rules -> putStr $ runMarkov rules input
Left err -> hPutStrLn stderr $ "Parse error at " ++ show err
data Rule = Rule
{from :: String, terminating :: Bool, to :: String}
markovParser :: Parser [Rule]
markovParser = liftM catMaybes $
(comment <|> rule) `sepEndBy` many1 newline
where comment = char '#' >> skipMany nonnl >> return Nothing
rule = liftM Just $ liftM3 Rule
(manyTill (nonnl <?> "pattern character") $ try arrow)
(succeeds $ char '.')
(many nonnl)
arrow = ws >> string "->" >> ws <?> "whitespace-delimited arrow"
nonnl = noneOf "\n"
ws = many1 $ oneOf " \t"
succeeds p = option False $ p >> return True
runMarkov :: [Rule] -> String -> String
runMarkov rules s = f rules s
where f [] s = s
f (Rule from terminating to : rs) s = g "" s
where g _ "" = f rs s
g before ahead@(a : as) = if from `isPrefixOf` ahead
then let new = reverse before ++ to ++ drop (length from) ahead
in if terminating then new else f rules new
else g (a : before) as
[edit] Icon and Unicon
procedure main(A)
rules := loadRules(open(A[1],"r"))
every write(line := !&input, " -> ",apply(rules, line))
end
record rule(pat, term, rep)
procedure loadRules(f)
rules := []
every !f ? if not ="#" then put(rules,
rule(1(trim(tab(find("->"))),move(2),tab(many(' \t'))),
(="."|&null), trim(tab(0))))
return rules
end
procedure apply(rules, line)
s := line
repeat {
s ?:= tab(find((r := !rules).pat)) || r.rep || (move(*r.pat),tab(0))
if (s == line) | \r.term then return s else line := s
}
end
Sample runs using above rule sets and test strings:
->ma mars.1 I bought a B of As from T S. I bought a B of As from T S. -> I bought a bag of apples from my brother. ->ma mars.2 I bought a B of As from T S. I bought a B of As from T S. -> I bought a bag of apples from T shop. ->ma mars.3 I bought a B of As W my Bgage from T S. I bought a B of As W my Bgage from T S. -> I bought a bag of apples with my money from T shop. ->ma mars.4 _1111*11111_ _1111*11111_ -> 11111111111111111111 ->ma mars.5 000000A000000 000000A000000 -> 00011H1111000
[edit] J
Solution:require'strings regex'Example:
markovLexer =: verb define
rules =. LF cut TAB&=`(,:&' ')}y
rules =. a: -.~ (dltb@:{.~ i:&'#')&.> rules
rules =. 0 _1 {"1 '\s+->\s+' (rxmatch rxcut ])S:0 rules
(,. ] (}.&.>~ ,. ]) ('.'={.)&.>)/ |: rules
)
replace =: dyad define
'index patternLength replacement'=. x
'head tail' =. index split y
head, replacement, patternLength }. tail
)
matches =: E. i. 1:
markov =: dyad define
ruleIdx =. 0 [ rules =. markovLexer x
while. ruleIdx < #rules do.
'pattern replacement terminating' =. ruleIdx { rules
ruleIdx =. 1 + ruleIdx
if. (#y) > index =. pattern matches y do.
y =. (index ; (#pattern) ; replacement) replace y
ruleIdx =. _ * terminating
end.
end.
y
)
m1 =. noun define
# This rules file is extracted from Wikipedia:
# http://en.wikipedia.org/wiki/Markov_Algorithm
A -> apple
B -> bag
S -> shop
T -> the
the shop -> my brother
a never used -> .terminating rule
)
m1 markov 'I bought a B of As from T S.'
I bought a bag of apples from my brother.
Discussion: The J implementation correctly processes all the rulesets. More details are available on the the talk page.
[edit] Perl
This program expects a source file as an argument and uses the standard input and output devices for the algorithm's I/O.
@ARGV == 1 or die "Please provide exactly one source file as an argument.\n";
open my $source, '<', $ARGV[0] or die "I couldn't open \"$ARGV[0]\" for reading. ($!.)\n";
my @rules;
while (<$source>)
{/\A#/ and next;
my @a = /(.*?)\s+->\s+(\.?)(.*)/ or die "Syntax error: $_";
push @rules, \@a;}
close $source;
my $input = do {local $/; <STDIN>;};
OUTER:
{foreach (@rules)
{my ($from, $terminating, $to) = @$_;
$input =~ s/\Q$from\E/$to/
and ($terminating ? last OUTER : redo OUTER);}}
print $input;
[edit] PicoLisp
(de markov (File Text)
(use (@A @Z R)
(let Rules
(make
(in File
(while (skip "#")
(when (match '(@A " " "-" ">" " " @Z) (replace (line) "@" "#"))
(link (cons (clip @A) (clip @Z))) ) ) ) )
(setq Text (chop Text))
(pack
(loop
(NIL
(find
'((R) (match (append '(@A) (car R) '(@Z)) Text))
Rules )
Text )
(T (= "." (cadr (setq R @)))
(append @A (cddr R) @Z) )
(setq Text (append @A (cdr R) @Z)) ) ) ) ) )
Output:
: (markov "r1" "I bought a B of As from T S.") -> "I bought a bag of apples from my brother." : (markov "r2" "I bought a B of As from T S.") -> "I bought a bag of apples from T shop." : (markov "r3" "I bought a B of As W my Bgage from T S.") -> "I bought a bag of apples with my money from T shop." : (markov "r4" "_1111*11111_") -> "11111111111111111111" : (markov "r5" "000000A000000") -> "00011H1111000"
[edit] PureBasic
The GUI used here allows a ruleset to be loaded from a text file or manually added one rule at a time. Symbol input can be tested anytime by selecting 'Interpret'.
Structure mRule
pattern.s
replacement.s
isTerminal.i
EndStructure
Procedure parseRule(text.s, List rules.mRule())
#tab = 9: #space = 32: #whiteSpace$ = Chr(#space) + Chr(#tab)
Protected tLen, cPtr, nChar.c, pEnd, pLast, pattern.s
cPtr = 1
If FindString(#whiteSpace$, Left(text, cPtr), 1): ProcedureReturn 0: EndIf ;parse error
If Left(text, cPtr) = "#": ProcedureReturn 2: EndIf ;comment skipped
tLen = Len(text)
Repeat
cPtr + 1
If cPtr > tLen: ProcedureReturn 0: EndIf ;parse error
nChar = Asc(Mid(text, cPtr, 1))
Select nChar
Case #space, #tab
Select pEnd
Case 0 To 2
pEnd = 1
pLast = cPtr - 1
Case 3
pattern = Left(text, pLast)
EndSelect
Case '-'
If pEnd = 1: pEnd = 2: EndIf
Case '>'
If pEnd = 2: pEnd = 3: EndIf
EndSelect
Until pattern <> ""
Repeat
cPtr + 1
Until Not FindString(#whiteSpace$, Mid(text, cPtr, 1), 1)
Protected isTerminal
If Mid(text, cPtr, 1) = "."
isTerminal = #True: cPtr + 1
EndIf
LastElement(rules()): AddElement(rules())
rules()\pattern = pattern
rules()\replacement = Right(text, tLen - cPtr + 1)
rules()\isTerminal = isTerminal
ProcedureReturn 1 ;processed rule
EndProcedure
Procedure.s interpretMarkov(text.s, List rules.mRule())
Repeat
madeReplacement = #False
ForEach rules()
If FindString(text, rules()\pattern, 1)
text = ReplaceString(text, rules()\pattern, rules()\replacement)
madeReplacement = #True: isFinished = rules()\isTerminal
Break
EndIf
Next
Until Not madeReplacement Or isFinished
ProcedureReturn text
EndProcedure
Procedure addRule(text.s, List rules.mRule())
Protected result = parseRule(text, rules())
Select result
Case 0: AddGadgetItem(7, -1, "Invalid rule: " + #DQUOTE$ + text + #DQUOTE$)
Case 1: AddGadgetItem(7, -1, "Added: " + #DQUOTE$ + text + #DQUOTE$)
Case 2: AddGadgetItem(7, -1, "Comment: " + #DQUOTE$ + text + #DQUOTE$)
EndSelect
EndProcedure
OpenWindow(0, 0, 0, 350, 300, "Markov Algorithm Interpreter", #PB_Window_SystemMenu)
ButtonGadget(0, 45, 10, 75, 20, "Load Ruleset")
ButtonGadget(1, 163, 10, 65, 20, "Add Rule")
ButtonGadget(2, 280, 10, 65, 20, "Interpret")
TextGadget(3, 5, 40, 30, 20, "Input:")
StringGadget(4, 45, 40, 300, 20,"")
TextGadget(5, 5, 100, 35, 20, "Output:")
ButtonGadget(6, 160, 70, 70, 20, "Clear Output")
EditorGadget(7, 45, 100, 300, 195, #PB_Editor_ReadOnly)
NewList rules.mRule()
Define event, isDone, text.s, result, file.s
Repeat
event = WaitWindowEvent()
Select event
Case #PB_Event_Gadget
Select EventGadget()
Case 0
Define file.s, rule.s
file = OpenFileRequester("Select rule set", "*.txt", "Text (*.txt)|*.txt", 0)
If file
ClearList(rules())
ReadFile(0, file)
While Not(Eof(0))
addRule(ReadString(0), rules())
Wend
AddGadgetItem(7, -1, "Loaded " + Str(ListSize(rules())) + " rules."): AddGadgetItem(7, -1, "")
EndIf
Case 1
addRule(GetGadgetText(4), rules())
Case 2
text = GetGadgetText(4): AddGadgetItem(7, -1, "Interpret: " + #DQUOTE$ + text + #DQUOTE$)
AddGadgetItem(7, -1, "Result: " + #DQUOTE$ + interpretMarkov(text, rules()) + #DQUOTE$): AddGadgetItem(7, -1, "")
Case 6
ClearGadgetItems(7)
EndSelect
Case #PB_Event_CloseWindow
isDone = #True
EndSelect
Until isDone
Sample output from loading Ruleset 1 and interpreting a symbol:
Comment: "# This rules file is extracted from Wikipedia:" Comment: "# http://en.wikipedia.org/wiki/Markov_Algorithm" Added: "A -> apple" Added: "B -> bag" Added: "S -> shop" Added: "T -> the" Added: "the shop -> my brother" Added: "a never used -> .terminating rule" Loaded 6 rules. Interpret: "I bought a B of As from T S." Result: "I bought a bag of apples from my brother."
[edit] Python
The example uses a regexp to parse the syntax of the grammar. This regexp is multi-line and verbose, and uses named groups to aid in understanding the regexp and to allow more meaningful group names to be used when extracting the replacement data from the grammars in function extractreplacements.
The example gains flexibility by not being tied to specific files. The functions may be imported into other programs which then can provide textual input from their sources without the need to pass 'file handles' around.
import re
def extractreplacements(grammar):
return [ (matchobj.group('pat'), matchobj.group('repl'), bool(matchobj.group('term')))
for matchobj in re.finditer(syntaxre, grammar)
if matchobj.group('rule')]
def replace(text, replacements):
while True:
for pat, repl, term in replacements:
if pat in text:
text = text.replace(pat, repl, 1)
if term:
return text
break
else:
return text
syntaxre = r"""(?mx)
^(?:
(?: (?P<comment> \# .* ) ) |
(?: (?P<blank> \s* ) (?: \n | $ ) ) |
(?: (?P<rule> (?P<pat> .+? ) \s+ -> \s+ (?P<term> \.)? (?P<repl> .+) ) )
)$
"""
grammar1 = """\
# This rules file is extracted from Wikipedia:
# http://en.wikipedia.org/wiki/Markov_Algorithm
A -> apple
B -> bag
S -> shop
T -> the
the shop -> my brother
a never used -> .terminating rule
"""
grammar2 = '''\
# Slightly modified from the rules on Wikipedia
A -> apple
B -> bag
S -> .shop
T -> the
the shop -> my brother
a never used -> .terminating rule
'''
grammar3 = '''\
# BNF Syntax testing rules
A -> apple
WWWW -> with
Bgage -> ->.*
B -> bag
->.* -> money
W -> WW
S -> .shop
T -> the
the shop -> my brother
a never used -> .terminating rule
'''
grammar4 = '''\
### Unary Multiplication Engine, for testing Markov Algorithm implementations
### By Donal Fellows.
# Unary addition engine
_+1 -> _1+
1+1 -> 11+
# Pass for converting from the splitting of multiplication into ordinary
# addition
1! -> !1
,! -> !+
_! -> _
# Unary multiplication by duplicating left side, right side times
1*1 -> x,@y
1x -> xX
X, -> 1,1
X1 -> 1X
_x -> _X
,x -> ,X
y1 -> 1y
y_ -> _
# Next phase of applying
1@1 -> x,@y
1@_ -> @_
,@_ -> !_
++ -> +
# Termination cleanup for addition
_1 -> 1
1+_ -> 1
_+_ ->
'''
grammar5 = '''\
# Turing machine: three-state busy beaver
#
# state A, symbol 0 => write 1, move right, new state B
A0 -> 1B
# state A, symbol 1 => write 1, move left, new state C
0A1 -> C01
1A1 -> C11
# state B, symbol 0 => write 1, move left, new state A
0B0 -> A01
1B0 -> A11
# state B, symbol 1 => write 1, move right, new state B
B1 -> 1B
# state C, symbol 0 => write 1, move left, new state B
0C0 -> B01
1C0 -> B11
# state C, symbol 1 => write 1, move left, halt
0C1 -> H01
1C1 -> H11
'''
text1 = "I bought a B of As from T S."
text2 = "I bought a B of As W my Bgage from T S."
text3 = '_1111*11111_'
text4 = '000000A000000'
if __name__ == '__main__':
assert replace(text1, extractreplacements(grammar1)) \
== 'I bought a bag of apples from my brother.'
assert replace(text1, extractreplacements(grammar2)) \
== 'I bought a bag of apples from T shop.'
# Stretch goals
assert replace(text2, extractreplacements(grammar3)) \
== 'I bought a bag of apples with my money from T shop.'
assert replace(text3, extractreplacements(grammar4)) \
== '11111111111111111111'
assert replace(text4, extractreplacements(grammar5)) \
== '00011H1111000'
[edit] Ruby
raise "Please input an input code file, an input data file, and an output file." if ARGV.size < 3
rules = File.readlines(ARGV[0]).inject([]) do |rules, line|
if line =~ /^\s*#/
rules
elsif line =~ /^(.+)\s+->\s+(\.?)(.*)$/
rules << [$1, $3, $2 != ""]
else
raise "Syntax error: #{line}"
end
end
File.open(ARGV[2], "w") do |file|
file.write(File.read(ARGV[1]).tap { |input_data|
while (matched = rules.find { |match, replace, term|
input_data[match] and input_data.sub!(match, replace)
}) and !matched[2]
end
})
end
[edit] Scala
import scala.io.Source
object MarkovAlgorithm {
val RulePattern = """(.*?)\s+->\s+(\.?)(.*)""".r
val CommentPattern = """#.*|\s*""".r
def rule(line: String) = line match {
case CommentPattern() => None
case RulePattern(pattern, terminal, replacement) => Some(pattern, replacement, terminal == ".")
case _ => error("Syntax error on line "+line)
}
def main(args: Array[String]) {
if (args.size != 2 ) {
println("Syntax: MarkovAlgorithm inputFile inputPattern")
exit(1)
}
val rules = (Source fromPath args(0) getLines () map rule).toList.flatten
def algorithm(input: String): String = rules find (input contains _._1) match {
case Some((pattern, replacement, true)) => input replaceFirst ("\\Q"+pattern+"\\E", replacement)
case Some((pattern, replacement, false)) => algorithm(input replaceFirst ("\\Q"+pattern+"\\E", replacement))
case None => input
}
println(args(1))
println(algorithm(args(1)))
}
}
Script-style, and more concise:
import scala.io.Source
if (argv.size != 2 ) error("Syntax: MarkovAlgorithm inputFile inputPattern")
val rulePattern = """(.*?)\s+->\s+(\.?)(.*)""".r
val isComment = (_: String) matches "#.*|\\s*"
val rules = Source fromPath args(0) getLines () filterNot isComment map (rulePattern unapplySeq _ get) toList;
def algorithm(input: String): String = rules find (input contains _.head) match {
case Some(Seq(pattern, ".", replacement)) => input replaceFirst ("\\Q"+pattern+"\\E", replacement)
case Some(Seq(pattern, "", replacement)) => algorithm(input replaceFirst ("\\Q"+pattern+"\\E", replacement))
case None => input
}
println(argv(1))
println(algorithm(argv(1)))
Sample outputs:
C:\>scala MarkovAlgorithm ruleset1.txt "I bought a B of As from T S." I bought a B of As from T S. I bought a bag of apples from my brother. C:\>scala MarkovAlgorithm ruleset2.txt "I bought a B of As from T S." I bought a B of As from T S. I bought a bag of apples from T shop. C:\>scala MarkovAlgorithm ruleset3.txt "I bought a B of As W my Bgage from T S." I bought a B of As W my Bgage from T S. I bought a bag of apples with my money from T shop. C:\>scala MarkovAlgorithm ruleset4.txt "_1111*11111_" _1111*11111_ 11111111111111111111
The script is called much in the same way, but with the ".scala" extension added.
[edit] Scheme
The following implementation uses several string-related procedures provided by SRFI-13 [1].
(define split-into-lines
(lambda (str)
(let loop ((index 0)
(result '()))
(let ((next-index (string-index str #\newline index)))
(if next-index
(loop (+ next-index 1)
(cons (substring str index next-index) result))
(reverse (cons (substring str index) result)))))))
(define parse-rules
(lambda (str)
(let loop ((rules (split-into-lines str))
(result '()))
(if (null? rules)
(reverse result)
(let ((rule (car rules)))
(loop (cdr rules)
(if (or (string=? rule "")
(eq? (string-ref rule 0) #\#))
result
(cons
(let ((index (string-contains rule "->" 1)))
(list (string-trim-right (substring rule 0 index))
(string-trim (substring rule (+ index 2)))))
result))))))))
(define apply-rules
(lambda (str rules)
(let loop ((remaining rules)
(result str))
(if (null? remaining)
result
(let* ((rule (car remaining))
(pattern (car rule))
(replacement (cadr rule))
(start (string-contains result pattern)))
(if start
(if (eq? #\. (string-ref replacement 0))
(string-replace result replacement start
(+ start (string-length pattern)) 1)
(apply-rules
(string-replace result replacement start
(+ start (string-length pattern)))
rules))
(loop (cdr remaining) result)))))))
[edit] Tcl
package require Tcl 8.5
if {$argc < 3} {error "usage: $argv0 ruleFile inputFile outputFile"}
lassign $argv ruleFile inputFile outputFile
# Read the file of rules
set rules {}
set f [open $ruleFile]
foreach line [split [read $f] \n[close $f]] {
if {[string match "#*" $line] || $line eq ""} continue
if {[regexp {^(.+)\s+->\s+(\.?)(.*)$} $line -> from final to]} {
lappend rules $from $to [string compare "." $final] [string length $from]
} else {
error "Syntax error: \"$line\""
}
}
# Apply the rules
set f [open $inputFile]
set out [open $outputFile w]
foreach line [split [read $f] \n[close $f]] {
set any 1
while {$any} {
set any 0
foreach {from to more fl} $rules {
# If we match the 'from' pattern...
if {[set idx [string first $from $line]] >= 0} {
# Change for the 'to' replacement
set line [string replace $line $idx [expr {$idx+$fl-1}] $to]
# Stop if we terminate, otherwise note that we've more work to do
set any $more
break; # Restart search for rules to apply
}
}
#DEBUG# puts $line
}
# Output the processed line
puts $out $line
}
close $out
In the case where there are no terminating rules and no overlapping issues, the following is an alternative:
package require Tcl 8.5
if {$argc < 3} {error "usage: $argv0 ruleFile inputFile outputFile"}
lassign $argv ruleFile inputFile outputFile
# Read the file of rules
set rules {}
set f [open $ruleFile]
foreach line [split [read $f] \n[close $f]] {
if {[string match "#*" $line] || $line eq ""} continue
if {[regexp {^(.+)\s+->\s+(.*)$} $line -> from to]} {
dict set rules $from $to
} else {
error "Syntax error: \"$line\""
}
}
# Apply the rules in a simplistic manner
set in [open $inputFile]
set out [open $outputFile w]
set data [read $in]
close $in
while 1 {
set newData [string map $rules $data]
if {$newData eq $data} break
set data $newData
}
puts $out $data
close $out
[edit] VBScript
[edit] Implementation
class markovparser
dim aRules
public property let ruleset( sBlock )
dim i
aRules = split( sBlock, vbNewLine )
'~ remove blank lines from end of array
do while aRules( ubound( aRules ) ) = vbnullstring
redim preserve aRules( ubound( aRules ) - 1 )
loop
'~ parse array
for i = lbound( aRules ) to ubound( aRules )
if left( aRules( i ), 1 ) = "#" then
aRules( i ) = Array( vbnullstring, aRules(i))
else
aRules( i ) = Split( aRules( i ), " -> ", 2 )
end if
next
end property
public function apply( sArg )
dim ruleapplied
dim terminator
dim was
dim i
dim repl
dim changes
ruleapplied = true
terminator = false
do while ruleapplied and (not terminator)
changes = 0
was = sArg
for i = lbound( aRules ) to ubound( aRules )
repl = aRules(i)(1)
if left( repl, 1 ) = "." then
terminator = true
repl = mid( repl, 2 )
end if
sArg = replace( sArg, aRules(i)(0), repl)
if was <> sArg then
changes = changes + 1
if changes = 1 then
exit for
end if
end if
if terminator then
exit for
end if
next
if changes = 0 then
ruleapplied = false
end if
loop
apply = sArg
end function
sub dump
dim i
for i = lbound( aRules ) to ubound( aRules )
wscript.echo eef(aRules(i)(0)=vbnullstring,aRules(i)(1),aRules(i)(0)& " -> " & aRules(i)(1)) & eef( left( aRules(i)(1), 1 ) = ".", " #terminator", "" )
next
end sub
private function eef( bCond, sExp1, sExp2 )
if bCond then
eef = sExp1
else
eef = sExp2
end if
end function
end class
[edit] Invocation
dim m1
set m1 = new markovparser
m1.ruleset = "# This rules file is extracted from Wikipedia:" & vbNewLine & _
"# http://en.wikipedia.org/wiki/Markov_Algorithm" & vbNewLine & _
"A -> apple" & vbNewLine & _
"B -> bag" & vbNewLine & _
"S -> shop" & vbNewLine & _
"T -> the" & vbNewLine & _
"the shop -> my brother" & vbNewLine & _
"a never used -> .terminating rule"
wscript.echo m1.apply( "I bought a B of As from T S.")
dim m2
set m2 = new markovparser
m2.ruleset = replace( "# Slightly modified from the rules on Wikipedia\nA -> apple\nB -> bag\nS -> .shop\nT -> the\nthe shop -> my brother\na never used -> .terminating rule", "\n", vbNewLine )
'~ m1.dump
wscript.echo m2.apply( "I bought a B of As from T S.")
dim m3
set m3 = new markovparser
m3.ruleset = replace("# BNF Syntax testing rules\nA -> apple\nWWWW -> with\nBgage -> ->.*\nB -> bag" & vbNewLine & _
"->.* -> money\nW -> WW\nS -> .shop\nT -> the\nthe shop -> my brother\na never used -> .terminating rule", "\n", vbNewLine )
wscript.echo m3.apply("I bought a B of As W my Bgage from T S.")
set m4 = new markovparser
m4.ruleset = "### Unary Multiplication Engine, for testing Markov Algorithm implementations" & vbNewLine & _
"### By Donal Fellows." & vbNewLine & _
"# Unary addition engine" & vbNewLine & _
"_+1 -> _1+" & vbNewLine & _
"1+1 -> 11+" & vbNewLine & _
"# Pass for converting from the splitting of multiplication into ordinary" & vbNewLine & _
"# addition" & vbNewLine & _
"1! -> !1" & vbNewLine & _
",! -> !+" & vbNewLine & _
"_! -> _" & vbNewLine & _
"# Unary multiplication by duplicating left side, right side times" & vbNewLine & _
"1*1 -> x,@y" & vbNewLine & _
"1x -> xX" & vbNewLine & _
"X, -> 1,1" & vbNewLine & _
"X1 -> 1X" & vbNewLine & _
"_x -> _X" & vbNewLine & _
",x -> ,X" & vbNewLine & _
"y1 -> 1y" & vbNewLine & _
"y_ -> _" & vbNewLine & _
"# Next phase of applying" & vbNewLine & _
"1@1 -> x,@y" & vbNewLine & _
"1@_ -> @_" & vbNewLine & _
",@_ -> !_" & vbNewLine & _
"++ -> +" & vbNewLine & _
"# Termination cleanup for addition" & vbNewLine & _
"_1 -> 1" & vbNewLine & _
"1+_ -> 1" & vbNewLine & _
"_+_ -> "
'~ m4.dump
wscript.echo m4.apply( "_1111*11111_")
set fso = createobject("scripting.filesystemobject")
set m5 = new markovparser
m5.ruleset = fso.opentextfile("busybeaver.tur").readall
wscript.echo m5.apply("000000A000000")
[edit] Output
I bought a bag of apples from my brother.
I bought a bag of apples from T shop.
I bought a bag of apples with my money from T shop.
11111111111111111111
00011H1111000