Execute a Markov algorithm: Difference between revisions
(Added PicoLisp) |
(→{{header|PicoLisp}}: Added PureBasic) |
||
Line 923: | Line 923: | ||
: (markov "r5" "000000A000000") |
: (markov "r5" "000000A000000") |
||
-> "00011H1111000"</pre> |
-> "00011H1111000"</pre> |
||
=={{header|PureBasic}}== |
|||
The GUI used here allows rules to be loaded from a text file or manually added one at a time. Symbol input can be tested anytime by selecting 'Test Rules'. |
|||
<lang PureBasic>Structure mRule |
|||
pattern.s |
|||
replacement.s |
|||
isTerminal.i |
|||
EndStructure |
|||
Procedure parseRule(text.s, List rules.mRule()) |
|||
#tab = 9: #space = 32: #whiteSpace$ = Chr(#space) + Chr(#tab) |
|||
Protected tLen, cPtr, nChar.c, pEnd, pLast, pattern.s |
|||
cPtr = 1 |
|||
If FindString(#whiteSpace$, Left(text, cPtr), 1): ProcedureReturn 0: EndIf ;parse error |
|||
If Left(text, cPtr) = "#": ProcedureReturn 2: EndIf ;comment skipped |
|||
tLen = Len(text) |
|||
Repeat |
|||
cPtr + 1 |
|||
If cPtr > tLen: ProcedureReturn 0: EndIf ;parse error |
|||
nChar = Asc(Mid(text, cPtr, 1)) |
|||
Select nChar |
|||
Case #space, #tab |
|||
Select pEnd |
|||
Case 0 To 2 |
|||
pEnd = 1 |
|||
pLast = cPtr - 1 |
|||
Case 3 |
|||
pattern = Left(text, pLast) |
|||
EndSelect |
|||
Case '-' |
|||
If pEnd = 1: pEnd = 2: EndIf |
|||
Case '>' |
|||
If pEnd = 2: pEnd = 3: EndIf |
|||
EndSelect |
|||
Until pattern <> "" |
|||
Repeat |
|||
cPtr + 1 |
|||
Until Not FindString(#whiteSpace$, Mid(text, cPtr, 1), 1) |
|||
Protected isTerminal |
|||
If Mid(text, cPtr, 1) = "." |
|||
isTerminal = #True: cPtr + 1 |
|||
EndIf |
|||
LastElement(rules()): AddElement(rules()) |
|||
rules()\pattern = pattern |
|||
rules()\replacement = Right(text, tLen - cPtr + 1) |
|||
rules()\isTerminal = isTerminal |
|||
ProcedureReturn 1 ;processed rule |
|||
EndProcedure |
|||
Procedure.s execMarkov(text.s, List rules.mRule()) |
|||
Repeat |
|||
madeReplacement = #False |
|||
ForEach rules() |
|||
If FindString(text, rules()\pattern, 1) |
|||
text = ReplaceString(text, rules()\pattern, rules()\replacement) |
|||
madeReplacement = #True: isFinished = rules()\isTerminal |
|||
Break |
|||
EndIf |
|||
Next |
|||
Until Not madeReplacement Or isFinished |
|||
ProcedureReturn text |
|||
EndProcedure |
|||
Procedure addRule(text.s, List rules.mRule()) |
|||
Protected result = parseRule(text, rules()) |
|||
Select result |
|||
Case 0: AddGadgetItem(7, -1, "Invalid rule: " + #DQUOTE$ + text + #DQUOTE$) |
|||
Case 1: AddGadgetItem(7, -1, "Added: " + #DQUOTE$ + text + #DQUOTE$) |
|||
Case 2: AddGadgetItem(7, -1, "Comment: " + #DQUOTE$ + text + #DQUOTE$) |
|||
EndSelect |
|||
EndProcedure |
|||
OpenWindow(0, 0, 0, 350,300, "Markov Rule Interpreter", #PB_Window_SystemMenu) |
|||
ButtonGadget(0, 45, 10, 65, 20, "Load Rules") |
|||
ButtonGadget(1, 163, 10, 65, 20, "Add Rule") |
|||
ButtonGadget(2, 280, 10, 65, 20, "Interpret") |
|||
TextGadget(3, 5, 40, 30, 20, "Input:") |
|||
StringGadget(4, 45, 40, 300, 20,"") |
|||
TextGadget(5, 5, 100, 35, 20, "Output:") |
|||
ButtonGadget(6, 160, 70, 70, 20, "Clear Output") |
|||
EditorGadget(7, 45, 100, 300, 195, #PB_Editor_ReadOnly) |
|||
NewList rules.mRule() |
|||
Define event, isDone, text.s, result, file.s |
|||
Repeat |
|||
event = WaitWindowEvent() |
|||
Select event |
|||
Case #PB_Event_Gadget |
|||
Select EventGadget() |
|||
Case 0 |
|||
Define file.s, rule.s |
|||
file = OpenFileRequester("Select Markov Rule set", "*.txt", "Text (*.txt)|*.txt", 0) |
|||
If file |
|||
ClearList(rules()) |
|||
ReadFile(0, file) |
|||
While Not(Eof(0)) |
|||
addRule(ReadString(0), rules()) |
|||
Wend |
|||
AddGadgetItem(7, -1, "Loaded " + Str(ListSize(rules())) + " rules."): AddGadgetItem(7, -1, "") |
|||
EndIf |
|||
Case 1 |
|||
addRule(GetGadgetText(4), rules()) |
|||
Case 2 |
|||
text = GetGadgetText(4): AddGadgetItem(7, -1, "Interpret: " + #DQUOTE$ + text + #DQUOTE$) |
|||
AddGadgetItem(7, -1, "Result: " + #DQUOTE$ + execMarkov(text, rules()) + #DQUOTE$): AddGadgetItem(7, -1, "") |
|||
Case 6 |
|||
ClearGadgetItems(7) |
|||
EndSelect |
|||
Case #PB_Event_CloseWindow |
|||
isDone = #True |
|||
EndSelect |
|||
Until isDone</lang> |
|||
Sample output from loading Ruleset 1 and interpreting a symbol: |
|||
<pre>Comment: "# This rules file is extracted from Wikipedia:" |
|||
Comment: "# http://en.wikipedia.org/wiki/Markov_Algorithm" |
|||
Added: "A -> apple" |
|||
Added: "B -> bag" |
|||
Added: "S -> shop" |
|||
Added: "T -> the" |
|||
Added: "the shop -> my brother" |
|||
Added: "a never used -> .terminating rule" |
|||
Loaded 6 rules. |
|||
Interpret: "I bought a B of As from T S." |
|||
Result: "I bought a bag of apples from my brother."</pre> |
|||
=={{header|Python}}== |
=={{header|Python}}== |
Revision as of 21:29, 23 July 2010
This page uses content from Wikipedia. The original article was at Markov_algorithm. The list of authors can be seen in the page history. As with Rosetta Code, the text of Wikipedia is available under the GNU FDL. (See links for details on variance) |
You are encouraged to solve this task according to the task description, using any language you may know.
Create an interpreter for a Markov Algorithm. Rules have the syntax:
<ruleset> ::= ((<comment> | <rule>) <newline>+)* <comment> ::= # {<any character>} <rule> ::= <pattern> <whitespace> -> <whitespace> [.] <replacement> <whitespace> ::= (<tab> | <space>) [<whitespace>]
There is one rule per line. If there is a . present before the <replacement>, then this is a terminating rule in which case the interpreter must halt execution. A ruleset consists of a sequence of rules, with optional comments.
Rulesets
Use the following tests on entries:
Ruleset 1
# This rules file is extracted from Wikipedia: # http://en.wikipedia.org/wiki/Markov_Algorithm A -> apple B -> bag S -> shop T -> the the shop -> my brother a never used -> .terminating rule
Sample text of:
I bought a B of As from T S.
Should generate the output:
I bought a bag of apples from my brother.
Ruleset 2
A test of the terminating rule
# Slightly modified from the rules on Wikipedia A -> apple B -> bag S -> .shop T -> the the shop -> my brother a never used -> .terminating rule
Sample text of:
I bought a B of As from T S.
Should generate:
I bought a bag of apples from T shop.
Ruleset 3
This tests for correct substitution order and may trap simple regexp based replacement routines if special regexp characters are not escaped.
# BNF Syntax testing rules A -> apple WWWW -> with Bgage -> ->.* B -> bag ->.* -> money W -> WW S -> .shop T -> the the shop -> my brother a never used -> .terminating rule
Sample text of:
I bought a B of As W my Bgage from T S.
Should generate:
I bought a bag of apples with my money from T shop.
Ruleset 4
This tests for correct order of scanning of rules, and may trap replacement routines that scan in the wrong order. It implements a general unary multiplication engine. (Note that the input expression must be placed within underscores in this implementation.)
### Unary Multiplication Engine, for testing Markov Algorithm implementations ### By Donal Fellows. # Unary addition engine _+1 -> _1+ 1+1 -> 11+ # Pass for converting from the splitting of multiplication into ordinary # addition 1! -> !1 ,! -> !+ _! -> _ # Unary multiplication by duplicating left side, right side times 1*1 -> x,@y 1x -> xX X, -> 1,1 X1 -> 1X _x -> _X ,x -> ,X y1 -> 1y y_ -> _ # Next phase of applying 1@1 -> x,@y 1@_ -> @_ ,@_ -> !_ ++ -> + # Termination cleanup for addition _1 -> 1 1+_ -> 1 _+_ ->
Sample text of:
_1111*11111_
should generate the output:
11111111111111111111
Ruleset 5
A simple Turing machine, implementing a three-state busy beaver. The tape consists of 0s and 1s, the states are A, B, C and H (for Halt), and the head position is indicated by writing the state letter before the character where the head is. All parts of the initial tape the machine operates on have to be given in the input.
Besides demonstrating that the Markov algorithm is Turing-complete, it also made me catch a bug in the C++ implementation which wasn't caught by the first four rulesets.
# Turing machine: three-state busy beaver # # state A, symbol 0 => write 1, move right, new state B A0 -> 1B # state A, symbol 1 => write 1, move left, new state C 0A1 -> C01 1A1 -> C11 # state B, symbol 0 => write 1, move left, new state A 0B0 -> A01 1B0 -> A11 # state B, symbol 1 => write 1, move right, new state B B1 -> 1B # state C, symbol 0 => write 1, move left, new state B 0C0 -> B01 1C0 -> B11 # state C, symbol 1 => write 1, move left, halt 0C1 -> H01 1C1 -> H11
This ruleset should turn
000000A000000
into
00011H1111000
Examples
AutoHotkey
<lang autohotkey>;---------------------------------------------------------------------------
- Markov Algorithm.ahk
- by wolf_II
- ---------------------------------------------------------------------------
- interpreter for a Markov Algorithm
- ---------------------------------------------------------------------------
- ---------------------------------------------------------------------------
AutoExecute: ; auto-execute section of the script
- ---------------------------------------------------------------------------
#SingleInstance, Force ; only one instance allowed #NoEnv ; don't check empty variables StartupDir := A_WorkingDir ; remember startup directory SetWorkingDir, %A_ScriptDir% ; change directoy StringCaseSense, On ; case sensitive comparisons ;----------------------------------------------------------------------- AppName := "Markov Algorithm" Gosub, GuiCreate Gui, Show,, %AppName%
Return
- ---------------------------------------------------------------------------
GuiCreate: ; create the GUI
- ---------------------------------------------------------------------------
; GUI options Gui, -MinimizeBox Gui, Add, Edit, y0 h0 ; catch the focus
; Ruleset Gui, Add, GroupBox, w445 h145 Section, Ruleset Gui, Add, Edit, xs+15 ys+20 w300 r8 vRuleset Gui, Add, Button, x+15 w100, Load Ruleset Gui, Add, Button, wp, Save Ruleset Gui, Add, Button, w30, 1 Gui, Add, Button, x+5 wp, 2 Gui, Add, Button, x+5 wp, 3 Gui, Add, Button, xs+330 y+6 wp, 4 Gui, Add, Button, x+5 wp, 5
; String Gui, Add, GroupBox, xs w445 h75 Section, String Gui, Add, Edit, xs+15 ys+20 w300 vString Gui, Add, Button, x+15 w100, Apply Ruleset Gui, Add, Button, xp wp Hidden, Stop Gui, Add, CheckBox, xs+15 yp+30 vSingleStepping, Single Stepping?
; Output Gui, Add, GroupBox, xs w445 h235 Section, Output Gui, Add, Edit, xs+15 ys+20 w415 r15 ReadOnly vOutput HwndhOut
Return
- ---------------------------------------------------------------------------
GuiClose:
- ---------------------------------------------------------------------------
ExitApp
Return
- ---------------------------------------------------------------------------
ButtonLoadRuleset: ; load ruleset from file
- ---------------------------------------------------------------------------
Gui, +OwnDialogs FileSelectFile, RulesetFile,,, Load Ruleset, *.markov If Not SubStr(RulesetFile, -6) = ".markov" RulesetFile .= ".markov" If FileExist(RulesetFile) { FileRead, Ruleset, %RulesetFile% GuiControl,, Ruleset, %Ruleset% } Else MsgBox, 16, Error - %AppName%, File not found:`n`n"%RulesetFile%"
Return
- ---------------------------------------------------------------------------
ButtonSaveRuleset: ; save ruleset to file
- ---------------------------------------------------------------------------
Gui, +OwnDialogs Gui, Submit, NoHide FileSelectFile, RulesetFile, S16,, Save Ruleset, *.markov If Not SubStr(RulesetFile, -6) = ".markov" RulesetFile .= ".markov" FileDelete, %RulesetFile% FileAppend, %Ruleset%, %RulesetFile% Gui, Show
Return
- ---------------------------------------------------------------------------
Button1: ; http://rosettacode.org/wiki/Execute_a_Markov_algorithm#Ruleset_1
- ---------------------------------------------------------------------------
GuiControl,, Output ; clear output GuiControl,, String, I bought a B of As from T S. GuiControl,, Ruleset, (LTrim # This rules file is extracted from Wikipedia: # http://en.wikipedia.org/wiki/Markov_Algorithm A -> apple B -> bag S -> shop T -> the the shop -> my brother a never used -> .terminating rule )
Return
- ---------------------------------------------------------------------------
Button2: ; http://rosettacode.org/wiki/Execute_a_Markov_algorithm#Ruleset_2
- ---------------------------------------------------------------------------
GuiControl,, Output ; clear output GuiControl,, String, I bought a B of As from T S. GuiControl,, Ruleset, (LTrim # Slightly modified from the rules on Wikipedia A -> apple B -> bag S -> .shop T -> the the shop -> my brother a never used -> .terminating rule )
Return
- ---------------------------------------------------------------------------
Button3: ; http://rosettacode.org/wiki/Execute_a_Markov_algorithm#Ruleset_3
- ---------------------------------------------------------------------------
GuiControl,, Output ; clear output GuiControl,, String, I bought a B of As W my Bgage from T S. GuiControl,, Ruleset, (LTrim # BNF Syntax testing rules A -> apple WWWW -> with Bgage -> ->.* B -> bag ->.* -> money W -> WW S -> .shop T -> the the shop -> my brother a never used -> .terminating rule )
Return
- ---------------------------------------------------------------------------
Button4: ; http://rosettacode.org/wiki/Execute_a_Markov_algorithm#Ruleset_4
- ---------------------------------------------------------------------------
GuiControl,, Output ; clear output GuiControl,, String, _1111*11111_ GuiControl,, Ruleset, (LTrim ### Unary Multiplication Engine, for testing Markov Algorithm implementations ### By Donal Fellows. # Unary addition engine _+1 -> _1+ 1+1 -> 11+ # Pass for converting from the splitting of multiplication into ordinary # addition 1! -> !1 ,! -> !+ _! -> _ # Unary multiplication by duplicating left side, right side times 1*1 -> x,@y 1x -> xX X, -> 1,1 X1 -> 1X _x -> _X ,x -> ,X y1 -> 1y y_ -> _ # Next phase of applying 1@1 -> x,@y 1@_ -> @_ ,@_ -> !_ ++ -> + # Termination cleanup for addition _1 -> 1 1+_ -> 1 _+_ -> )
Return
- ---------------------------------------------------------------------------
Button5: ; http://rosettacode.org/wiki/Execute_a_Markov_algorithm#Ruleset_5
- ---------------------------------------------------------------------------
GuiControl,, Output ; clear output GuiControl,, String, 000000A000000 GuiControl,, Ruleset, (LTrim # Turing machine: three-state busy beaver # # state A, symbol 0 => write 1, move right, new state B A0 -> 1B # state A, symbol 1 => write 1, move left, new state C 0A1 -> C01 1A1 -> C11 # state B, symbol 0 => write 1, move left, new state A 0B0 -> A01 1B0 -> A11 # state B, symbol 1 => write 1, move right, new state B B1 -> 1B # state C, symbol 0 => write 1, move left, new state B 0C0 -> B01 1C0 -> B11 # state C, symbol 1 => write 1, move left, halt 0C1 -> H01 1C1 -> H11 )
Return
- ---------------------------------------------------------------------------
ButtonApplyRuleset: ; flow control for Algorithm
- ---------------------------------------------------------------------------
; prepare Gui, Submit, NoHide GuiControl,, Output ; clear Controls(False) ; disable Count := 0 Subst := True Stop := False
; keep substituting for as long as necessary While, Subst { Subst := False ; reset control variable IfEqual, Stop, 1, Break Gosub, Algorithm }
; clean up Output("Substitution count: " Count) Controls(True) ; re-enable
Return
- ---------------------------------------------------------------------------
ButtonStop: ; this button is initially hidden
- ---------------------------------------------------------------------------
Stop := True
Return
- ---------------------------------------------------------------------------
Algorithm: ; http://rosettacode.org/wiki/Execute_a_Markov_algorithm
- ---------------------------------------------------------------------------
; Parse the ruleset and apply each rule to the string. Whenever a rule ; has changed the string goto first rule. Continue until a encountering ; a terminating rule, or until no further changes to the strings are ; made. ;----------------------------------------------------------------------- Loop, Parse, Ruleset, `n, `r ; always start from the beginning { ; check for comment If SubStr(A_LoopField, 1, 1) = "#" Continue ; get next line
; split a rule into $Search, $Terminator and $Replace LookFor := "(?P<Search>.+) -> (?P<Terminator>\.?)(?P<Replace>.+)" RegExMatch(A_LoopField, LookFor, $)
; single stepping through possible substitutions If SingleStepping MsgBox,, %AppName%, % "" . "Rule = """ A_LoopField """`n`n" . "Search`t= """ $Search """`n" . "Replace`t= """ $Replace """`n" . "Termintor`t= """ ($Terminator ? "True" : "False") """`n"
; try to substitute StringReplace, String, String, %$Search%, %$Replace%, UseErrorLevel
; any success? If ErrorLevel { ; yes, substitution done Count++ ; keep count Subst := True ; set control variable Output(String) ; write new string to output }
; terminate? If $Terminator { ; yes, terminate Stop := True ; set control variable Break ; back to flow control }
; we are not yet terminated ... If Subst ; but we just did a substitution Break ; back to flow control }
Return
- ---------------------------------------------------------------------------
Controls(Bool) { ; [en|dis]able controls
- ---------------------------------------------------------------------------
Enable := Bool ? "+" : "-" Disable := Bool ? "-" : "+" Loop, 2 GuiControl, %Disable%ReadOnly, % "Edit" A_Index + 1 Loop, 7 GuiControl, %Disable%Disabled, % "Button" A_Index + 1 GuiControl, %Disable%Disabled, Edit4 GuiControl, %Disable%Hidden, Button10 GuiControl, %Enable%Hidden, Button11 GuiControl, %Disable%Disabled, Button12
}
- ---------------------------------------------------------------------------
Output(Text) { ; append text to output
- ---------------------------------------------------------------------------
static EM_REPLACESEL = 0xC2 global hOut Sleep, 100 Text .= "`r`n" SendMessage, EM_REPLACESEL,, &Text,, ahk_id %hOut%
}
- ---------- end of file ----------------------------------------------------</lang>
C
<lang c>#include <stdio.h>
- include <stdlib.h>
- include <stdbool.h> // requires C99
- include <string.h>
- include <assert.h>
- define MAX_RULE_LEN 1024
- define MAX_STR_LEN 1024
typedef struct rulestruct {
const char *trigger; const char *replacement; bool terminal; struct rulestruct *next;
} rule_t;
rule_t *free_rule(rule_t *r)
{
if ( r == NULL ) return NULL; if ( r->trigger != NULL ) free(r->trigger); if ( r->replacement != NULL ) free(r->replacement); rule_t *next = r->next; free(r); return next;
}
void free_rulelist(rule_t *head) {
rule_t *n = head; while( n != NULL ) n = free_rule(n);
}
void readrules(FILE *f, rule_t **ruleset) {
char buffer[MAX_RULE_LEN]; rule_t *t, *prev; int i, j; size_t l; *ruleset = prev = NULL; for(l=1; fgets(buffer, MAX_RULE_LEN, f) != NULL; l++ ) { if ( buffer[0] == '#' ) continue; // not a rule but a comment t = malloc(sizeof(rule_t)); assert( t != NULL ); memset(t, 0, sizeof(rule_t)); // just to be sure, in case of failure, to avoid // freeing unallocated memory // skip blank lines (there cannot be leading spaces...!) if ( (buffer[0] == '\n') || (buffer[0] == '\r') ) continue; // it's a rule: let's move until the first " -> " char *map = strstr(buffer, " -> "); if ( map == NULL ) { fprintf(stderr, "rule set syntax error line %d\n", l); free_rule(t); return; } i = map - buffer + 4; // skip " -> " j = map - buffer - 1; while( (buffer[j] == ' ') || (buffer[j] == '\t') ) j--; buffer[j+1] = 0; t->trigger = strdup(buffer); assert( t->trigger != NULL ); //skip whitespaces after -> for( ; (buffer[i] == '\t') || (buffer[i] == ' '); i++) ; if ( buffer[i] == '.' ) { t->terminal = true; i++; // terminal rule } else { t->terminal = false; // or not } j = i; // store this position and let's find the end i += strlen(buffer+j); for( i--; (buffer[i] == '\n') || (buffer[i] == '\r') ; i--) ; buffer[i+1] = 0; t->replacement = strdup(buffer+j); assert(t->replacement != NULL); if ( prev == NULL ) { *ruleset = t; } else { prev->next = t; } prev = t; }
}
// each line of the file is a "string" void markov(FILE *f, rule_t *rule) {
char buffer[2][MAX_STR_LEN]; // double to allow state changing and no overlapping int bi; rule_t *r; char *d; const char *p, *bp; bool repldone; size_t s;
while( ( fgets(buffer[0], MAX_STR_LEN, f) != NULL ) ) { bi = 0;
do { repldone = false; for( r = rule; r != NULL; r = r->next, bi++) {
bp = buffer[bi%2]; d = buffer[(bi+1)%2]; if ( (p = strstr(bp, r->trigger)) != NULL ) { s = p - bp; memcpy(d, bp, s);
d += s;
strcpy(d, r->replacement);
d += strlen(r->replacement);
strcpy(d, bp + strlen(r->trigger) + s); if ( r->terminal ) { repldone = false; bi++; // let be bi the current (last) buffer break; } repldone = true; // a repl. was done r = rule; // since a repl. was done, let's "reset" r } else { bi--; // stay on the same buffer }
} } while( repldone ); } puts(buffer[(bi)%2]);
}
int main() {
FILE *rulefile_h = NULL; FILE *stringfile_h = NULL; rule_t *rulelist;
if ( argc < 3 ) { printf("Usage: %s rulefile stringfile\n", argv[0]); exit(EXIT_FAILURE); } rulefile_h = fopen(argv[1], "r"); assert( rulefile_h != NULL ); stringfile_h = fopen(argv[2], "r"); assert( stringfile_h != NULL );
readrules(rulefile_h, &rulelist); assert( rulelist != NULL ); markov(stringfile_h, rulelist);
// dump rules
/*
rule_t *h = rulelist; while( h != NULL ) { printf("%s -> %s%s\n", h->trigger, h->replacement, h->terminal ? " [TERMINATING RULE]" : ""); h = h->next; }
- /
free_rulelist(rulelist);
fclose(rulefile_h); fclose(stringfile_h);
return EXIT_SUCCESS;
}</lang>
C++
Note: Non-use of iswhite
is intentional, since depending on the locale, other chars besides space and tab might be detected by that function.
<lang cpp>
- include <cstdlib>
- include <iostream>
- include <fstream>
- include <vector>
- include <string>
struct rule {
std::string pattern; std::string replacement; bool terminal; rule(std::string pat, std::string rep, bool term): pattern(pat), replacement(rep), terminal(term) { }
};
std::string const whitespace = " \t"; std::string::size_type const npos = std::string::npos;
bool is_whitespace(char c) {
return whitespace.find(c) != npos;
}
std::vector<rule> read_rules(std::ifstream& rulefile) {
std::vector<rule> rules; std::string line; while (std::getline(rulefile, line)) { std::string::size_type pos; // remove comments pos = line.find('#'); if (pos != npos) line.resize(pos); // ignore lines consisting only of whitespace if (line.find_first_not_of(whitespace) == npos) continue; // find "->" surrounded by whitespace pos = line.find("->"); while (pos != npos && (pos == 0 || !is_whitespace(line[pos-1]))) pos = line.find("->", pos+1); if (pos == npos || line.length() < pos+3 || !is_whitespace(line[pos+2])) { std::cerr << "invalid rule: " << line << "\n"; std::exit(EXIT_FAILURE); } std::string pattern = line.substr(0, pos-1); std::string replacement = line.substr(pos+3); // remove additional separating whitespace pattern.erase(pattern.find_last_not_of(whitespace)+1); replacement.erase(0, replacement.find_first_not_of(whitespace)); // test for terminal rule bool terminal = !replacement.empty() && replacement[0] == '.'; if (terminal) replacement.erase(0,1); rules.push_back(rule(pattern, replacement, terminal)); }
return rules;
}
std::string markov(std::vector<rule> rules, std::string input) {
std::string& output = input; std::vector<rule>::iterator iter = rules.begin();
// Loop through each rule, transforming our current version // with each rule. while (iter != rules.end()) { std::string::size_type pos = output.find(iter->pattern); if (pos != npos) { output.replace(pos, iter->pattern.length(), iter->replacement); if (iter->terminal) break; iter = rules.begin(); } else ++iter; }
return output;
}
int main(int argc, char* argv[]) {
if (argc != 3) { std::cout << "usage:\n " << argv[0] << " rulefile text\n"; return EXIT_FAILURE; } std::ifstream rulefile(argv[1]); std::vector<rule> rules = read_rules(rulefile);
std::string input(argv[2]); std::string output = markov(rules, input);
std::cout << output << "\n";
} </lang>
Haskell
This program expects a source file as an argument and uses the standard input and output devices for the algorithm's I/O.
<lang haskell>import Data.List (isPrefixOf) import Data.Maybe (catMaybes) import Control.Monad import Text.ParserCombinators.Parsec import System.IO import System.Environment (getArgs)
main = do
args <- getArgs unless (length args == 1) $ fail "Please provide exactly one source file as an argument." let sourcePath = head args source <- readFile sourcePath input <- getContents case parse markovParser sourcePath source of Right rules -> putStr $ runMarkov rules input Left err -> hPutStrLn stderr $ "Parse error at " ++ show err
data Rule = Rule
{from :: String, terminating :: Bool, to :: String}
markovParser :: Parser [Rule] markovParser = liftM catMaybes $
(comment <|> rule) `sepEndBy` many1 newline where comment = char '#' >> skipMany nonnl >> return Nothing rule = liftM Just $ liftM3 Rule (manyTill (nonnl <?> "pattern character") $ try arrow) (succeeds $ char '.') (many nonnl) arrow = ws >> string "->" >> ws <?> "whitespace-delimited arrow" nonnl = noneOf "\n" ws = many1 $ oneOf " \t" succeeds p = option False $ p >> return True
runMarkov :: [Rule] -> String -> String runMarkov rules s = f rules s
where f [] s = s f (Rule from terminating to : rs) s = g "" s where g _ "" = f rs s g before ahead@(a : as) = if from `isPrefixOf` ahead then let new = reverse before ++ to ++ drop (length from) ahead in if terminating then new else f rules new else g (a : before) as</lang>
J
Solution:<lang j>require'strings regex'
markovLexer =: verb define
rules =. LF cut TAB&=`(,:&' ')}y rules =. a: -.~ (dltb@:{.~ i:&'#')&.> rules rules =. 0 _1 {"1 '\s+->\s+' (rxmatch rxcut ])S:0 rules (,. ] (}.&.>~ ,. ]) ('.'={.)&.>)/ |: rules
)
replace =: dyad define
'index patternLength replacement'=. x 'head tail' =. index split y head, replacement, patternLength }. tail
)
matches =: E. i. 1:
markov =: dyad define
ruleIdx =. 0 [ rules =. markovLexer x while. ruleIdx < #rules do. 'pattern replacement terminating' =. ruleIdx { rules ruleIdx =. 1 + ruleIdx if. (#y) > index =. pattern matches y do. y =. (index ; (#pattern) ; replacement) replace y ruleIdx =. _ * terminating end. end. y
)</lang>
Example:<lang j> m1 =. noun define # This rules file is extracted from Wikipedia: # http://en.wikipedia.org/wiki/Markov_Algorithm A -> apple B -> bag S -> shop T -> the the shop -> my brother a never used -> .terminating rule )
m1 markov 'I bought a B of As from T S.'
I bought a bag of apples from my brother. </lang> Discussion: The J implementation correctly processes all the rulesets. More details are available on the the talk page.
Perl
This program expects a source file as an argument and uses the standard input and output devices for the algorithm's I/O.
<lang perl>@ARGV == 1 or die "Please provide exactly one source file as an argument.\n"; open my $source, '<', $ARGV[0] or die "I couldn't open \"$ARGV[0]\" for reading. ($!.)\n"; my @rules; while (<$source>)
{/\A#/ and next; my @a = /(.*?)\s+->\s+(\.?)(.*)/ or die "Syntax error: $_"; push @rules, \@a;}
close $source;
my $input = do {local $/; <STDIN>;};
OUTER:
{foreach (@rules) {my ($from, $terminating, $to) = @$_; $input =~ s/\Q$from\E/$to/ and ($terminating ? last OUTER : redo OUTER);}}
print $input;</lang>
PicoLisp
<lang PicoLisp>(de markov (File Text)
(use (@A @Z R) (let Rules (make (in File (while (skip "#") (when (match '(@A " " "-" ">" " " @Z) (replace (line) "@" "#")) (link (cons (clip @A) (clip @Z))) ) ) ) ) (setq Text (chop Text)) (pack (loop (NIL (find '((R) (match (append '(@A) (car R) '(@Z)) Text)) Rules ) Text ) (T (= "." (cadr (setq R @))) (append @A (cddr R) @Z) ) (setq Text (append @A (cdr R) @Z)) ) ) ) ) )</lang>
Output:
: (markov "r1" "I bought a B of As from T S.") -> "I bought a bag of apples from my brother." : (markov "r2" "I bought a B of As from T S.") -> "I bought a bag of apples from T shop." : (markov "r3" "I bought a B of As W my Bgage from T S.") -> "I bought a bag of apples with my money from T shop." : (markov "r4" "_1111*11111_") -> "11111111111111111111" : (markov "r5" "000000A000000") -> "00011H1111000"
PureBasic
The GUI used here allows rules to be loaded from a text file or manually added one at a time. Symbol input can be tested anytime by selecting 'Test Rules'. <lang PureBasic>Structure mRule
pattern.s replacement.s isTerminal.i
EndStructure
Procedure parseRule(text.s, List rules.mRule())
#tab = 9: #space = 32: #whiteSpace$ = Chr(#space) + Chr(#tab) Protected tLen, cPtr, nChar.c, pEnd, pLast, pattern.s cPtr = 1 If FindString(#whiteSpace$, Left(text, cPtr), 1): ProcedureReturn 0: EndIf ;parse error If Left(text, cPtr) = "#": ProcedureReturn 2: EndIf ;comment skipped tLen = Len(text) Repeat cPtr + 1 If cPtr > tLen: ProcedureReturn 0: EndIf ;parse error nChar = Asc(Mid(text, cPtr, 1)) Select nChar Case #space, #tab Select pEnd Case 0 To 2 pEnd = 1 pLast = cPtr - 1 Case 3 pattern = Left(text, pLast) EndSelect Case '-' If pEnd = 1: pEnd = 2: EndIf Case '>' If pEnd = 2: pEnd = 3: EndIf EndSelect Until pattern <> "" Repeat cPtr + 1 Until Not FindString(#whiteSpace$, Mid(text, cPtr, 1), 1) Protected isTerminal If Mid(text, cPtr, 1) = "." isTerminal = #True: cPtr + 1 EndIf LastElement(rules()): AddElement(rules()) rules()\pattern = pattern rules()\replacement = Right(text, tLen - cPtr + 1) rules()\isTerminal = isTerminal ProcedureReturn 1 ;processed rule
EndProcedure
Procedure.s execMarkov(text.s, List rules.mRule())
Repeat madeReplacement = #False ForEach rules() If FindString(text, rules()\pattern, 1) text = ReplaceString(text, rules()\pattern, rules()\replacement) madeReplacement = #True: isFinished = rules()\isTerminal Break EndIf Next Until Not madeReplacement Or isFinished ProcedureReturn text
EndProcedure
Procedure addRule(text.s, List rules.mRule())
Protected result = parseRule(text, rules()) Select result Case 0: AddGadgetItem(7, -1, "Invalid rule: " + #DQUOTE$ + text + #DQUOTE$) Case 1: AddGadgetItem(7, -1, "Added: " + #DQUOTE$ + text + #DQUOTE$) Case 2: AddGadgetItem(7, -1, "Comment: " + #DQUOTE$ + text + #DQUOTE$) EndSelect
EndProcedure
OpenWindow(0, 0, 0, 350,300, "Markov Rule Interpreter", #PB_Window_SystemMenu) ButtonGadget(0, 45, 10, 65, 20, "Load Rules") ButtonGadget(1, 163, 10, 65, 20, "Add Rule") ButtonGadget(2, 280, 10, 65, 20, "Interpret") TextGadget(3, 5, 40, 30, 20, "Input:") StringGadget(4, 45, 40, 300, 20,"") TextGadget(5, 5, 100, 35, 20, "Output:") ButtonGadget(6, 160, 70, 70, 20, "Clear Output") EditorGadget(7, 45, 100, 300, 195, #PB_Editor_ReadOnly)
NewList rules.mRule() Define event, isDone, text.s, result, file.s Repeat
event = WaitWindowEvent() Select event Case #PB_Event_Gadget Select EventGadget() Case 0 Define file.s, rule.s file = OpenFileRequester("Select Markov Rule set", "*.txt", "Text (*.txt)|*.txt", 0) If file ClearList(rules()) ReadFile(0, file) While Not(Eof(0)) addRule(ReadString(0), rules()) Wend AddGadgetItem(7, -1, "Loaded " + Str(ListSize(rules())) + " rules."): AddGadgetItem(7, -1, "") EndIf Case 1 addRule(GetGadgetText(4), rules()) Case 2 text = GetGadgetText(4): AddGadgetItem(7, -1, "Interpret: " + #DQUOTE$ + text + #DQUOTE$) AddGadgetItem(7, -1, "Result: " + #DQUOTE$ + execMarkov(text, rules()) + #DQUOTE$): AddGadgetItem(7, -1, "") Case 6 ClearGadgetItems(7) EndSelect Case #PB_Event_CloseWindow isDone = #True EndSelect
Until isDone</lang> Sample output from loading Ruleset 1 and interpreting a symbol:
Comment: "# This rules file is extracted from Wikipedia:" Comment: "# http://en.wikipedia.org/wiki/Markov_Algorithm" Added: "A -> apple" Added: "B -> bag" Added: "S -> shop" Added: "T -> the" Added: "the shop -> my brother" Added: "a never used -> .terminating rule" Loaded 6 rules. Interpret: "I bought a B of As from T S." Result: "I bought a bag of apples from my brother."
Python
The example uses a regexp to parse the syntax of the grammar. This regexp is multi-line and verbose, and uses named groups to aid in understanding the regexp and to allow more meaningful group names to be used when extracting the replacement data from the grammars in function extractreplacements
.
The example gains flexibility by not being tied to specific files. The functions may be imported into other programs which then can provide textual input from their sources without the need to pass 'file handles' around. <lang python>import re
def extractreplacements(grammar):
return [ (matchobj.group('pat'), matchobj.group('repl'), bool(matchobj.group('term'))) for matchobj in re.finditer(syntaxre, grammar) if matchobj.group('rule')]
def replace(text, replacements):
while True: for pat, repl, term in replacements: if pat in text: text = text.replace(pat, repl, 1) if term: return text break else: return text
syntaxre = r"""(?mx) ^(?:
(?: (?P<comment> \# .* ) ) | (?: (?P<blank> \s* ) (?: \n | $ ) ) | (?: (?P<rule> (?P<pat> .+? ) \s+ -> \s+ (?P<term> \.)? (?P<repl> .+) ) )
)$ """
grammar1 = """\
- This rules file is extracted from Wikipedia:
- http://en.wikipedia.org/wiki/Markov_Algorithm
A -> apple B -> bag S -> shop T -> the the shop -> my brother a never used -> .terminating rule """
grammar2 = \
- Slightly modified from the rules on Wikipedia
A -> apple B -> bag S -> .shop T -> the the shop -> my brother a never used -> .terminating rule
grammar3 = \
- BNF Syntax testing rules
A -> apple WWWW -> with Bgage -> ->.* B -> bag ->.* -> money W -> WW S -> .shop T -> the the shop -> my brother a never used -> .terminating rule
grammar4 = \
- Unary Multiplication Engine, for testing Markov Algorithm implementations
- By Donal Fellows.
- Unary addition engine
_+1 -> _1+ 1+1 -> 11+
- Pass for converting from the splitting of multiplication into ordinary
- addition
1! -> !1 ,! -> !+ _! -> _
- Unary multiplication by duplicating left side, right side times
1*1 -> x,@y 1x -> xX X, -> 1,1 X1 -> 1X _x -> _X ,x -> ,X y1 -> 1y y_ -> _
- Next phase of applying
1@1 -> x,@y 1@_ -> @_ ,@_ -> !_ ++ -> +
- Termination cleanup for addition
_1 -> 1 1+_ -> 1 _+_ ->
grammar5 = \
- Turing machine: three-state busy beaver
- state A, symbol 0 => write 1, move right, new state B
A0 -> 1B
- state A, symbol 1 => write 1, move left, new state C
0A1 -> C01 1A1 -> C11
- state B, symbol 0 => write 1, move left, new state A
0B0 -> A01 1B0 -> A11
- state B, symbol 1 => write 1, move right, new state B
B1 -> 1B
- state C, symbol 0 => write 1, move left, new state B
0C0 -> B01 1C0 -> B11
- state C, symbol 1 => write 1, move left, halt
0C1 -> H01 1C1 -> H11
text1 = "I bought a B of As from T S."
text2 = "I bought a B of As W my Bgage from T S."
text3 = '_1111*11111_'
text4 = '000000A000000'
if __name__ == '__main__':
assert replace(text1, extractreplacements(grammar1)) \ == 'I bought a bag of apples from my brother.' assert replace(text1, extractreplacements(grammar2)) \ == 'I bought a bag of apples from T shop.' # Stretch goals assert replace(text2, extractreplacements(grammar3)) \ == 'I bought a bag of apples with my money from T shop.' assert replace(text3, extractreplacements(grammar4)) \ == '11111111111111111111' assert replace(text4, extractreplacements(grammar5)) \ == '00011H1111000'</lang>
Ruby
<lang Ruby>raise "Please input an input code file, an input data file, and an output file." if ARGV.size < 3
rules = File.readlines(ARGV[0]).inject([]) do |rules, line|
if line =~ /^\s*#/ rules elsif line =~ /^(.+)\s+->\s+(\.?)(.*)$/ rules << [$1, $3, $2 != ""] else raise "Syntax error: #{line}" end
end
File.open(ARGV[2], "w") do |file|
file.write(File.read(ARGV[1]).tap { |input_data| while (matched = rules.find { |match, replace, term| input_data[match] and input_data.sub!(match, replace) }) and !matched[2] end })
end</lang>
Scala
<lang scala>import scala.io.Source
object MarkovAlgorithm {
val RulePattern = """(.*?)\s+->\s+(\.?)(.*)""".r val CommentPattern = """#.*|\s*""".r def rule(line: String) = line match { case CommentPattern() => None case RulePattern(pattern, terminal, replacement) => Some(pattern, replacement, terminal == ".") case _ => error("Syntax error on line "+line) } def main(args: Array[String]) { if (args.size != 2 ) { println("Syntax: MarkovAlgorithm inputFile inputPattern") exit(1) } val rules = (Source fromPath args(0) getLines () map rule).toList.flatten def algorithm(input: String): String = rules find (input contains _._1) match { case Some((pattern, replacement, true)) => input replaceFirst ("\\Q"+pattern+"\\E", replacement) case Some((pattern, replacement, false)) => algorithm(input replaceFirst ("\\Q"+pattern+"\\E", replacement)) case None => input }
println(args(1)) println(algorithm(args(1))) }
}</lang>
Script-style, and more concise:
<lang scala>import scala.io.Source
if (argv.size != 2 ) error("Syntax: MarkovAlgorithm inputFile inputPattern")
val rulePattern = """(.*?)\s+->\s+(\.?)(.*)""".r val isComment = (_: String) matches "#.*|\\s*" val rules = Source fromPath args(0) getLines () filterNot isComment map (rulePattern unapplySeq _ get) toList;
def algorithm(input: String): String = rules find (input contains _.head) match {
case Some(Seq(pattern, ".", replacement)) => input replaceFirst ("\\Q"+pattern+"\\E", replacement) case Some(Seq(pattern, "", replacement)) => algorithm(input replaceFirst ("\\Q"+pattern+"\\E", replacement)) case None => input
}
println(argv(1)) println(algorithm(argv(1)))</lang>
Sample outputs:
C:\>scala MarkovAlgorithm ruleset1.txt "I bought a B of As from T S." I bought a B of As from T S. I bought a bag of apples from my brother. C:\>scala MarkovAlgorithm ruleset2.txt "I bought a B of As from T S." I bought a B of As from T S. I bought a bag of apples from T shop. C:\>scala MarkovAlgorithm ruleset3.txt "I bought a B of As W my Bgage from T S." I bought a B of As W my Bgage from T S. I bought a bag of apples with my money from T shop. C:\>scala MarkovAlgorithm ruleset4.txt "_1111*11111_" _1111*11111_ 11111111111111111111
The script is called much in the same way, but with the ".scala" extension added.
Tcl
<lang tcl>package require Tcl 8.5 if {$argc < 3} {error "usage: $argv0 ruleFile inputFile outputFile"} lassign $argv ruleFile inputFile outputFile
- Read the file of rules
set rules {} set f [open $ruleFile] foreach line [split [read $f] \n[close $f]] {
if {[string match "#*" $line] || $line eq ""} continue if {[regexp {^(.+)\s+->\s+(\.?)(.*)$} $line -> from final to]} {
lappend rules $from $to [string compare "." $final] [string length $from]
} else {
error "Syntax error: \"$line\""
}
}
- Apply the rules
set f [open $inputFile] set out [open $outputFile w] foreach line [split [read $f] \n[close $f]] {
set any 1 while {$any} {
set any 0 foreach {from to more fl} $rules { # If we match the 'from' pattern... if {[set idx [string first $from $line]] >= 0} { # Change for the 'to' replacement set line [string replace $line $idx [expr {$idx+$fl-1}] $to]
# Stop if we terminate, otherwise note that we've more work to do
set any $more
break; # Restart search for rules to apply } }
#DEBUG# puts $line }
# Output the processed line puts $out $line
} close $out</lang> In the case where there are no terminating rules and no overlapping issues, the following is an alternative: <lang tcl>package require Tcl 8.5 if {$argc < 3} {error "usage: $argv0 ruleFile inputFile outputFile"} lassign $argv ruleFile inputFile outputFile
- Read the file of rules
set rules {} set f [open $ruleFile] foreach line [split [read $f] \n[close $f]] {
if {[string match "#*" $line] || $line eq ""} continue if {[regexp {^(.+)\s+->\s+(.*)$} $line -> from to]} { dict set rules $from $to } else {
error "Syntax error: \"$line\""
}
}
- Apply the rules in a simplistic manner
set in [open $inputFile] set out [open $outputFile w] set data [read $in] close $in while 1 {
set newData [string map $rules $data] if {$newData eq $data} break set data $newData
} puts $out $data close $out</lang>
VBScript
Implementation
<lang vb> class markovparser
dim aRules public property let ruleset( sBlock ) dim i aRules = split( sBlock, vbNewLine ) '~ remove blank lines from end of array do while aRules( ubound( aRules ) ) = vbnullstring redim preserve aRules( ubound( aRules ) - 1 ) loop '~ parse array for i = lbound( aRules ) to ubound( aRules ) if left( aRules( i ), 1 ) = "#" then aRules( i ) = Array( vbnullstring, aRules(i)) else aRules( i ) = Split( aRules( i ), " -> ", 2 ) end if next end property
public function apply( sArg ) dim ruleapplied dim terminator dim was dim i dim repl dim changes
ruleapplied = true terminator = false
do while ruleapplied and (not terminator) changes = 0 was = sArg for i = lbound( aRules ) to ubound( aRules ) repl = aRules(i)(1) if left( repl, 1 ) = "." then terminator = true repl = mid( repl, 2 ) end if sArg = replace( sArg, aRules(i)(0), repl) if was <> sArg then changes = changes + 1 if changes = 1 then exit for end if end if if terminator then exit for end if next if changes = 0 then ruleapplied = false end if loop apply = sArg end function
sub dump dim i for i = lbound( aRules ) to ubound( aRules ) wscript.echo eef(aRules(i)(0)=vbnullstring,aRules(i)(1),aRules(i)(0)& " -> " & aRules(i)(1)) & eef( left( aRules(i)(1), 1 ) = ".", " #terminator", "" ) next end sub
private function eef( bCond, sExp1, sExp2 ) if bCond then eef = sExp1 else eef = sExp2 end if end function end class </lang>
Invocation
<lang vb> dim m1 set m1 = new markovparser m1.ruleset = "# This rules file is extracted from Wikipedia:" & vbNewLine & _ "# http://en.wikipedia.org/wiki/Markov_Algorithm" & vbNewLine & _ "A -> apple" & vbNewLine & _ "B -> bag" & vbNewLine & _ "S -> shop" & vbNewLine & _ "T -> the" & vbNewLine & _ "the shop -> my brother" & vbNewLine & _ "a never used -> .terminating rule" wscript.echo m1.apply( "I bought a B of As from T S.")
dim m2 set m2 = new markovparser m2.ruleset = replace( "# Slightly modified from the rules on Wikipedia\nA -> apple\nB -> bag\nS -> .shop\nT -> the\nthe shop -> my brother\na never used -> .terminating rule", "\n", vbNewLine ) '~ m1.dump wscript.echo m2.apply( "I bought a B of As from T S.")
dim m3 set m3 = new markovparser m3.ruleset = replace("# BNF Syntax testing rules\nA -> apple\nWWWW -> with\nBgage -> ->.*\nB -> bag" & vbNewLine & _ "->.* -> money\nW -> WW\nS -> .shop\nT -> the\nthe shop -> my brother\na never used -> .terminating rule", "\n", vbNewLine ) wscript.echo m3.apply("I bought a B of As W my Bgage from T S.")
set m4 = new markovparser m4.ruleset = "### Unary Multiplication Engine, for testing Markov Algorithm implementations" & vbNewLine & _ "### By Donal Fellows." & vbNewLine & _ "# Unary addition engine" & vbNewLine & _ "_+1 -> _1+" & vbNewLine & _ "1+1 -> 11+" & vbNewLine & _ "# Pass for converting from the splitting of multiplication into ordinary" & vbNewLine & _ "# addition" & vbNewLine & _ "1! -> !1" & vbNewLine & _ ",! -> !+" & vbNewLine & _ "_! -> _" & vbNewLine & _ "# Unary multiplication by duplicating left side, right side times" & vbNewLine & _ "1*1 -> x,@y" & vbNewLine & _ "1x -> xX" & vbNewLine & _ "X, -> 1,1" & vbNewLine & _ "X1 -> 1X" & vbNewLine & _ "_x -> _X" & vbNewLine & _ ",x -> ,X" & vbNewLine & _ "y1 -> 1y" & vbNewLine & _ "y_ -> _" & vbNewLine & _ "# Next phase of applying" & vbNewLine & _ "1@1 -> x,@y" & vbNewLine & _ "1@_ -> @_" & vbNewLine & _ ",@_ -> !_" & vbNewLine & _ "++ -> +" & vbNewLine & _ "# Termination cleanup for addition" & vbNewLine & _ "_1 -> 1" & vbNewLine & _ "1+_ -> 1" & vbNewLine & _ "_+_ -> " '~ m4.dump wscript.echo m4.apply( "_1111*11111_")
set fso = createobject("scripting.filesystemobject") set m5 = new markovparser m5.ruleset = fso.opentextfile("busybeaver.tur").readall wscript.echo m5.apply("000000A000000") </lang>
Output
<lang vb> I bought a bag of apples from my brother. I bought a bag of apples from T shop. I bought a bag of apples with my money from T shop. 11111111111111111111 00011H1111000 </lang>