Execute a Markov algorithm: Difference between revisions

Line 973:

typedef struct {

char *pat, *repl;

int terminate;

} rule_t;

typedef struct {

int n;

rule_t *rules;

char *buf;

} ruleset_t;

void ruleset_del(ruleset_t *r)

{

if (r->rules) free(r->rules);

if (r->buf) free(r->buf);

free(r);

}

string * str_new(const char *s)

{

int l = strlen(s);

string *str = malloc(sizeof(string));

str->s = malloc(l + 1);

strcpy(str->s, s);

str->alloc_len = l + 1;

return str;

}

void str_append(string *str, const char *s, int len)

{

int l = strlen(str->s);

if (len == -1) len = strlen(s);

if (str->alloc_len < l + len + 1) {

str->alloc_len = l + len + 1;

str->s = realloc(str->s, str->alloc_len);

}

memcpy(str->s + l, s, len);

str->s[l + len] = '\0';

}

Line 1,016:

void str_transfer(string *dest, string *src)

{

size_t tlen = dest->alloc_len;

dest->alloc_len = src->alloc_len;

src->alloc_len = tlen;

char *ts = dest->s;

dest->s = src->s;

src->s = ts;

src->s[0] = '\0';

}

void str_del(string *s)

{

if (s->s) free(s->s);

free(s);

}

void str_markov(string *str, ruleset_t *r)

{

int i, j, sl, pl;

int changed = 0, done = 0;

string *tmp = str_new("");

while (!done) {

changed = 0;

for (i = 0; !done && !changed && i < r->n; i++) {

pl = strlen(r->rules[i].pat);

sl = strlen(str->s);

for (j = 0; j < sl; j++) {

if (strncmp(str->s + j, r->rules[i].pat, pl))

continue;

str_append(tmp, str->s, j);

str_append(tmp, r->rules[i].repl, -1);

str_append(tmp, str->s + j + pl, -1);

str_transfer(str, tmp);

changed = 1;

if (r->rules[i].terminate)

done = 1;

break;

}

if (!changed) break;

}

str_del(tmp);

return;

}

ruleset_t* read_rules(const char *name)

{

struct stat s;

char *buf;

size_t i, j, k, tmp;

rule_t *rules = 0;

int n = 0; /* number of rules */

int fd = open(name, O_RDONLY);

if (fd == -1) return 0;

fstat(fd, &s);

buf = malloc(s.st_size + 2);

read(fd, buf, s.st_size);

buf[s.st_size] = '\n';

buf[s.st_size + 1] = '\0';

close(fd);

for (i = j = 0; buf[i] != '\0'; i++) {

if (buf[i] != '\n') continue;

/* skip comments */

if (buf[j] == '#' || i == j) {

j = i + 1;

continue;

}

/* find the '->' */

for (k = j + 1; k < i - 3; k++)

if (isspace(buf[k]) && !strncmp(buf + k + 1, "->", 2))

break;

if (k >= i - 3) {

printf("parse error: no -> in %.*s\n", i - j, buf + j);

break;

}

/* left side: backtrack through whitespaces */

for (tmp = k; tmp > j && isspace(buf[--tmp]); );

if (tmp < j) {

printf("left side blank? %.*s\n", i - j, buf + j);

break;

}

buf[++tmp] = '\0';

/* right side */

for (k += 3; k < i && isspace(buf[++k]););

buf[i] = '\0';

rules = realloc(rules, sizeof(rule_t) * (1 + n));

rules[n].pat = buf + j;

if (buf[k] == '.') {

rules[n].terminate = 1;

rules[n].repl = buf + k + 1;

} else {

rules[n].terminate = 0;

rules[n].repl = buf + k;

}

n++;

j = i + 1;

}

ruleset_t *r = malloc(sizeof(ruleset_t));

r->buf = buf;

r->rules = rules;

r->n = n;

return r;

}

int test_rules(const char *s, const char *file)

{

ruleset_t * r = read_rules(file);

if (!r) return 0;

printf("Rules from '%s' ok\n", file);

string *ss = str_new(s);

printf("text: %s\n", ss->s);

str_markov(ss, r);

printf("markoved: %s\n", ss->s);

str_del(ss);

ruleset_del(r);

return printf("\n");

}

int main()

{

/* rule 1-5 are files containing rules from page top */

test_rules("I bought a B of As from T S.", "rule1");

test_rules("I bought a B of As from T S.", "rule2");

test_rules("I bought a B of As W my Bgage from T S.", "rule3");

test_rules("_1111*11111_", "rule4");

test_rules("000000A000000", "rule5");

return 0;

}</lang>output<lang>Rules from 'rule1' ok

text: I bought a B of As from T S.

Line 1,455:

This implementation expect the initial text on the command line and the ruleset on STDIN.

<lang dejavu>(remove-comments) text:

]

for line in text:

if and line not starts-with line "#":

line

[

(markov-parse) text:

]

for line in text:

local :index find line " -> "

local :pat slice line 0 index

local :rep slice line + index 4 len line

local :term starts-with rep "."

if term:

set :rep slice rep 1 len rep

& pat & term rep

[

markov-parse:

(markov-parse) (remove-comments) split !decode!utf-8 !read!stdin "\n"

(markov-tick) rules start:

for rule in copy rules:

local :pat &< rule

local :rep &> dup &> rule

local :term &<

local :index find start pat

if < -1 index:

)

slice start + index len pat len start

rep

slice start 0 index

concat(

return term

true start

markov rules:

true

while:

not (markov-tick) rules

!. markov markov-parse get-from !args 1</lang>

Line 2,016:

'''Example''':<lang j> m1 =. noun define

# This rules file is extracted from Wikipedia:

# http://en.wikipedia.org/wiki/Markov_Algorithm

A -> apple

B -> bag

S -> shop

T -> the

the shop -> my brother

a never used -> .terminating rule

)

Line 2,453:

<lang lua>-- utility method to escape punctuation

function normalize(str)

local result = str:gsub("(%p)", "%%%1")

-- print(result)

return result

end

-- utility method to split string into lines

function get_lines(str)

local t = {}

for line in str:gmatch("([^\n\r]*)[\n\r]*") do

table.insert(t, line)

end

return t

end

Line 2,471:

function markov.rule(pattern,replacement,terminating)

return {

pattern = pattern,

replacement = replacement,

terminating = (terminating == ".")

}, normalize(pattern)

end

function markov.make_rules(sample)

local lines = get_lines(sample)

local rules = {}

local finders = {}

for i,line in ipairs(lines) do

if not line:find("^#") then

s,e,pat,term,rep = line:find(MARKOV_RULE_PATTERN)

if s then

r, p = markov.rule(pat,rep,term)

rules[p] = r

table.insert(finders, p)

end

return {

rules = rules,

finders = finders

}

end

Line 2,508:

for i,v in ipairs(finders) do

local found_now = false -- did we find this rule?

if sample_input:find(v) then

found = true

found_now = true

end

sample_input = sample_input:gsub(v, rules[v].replacement, 1)

-- handle terminating rules

if found_now then

if rules[v].terminating then terminate = true end

break

end

Line 2,624:

function do_markov(rules, input, output)

local m = markov.make_rules(rules)

input = markov.execute(m, input)

assert(input == output)

print(input)

end

Line 2,673:

=={{header|МК-61/52}}==

<lang> 9 П4

КИП4 [x] П7 Вx {x} П8

ИП8 ИПE * П8 {x} x=0 08

П5 ИП9 П1 lg [x] 10^x П3

ИП1 П2

Сx П6

ИП2 ИП7 - x=0 70

ИП9 ^ lg [x] 1 + ИП5 - 10^x / [x]

ИП6 ИП8 x#0 50 lg [x] 1 + + 10^x *

ИП9 ИП6 10^x П7 / {x} ИП7 * +

ИП8 ИП7 * + П9

С/П БП 00

x>=0 80

КИП6

ИП2 ИПE / [x] П2

x=0 26

КИП5

ИП1 ИП3 / {x} ИП3 * П1

ИП3 ИПE / [x] П3

x=0 22

ИП4 ИП0 - 9 - x=0 02 С/П</lang>

Under the rules of left 4 registers, under the word has 8 character cells, the alphabet of the digits from 1 to 8. Rules are placed in "123,456", where "123" is a fragment, and "456" is to be replaced, in the registers of the РA to РD. The number of rules is stored in Р0, the initial word is in Р9. Number triggered rule is the last digit registration Р4 (0 to 3), if no rule did not work, the indicator 0, otherwise the current word to be processed. In РE is stored 10.

Line 3,133:

apply_markov(Rules, Sentence, Replacement) :-

maplist(\X^Y^(atom_chars(X, Ch), phrase(markov(Y), Ch, [])), Rules, TmpRules),

% comments produce empty rules

exclude(=([]), TmpRules, LstRules),

atom_chars(Sentence, L),

apply_rules(L, LstRules, R),

atom_chars(Replacement, R).

apply_rules(In, Rules, Out ) :-

apply_one_rule(In, Rules, Out1, Keep_On),

( Keep_On = false

-> Out = Out1

; apply_rules(Out1, Rules, Out)).

apply_one_rule(In, [Rule | Rules], Out, Keep_On) :-

extract(Rule, In, Out1, KeepOn),

( KeepOn = false

-> Out = Out1, Keep_On = KeepOn

; (KeepOn = stop

-> Out = Out1,

Keep_On = true

; apply_one_rule(Out1, Rules, Out, Keep_On))).

apply_one_rule(In, [], In, false) .

Line 3,161:

extract([Pattern, Replace], In, Out, Keep_On) :-

( Replace = [.|Rest]

-> R = Rest

; R = Replace),

( (append(Pattern, End, T), append(Deb, T, In))

-> extract([Pattern, Replace], End, NewEnd, _Keep_On),

append_3(Deb, R, NewEnd, Out),

Keep_On = stop

; Out = In,

( R = Replace

-> Keep_On = true

; Keep_On = false)).

append_3(A, B, C, D) :-

append(A, B, T),

append(T, C, D).

% creation of the rules

Line 3,197:

rule([A,B]) -->

pattern(A), whitespaces, ['-', '>'], whitespaces, end_rule(B).

pattern([X | R]) --> [X], {X \= '\n'}, pattern(R).

Line 3,222:

markov :-

maplist(\X^(call(X), nl,nl), [markov_1, markov_2, markov_3, markov_4, markov_5]).

markov_1 :-

A = ['# This rules file is extracted from Wikipedia:',

'# http://en.wikipedia.org/wiki/Markov_Algorithm',

'A -> apple',

'B -> bag',

'S -> shop',

'T -> the',

'the shop -> my brother',

'a never used -> .terminating rule'],

B = 'I bought a B of As from T S.',

apply_markov(A, B, R),

writeln(B),

writeln(R).

markov_2 :-

A = ['# Slightly modified from the rules on Wikipedia',

'A -> apple',

'B -> bag',

'S -> .shop',

'T -> the',

'the shop -> my brother',

'a never used -> .terminating rule'],

B = 'I bought a B of As from T S.',

apply_markov(A, B, R),

writeln(B),

writeln(R).

markov_3 :-

A = ['# BNF Syntax testing rules',

'A -> apple',

'WWWW -> with',

'Bgage -> ->.*',

'B -> bag',

'->.* -> money',

'W -> WW',

'S -> .shop',

'T -> the',

'the shop -> my brother',

'a never used -> .terminating rule'],

B = 'I bought a B of As W my Bgage from T S.',

apply_markov(A, B, R),

writeln(B),

writeln(R).

markov_4 :-

A = ['### Unary Multiplication Engine, for testing Markov Algorithm implementations',

'### By Donal Fellows.',

'# Unary addition engine',

'_+1 -> _1+',

'1+1 -> 11+',

'# Pass for converting from the splitting of multiplication into ordinary',

'# addition',

'1! -> !1',

',! -> !+',

'_! -> _',

'# Unary multiplication by duplicating left side, right side times',

'1*1 -> x,@y',

'1x -> xX',

'X, -> 1,1',

'X1 -> 1X',

'_x -> _X',

',x -> ,X',

'y1 -> 1y',

'y_ -> _',

'# Next phase of applying',

'1@1 -> x,@y',

'1@_ -> @_',

',@_ -> !_',

'++ -> +',

'# Termination cleanup for addition',

'_1 -> 1',

'1+_ -> 1',

'_+_ -> '],

B = '_1111*11111_',

apply_markov(A, B, R),

writeln(B),

writeln(R).

markov_5 :-

A = ['# Turing machine: three-state busy beaver',

'#',

'# state A, symbol 0 => write 1, move right, new state B',

'A0 -> 1B',

'# state A, symbol 1 => write 1, move left, new state C',

'0A1 -> C01',

'1A1 -> C11',

'# state B, symbol 0 => write 1, move left, new state A',

'0B0 -> A01',

'1B0 -> A11',

'# state B, symbol 1 => write 1, move right, new state B',

'B1 -> 1B',

'# state C, symbol 0 => write 1, move left, new state B',

'0C0 -> B01',

'1C0 -> B11',

'# state C, symbol 1 => write 1, move left, halt',

'0C1 -> H01',

'1C1 -> H11'],

B = '000000A000000',

apply_markov(A, B, R),

writeln(B),

writeln(R).

</lang>

Output :

Line 4,061:

00011H1111000

</pre>

=={{header|Rust}}==

<lang rust>use std::str::FromStr;

#[derive(Clone, Debug)]

pub struct Rule {

pub pat: String,

pub rep: String,

pub terminal: bool,

}

impl Rule {

pub fn new(pat: String, rep: String, terminal: bool) -> Self {

Self { pat, rep, terminal }

}

pub fn applicable_range(&self, input: impl AsRef<str>) -> Option<std::ops::Range<usize>> {

input

.as_ref()

.match_indices(&self.pat)

.next()

.map(|(start, slice)| start..start + slice.len())

}

pub fn apply(&self, s: &mut String) -> bool {

self.applicable_range(s.as_str()).map_or(false, |range| {

s.replace_range(range, &self.rep);

true

})

}

impl FromStr for Rule {

type Err = String;

fn from_str(s: &str) -> Result<Self, Self::Err> {

let mut split = s.splitn(2, " -> ");

let pat = split.next().ok_or_else(|| s.to_string())?;

let rep = split.next().ok_or_else(|| s.to_string())?;

let pat = pat.to_string();

if rep.starts_with('.') {

Ok(Self::new(pat, rep[1..].to_string(), true))

} else {

Ok(Self::new(pat, rep.to_string(), false))

}

#[derive(Clone, Debug)]

pub struct Rules {

rules: Vec<Rule>,

}

impl Rules {

pub fn new(rules: Vec<Rule>) -> Self {

Self { rules }

}

pub fn apply(&self, s: &mut String) -> Option<&Rule> {

self.rules

.iter()

.find(|rule| rule.apply(s))

}

pub fn execute(&self, mut buffer: String) -> String {

while let Some(rule) = self.apply(&mut buffer) {

if rule.terminal {

break;

}

buffer

}

impl FromStr for Rules {

type Err = String;

fn from_str(s: &str) -> Result<Self, Self::Err> {

let mut rules = Vec::new();

for line in s.lines().filter(|line| !line.starts_with('#')) {

rules.push(line.parse::<Rule>()?);

}

Ok(Rules::new(rules))

}

#[cfg(test)]

mod tests {

use super::Rules;

#[test]

fn case_01() -> Result<(), String> {

let input = "I bought a B of As from T S.";

let rules = "\

# This rules file is extracted from Wikipedia:

# http://en.wikipedia.org/wiki/Markov_Algorithm

A -> apple

B -> bag

S -> shop

T -> the

the shop -> my brother

a never used -> .terminating rule";

assert_eq!(

rules.parse::<Rules>()?.execute(input.to_string()),

"I bought a bag of apples from my brother."

);

Ok(())

}

#[test]

fn case_02() -> Result<(), String> {

let input = "I bought a B of As from T S.";

let rules = "\

# Slightly modified from the rules on Wikipedia

A -> apple

B -> bag

S -> .shop

T -> the

the shop -> my brother

a never used -> .terminating rule";

assert_eq!(

rules.parse::<Rules>()?.execute(input.to_string()),

"I bought a bag of apples from T shop."

);

Ok(())

}

#[test]

fn case_03() -> Result<(), String> {

let input = "I bought a B of As W my Bgage from T S.";

let rules = "\

# BNF Syntax testing rules

A -> apple

WWWW -> with

Bgage -> ->.*

B -> bag

->.* -> money

W -> WW

S -> .shop

T -> the

the shop -> my brother

a never used -> .terminating rule";

assert_eq!(

rules.parse::<Rules>()?.execute(input.to_string()),

"I bought a bag of apples with my money from T shop."

);

Ok(())

}

#[test]

fn case_04() -> Result<(), String> {

let input = "_1111*11111_";

let rules = "\

### Unary Multiplication Engine, for testing Markov Algorithm implementations

### By Donal Fellows.

# Unary addition engine

_+1 -> _1+

1+1 -> 11+

# Pass for converting from the splitting of multiplication into ordinary

# addition

1! -> !1

,! -> !+

_! -> _

# Unary multiplication by duplicating left side, right side times

1*1 -> x,@y

1x -> xX

X, -> 1,1

X1 -> 1X

_x -> _X

,x -> ,X

y1 -> 1y

y_ -> _

# Next phase of applying

1@1 -> x,@y

1@_ -> @_

,@_ -> !_

++ -> +

# Termination cleanup for addition

_1 -> 1

1+_ -> 1

_+_ -> ";

assert_eq!(

rules.parse::<Rules>()?.execute(input.to_string()),

"11111111111111111111"

);

Ok(())

}

#[test]

fn case_05() -> Result<(), String> {

let input = "000000A000000";

let rules = "\

# Turing machine: three-state busy beaver

#

# state A, symbol 0 => write 1, move right, new state B

A0 -> 1B

# state A, symbol 1 => write 1, move left, new state C

0A1 -> C01

1A1 -> C11

# state B, symbol 0 => write 1, move left, new state A

0B0 -> A01

1B0 -> A11

# state B, symbol 1 => write 1, move right, new state B

B1 -> 1B

# state C, symbol 0 => write 1, move left, new state B

0C0 -> B01

1C0 -> B11

# state C, symbol 1 => write 1, move left, halt

0C1 -> H01

1C1 -> H11";

assert_eq!(

rules.parse::<Rules>()?.execute(input.to_string()),

"00011H1111000"

);

Ok(())

}

</lang>

=={{header|Scala}}==

Line 4,196:

Line 4,431:

Rule ::= ( pattern : char(1),

replacement : char(1),

terminal : bool);

ReplaceResult ::= (newString : char(1), wasReplaced : bool);

Line 4,204:

Line 4,439:

createRule(line(1)) :=

let

containsComments := firstIndexOf(line, '#');

removedComments := line when containsComments = 0 else

line[1 ... containsComments - 1];

arrowLocation := startOfArrow(removedComments, 1);

lhs := removedComments[1 ... arrowLocation-1];

rhs := removedComments[arrowLocation + 4 ... size(removedComments)];

isTerminal := size(rhs) > 0 and rhs[1] = '.';

in

(pattern : lhs,

replacement : rhs[2 ... size(rhs)] when isTerminal else rhs,

terminal : isTerminal) when size(removedComments) > 0 and arrowLocation /= -1;

startOfArrow(line(1), n) :=

-1 when n > size(line) - 3 else

n when (line[n]=' ' or line[n]='\t') and

line[n+1] = '-' and line[n+2] = '>' and

(line[n+3]=' ' or line[n+3]='\t') else

startOfArrow(line, n+1);

markov(rules(1), n, input(1)) :=

let

replaced := replaceSubString(input, rules[n].pattern, rules[n].replacement, 1);

in

input when n > size(rules) else

replaced.newString when replaced.wasReplaced and rules[n].terminal else

markov(rules, 1, replaced.newString) when replaced.wasReplaced else

markov(rules, n+1, input);

replaceSubString(str(1), original(1), new(1), n) :=

(newString : str, wasReplaced : false)

when n > size(str) - size(original) + 1 else

(newString : str[1 ... n - 1] ++ new ++ str[n + size(original) ... size(str)], wasReplaced : true)

when equalList(str[n ... n + size(original) - 1], original) else

replaceSubString(str, original, new, n + 1);

</lang>

Line 4,446:

Line 4,681:

if {[string match "#*" $line] || $line eq ""} continue

if {[regexp {^(.+)\s+->\s+(\.?)(.*)$} $line -> from final to]} {

lappend rules $from $to [string compare "." $final] [string length $from]

} else {

error "Syntax error: \"$line\""

}

Line 4,458:

Line 4,693:

set any 1

while {$any} {

set any 0

foreach {from to more fl} $rules {

# If we match the 'from' pattern...

if {[set idx [string first $from $line]] >= 0} {

# Change for the 'to' replacement

set line [string replace $line $idx [expr {$idx+$fl-1}] $to]

# Stop if we terminate, otherwise note that we've more work to do

set any $more

break; # Restart search for rules to apply

}

#DEBUG# puts $line

}

Line 4,490:

Line 4,725:

dict set rules $from $to

} else {

error "Syntax error: \"$line\""

}

Line 4,512:

Line 4,747:

class markovparser

dim aRules

public property let ruleset( sBlock )

dim i

aRules = split( sBlock, vbNewLine )

'~ remove blank lines from end of array

do while aRules( ubound( aRules ) ) = vbnullstring

redim preserve aRules( ubound( aRules ) - 1 )

loop

'~ parse array

for i = lbound( aRules ) to ubound( aRules )

if left( aRules( i ), 1 ) = "#" then

aRules( i ) = Array( vbnullstring, aRules(i))

else

aRules( i ) = Split( aRules( i ), " -> ", 2 )

end if

end property

public function apply( sArg )

dim ruleapplied

dim terminator

dim was

dim i

dim repl

dim changes

ruleapplied = true

terminator = false

do while ruleapplied and (not terminator)

changes = 0

was = sArg

for i = lbound( aRules ) to ubound( aRules )

repl = aRules(i)(1)

if left( repl, 1 ) = "." then

terminator = true

repl = mid( repl, 2 )

end if

sArg = replace( sArg, aRules(i)(0), repl)

if was <> sArg then

changes = changes + 1

if changes = 1 then

exit for

end if

if terminator then

exit for

end if

if changes = 0 then

ruleapplied = false

end if

loop

apply = sArg

end function

sub dump

dim i

for i = lbound( aRules ) to ubound( aRules )

wscript.echo eef(aRules(i)(0)=vbnullstring,aRules(i)(1),aRules(i)(0)& " -> " & aRules(i)(1)) & eef( left( aRules(i)(1), 1 ) = ".", " #terminator", "" )

end sub

private function eef( bCond, sExp1, sExp2 )

if bCond then

eef = sExp1

else

eef = sExp2

end if

end function

end class

</lang>

Line 4,663:

Line 4,898:

ks:=L(); vs:=L();

foreach line in (lines){

if(line[0]=="#") continue; // nuke <comment>

pattern,replacement:=line.replace("\t"," ")

.split(" -> ",1).apply("strip");

Line 4,675:

Line 4,910:

do{ go:=False;

foreach n,k in (eks){

if (Void!=text.find(k)){

if (Void==(v:=vs[n])) return(text);

if (v[0,1]==".") v=v[1,*] else go=True;

text=text.replace(k,v,1);

break; // restart after every rule application, unless terminating

}

}while(go);