Execute a Markov algorithm: Difference between revisions

Add source for Rust
m (→‎{{header|Raku}}: .perl not needed)
(Add source for Rust)
Line 973:
 
typedef struct {
char *pat, *repl;
int terminate;
} rule_t;
 
typedef struct {
int n;
rule_t *rules;
char *buf;
} ruleset_t;
 
void ruleset_del(ruleset_t *r)
{
if (r->rules) free(r->rules);
if (r->buf) free(r->buf);
free(r);
}
 
string * str_new(const char *s)
{
int l = strlen(s);
string *str = malloc(sizeof(string));
str->s = malloc(l + 1);
strcpy(str->s, s);
str->alloc_len = l + 1;
return str;
}
 
void str_append(string *str, const char *s, int len)
{
int l = strlen(str->s);
if (len == -1) len = strlen(s);
 
if (str->alloc_len < l + len + 1) {
str->alloc_len = l + len + 1;
str->s = realloc(str->s, str->alloc_len);
}
}
memcpy(str->s + l, s, len);
str->s[l + len] = '\0';
}
 
Line 1,016:
void str_transfer(string *dest, string *src)
{
size_t tlen = dest->alloc_len;
dest->alloc_len = src->alloc_len;
src->alloc_len = tlen;
 
char *ts = dest->s;
dest->s = src->s;
src->s = ts;
src->s[0] = '\0';
}
 
void str_del(string *s)
{
if (s->s) free(s->s);
free(s);
}
 
void str_markov(string *str, ruleset_t *r)
{
int i, j, sl, pl;
int changed = 0, done = 0;
string *tmp = str_new("");
 
while (!done) {
changed = 0;
for (i = 0; !done && !changed && i < r->n; i++) {
pl = strlen(r->rules[i].pat);
sl = strlen(str->s);
for (j = 0; j < sl; j++) {
if (strncmp(str->s + j, r->rules[i].pat, pl))
continue;
str_append(tmp, str->s, j);
str_append(tmp, r->rules[i].repl, -1);
str_append(tmp, str->s + j + pl, -1);
 
str_transfer(str, tmp);
changed = 1;
 
if (r->rules[i].terminate)
done = 1;
break;
}
}
}
}
if (!changed) break;
}
}
str_del(tmp);
return;
}
 
ruleset_t* read_rules(const char *name)
{
struct stat s;
char *buf;
size_t i, j, k, tmp;
rule_t *rules = 0;
int n = 0; /* number of rules */
 
int fd = open(name, O_RDONLY);
if (fd == -1) return 0;
 
fstat(fd, &s);
buf = malloc(s.st_size + 2);
read(fd, buf, s.st_size);
buf[s.st_size] = '\n';
buf[s.st_size + 1] = '\0';
close(fd);
 
for (i = j = 0; buf[i] != '\0'; i++) {
if (buf[i] != '\n') continue;
 
/* skip comments */
if (buf[j] == '#' || i == j) {
j = i + 1;
continue;
}
}
 
/* find the '->' */
for (k = j + 1; k < i - 3; k++)
if (isspace(buf[k]) && !strncmp(buf + k + 1, "->", 2))
break;
 
if (k >= i - 3) {
printf("parse error: no -> in %.*s\n", i - j, buf + j);
break;
}
}
 
/* left side: backtrack through whitespaces */
for (tmp = k; tmp > j && isspace(buf[--tmp]); );
if (tmp < j) {
printf("left side blank? %.*s\n", i - j, buf + j);
break;
}
}
buf[++tmp] = '\0';
 
/* right side */
for (k += 3; k < i && isspace(buf[++k]););
buf[i] = '\0';
 
rules = realloc(rules, sizeof(rule_t) * (1 + n));
rules[n].pat = buf + j;
 
if (buf[k] == '.') {
rules[n].terminate = 1;
rules[n].repl = buf + k + 1;
} else {
rules[n].terminate = 0;
rules[n].repl = buf + k;
}
}
n++;
 
j = i + 1;
}
}
 
ruleset_t *r = malloc(sizeof(ruleset_t));
r->buf = buf;
r->rules = rules;
r->n = n;
return r;
}
 
int test_rules(const char *s, const char *file)
{
ruleset_t * r = read_rules(file);
if (!r) return 0;
printf("Rules from '%s' ok\n", file);
 
string *ss = str_new(s);
printf("text: %s\n", ss->s);
 
str_markov(ss, r);
printf("markoved: %s\n", ss->s);
 
str_del(ss);
ruleset_del(r);
 
return printf("\n");
}
 
int main()
{
/* rule 1-5 are files containing rules from page top */
test_rules("I bought a B of As from T S.", "rule1");
test_rules("I bought a B of As from T S.", "rule2");
test_rules("I bought a B of As W my Bgage from T S.", "rule3");
test_rules("_1111*11111_", "rule4");
test_rules("000000A000000", "rule5");
 
return 0;
}</lang>output<lang>Rules from 'rule1' ok
text: I bought a B of As from T S.
Line 1,455:
This implementation expect the initial text on the command line and the ruleset on STDIN.
<lang dejavu>(remove-comments) text:
]
]
for line in text:
if and line not starts-with line "#":
line
[
[
 
(markov-parse) text:
]
]
for line in text:
local :index find line " -> "
local :pat slice line 0 index
local :rep slice line + index 4 len line
local :term starts-with rep "."
if term:
set :rep slice rep 1 len rep
& pat & term rep
[
[
 
markov-parse:
(markov-parse) (remove-comments) split !decode!utf-8 !read!stdin "\n"
 
(markov-tick) rules start:
for rule in copy rules:
local :pat &< rule
local :rep &> dup &> rule
local :term &<
local :index find start pat
if < -1 index:
)
)
slice start + index len pat len start
rep
rep
slice start 0 index
concat(
return term
true start
 
markov rules:
true
while:
not (markov-tick) rules
 
!. markov markov-parse get-from !args 1</lang>
Line 2,016:
 
'''Example''':<lang j> m1 =. noun define
# This rules file is extracted from Wikipedia:
# http://en.wikipedia.org/wiki/Markov_Algorithm
A -> apple
B -> bag
S -> shop
T -> the
the shop -> my brother
a never used -> .terminating rule
)
 
Line 2,453:
<lang lua>-- utility method to escape punctuation
function normalize(str)
local result = str:gsub("(%p)", "%%%1")
-- print(result)
return result
end
 
-- utility method to split string into lines
function get_lines(str)
local t = {}
for line in str:gmatch("([^\n\r]*)[\n\r]*") do
table.insert(t, line)
end
return t
end
 
Line 2,471:
 
function markov.rule(pattern,replacement,terminating)
return {
pattern = pattern,
replacement = replacement,
terminating = (terminating == ".")
}, normalize(pattern)
end
 
function markov.make_rules(sample)
local lines = get_lines(sample)
local rules = {}
local finders = {}
for i,line in ipairs(lines) do
if not line:find("^#") then
s,e,pat,term,rep = line:find(MARKOV_RULE_PATTERN)
if s then
r, p = markov.rule(pat,rep,term)
rules[p] = r
table.insert(finders, p)
end
end
end
return {
rules = rules,
finders = finders
}
}
end
 
Line 2,508:
 
for i,v in ipairs(finders) do
local found_now = false -- did we find this rule?
if sample_input:find(v) then
found = true
found_now = true
end
sample_input = sample_input:gsub(v, rules[v].replacement, 1)
-- handle terminating rules
if found_now then
if rules[v].terminating then terminate = true end
break
end
end
 
Line 2,624:
 
function do_markov(rules, input, output)
local m = markov.make_rules(rules)
input = markov.execute(m, input)
assert(input == output)
print(input)
end
 
Line 2,673:
 
=={{header|МК-61/52}}==
<lang> 9 П4
КИП4 [x] П7 Вx {x} П8
ИП8 ИПE * П8 {x} x=0 08
П5 ИП9 П1 lg [x] 10^x П3
ИП1 П2
Сx П6
ИП2 ИП7 - x=0 70
ИП9 ^ lg [x] 1 + ИП5 - 10^x / [x]
ИП6 ИП8 x#0 50 lg [x] 1 + + 10^x *
ИП9 ИП6 10^x П7 / {x} ИП7 * +
ИП8 ИП7 * + П9
С/П БП 00
x>=0 80
КИП6
ИП2 ИПE / [x] П2
x=0 26
КИП5
ИП1 ИП3 / {x} ИП3 * П1
ИП3 ИПE / [x] П3
x=0 22
ИП4 ИП0 - 9 - x=0 02 С/П</lang>
 
Under the rules of left 4 registers, under the word has 8 character cells, the alphabet of the digits from 1 to 8. Rules are placed in "123,456", where "123" is a fragment, and "456" is to be replaced, in the registers of the РA to РD. The number of rules is stored in Р0, the initial word is in Р9. Number triggered rule is the last digit registration Р4 (0 to 3), if no rule did not work, the indicator 0, otherwise the current word to be processed. In РE is stored 10.
Line 3,133:
 
apply_markov(Rules, Sentence, Replacement) :-
maplist(\X^Y^(atom_chars(X, Ch), phrase(markov(Y), Ch, [])), Rules, TmpRules),
% comments produce empty rules
exclude(=([]), TmpRules, LstRules),
 
atom_chars(Sentence, L),
apply_rules(L, LstRules, R),
atom_chars(Replacement, R).
 
apply_rules(In, Rules, Out ) :-
apply_one_rule(In, Rules, Out1, Keep_On),
( Keep_On = false
-> Out = Out1
; apply_rules(Out1, Rules, Out)).
 
 
apply_one_rule(In, [Rule | Rules], Out, Keep_On) :-
extract(Rule, In, Out1, KeepOn),
( KeepOn = false
-> Out = Out1, Keep_On = KeepOn
; (KeepOn = stop
-> Out = Out1,
Keep_On = true
; apply_one_rule(Out1, Rules, Out, Keep_On))).
 
apply_one_rule(In, [], In, false) .
Line 3,161:
 
extract([Pattern, Replace], In, Out, Keep_On) :-
( Replace = [.|Rest]
-> R = Rest
; R = Replace),
( (append(Pattern, End, T), append(Deb, T, In))
-> extract([Pattern, Replace], End, NewEnd, _Keep_On),
append_3(Deb, R, NewEnd, Out),
Keep_On = stop
; Out = In,
( R = Replace
-> Keep_On = true
; Keep_On = false)).
 
 
append_3(A, B, C, D) :-
append(A, B, T),
append(T, C, D).
 
% creation of the rules
Line 3,197:
 
rule([A,B]) -->
pattern(A), whitespaces, ['-', '>'], whitespaces, end_rule(B).
 
pattern([X | R]) --> [X], {X \= '\n'}, pattern(R).
Line 3,222:
 
markov :-
maplist(\X^(call(X), nl,nl), [markov_1, markov_2, markov_3, markov_4, markov_5]).
 
markov_1 :-
A = ['# This rules file is extracted from Wikipedia:',
'# http://en.wikipedia.org/wiki/Markov_Algorithm',
'A -> apple',
'B -> bag',
'S -> shop',
'T -> the',
'the shop -> my brother',
'a never used -> .terminating rule'],
B = 'I bought a B of As from T S.',
apply_markov(A, B, R),
writeln(B),
writeln(R).
 
 
markov_2 :-
A = ['# Slightly modified from the rules on Wikipedia',
'A -> apple',
'B -> bag',
'S -> .shop',
'T -> the',
'the shop -> my brother',
'a never used -> .terminating rule'],
 
B = 'I bought a B of As from T S.',
 
apply_markov(A, B, R),
writeln(B),
writeln(R).
 
 
markov_3 :-
A = ['# BNF Syntax testing rules',
'A -> apple',
'WWWW -> with',
'Bgage -> ->.*',
'B -> bag',
'->.* -> money',
'W -> WW',
'S -> .shop',
'T -> the',
'the shop -> my brother',
'a never used -> .terminating rule'],
 
B = 'I bought a B of As W my Bgage from T S.',
 
apply_markov(A, B, R),
writeln(B),
writeln(R).
 
 
markov_4 :-
A = ['### Unary Multiplication Engine, for testing Markov Algorithm implementations',
'### By Donal Fellows.',
'# Unary addition engine',
'_+1 -> _1+',
'1+1 -> 11+',
'# Pass for converting from the splitting of multiplication into ordinary',
'# addition',
'1! -> !1',
',! -> !+',
'_! -> _',
'# Unary multiplication by duplicating left side, right side times',
'1*1 -> x,@y',
'1x -> xX',
'X, -> 1,1',
'X1 -> 1X',
'_x -> _X',
',x -> ,X',
'y1 -> 1y',
'y_ -> _',
'# Next phase of applying',
'1@1 -> x,@y',
'1@_ -> @_',
',@_ -> !_',
'++ -> +',
'# Termination cleanup for addition',
'_1 -> 1',
'1+_ -> 1',
'_+_ -> '],
 
B = '_1111*11111_',
 
apply_markov(A, B, R),
writeln(B),
writeln(R).
 
markov_5 :-
A = ['# Turing machine: three-state busy beaver',
'#',
'# state A, symbol 0 => write 1, move right, new state B',
'A0 -> 1B',
'# state A, symbol 1 => write 1, move left, new state C',
'0A1 -> C01',
'1A1 -> C11',
'# state B, symbol 0 => write 1, move left, new state A',
'0B0 -> A01',
'1B0 -> A11',
'# state B, symbol 1 => write 1, move right, new state B',
'B1 -> 1B',
'# state C, symbol 0 => write 1, move left, new state B',
'0C0 -> B01',
'1C0 -> B11',
'# state C, symbol 1 => write 1, move left, halt',
'0C1 -> H01',
'1C1 -> H11'],
 
B = '000000A000000',
apply_markov(A, B, R),
writeln(B),
writeln(R).
</lang>
Output :
Line 4,061:
00011H1111000
</pre>
 
 
=={{header|Rust}}==
 
<lang rust>use std::str::FromStr;
 
#[derive(Clone, Debug)]
pub struct Rule {
pub pat: String,
pub rep: String,
pub terminal: bool,
}
 
impl Rule {
pub fn new(pat: String, rep: String, terminal: bool) -> Self {
Self { pat, rep, terminal }
}
 
pub fn applicable_range(&self, input: impl AsRef<str>) -> Option<std::ops::Range<usize>> {
input
.as_ref()
.match_indices(&self.pat)
.next()
.map(|(start, slice)| start..start + slice.len())
}
 
pub fn apply(&self, s: &mut String) -> bool {
self.applicable_range(s.as_str()).map_or(false, |range| {
s.replace_range(range, &self.rep);
true
})
}
}
 
impl FromStr for Rule {
type Err = String;
 
fn from_str(s: &str) -> Result<Self, Self::Err> {
let mut split = s.splitn(2, " -> ");
let pat = split.next().ok_or_else(|| s.to_string())?;
let rep = split.next().ok_or_else(|| s.to_string())?;
 
let pat = pat.to_string();
if rep.starts_with('.') {
Ok(Self::new(pat, rep[1..].to_string(), true))
} else {
Ok(Self::new(pat, rep.to_string(), false))
}
}
}
 
#[derive(Clone, Debug)]
pub struct Rules {
rules: Vec<Rule>,
}
 
impl Rules {
pub fn new(rules: Vec<Rule>) -> Self {
Self { rules }
}
 
pub fn apply(&self, s: &mut String) -> Option<&Rule> {
self.rules
.iter()
.find(|rule| rule.apply(s))
}
 
pub fn execute(&self, mut buffer: String) -> String {
while let Some(rule) = self.apply(&mut buffer) {
if rule.terminal {
break;
}
}
 
buffer
}
}
 
impl FromStr for Rules {
type Err = String;
 
fn from_str(s: &str) -> Result<Self, Self::Err> {
let mut rules = Vec::new();
 
for line in s.lines().filter(|line| !line.starts_with('#')) {
rules.push(line.parse::<Rule>()?);
}
 
Ok(Rules::new(rules))
}
}
 
#[cfg(test)]
mod tests {
 
use super::Rules;
 
#[test]
fn case_01() -> Result<(), String> {
let input = "I bought a B of As from T S.";
let rules = "\
# This rules file is extracted from Wikipedia:
# http://en.wikipedia.org/wiki/Markov_Algorithm
A -> apple
B -> bag
S -> shop
T -> the
the shop -> my brother
a never used -> .terminating rule";
 
assert_eq!(
rules.parse::<Rules>()?.execute(input.to_string()),
"I bought a bag of apples from my brother."
);
 
Ok(())
}
 
#[test]
fn case_02() -> Result<(), String> {
let input = "I bought a B of As from T S.";
let rules = "\
# Slightly modified from the rules on Wikipedia
A -> apple
B -> bag
S -> .shop
T -> the
the shop -> my brother
a never used -> .terminating rule";
 
assert_eq!(
rules.parse::<Rules>()?.execute(input.to_string()),
"I bought a bag of apples from T shop."
);
 
Ok(())
}
 
#[test]
fn case_03() -> Result<(), String> {
let input = "I bought a B of As W my Bgage from T S.";
let rules = "\
# BNF Syntax testing rules
A -> apple
WWWW -> with
Bgage -> ->.*
B -> bag
->.* -> money
W -> WW
S -> .shop
T -> the
the shop -> my brother
a never used -> .terminating rule";
 
assert_eq!(
rules.parse::<Rules>()?.execute(input.to_string()),
"I bought a bag of apples with my money from T shop."
);
 
Ok(())
}
 
#[test]
fn case_04() -> Result<(), String> {
let input = "_1111*11111_";
let rules = "\
### Unary Multiplication Engine, for testing Markov Algorithm implementations
### By Donal Fellows.
# Unary addition engine
_+1 -> _1+
1+1 -> 11+
# Pass for converting from the splitting of multiplication into ordinary
# addition
1! -> !1
,! -> !+
_! -> _
# Unary multiplication by duplicating left side, right side times
1*1 -> x,@y
1x -> xX
X, -> 1,1
X1 -> 1X
_x -> _X
,x -> ,X
y1 -> 1y
y_ -> _
# Next phase of applying
1@1 -> x,@y
1@_ -> @_
,@_ -> !_
++ -> +
# Termination cleanup for addition
_1 -> 1
1+_ -> 1
_+_ -> ";
 
assert_eq!(
rules.parse::<Rules>()?.execute(input.to_string()),
"11111111111111111111"
);
 
Ok(())
}
 
#[test]
fn case_05() -> Result<(), String> {
let input = "000000A000000";
let rules = "\
# Turing machine: three-state busy beaver
#
# state A, symbol 0 => write 1, move right, new state B
A0 -> 1B
# state A, symbol 1 => write 1, move left, new state C
0A1 -> C01
1A1 -> C11
# state B, symbol 0 => write 1, move left, new state A
0B0 -> A01
1B0 -> A11
# state B, symbol 1 => write 1, move right, new state B
B1 -> 1B
# state C, symbol 0 => write 1, move left, new state B
0C0 -> B01
1C0 -> B11
# state C, symbol 1 => write 1, move left, halt
0C1 -> H01
1C1 -> H11";
 
assert_eq!(
rules.parse::<Rules>()?.execute(input.to_string()),
"00011H1111000"
);
 
Ok(())
}
}
</lang>
 
=={{header|Scala}}==
Line 4,196 ⟶ 4,431:
 
Rule ::= ( pattern : char(1),
replacement : char(1),
terminal : bool);
 
ReplaceResult ::= (newString : char(1), wasReplaced : bool);
Line 4,204 ⟶ 4,439:
 
createRule(line(1)) :=
let
containsComments := firstIndexOf(line, '#');
removedComments := line when containsComments = 0 else
line[1 ... containsComments - 1];
 
arrowLocation := startOfArrow(removedComments, 1);
lhs := removedComments[1 ... arrowLocation-1];
rhs := removedComments[arrowLocation + 4 ... size(removedComments)];
isTerminal := size(rhs) > 0 and rhs[1] = '.';
in
(pattern : lhs,
replacement : rhs[2 ... size(rhs)] when isTerminal else rhs,
terminal : isTerminal) when size(removedComments) > 0 and arrowLocation /= -1;
 
startOfArrow(line(1), n) :=
-1 when n > size(line) - 3 else
n when (line[n]=' ' or line[n]='\t') and
line[n+1] = '-' and line[n+2] = '>' and
(line[n+3]=' ' or line[n+3]='\t') else
startOfArrow(line, n+1);
 
markov(rules(1), n, input(1)) :=
let
replaced := replaceSubString(input, rules[n].pattern, rules[n].replacement, 1);
in
input when n > size(rules) else
replaced.newString when replaced.wasReplaced and rules[n].terminal else
markov(rules, 1, replaced.newString) when replaced.wasReplaced else
markov(rules, n+1, input);
 
replaceSubString(str(1), original(1), new(1), n) :=
(newString : str, wasReplaced : false)
when n > size(str) - size(original) + 1 else
(newString : str[1 ... n - 1] ++ new ++ str[n + size(original) ... size(str)], wasReplaced : true)
when equalList(str[n ... n + size(original) - 1], original) else
replaceSubString(str, original, new, n + 1);
 
</lang>
Line 4,446 ⟶ 4,681:
if {[string match "#*" $line] || $line eq ""} continue
if {[regexp {^(.+)\s+->\s+(\.?)(.*)$} $line -> from final to]} {
lappend rules $from $to [string compare "." $final] [string length $from]
} else {
error "Syntax error: \"$line\""
}
}
Line 4,458 ⟶ 4,693:
set any 1
while {$any} {
set any 0
foreach {from to more fl} $rules {
# If we match the 'from' pattern...
if {[set idx [string first $from $line]] >= 0} {
# Change for the 'to' replacement
set line [string replace $line $idx [expr {$idx+$fl-1}] $to]
 
# Stop if we terminate, otherwise note that we've more work to do
set any $more
break; # Restart search for rules to apply
}
}
}
#DEBUG# puts $line
}
Line 4,490 ⟶ 4,725:
dict set rules $from $to
} else {
error "Syntax error: \"$line\""
}
}
Line 4,512 ⟶ 4,747:
class markovparser
 
dim aRules
public property let ruleset( sBlock )
dim i
aRules = split( sBlock, vbNewLine )
'~ remove blank lines from end of array
do while aRules( ubound( aRules ) ) = vbnullstring
redim preserve aRules( ubound( aRules ) - 1 )
loop
'~ parse array
for i = lbound( aRules ) to ubound( aRules )
if left( aRules( i ), 1 ) = "#" then
aRules( i ) = Array( vbnullstring, aRules(i))
else
aRules( i ) = Split( aRules( i ), " -> ", 2 )
end if
next
end property
public function apply( sArg )
dim ruleapplied
dim terminator
dim was
dim i
dim repl
dim changes
ruleapplied = true
terminator = false
 
do while ruleapplied and (not terminator)
changes = 0
was = sArg
for i = lbound( aRules ) to ubound( aRules )
repl = aRules(i)(1)
if left( repl, 1 ) = "." then
terminator = true
repl = mid( repl, 2 )
end if
sArg = replace( sArg, aRules(i)(0), repl)
if was <> sArg then
changes = changes + 1
if changes = 1 then
exit for
end if
end if
if terminator then
exit for
end if
next
if changes = 0 then
ruleapplied = false
end if
loop
apply = sArg
end function
sub dump
dim i
for i = lbound( aRules ) to ubound( aRules )
wscript.echo eef(aRules(i)(0)=vbnullstring,aRules(i)(1),aRules(i)(0)& " -> " & aRules(i)(1)) & eef( left( aRules(i)(1), 1 ) = ".", " #terminator", "" )
next
end sub
private function eef( bCond, sExp1, sExp2 )
if bCond then
eef = sExp1
else
eef = sExp2
end if
end function
end class
</lang>
Line 4,663 ⟶ 4,898:
ks:=L(); vs:=L();
foreach line in (lines){
if(line[0]=="#") continue; // nuke <comment>
pattern,replacement:=line.replace("\t"," ")
.split(" -> ",1).apply("strip");
Line 4,675 ⟶ 4,910:
do{ go:=False;
foreach n,k in (eks){
if (Void!=text.find(k)){
if (Void==(v:=vs[n])) return(text);
if (v[0,1]==".") v=v[1,*] else go=True;
text=text.replace(k,v,1);
break; // restart after every rule application, unless terminating
}
}
}while(go);
Anonymous user