Run-length encoding
| This page uses content from Wikipedia. The original article was at Run-length_encoding. The list of authors can be seen in the page history. As with Rosetta Code, the text of Wikipedia is available under the GNU FDL. (See links for details on variance) |
You are encouraged to solve this task according to the task description, using any language you may know.
Given a string containing uppercase characters (A-Z), compress repeated 'runs' of the same character by storing the length of that run, and provide a function to reverse the compression. The output can be anything, as long as you can recreate the input with it.
Example:
- Input:
WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW - Output:
12W1B12W3B24W1B14W
Note: the encoding step in the above example is the same as a step of the Look-and-say sequence.
[edit] Ada
with Ada.Text_IO; use Ada.Text_IO;
with Ada.Strings.Fixed; use Ada.Strings.Fixed;
procedure Test_Run_Length_Encoding is
function Encode (Data : String) return String is
begin
if Data'Length = 0 then
return "";
else
declare
Code : constant Character := Data (Data'First);
Index : Integer := Data'First + 1;
begin
while Index <= Data'Last and then Code = Data (Index) loop
Index := Index + 1;
end loop;
declare
Prefix : constant String := Integer'Image (Index - Data'First);
begin
return Prefix (2..Prefix'Last) & Code & Encode (Data (Index..Data'Last));
end;
end;
end if;
end Encode;
function Decode (Data : String) return String is
begin
if Data'Length = 0 then
return "";
else
declare
Index : Integer := Data'First;
Count : Natural := 0;
begin
while Index < Data'Last and then Data (Index) in '0'..'9' loop
Count := Count * 10 + Character'Pos (Data (Index)) - Character'Pos ('0');
Index := Index + 1;
end loop;
if Index > Data'First then
return Count * Data (Index) & Decode (Data (Index + 1..Data'Last));
else
return Data;
end if;
end;
end if;
end Decode;
begin
Put_Line (Encode ("WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"));
Put_Line (Decode ("12W1B12W3B24W1B14W"));
end Test_Run_Length_Encoding;
Sample output:
12W1B12W3B24W1B14W WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW
[edit] AWK
It works with "textual" input. Lines containing numbers are skipped, since they can't be represented in a not ambiguous way in this implementation (e.g. "11AA" would be encoded as "212A", which would be decoded as A repeated 212 times!)
Encoding
BEGIN {
FS=""
}
/^[^0-9]+$/ {
cp = $1; j = 0
for(i=1; i <= NF; i++) {
if ( $i == cp ) {
j++;
} else {
printf("%d%c", j, cp)
j = 1
}
cp = $i
}
printf("%d%c", j, cp)
}
Decoding
BEGIN {
RS="[0-9]+[^0-9]"
final = "";
}
{
match(RT, /([0-9]+)([^0-9])/, r)
for(i=0; i < int(r[1]); i++) {
final = final r[2]
}
}
END {
print final
}
[edit] ALGOL 68
Note: The following uses iterators, eliminating the need of declaring arbitrarily large CHAR arrays for caching.
STRING input := "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW";
STRING output := "12W1B12W3B24W1B14W";
MODE YIELDCHAR = PROC(CHAR)VOID;
MODE GENCHAR = PROC(YIELDCHAR)VOID;
PROC gen char string = (REF STRING s, YIELDCHAR yield)VOID:
FOR i FROM LWB s TO UPB s DO yield(s[i]) OD;
CO
# Note: The following 2 lines use currying. This not supported by ELLA ALGOL 68RS #
GENCHAR input seq = gen char string(input,),
output seq = gen char string(output,);
END CO
GENCHAR
input seq = (YIELDCHAR yield)VOID: gen char string(input, yield),
output seq = (YIELDCHAR yield)VOID: gen char string(output, yield);
PROC gen encode = (GENCHAR gen char, YIELDCHAR yield)VOID: (
INT count := 0;
CHAR prev;
# FOR CHAR c IN # gen char( # ) DO ( #
## (CHAR c)VOID: (
IF count = 0 THEN
count := 1;
prev := c
ELIF c NE prev THEN
STRING str count := whole(count,0);
gen char string(str count, yield); count := 1;
yield(prev); prev := c
ELSE
count +:=1
FI
# OD # ));
IF count NE 0 THEN
STRING str count := whole(count,0);
gen char string(str count,yield);
yield(prev)
FI
);
STRING zero2nine = "0123456789";
PROC gen decode = (GENCHAR gen char, YIELDCHAR yield)VOID: (
INT repeat := 0;
# FOR CHAR c IN # gen char( # ) DO ( #
## (CHAR c)VOID: (
IF char in string(c, LOC INT, zero2nine) THEN
repeat := repeat*10 + ABS c - ABS "0"
ELSE
FOR i TO repeat DO yield(c) OD;
repeat := 0
FI
# OD # ))
);
# iterate through input string #
print("Encode input: ");
# FOR CHAR c IN # gen encode(input seq, # ) DO ( #
## (CHAR c)VOID:
print(c)
# OD # );
print(new line);
# iterate through output string #
print("Decode output: ");
# FOR CHAR c IN # gen decode(output seq, # ) DO ( #
## (CHAR c)VOID:
print(c)
# OD # );
print(new line)
Output:
Encode input: 12W1B12W3B24W1B14W Decode output: WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW
[edit] APL
∇ ret←RLL rll;count
[1] count←∣2-/((1,(2≠/rll),1)×⍳1+⍴rll)~0
[2] ret←(⍕count,¨(1,2≠/rll)/rll)~' '
∇
Sample Output:
RLL 'WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW'
12W1B12W3B24W1B14W
[edit] AutoHotkey
MsgBox % key := rle_encode("WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW")
MsgBox % rle_decode(key)
rle_encode(message)
{
StringLeft, previous, message, 1
StringRight, last, message, 1
message .= Asc(Chr(last)+1)
count = 0
Loop, Parse, message
{
If (previous == A_LoopField)
count +=1
Else
{
output .= previous . count
previous := A_LoopField
count = 1
}
}
Return output
}
rle_decode(message)
{
pos = 1
While, item := RegExMatch(message, "\D", char, pos)
{
digpos := RegExMatch(message, "\d+", dig, item)
Loop, % dig
output .= char
pos := digpos
}
Return output
}
[edit] BASIC
DECLARE FUNCTION RLDecode$ (i AS STRING)
DECLARE FUNCTION RLEncode$ (i AS STRING)
DIM initial AS STRING, encoded AS STRING, decoded AS STRING
INPUT "Type something: ", initial
encoded = RLEncode(initial)
decoded = RLDecode(encoded)
PRINT initial
PRINT encoded
PRINT decoded
FUNCTION RLDecode$ (i AS STRING)
DIM Loop0 AS LONG, rCount AS STRING, outP AS STRING, m AS STRING
FOR Loop0 = 1 TO LEN(i)
m = MID$(i, Loop0, 1)
SELECT CASE m
CASE "0" TO "9"
rCount = rCount + m
CASE ELSE
IF LEN(rCount) THEN
outP = outP + STRING$(VAL(rCount), m)
rCount = ""
ELSE
outP = outP + m
END IF
END SELECT
NEXT
RLDecode$ = outP
END FUNCTION
FUNCTION RLEncode$ (i AS STRING)
DIM tmp1 AS STRING, tmp2 AS STRING, outP AS STRING
DIM Loop0 AS LONG, rCount AS LONG
tmp1 = MID$(i, 1, 1)
tmp2 = tmp1
rCount = 1
FOR Loop0 = 2 TO LEN(i)
tmp1 = MID$(i, Loop0, 1)
IF tmp1 <> tmp2 THEN
outP = outP + LTRIM$(RTRIM$(STR$(rCount))) + tmp2
tmp2 = tmp1
rCount = 1
ELSE
rCount = rCount + 1
END IF
NEXT
outP = outP + LTRIM$(RTRIM$(STR$(rCount)))
outP = outP + tmp2
RLEncode$ = outP
END FUNCTION
Sample output (last one shows errors from using numbers in input string):
Type something: aaaaeeeeeeiiiioooouuy aaaaeeeeeeiiiioooouuy 4a6e4i4o2u1y aaaaeeeeeeiiiioooouuy Type something: My dog has fleas. My dog has fleas. 1M1y1 1d1o1g1 1h1a1s1 1f1l1e1a1s1. My dog has fleas. Type something: 1r 1r 111r rrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr
[edit] BBC BASIC
The run counts are indicated by means of character codes in the range 131 to 255.
input$ = "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"
PRINT "Input: " input$
rle$ = FNencodeRLE(input$)
output$ = FNdecodeRLE(rle$)
PRINT "Output: " output$
END
DEF FNencodeRLE(text$)
LOCAL n%, r%, c$, o$
n% = 1
WHILE n% <= LEN(text$)
c$ = MID$(text$, n%, 1)
n% += 1
r% = 1
WHILE c$ = MID$(text$, n%, 1) AND r% < 127
r% += 1
n% += 1
ENDWHILE
IF r% < 3 o$ += STRING$(r%, c$) ELSE o$ += CHR$(128+r%) + c$
ENDWHILE
= o$
DEF FNdecodeRLE(rle$)
LOCAL n%, c$, o$
n% = 1
WHILE n% <= LEN(rle$)
c$ = MID$(rle$, n%, 1)
n% += 1
IF ASC(c$) > 128 THEN
o$ += STRING$(ASC(c$)-128, MID$(rle$, n%, 1))
n% += 1
ELSE
o$ += c$
ENDIF
ENDWHILE
= o$
[edit] Bracmat
( run-length
= character otherCharacter acc begin end
. :?acc
& 0:?begin
& @( !arg
: ?
[!begin
%@?character
?
[?end
( (%@:~!character:?otherCharacter) ?
& !acc !end+-1*!begin !character:?acc
& !otherCharacter:?character
& !end:?begin
& ~`
| &!acc !end+-1*!begin !character:?acc
)
)
& str$!acc
)
& run-length$WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW
12W1B12W3B24W1B14W
[edit] Burlesque
"WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"
=[{^^[~\/L[Sh}\m
[edit] C
Encoder that can deal with byte streams. Can encode/decode any byte values and any length with reasonable efficiency. Also showing OO and polymophism with structs.
#include <stdio.h>
#include <stdlib.h>
typedef struct stream_t stream_t, *stream;
struct stream_t {
/* get funciton is supposed to return a byte value (0-255),
or -1 to signify end of input */
int (*get)(stream);
/* put function does output, one byte at a time */
int (*put)(stream, int);
};
/* next two structs inherit from stream_t */
typedef struct {
int (*get)(stream);
int (*put)(stream, int);
char *string;
int pos;
} string_stream;
typedef struct {
int (*get)(stream);
int (*put)(stream, int);
FILE *fp;
} file_stream;
/* methods for above streams */
int sget(stream in)
{
int c;
string_stream* s = (string_stream*) in;
c = (unsigned char)(s->string[s->pos]);
if (c == '\0') return -1;
s->pos++;
return c;
}
int sput(stream out, int c)
{
string_stream* s = (string_stream*) out;
s->string[s->pos++] = (c == -1) ? '\0' : c;
if (c == -1) s->pos = 0;
return 0;
}
int file_put(stream out, int c)
{
file_stream *f = (file_stream*) out;
return fputc(c, f->fp);
}
/* helper function */
void output(stream out, unsigned char* buf, int len)
{
int i;
out->put(out, 128 + len);
for (i = 0; i < len; i++)
out->put(out, buf[i]);
}
/* Specification: encoded stream are unsigned bytes consisting of sequences.
* First byte of each sequence is the length, followed by a number of bytes.
* If length <=128, the next byte is to be repeated length times;
* If length > 128, the next (length - 128) bytes are not repeated.
* this is to improve efficiency for long non-repeating sequences.
* This scheme can encode arbitrary byte values efficiently.
* c.f. Adobe PDF spec RLE stream encoding (not exactly the same)
*/
void encode(stream in, stream out)
{
unsigned char buf[256];
int len = 0, repeat = 0, end = 0, c;
int (*get)(stream) = in->get;
int (*put)(stream, int) = out->put;
while (!end) {
end = ((c = get(in)) == -1);
if (!end) {
buf[len++] = c;
if (len <= 1) continue;
}
if (repeat) {
if (buf[len - 1] != buf[len - 2])
repeat = 0;
if (!repeat || len == 129 || end) {
/* write out repeating bytes */
put(out, end ? len : len - 1);
put(out, buf[0]);
buf[0] = buf[len - 1];
len = 1;
}
} else {
if (buf[len - 1] == buf[len - 2]) {
repeat = 1;
if (len > 2) {
output(out, buf, len - 2);
buf[0] = buf[1] = buf[len - 1];
len = 2;
}
continue;
}
if (len == 128 || end) {
output(out, buf, len);
len = 0;
repeat = 0;
}
}
}
put(out, -1);
}
void decode(stream in, stream out)
{
int c, i, cnt;
while (1) {
c = in->get(in);
if (c == -1) return;
if (c > 128) {
cnt = c - 128;
for (i = 0; i < cnt; i++)
out->put(out, in->get(in));
} else {
cnt = c;
c = in->get(in);
for (i = 0; i < cnt; i++)
out->put(out, c);
}
}
}
int main()
{
char buf[256];
string_stream str_in = { sget, 0,
"WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW", 0};
string_stream str_out = { sget, sput, buf, 0 };
file_stream file = { 0, file_put, stdout };
/* encode from str_in to str_out */
encode((stream)&str_in, (stream)&str_out);
/* decode from str_out to file (stdout) */
decode((stream)&str_out, (stream)&file);
return 0;
}
[edit] C++
#include <iostream>
#include <string>
#include <sstream>
#include <boost/regex.hpp>
#include <cstdlib>
std::string encode ( const std::string & ) ;
std::string decode ( const std::string & ) ;
int main( ) {
std::string to_encode ( "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW" ) ;
std::cout << to_encode << " encoded:" << std::endl ;
std::string encoded ( encode ( to_encode ) ) ;
std::cout << encoded << std::endl ;
std::string decoded ( decode( encoded ) ) ;
std::cout << "Decoded again:\n" ;
std::cout << decoded << std::endl ;
if ( to_encode == decoded )
std::cout << "It must have worked!\n" ;
return 0 ;
}
std::string encode( const std::string & to_encode ) {
std::string::size_type found = 0 , nextfound = 0 ;
std::ostringstream oss ;
nextfound = to_encode.find_first_not_of( to_encode[ found ] , found ) ;
while ( nextfound != std::string::npos ) {
oss << nextfound - found ;
oss << to_encode[ found ] ;
found = nextfound ;
nextfound = to_encode.find_first_not_of( to_encode[ found ] , found ) ;
}
//since we must not discard the last characters we add them at the end of the string
std::string rest ( to_encode.substr( found ) ) ;//last run of characters starts at position found
oss << rest.length( ) << to_encode[ found ] ;
return oss.str( ) ;
}
std::string decode ( const std::string & to_decode ) {
boost::regex e ( "(\\d+)(\\w)" ) ;
boost::match_results<std::string::const_iterator> matches ;
std::ostringstream oss ;
std::string::const_iterator start = to_decode.begin( ) , end = to_decode.end( ) ;
while ( boost::regex_search ( start , end , matches , e ) ) {
std::string numberstring ( matches[ 1 ].first , matches[ 1 ].second ) ;
int number = atoi( numberstring.c_str( ) ) ;
std::string character ( matches[ 2 ].first , matches[ 2 ].second ) ;
for ( int i = 0 ; i < number ; i++ )
oss << character ;
start = matches[ 2 ].second ;
}
return oss.str( ) ;
}
[edit] C#
This example only works if there are no digits in the string to be encoded and then decoded.
public static void Main(string[] args)
{
string input = "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW";
Console.WriteLine(Encode(input));//Outputs: 12W1B12W3B24W1B14W
Console.WriteLine(Decode(Encode(input)));//Outputs: WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW
Console.ReadLine();
}
public static string Encode(string s)
{
StringBuilder sb = new StringBuilder();
int count = 1;
char current =s[0];
for(int i = 1; i < s.Length;i++)
{
if (current == s[i])
{
count++;
}
else
{
sb.AppendFormat("{0}{1}", count, current);
count = 1;
current = s[i];
}
}
sb.AppendFormat("{0}{1}", count, current);
return sb.ToString();
}
public static string Decode(string s)
{
string a = "";
int count = 0;
StringBuilder sb = new StringBuilder();
char current = char.MinValue;
for(int i = 0; i < s.Length; i++)
{
current = s[i];
if (char.IsDigit(current))
a += current;
else
{
count = int.Parse(a);
a = "";
for (int j = 0; j < count; j++)
sb.Append(current);
}
}
return sb.ToString();
}
Somewhat shorter, using Regex.Replace with MatchEvaluator (using C#2 syntax only):
using System;
using System.Text.RegularExpressions;
public class Program
{
private delegate void fOk(bool ok, string message);
public static int Main(string[] args)
{
const string raw = "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW";
const string code = "12W1B12W3B24W1B14W";
fOk Ok = delegate(bool ok, string message)
{
Console.WriteLine("{0}: {1}", ok ? "ok" : "not ok", message);
};
Ok(code.Equals(Encode(raw)), "Encode");
Ok(raw.Equals(Decode(code)), "Decode");
return 0;
}
public static string Encode(string input)
{
return Regex.Replace(input, @"(.)\1*", delegate(Match m)
{
return string.Concat(m.Value.Length, m.Groups[1].Value);
});
}
public static string Decode(string input)
{
return Regex.Replace(input, @"(\d+)(\D)", delegate(Match m)
{
return new string(m.Groups[2].Value[0], int.Parse(m.Groups[1].Value));
});
}
}
[edit] Clojure
(defn compress [s]
(->> (partition-by identity s) (mapcat (juxt count first)) (apply str)))
(defn extract [s]
(->> (re-seq #"(\d+)([A-Z])" s)
(mapcat (fn [[_ n ch]] (repeat (Integer/parseInt n) ch)))
(apply str)))
[edit] CoffeeScript
encode = (str) ->
str.replace /(.)\1*/g, (w) ->
w[0] + w.length
decode = (str) ->
str.replace /(.)(\d+)/g, (m,w,n) ->
new Array(+n+1).join(w)
console.log s = "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"
console.log encode s
console.log decode encode s
WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW W12B1W12B3W24B1W14 WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW
The following version encodes the number of ocurrences as an unicode character. You can change the way it looks by rotating the offset.
encode = (str, offset = 75) ->
str.replace /(.)\1*/g, (w) ->
w[0] + String.fromCharCode(offset+w.length)
decode = (str, offset = 75) ->
str.split('').map((w,i) ->
if not (i%2) then w else new Array(+w.charCodeAt(0)-offset).join(str[i-1])
).join('')
> encode "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW" WWBLWWBNWcBLWY > encode "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW", 1200 WҼBұWҼBҳWӈBұWҾ > encode "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW", 5200 WᑜBᑑWᑜBᑓWᑨBᑑWᑞ
[edit] Common Lisp
(defun group-similar (sequence &key (test 'eql))
(loop for x in (rest sequence)
with temp = (subseq sequence 0 1)
if (funcall test (first temp) x)
do (push x temp)
else
collect temp
and do (setf temp (list x))))
(defun run-length-encode (sequence)
(mapcar (lambda (group) (list (first group) (length group)))
(group-similar (coerce sequence 'list))))
(defun run-length-decode (sequence)
(reduce (lambda (s1 s2) (concatenate 'simple-string s1 s2))
(mapcar (lambda (elem)
(make-string (second elem)
:initial-element
(first elem)))
sequence)))
(run-length-encode "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW")
(run-length-decode '((#\W 12) (#\B 1) (#\W 12) (#\B 3) (#\W 24) (#\B 1)))
[edit] D
[edit] Short Functional Version
import std.algorithm, std.array;
alias encode = group;
auto decode(Group!("a == b", string) enc) {
return enc.map!(t => [t[0]].replicate(t[1])).join;
}
void main() {
immutable s = "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWW" ~
"WWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW";
assert(s.encode.decode.equal(s));
}
[edit] Basic Imperative Version
import std.stdio, std.array, std.conv;
// Similar to the 'look and say' function.
string encode(in string input) {
if (input.empty) return input;
char last = input[$ - 1];
string output;
int count;
foreach_reverse (c; input) {
if (c == last) {
count++;
} else {
output = text(count) ~ last ~ output;
count = 1;
last = c;
}
}
return text(count) ~ last ~ output;
}
string decode(in string input) {
string i, result;
foreach (c; input)
switch (c) {
case '0': .. case '9':
i ~= c;
break;
case 'A': .. case 'Z':
if (i.empty)
throw new Exception("Can not repeat a letter " ~
"without a number of repetitions");
result ~= replicate([c], to!int(i));
i.length = 0;
break;
default:
throw new Exception("'" ~ c ~
"' is not alphanumeric");
}
return result;
}
void main() {
immutable txt = "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWW" ~
"WWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW";
writeln("Input: ", txt);
immutable encoded = encode(txt);
writeln("Encoded: ", encoded);
assert(txt == decode(encoded));
}
- Output:
Input: WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW Encoded: 12W1B12W3B24W1B14W
[edit] UTF String Version
D's native string is utf-encoded. This version works for utf string, and uses a Variable-length Quantity module.
import std.stdio, std.conv, std.utf, std.array;
import vlq;
struct RLE { // for utf string
ubyte[] encoded;
RLE encode(const string s) {
validate(s); // check if s is well-formed utf, throw if not
encoded.length = 0; // reset
if (s.length == 0) return this; // empty string
string last;
VLQ count;
for (int i = 0; i < s.length; ) {
auto k = s.stride(i);
auto ucode = cast(string)s[i .. i + k];
if (i == 0) last = ucode;
if (ucode == last)
count++;
else {
encoded ~= count.toVLQ ~ cast(ubyte[])last;
last = ucode;
count = 1;
}
i += k;
}
encoded ~= VLQ(count).toVLQ ~ cast(ubyte[])last;
return this;
}
int opApply(int delegate(ref ulong c, ref string u) dg) {
VLQ count;
string ucode;
for (int i = 0; i < encoded.length; ) {
auto k = count.extract(encoded[i .. $]);
i += k;
if (i >= encoded.length)
throw new Exception("not valid encoded string");
k = stride(cast(string) encoded[i .. $], 0);
if (k == 0xff) // not valid utf code point
throw new Exception("not valid encoded string");
ucode = cast(string)encoded[i .. i + k].dup;
dg(count.value, ucode);
i += k;
}
return 0;
}
string toString() {
string res;
foreach (ref i, s ; this)
if (indexOf("0123456789#", s) == -1)
res ~= text(i) ~ s;
else
res ~= text(i) ~ '#' ~ s;
return res;
}
string decode() {
string res;
foreach (ref i, s; this)
res ~= replicate(s, cast(uint)i);
return res;
}
}
void main() {
RLE r;
auto s = "尋尋覓覓冷冷清清淒淒慘慘戚戚\nWWWWWWWWWWWWBWWWWWWWWWWW" ~
"WBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW\n" ~
"11#222##333";
auto f = File("display.txt", "w");
f.writeln(s);
r.encode(s);
f.writefln("-----\n%s\n-----\n%s", r, r.decode());
auto sEncoded = RLE.init.encode(s).encoded ;
assert(s == RLE(sEncoded).decode(), "Not work");
}
output from "display.txt":
尋尋覓覓冷冷清清淒淒慘慘戚戚 WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW 11#222##333 ----- 2尋2覓2冷2清2淒2慘2戚1 12W1B12W3B24W1B14W1 2#11##3#22##3#3 ----- 尋尋覓覓冷冷清清淒淒慘慘戚戚 WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW 11#222##333
NOTE: some characters in this section use Chinese font.
[edit] UTF String Version with Regular Expression
The code looks more complex than the third Python version because this also handles digits by escaping them with #.
import std.stdio, std.conv, std.array, std.regex, std.utf,
std.algorithm;
string reEncode(string s) {
validate(s); // Throw if it's not a well-formed UTF string
static string rep(Captures!string m) {
auto c = canFind("0123456789#", m[1]) ? "#" ~ m[1] : m[1];
return text(m.hit.length / m[1].length) ~ c;
}
return std.regex.replace!rep(s, regex(`(.|[\n\r\f])\1*`, "g"));
}
string reDecode(string s) {
validate(s); // Throw if it's not a well-formed UTF string
static string rep(Captures!string m) {
string c = m[2];
if (c.length > 1 && c[0] == '#')
c = c[1 .. $];
return replicate(c, to!int(m[1]));
}
auto r=regex(`(\d+)(#[0123456789#]|[\n\r\f]|[^0123456789#\n\r\f]+)`
, "g");
return std.regex.replace!rep(s, r);
}
void main() {
auto s = "尋尋覓覓冷冷清清淒淒慘慘戚戚\nWWWWWWWWWWWWBWWWWWWWWWWW" ~
"WBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW\n" ~
"11#222##333";
assert(s == reDecode(reEncode(s)));
}
[edit] E
def rle(string) {
var seen := null
var count := 0
var result := []
def put() {
if (seen != null) {
result with= [count, seen]
}
}
for ch in string {
if (ch != seen) {
put()
seen := ch
count := 0
}
count += 1
}
put()
return result
}
def unrle(coded) {
var result := ""
for [count, ch] in coded {
result += E.toString(ch) * count
}
return result
}
? rle("WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW")
# value: [[12, 'W'], [1, 'B'], [12, 'W'], [3, 'B'], [24, 'W'], [1, 'B'], [14, 'W']]
? unrle(rle("WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"))
# value: "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"
[edit] Erlang
A single-threaded/process version with a simple set of unit test.
-module(rle).
-export([encode/1,decode/1]).
-include_lib("eunit/include/eunit.hrl").
encode(S) ->
doEncode(string:substr(S, 2), string:substr(S, 1, 1), 1, []).
doEncode([], CurrChar, Count, R) ->
R ++ integer_to_list(Count) ++ CurrChar;
doEncode(S, CurrChar, Count, R) ->
NextChar = string:substr(S, 1, 1),
if
NextChar == CurrChar ->
doEncode(string:substr(S, 2), CurrChar, Count + 1, R);
true ->
doEncode(string:substr(S, 2), NextChar, 1,
R ++ integer_to_list(Count) ++ CurrChar)
end.
decode(S) ->
doDecode(string:substr(S, 2), string:substr(S, 1, 1), []).
doDecode([], _, R) ->
R;
doDecode(S, CurrString, R) ->
NextChar = string:substr(S, 1, 1),
IsInt = erlang:is_integer(catch(erlang:list_to_integer(NextChar))),
if
IsInt ->
doDecode(string:substr(S, 2), CurrString ++ NextChar, R);
true ->
doDecode(string:substr(S, 2), [],
R ++ string:copies(NextChar, list_to_integer(CurrString)))
end.
rle_test_() ->
PreEncoded =
"WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW",
Expected = "12W1B12W3B24W1B14W",
[
?_assert(encode(PreEncoded) =:= Expected),
?_assert(decode(Expected) =:= PreEncoded),
?_assert(decode(encode(PreEncoded)) =:= PreEncoded)
].
[edit] Euphoria
include misc.e
function encode(sequence s)
sequence out
integer prev_char,count
if length(s) = 0 then
return {}
end if
out = {}
prev_char = s[1]
count = 1
for i = 2 to length(s) do
if s[i] != prev_char then
out &= {count,prev_char}
prev_char = s[i]
count = 1
else
count += 1
end if
end for
out &= {count,prev_char}
return out
end function
function decode(sequence s)
sequence out
out = {}
for i = 1 to length(s) by 2 do
out &= repeat(s[i+1],s[i])
end for
return out
end function
sequence s
s = encode("WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW")
pretty_print(1,s,{3})
puts(1,'\n')
puts(1,decode(s))
Output:
{12,'W',1,'B',12,'W',3,'B',24,'W',1,'B',14,'W'}
WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW
[edit] F#
open System
open System.Text.RegularExpressions
let encode data =
// encodeData : seq<'T> -> seq<int * 'T> i.e. Takes a sequence of 'T types and return a sequence of tuples containing the run length and an instance of 'T.
let rec encodeData input =
seq { if not (Seq.isEmpty input) then
let head = Seq.head input
let runLength = Seq.length (Seq.takeWhile ((=) head) input)
yield runLength, head
yield! encodeData (Seq.skip runLength input) }
encodeData data |> Seq.fold(fun acc (len, d) -> acc + len.ToString() + d.ToString()) ""
let decode str =
[ for m in Regex.Matches(str, "(\d+)(.)") -> m ]
|> List.map (fun m -> Int32.Parse(m.Groups.[1].Value), m.Groups.[2].Value)
|> List.fold (fun acc (len, s) -> acc + String.replicate len s) ""
[edit] FALSE
1^[^$~][$@$@=$[%%\1+\$0~]?~[@.,1\$]?%]#%\., {encode}
[0[^$$'9>'0@>|~]['0-\10*+]#]n:
[n;!$~][[\$][1-\$,]#%%]#%% {decode}
[edit] Fan
**
** Generates a run-length encoding for a string
**
class RLE
{
Run[] encode(Str s)
{
runs := Run[,]
s.size.times |i|
{
ch := s[i]
if (runs.size==0 || runs.last.char != ch)
runs.add(Run(ch))
runs.last.inc
}
return runs
}
Str decode(Run[] runs)
{
buf := StrBuf()
runs.each |run|
{
run.count.times { buf.add(run.char.toChar) }
}
return buf.toStr
}
Void main()
{
echo(decode(encode(
"WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"
)))
}
}
internal class Run
{
Int char
Int count := 0
new make(Int ch) { char = ch }
Void inc() { ++count }
override Str toStr() { return "${count}${char.toChar}" }
}
[edit] Forth
variable a
: n>a (.) tuck a @ swap move a +! ;
: >a a @ c! 1 a +! ;
: encode ( c-addr +n a -- a n' )
dup a ! -rot over c@ 1 2swap 1 /string bounds ?do
over i c@ = if 1+
else n>a >a i c@ 1 then
loop n>a >a a @ over - ;
: digit? [char] 0 [ char 9 1+ literal ] within ;
: decode ( c-addr +n a -- a n' )
dup a ! 0 2swap bounds ?do
i c@ digit? if 10 * i c@ [char] 0 - + else
a @ over i c@ fill a +! 0 then
loop drop a @ over - ;
Example:
s" WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"
here 1000 + encode here 2000 + decode cr 3 spaces type
WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW
[edit] Fortran
program RLE
implicit none
integer, parameter :: bufsize = 100 ! Sets maximum size of coded and decoded strings, adjust as necessary
character(bufsize) :: teststr = "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"
character(bufsize) :: codedstr = "" Encode (Data (Index..Data'Last));
end;
end;
end if;
end Encode;
function Decode (Data : String) return String is
begin
if Data'Length = 0 then
return "";
else
declare
Index : Integer := Data'First;
Count : Natural := 0;
begin
while Index , decodedstr = ""
call Encode(teststr, codedstr)
write(*,"(a)") trim(codedstr)
call Decode(codedstr, decodedstr)
write(*,"(a)") trim(decodedstr)
contains
subroutine Encode(instr, outstr)
character(*), intent(in) :: instr
character(*), intent(out) :: outstr
character(8) :: tempstr = ""
character(26) :: validchars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
integer :: a, b, c, i
if(verify(trim(instr), validchars) /= 0) then
outstr = "Invalid input"
return
end if
outstr = ""
c = 1
a = iachar(instr(1:1))
do i = 2, len(trim(instr))
b = iachar(instr(i:i))
if(a == b) then
c = c + 1
else
write(tempstr, "(i0)") c
outstr = trim(outstr) // trim(tempstr) // achar(a)
a = b
c = 1
end if
end do
write(tempstr, "(i0)") c
outstr = trim(outstr) // trim(tempstr) // achar(b)
end subroutine
subroutine Decode(instr, outstr)
character(*), intent(in) :: instr
character(*), intent(out) :: outstr
character(26) :: validchars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
integer :: startn, endn, n
outstr = ""
startn = 1
do while(startn < len(trim(instr)))
endn = scan(instr(startn:), validchars) + startn - 1
read(instr(startn:endn-1), "(i8)") n
outstr = trim(outstr) // repeat(instr(endn:endn), n)
startn = endn + 1
end do
end subroutine
end program
[edit] Go
Decoder kind of necessary to demonstrate task requirement that I can recreate the input.
package main
import "fmt"
// encoding scheme:
// encode to byte array
// byte value < 26 means single character: byte value + 'A'
// byte value 26..255 means (byte value - 24) copies of next byte
func rllEncode(s string) (r []byte) {
if s == "" {
return
}
c := s[0]
if c < 'A' || c > 'Z' {
panic("invalid")
}
nc := byte(1)
for i := 1; i < len(s); i++ {
d := s[i]
switch {
case d != c:
case nc < (255 - 24):
nc++
continue
}
if nc > 1 {
r = append(r, nc+24)
}
r = append(r, c-'A')
if d < 'A' || d > 'Z' {
panic("invalid")
}
c = d
nc = 1
}
if nc > 1 {
r = append(r, nc+24)
}
r = append(r, c-'A')
return
}
func main() {
s := "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"
fmt.Println("source: ", len(s), "bytes:", s)
e := rllEncode(s)
fmt.Println("encoded:", len(e), "bytes:", e)
d := rllDecode(e)
fmt.Println("decoded:", len(d), "bytes:", d)
fmt.Println("decoded = source:", d == s)
}
func rllDecode(e []byte) string {
var c byte
var d []byte
for i := 0; i < len(e); i++ {
b := e[i]
if b < 26 {
c = 1
} else {
c = b - 24
i++
b = e[i]
}
for c > 0 {
d = append(d, b+'A')
c--
}
}
return string(d)
}
Output:
source: 67 bytes: WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW encoded: 12 bytes: [36 22 1 36 22 27 1 48 22 1 38 22] decoded: 67 bytes: WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW decoded = source: true
[edit] Groovy
def rleEncode(text) {
def encoded = new StringBuilder()
(text =~ /(([A-Z])\2*)/).each { matcher ->
encoded.append(matcher[1].size()).append(matcher[2])
}
encoded.toString()
}
def rleDecode(text) {
def decoded = new StringBuilder()
(text =~ /([0-9]+)([A-Z])/).each { matcher ->
decoded.append(matcher[2] * Integer.parseInt(matcher[1]))
}
decoded.toString()
}
Test code
def text = 'WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW'
def rleEncoded = rleEncode(text)
assert rleEncoded == '12W1B12W3B24W1B14W'
assert text == rleDecode(rleEncoded)
println "Original Text: $text"
println "Encoded Text: $rleEncoded"
Output:
Original Text: WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW Encoded Text: 12W1B12W3B24W1B14W
[edit] Haskell
import Data.List (group)
-- Datatypes
type Encoded = [(Int, Char)] -- An encoded String with form [(times, char), ...]
type Decoded = String
-- Takes a decoded string and returns an encoded list of tuples
rlencode :: Decoded -> Encoded
rlencode = map (\g -> (length g, head g)) . group
-- Takes an encoded list of tuples and returns the associated decoded String
rldecode :: Encoded -> Decoded
rldecode = concatMap decodeTuple
where decodeTuple (n,c) = replicate n c
main :: IO ()
main = do
-- Get input
putStr "String to encode: "
input <- getLine
-- Output encoded and decoded versions of input
let encoded = rlencode input
decoded = rldecode encoded
putStrLn $ "Encoded: " ++ show encoded ++ "\nDecoded: " ++ show decoded
[edit] Icon and Unicon
procedure main(arglist)
s := "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"
write(" s=",image(s))
write("s1=",image(s1 := rle_encode(s)))
write("s2=",image(s2 := rle_decode(s1)))
if s ~== s2 then write("Encode/Decode problem.")
else write("Encode/Decode worked.")
end
procedure rle_encode(s)
es := ""
s ? while c := move(1) do es ||:= *(move(-1),tab(many(c))) || c
return es
end
procedure rle_decode(es)
s := ""
es ? while s ||:= Repl(tab(many(&digits)),move(1))
return s
end
procedure Repl(n, c)
return repl(c,n)
end
Sample output:
s="WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW" s1="12W1B12W3B24W1B14W" s2="WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW" Encode/Decode worked.
[edit] J
Solution:
rle=: ;@(<@(":@#,{.);.1~ 1, 2 ~:/\ ])
rld=: '0123456789'&(-.~ #~ i. ".@:{ ' ' ,~ [)
Example:
rle 'WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW'
12W1B12W3B24W1B14W
rld '12W1B12W3B24W1B14W'
WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW
Note that this implementation fails for the empty case. Here's a version that fixes that:
rle=: ;@(<@(":@#,{.);.1~ 2 ~:/\ (a.{.@-.{.),])
Other approaches include using rle ::(''"_) or rle^:(*@#) or equivalent variations on the original sentence.
[edit] Java
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class RunLengthEncoding {
public static String encode(String source) {
StringBuffer dest = new StringBuffer();
for (int i = 0; i < source.length(); i++) {
int runLength = 1;
while (i+1 < source.length() && source.charAt(i) == source.charAt(i+1)) {
runLength++;
i++;
}
dest.append(runLength);
dest.append(source.charAt(i));
}
return dest.toString();
}
public static String decode(String source) {
StringBuffer dest = new StringBuffer();
Pattern pattern = Pattern.compile("[0-9]+|[a-zA-Z]");
Matcher matcher = pattern.matcher(source);
while (matcher.find()) {
int number = Integer.parseInt(matcher.group());
matcher.find();
while (number-- != 0) {
dest.append(matcher.group());
}
}
return dest.toString();
}
public static void main(String[] args) {
String example = "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW";
System.out.println(encode(example));
System.out.println(decode("1W1B1W1B1W1B1W1B1W1B1W1B1W1B"));
}
}
Tests:
import static org.junit.Assert.assertEquals;
import org.junit.Test;
public class RunLengthEncodingTest {
private RLE = new RunLengthEncoding();
@Test
public void encodingTest() {
assertEquals("1W", RLE.encode("W"));
assertEquals("4W", RLE.encode("WWWW"));
assertEquals("5w4i7k3i6p5e4d2i1a",
RLE.encode("wwwwwiiiikkkkkkkiiippppppeeeeeddddiia"));
assertEquals("12B1N12B3N24B1N14B",
RLE.encode("BBBBBBBBBBBBNBBBBBBBBBBBBNNNBBBBBBBBBBBBBBBBBBBBBBBBNBBBBBBBBBBBBBB"));
assertEquals("12W1B12W3B24W1B14W",
RLE.encode("WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"));
assertEquals("1W1B1W1B1W1B1W1B1W1B1W1B1W1B", RLE.encode("WBWBWBWBWBWBWB"));
}
@Test
public void decodingTest() {
assertEquals("W", RLE.decode("1W"));
assertEquals("WWWW", RLE.decode("4W"));
assertEquals("WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW",
RLE.decode("12W1B12W3B24W1B14W"));
assertEquals("WBWBWBWBWBWBWB", RLE.decode("1W1B1W1B1W1B1W1B1W1B1W1B1W1B"));
assertEquals("WBWBWBWBWBWBWB", RLE.decode("1W1B1W1B1W1B1W1B1W1B1W1B1W1B"));
}
}
[edit] JavaScript
Here's an encoding method that walks the input string character by character
function encode(input) {Here's an encoding method that uses a regular expression to grab the character runs ( for the
var encoding = [];
var prev, count, i;
for (count = 1, prev = input[0], i = 1; i < input.length; i++) {
if (input[i] != prev) {
encoding.push([count, prev]);
count = 1;
prev = input[i];
}
else
count ++;
}
encoding.push([count, prev]);
return encoding;
}
forEach method)
function encode_re(input) {
var encoding = [];
input.match(/(.)\1*/g).forEach(function(substr){ encoding.push([substr.length, substr[0]]) });
return encoding;
}
And to decode (see Repeating a string)
function decode(encoded) {
var output = "";
encoded.forEach(function(pair){ output += new Array(1+pair[0]).join(pair[1]) })
return output;
}
[edit] K
rle: {,/($-':i,#x),'x@i:&1,~=':x}
rld: {d:"0123456789"; ,/(.(d," ")@d?/:x)#'x _dvl d}
Example:
rle "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"
"12W1B12W3B24W1B14W"
rld "12W1B12W3B24W1B14W"
"WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"
[edit] Liberty BASIC
mainwin 100 20
'In$ ="aaaaaaaaaaaaaaaaaccbbbbbbbbbbbbbbba" ' testing...
In$ ="WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"
' Out$= "12W1B12W3B24W1B14W"
Out$ =Encoded$( In$)
Inv$ =Decoded$( Out$)
print " Supplied string ="; In$
Print " RLE version ="; Out$
print " Decoded back to ="; Inv$
end
function Encoded$( k$)
r$ =""
r =1
for i =2 to len( k$)
prev$ =mid$( k$, i -1, 1)
c$ =mid$( k$, i, 1)
if c$ =prev$ then ' entering a run of this character
r =r +1
else ' it occurred only once
r$ =r$ +str$( r) +prev$
r =1
end if
next i
r$ =r$ +str$( r) +c$
Encoded$ =r$
end function
function Decoded$( k$)
r$ =""
v =0
for i =1 to len( k$)
i$ =mid$( k$, i, 1)
if instr( "0123456789", i$) then
v =v *10 +val( i$)
else
for m =1 to v
r$ =r$ +i$
next m
v =0
end if
next i
Decoded$ =r$
end function
[edit] Logo
to encode :str [:out "||] [:count 0] [:last first :str]
if empty? :str [output (word :out :count :last)]
if equal? first :str :last [output (encode bf :str :out :count+1 :last)]
output (encode bf :str (word :out :count :last) 1 first :str)
end
to reps :n :w
output ifelse :n = 0 ["||] [word :w reps :n-1 :w]
end
to decode :str [:out "||] [:count 0]
if empty? :str [output :out]
if number? first :str [output (decode bf :str :out 10*:count + first :str)]
output (decode bf :str word :out reps :count first :str)
end
make "foo "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW
make "rle encode :foo
show equal? :foo decode :rle
[edit] Lua
local C, Ct, R, Cf, Cc = lpeg.C, lpeg.Ct, lpeg.R, lpeg.Cf, lpeg.Cc
astable = Ct(C(1)^0)
function compress(t)
local ret = {}
for i, v in ipairs(t) do
if t[i-1] and v == t[i-1] then
ret[#ret - 1] = ret[#ret - 1] + 1
else
ret[#ret + 1] = 1
ret[#ret + 1] = v
end
end
t = ret
return table.concat(ret)
end
q = io.read()
print(compress(astable:match(q)))
undo = Ct((Cf(Cc"0" * C(R"09")^1, function(a, b) return 10 * a + b end) * C(R"AZ"))^0)
function decompress(s)
t = undo:match(s)
local ret = ""
for i = 1, #t - 1, 2 do
for _ = 1, t[i] do
ret = ret .. t[i+1]
end
end
return ret
end
[edit] Mathematica
Custom functions using Map, Apply, pure functions, replacing using pattern matching, delayed rules and other functions:
RunLengthEncode[input_String]:=StringJoin@@Sequence@@@({ToString @Length[#],First[#]}&/@Split[Characters[input]])
RunLengthDecode[input_String]:=StringJoin@@ConstantArray@@@Reverse/@Partition[(Characters[input]/.(ToString[#]->#&/@Range[0,9]))//.{x___,i_Integer,j_Integer,y___}:>{x,10i+j,y},2]
Example:
mystring="WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW";
RunLengthEncode[mystring]
RunLengthDecode[%]
%==mystring
gives back:
12W1B12W3B24W1B14W
WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW
True
An alternate solution:
RunLengthEncode[s_String] := StringJoin[
{ToString[Length[#]] <> First[#]} & /@ Split[StringSplit[s, ""]]
]
RunLengthDecode[s_String] := StringJoin[
Table[#[[2]], {ToExpression[#[[1]]]}] & /@
Partition[StringSplit[s, x : _?LetterQ :> x], 2]
]
This second encode function is adapted from the MathWorld example.
[edit] Maxima
rle(a) := block(
[n: slength(a), b: "", c: charat(a, 1), k: 1],
for i from 2 thru n do
if cequal(c, charat(a, i)) then k: k + 1 else (b: sconcat(b, k, c), c: charat(a, i), k: 1),
sconcat(b, k, c)
)$
rle("WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW");
"12W1B12W3B24W1B14W"
[edit] MMIX
LOC Data_Segment
GREG @
Buf OCTA 0,0,0,0 integer print buffer
Char BYTE 0,0 single char print buffer
task BYTE "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWW"
BYTE "WWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW",0
len GREG @-1-task
// task should become this
tEnc BYTE "12W1B12W3B24W1B14W",0
GREG @
// tuple array for encoding purposes
// each tuple is a tetra (4 bytes long or 2 wydes long)
// (c,l) in which c is a char and l = number of chars c
// high wyde of the tetra contains the char
// low wyde .. .. .. contains the length
RLE TETRA 0
LOC #100 locate program
GREG @
// print number to stdout
// destroys input arg $3 !
Prt64 LDA $255,Buf+23 points to LSD
// do
2H DIV $3,$3,10 (N,R) = divmod (N,10)
GET $13,rR get remainder
INCL $13,'0' convert to ascii
STBU $13,$255 store ascii digit
BZ $3,3F
SUB $255,$255,1 move pointer down
JMP 2B While N !=0
3H TRAP 0,Fputs,StdOut print number to standard out
GO $127,$127,0 return
GREG @
// print char to stdout
PChar LDA $255,Char
STBU $4,$255
TRAP 0,Fputs,StdOut
GO $127,$127,0
GREG @
// encode routine
// $0 string pointer
// $1 index var
// $2 pointer to tuple array
// $11 temp var tuple
Encode SET $1,0 initialize index = 0
SET $11,0 postion in string = 0
LDBU $3,$0,$1 get first char
ADDU $6,$3,0 remember it
do
1H INCL $1,1 repeat incr index
LDBU $3,$0,$1 get a char
BZ $3,2F if EOS then finish
CMP $7,$3,$6
PBZ $7,1B while new == old
XOR $4,$4,$4 new tuple
ADDU $4,$6,0
SLU $4,$4,16 old char to tuple -> (c,_)
SUB $7,$1,$11 length = index - previous position
ADDU $11,$1,0 incr position
OR $4,$4,$7 length l to tuple -> (c,l)
STT $4,$2 put tuple in array
ADDU $6,$3,0 remember new char
INCL $2,4 incr 'tetra' pointer
JMP 1B loop
2H XOR $4,$4,$4 put last tuple in array
ADDU $4,$6,0
SLU $4,$4,16
SUB $7,$1,$11
ADDU $11,$1,0
OR $4,$4,$7
STT $4,$2
GO $127,$127,0 return
GREG @
Main LDA $0,task pointer uncompressed string
LDA $2,RLE pointer tuple array
GO $127,Encode encode string
LDA $2,RLE points to start tuples
SET $5,#ffff mask for extracting length
1H LDTU $3,$2 while not End of Array
BZ $3,2F
SRU $4,$3,16 char = (c,_)
AND $3,$3,$5 length = (_,l)
GO $127,Prt64 print length
GO $127,PChar print char
INCL $2,4 incr tuple pointer
JMP 1B wend
2H SET $4,#a print NL
GO $127,PChar
// decode using the RLE tuples
LDA $2,RLE pointer tuple array
SET $5,#ffff mask
1H LDTU $3,$2 while not End of Array
BZ $3,2F
SRU $4,$3,16 char = (c,_)
AND $3,$3,$5 length = (_,l)
// for (i=0;i<length;i++) {
3H GO $127,PChar print a char
SUB $3,$3,1
PBNZ $3,3B
INCL $2,4
JMP 1B }
2H SET $4,#a print NL
GO $127,PChar
TRAP 0,Halt,0 EXIT
Example run encode --> decode:
~/MIX/MMIX/Rosetta> mmix rle 12W1B12W3B24W1B14W WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW
[edit] Objective-C
See Run-length encoding/Objective-C
[edit] OCaml
let encode str =
let len = String.length str in
let rec aux i acc =
if i >= len then List.rev acc
else
let c1 = str.[i] in
let rec aux2 j =
if j >= len then (c1, j-i)
else
let c2 = str.[j] in
if c1 = c2
then aux2 (j+1)
else (c1, j-i)
in
let (c,n) as t = aux2 (i+1) in
aux (i+n) (t::acc)
in
aux 0 []
;;
let decode lst =
let l = List.map (fun (c,n) -> String.make n c) lst in
(String.concat "" l)
let () =
let e = encode "aaaaahhhhhhmmmmmmmuiiiiiiiaaaaaa" in
List.iter (fun (c,n) ->
Printf.printf " (%c, %d);\n" c n;
) e;
print_endline (decode [('a', 5); ('h', 6); ('m', 7); ('u', 1); ('i', 7); ('a', 6)]);
;;
- Using regular expressions
#load "str.cma";;
open Str
let encode =
global_substitute (Str.regexp "\\(.\\)\\1*")
(fun s -> string_of_int (String.length (matched_string s)) ^
matched_group 1 s)
let decode =
global_substitute (Str.regexp "\\([0-9]+\\)\\([^0-9]\\)")
(fun s -> String.make (int_of_string (matched_group 1 s))
(matched_group 2 s).[0])
let () =
print_endline (encode "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW");
print_endline (decode "12W1B12W3B24W1B14W");
[edit] Oz
declare
fun {RLEncode Xs}
for G in {Group Xs} collect:C do
{C {Length G}#G.1}
end
end
fun {RLDecode Xs}
for C#Y in Xs append:Ap do
{Ap {Replicate Y C}}
end
end
%% Helpers
%% e.g. "1122" -> ["11" "22"]
fun {Group Xs}
case Xs of nil then nil
[] X|Xr then
Ys Zs
{List.takeDropWhile Xr fun {$ W} W==X end ?Ys ?Zs}
in
(X|Ys) | {Group Zs}
end
end
%% e.g. 3,4 -> [3 3 3 3]
fun {Replicate X N}
case N of 0 then nil
else X|{Replicate X N-1}
end
end
Data = "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"
Enc = {RLEncode Data}
in
{System.showInfo Data}
{Show Enc}
{System.showInfo {RLDecode Enc}}
[edit] PARI/GP
rle(s)={
if(s=="", return(s));
my(v=Vec(s),cur=v[1],ct=1,out="");
v=concat(v,99); \\ sentinel
for(i=2,#v,
if(v[i]==cur,
ct++
,
out=Str(out,ct,cur);
cur=v[i];
ct=1
)
);
out
};
elr(s)={
if(s=="", return(s));
my(v=Vec(s),ct=eval(v[1]),out="");
v=concat(v,99); \\ sentinel
for(i=2,#v,
if(v[i]>="0" && v[i]<="9",
ct=10*ct+eval(v[i])
,
for(j=1,ct,out=Str(out,v[i]));
ct=0
)
);
out
};
rle("WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW")
elr(%)
Output:
%1 = "12W1B12W3B24W1B14W" %2 = "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"
[edit] Pascal
Program RunLengthEncoding(output);
procedure encode(s: string; var counts: array of integer; var letters: string);
var
i, j: integer;
begin
j := 0;
letters := '';
if length(s) > 0 then
begin
j := 1;
letters := letters + s[1];
counts[1] := 1;
for i := 2 to length(s) do
if s[i] = letters[j] then
inc(counts[j])
else
begin
inc(j);
letters := letters + s[i];
counts[j] := 1;
end;
end;
end;
procedure decode(var s: string; counts: array of integer; letters: string);
var
i, j: integer;
begin
s := '';
for i := 1 to length(letters) do
for j := 1 to counts[i] do
s := s + letters[i];
end;
var
s: string;
counts: array of integer;
letters: string;
i: integer;
begin
s := 'WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWW';
writeln(s);
setlength(counts, length(s));
encode(s, counts, letters);
for i := 1 to length(letters) - 1 do
write(counts[i], ' * ', letters[i], ', ');
writeln(counts[length(letters)], ' * ', letters[length(letters)]);
decode(s, counts, letters);
writeln(s);
end.
Output:
:> ./RunLengthEncoding WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWW 12 * W, 1 * B, 12 * W, 3 * B, 24 * W, 1 * B, 13 * W WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWW
[edit] Perl
# functional approach (return the encoded or decoded string)
sub encode {
(my $str = shift) =~ s {(.)(\1*)} {length($&).$1}gse;
return $str; }
sub decode {
(my $str = shift) =~ s {(\d+)(.)} {$2 x $1}gse;
return $str;}
# procedural approach (modify the argument in place)
sub encode {
$_[0] =~ s {(.)(\1*)} {length($&).$1}gse; }
sub decode {
$_[0] =~ s {(\d+)(.)} {$2 x $1}gse; }
The following modified versions of the previous one, encode/decode a bytes sequence in a way compatible with the functions of the C version.
sub encode
{my $str = shift;
$str =~ s {(.)(\1{0,254})} {pack("C",(length($2) + 1)) . $1 }gse;
return $str;}
sub decode
{
my @str = split //, shift;
my $r = "";
foreach my $i (0 .. scalar(@str)/2-1) {
$r .= $str[2*$i + 1] x unpack("C", $str[2*$i]);
}
return $r;
}
[edit] Perl 6
This currently depend on a workaround to pass the match object into the replacement closure as an explicit argument. This is supposed to happen automatically.
Note also that Perl 6 regexes don't care about unquoted whitespace, and that backrefs count from 0, not from 1.
sub encode($str) { $str.subst(/(.) $0*/, -> $/ { $/.chars ~ $0 ~ ' ' }, :g); }
sub decode($str) { $str.subst(/(\d+) (.) ' '/, -> $/ {$1 x $0}, :g); }
my $e = encode('WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW');
say $e;
say decode($e);
Output:
12W 1B 12W 3B 24W 1B 14W WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW
[edit] PHP
<?php
function encode($str) {
return preg_replace('/(.)\1*/e', 'strlen($0) . $1', $str);
}
function decode($str) {
return preg_replace('/(\d+)(\D)/e', 'str_repeat($2, $1)', $str);
}
echo encode('WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW'), "\n";
echo decode('12W1B12W3B24W1B14W'), "\n";
?>
[edit] PicoLisp
(de encode (Str)
(pack
(make
(for (Lst (chop Str) Lst)
(let (N 1 C)
(while (= (setq C (pop 'Lst)) (car Lst))
(inc 'N) )
(link N C) ) ) ) ) )
(de decode (Str)
(pack
(make
(let N 0
(for C (chop Str)
(if (>= "9" C "0")
(setq N (+ (format C) (* 10 N)))
(do N (link C))
(zero N) ) ) ) ) ) )
(and
(prinl "Data: " "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW")
(prinl "Encoded: " (encode @))
(prinl "Decoded: " (decode @)) )
Output:
Data: WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW Encoded: 12W1B12W3B24W1B14W Decoded: WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW
[edit] PL/I
declare (c1, c2) character (1);
declare run_length fixed binary;
declare input file;
open file (input) title ('/RLE.DAT,type(text),recsize(20000)');
on endfile (input) go to epilog;
get file (input) edit (c1) (a(1));
run_length = 1;
do forever;
get file (input) edit (c2) (a(1));
if c1 = c2 then
run_length = run_length + 1;
else
do; put edit (trim(run_length), c1) (a); run_length=1; end;
c1 = c2;
end;
epilog:
put edit (trim(run_length), c1) (a);
put skip;
/* The reverse of the above operation: */
declare c character (1);
declare i fixed binary;
declare new file;
open file (new) title ('/NEW.DAT,type(text),recsize(20000)');
on endfile (new) stop;
do forever;
run_length = 0;
do forever;
get file (new) edit (c) (a(1));
if index('0123456789', c) = 0 then leave;
run_length = run_length*10 + c;
end;
put edit ((c do i = 1 to run_length)) (a);
end;
[edit] PowerBASIC
This version can handle any arbitrary string that doesn't contain numbers (not just letters). (A flag value could be added which would allow the inclusion of any character, but such a flag isn't in this example.)
FUNCTION RLDecode (i AS STRING) AS STRING
DIM Loop0 AS LONG, rCount AS STRING, outP AS STRING, m AS STRING
FOR Loop0 = 1 TO LEN(i)
m = MID$(i, Loop0, 1)
SELECT CASE m
CASE "0" TO "9"
rCount = rCount & m
CASE ELSE
IF LEN(rCount) THEN
outP = outP & STRING$(VAL(rCount), m)
rCount=""
ELSE
outP = outP & m
END IF
END SELECT
NEXT
FUNCTION = outP
END FUNCTION
FUNCTION RLEncode (i AS STRING) AS STRING
DIM tmp1 AS STRING, tmp2 AS STRING, outP AS STRING
DIM Loop0 AS LONG, rCount AS LONG
tmp1 = MID$(i, 1, 1)
tmp2 = tmp1
rCount = 1
FOR Loop0 = 2 TO LEN(i)
tmp1 = MID$(i, Loop0, 1)
IF tmp1 <> tmp2 THEN
outP = outP & TRIM$(STR$(rCount)) & tmp2
tmp2 = tmp1
rCount = 1
ELSE
INCR rCount
END IF
NEXT
outP = outP & TRIM$(STR$(rCount))
outP = outP & tmp2
FUNCTION = outP
END FUNCTION
FUNCTION PBMAIN () AS LONG
DIM initial AS STRING, encoded AS STRING, decoded AS STRING
initial = INPUTBOX$("Type something.")
encoded = RLEncode(initial)
decoded = RLDecode(encoded)
'in PB/Win, "?" = MSGBOX; in PB/DOS & PB/CC. "?" = PRINT
? initial & $CRLF & encoded & $CRLF & decoded
END FUNCTION
Outputs are similar to those in BASIC, above.
[edit] PowerShell
function Compress-RLE ($s) {
$re = [regex] '(.)\1*'
$ret = ""
foreach ($m in $re.Matches($s)) {
$ret += $m.Length
$ret += $m.Value[0]
}
return $ret
}
function Expand-RLE ($s) {
$re = [regex] '(\d+)(.)'
$ret = ""
foreach ($m in $re.Matches($s)) {
$ret += [string] $m.Groups[2] * [int] [string] $m.Groups[1]
}
return $ret
}
Output:
PS> Compress-RLE "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW" 12W1B12W3B24W1B14W PS> Expand-RLE "12W1B12W3B24W1B14W" WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW
[edit] Prolog
Works with SWI-Prolog.
This code is inspired from a code found here : http://groups.google.com/group/comp.lang.prolog/browse_thread/thread/b053ea2512e8b350 (author : Pascal J. Bourguignon).
% the test
run_length :-
L = "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW",
writef('encode %s\n', [L]),
encode(L, R),
writeln(R), nl,
writef('decode %w\n', [R]),
decode(R, L1),
writeln(L1).
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% encode
%
% translation
% from
% "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"
% to
% "12W1B12W3B24W1B14W"
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
encode(In, Out) :-
% Because of the special management of the "strings" by Prolog
( is_list(In) -> I = In; string_to_list(In, I)),
packList(I, R1),
dcg_packList2List(R1,R2, []),
string_to_list(Out,R2).
dcg_packList2List([[N, V]|T]) -->
{ number_codes(N, LN)},
LN,
[V],
dcg_packList2List(T).
dcg_packList2List([]) --> [].
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% decode
%
% translation
% from
% "12W1B12W3B24W1B14W"
% to
% "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
decode(In, Out) :-
% Because of the special management of the "strings" by Prolog
( is_list(In) -> I = In; string_to_list(In, I)),
dcg_List2packList(I, R1, []),
packList(L1, R1),
string_to_list(Out, L1).
dcg_List2packList([H|T]) -->
{code_type(H, digit)},
parse_number([H|T], 0).
dcg_List2packList([]) --> [].
parse_number([H|T], N) -->
{code_type(H, digit), !,
N1 is N*10 + H - 48 },
parse_number(T, N1).
parse_number([H|T], N) -->
[[N, H]],
dcg_List2packList(T).
% use of library clpfd allows packList(?In, ?Out) to works
% in both ways In --> Out and In <-- Out.
:- use_module(library(clpfd)).
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% ?- packList([a,a,a,b,c,c,c,d,d,e], L).
% L = [[3,a],[1,b],[3,c],[2,d],[1,e]] .
% ?- packList(R, [[3,a],[1,b],[3,c],[2,d],[1,e]]).
% R = [a,a,a,b,c,c,c,d,d,e] .
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
packList([],[]).
packList([X],[[1,X]]) :- !.
packList([X|Rest],[XRun|Packed]):-
run(X,Rest, XRun,RRest),
packList(RRest,Packed).
run(Var,[],[1,Var],[]).
run(Var,[Var|LRest],[N1, Var],RRest):-
N #> 0,
N1 #= N + 1,
run(Var,LRest,[N, Var],RRest).
run(Var,[Other|RRest], [1,Var],[Other|RRest]):-
dif(Var,Other).
Output :
?- run_length. encode WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW 12W1B12W3B24W1B14W decode 12W1B12W3B24W1B14W WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW true .
[edit] Pure
using system;
encode s = strcat $ map (sprintf "%d%s") $ encode $ chars s with
encode [] = [];
encode xs@(x:_) = (#takewhile (==x) xs,x) : encode (dropwhile (==x) xs);
end;
decode s = strcat [c | n,c = parse s; i = 1..n] with
parse s::string = regexg item "([0-9]+)(.)" REG_EXTENDED s 0;
item info = val (reg 1 info!1), reg 2 info!1;
end;
let s = "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW";
let r = encode s; // "12W1B12W3B24W1B14W"
decode r;
[edit] PureBasic
with some optimations to use pointers instead of string functions. According to the task description it works with uppercase A - Z. In this implementation it also functions with all characters that are non-digits and whose value is non-zero.Procedure.s RLDecode(toDecode.s)
Protected.s repCount, output, currChar, tmp
Protected *c.Character = @toDecode
While *c\c <> #Null
currChar = Chr(*c\c)
Select *c\c
Case '0' To '9'
repCount + currChar
Default
If repCount
tmp = Space(Val(repCount))
ReplaceString(tmp, " ", currChar, #PB_String_InPlace)
output + tmp
repCount = ""
Else
output + currChar
EndIf
EndSelect
*c + SizeOf(Character)
Wend
ProcedureReturn output
EndProcedure
Procedure.s RLEncode(toEncode.s)
Protected.s currChar, prevChar, output
Protected repCount
Protected *c.Character = @toEncode
prevChar = Chr(*c\c)
repCount = 1
*c + SizeOf(Character)
While *c\c <> #Null
currChar = Chr(*c\c)
If currChar <> prevChar
output + Str(repCount) + prevChar
prevChar = currChar
repCount = 1
Else
repCount + 1
EndIf
*c + SizeOf(Character)
Wend
output + Str(repCount)
output + prevChar
ProcedureReturn output
EndProcedure
If OpenConsole()
Define initial.s, encoded.s, decoded.s
Print("Type something: ")
initial = Input()
encoded = RLEncode(initial)
decoded = RLDecode(encoded)
PrintN(initial)
PrintN(RLEncode(initial))
PrintN(RLDecode(encoded))
Print(#CRLF$ + #CRLF$ + "Press ENTER to exit")
Input()
CloseConsole()
EndIf
Sample output:
Type something: WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWW WWW WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW 12W1B12W3B24W1B14W WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW
[edit] Python
def encode(input_string):
count = 1
prev = ''
lst = []
for character in input_string:
if character != prev:
if prev:
entry = (prev,count)
lst.append(entry)
#print lst
count = 1
prev = character
else:
count += 1
else:
entry = (character,count)
lst.append(entry)
return lst
def decode(lst):
q = ""
for character, count in lst:
q += character * count
return q
#Method call
encode("aaaaahhhhhhmmmmmmmuiiiiiiiaaaaaa")
decode([('a', 5), ('h', 6), ('m', 7), ('u', 1), ('i', 7), ('a', 6)])
Functional
from itertools import groupby
def encode(input_string):
return [(len(list(g)), k) for k,g in groupby(input_string)]
def decode(lst):
return ''.join(c * n for n,c in lst)
encode("aaaaahhhhhhmmmmmmmuiiiiiiiaaaaaa")
decode([(5, 'a'), (6, 'h'), (7, 'm'), (1, 'u'), (7, 'i'), (6, 'a')])
By regular expression
The simplified input range of only uppercase characters allows a simple regular expression to be applied repeatedly for encoding, and another for decoding:
from re import sub
def encode(text):
'''
Doctest:
>>> encode('WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW')
'12W1B12W3B24W1B14W'
'''
return sub(r'(.)\1*', lambda m: str(len(m.group(0))) + m.group(1),
text)
def decode(text):
'''
Doctest:
>>> decode('12W1B12W3B24W1B14W')
'WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW'
'''
return sub(r'(\d+)(\D)', lambda m: m.group(2) * int(m.group(1)),
text)
textin = "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"
assert decode(encode(textin)) == textin
[edit] R
R has a built-in function, rle, for run length encoding. This modification allows input and output in the forms specified above.
runlengthencoding <- function(x)
{
splitx <- unlist(strsplit(input, ""))
rlex <- rle(splitx)
paste(with(rlex, as.vector(rbind(lengths, values))), collapse="")
}
input <- "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"
runlengthencoding(input)
Similarly, inverse.rle provides decompression after a run length encoding.
inverserunlengthencoding <- function(x)
{
lengths <- as.numeric(unlist(strsplit(output, "[[:alpha:]]")))
values <- unlist(strsplit(output, "[[:digit:]]"))
values <- values[values != ""]
uncompressed <- inverse.rle(list(lengths=lengths, values=values))
paste(uncompressed, collapse="")
}
output <- "12W1B12W3B24W1B14W"
inverserunlengthencoding(output)
[edit] Racket
#lang racket
(define (encode str)
(regexp-replace* #px"(.)\\1*" str (λ (m c) (~a (string-length m) c))))
(define (decode str)
(regexp-replace* #px"([0-9]+)(.)" str (λ (m n c) (make-string (string->number n) (string-ref c 0)))))
[edit] REXX
The task (input) rule was relaxed a bit as this program accepts upper- and lowercase input.
[edit] encoding
/*REXX program encodes string by using a run-length scheme (min len=2).*/
parse arg x /*normally, input would be a file*/
def='WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW'
if x=='' then x=def /*No input? Then use the default*/
Lx=length(x) /*get the length of the X string.*/
y= /*Y is the output string (so far)*/
do j=1 to Lx /*warning! J is modified below.*/
c=substr(x,j,1) /*pick a character, check for err*/
if \datatype(c,'m') then do;say "error!: data isn't alphabetic";exit 13; end
r=0 /*R is NOT the number of chars. */
do k=j+1 to Lx while substr(x,k,1)==c
r=r+1 /*R is a replication count. */
end /*k*/
if r==0 then Y = Y || c /*C wan't repeated, just OUT it.*/
else Y = Y || r || c /*add it to the encoded string. */
j=j+r /*A bad thing to do, but simple. */
end /*j*/
say ' input=' x
say 'encoded=' y
/*stick a fork in it, we're done.*/
output when using default input:'
input= WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW output= 11WB11W2B23WB13W
[edit] decoding
/*REXX program decodes string by using a run-length scheme (min len=2).*/
parse arg x /*normally, input would be a file*/
if x=='' then x='11WB11W2B23WB13W' /*No input? Then use the default*/
Lx=length(x) /*get the length of the X string.*/
y= /*Y is the output string (so far)*/
do j=1 to Lx /*warning! J is modified below.*/
c=substr(x,j,1)
if \datatype(c,'W') then do /*a loner char, simply add to OUT*/
y=y || c
iterate
end
d=1
do k=j+1 to Lx while datatype(substr(x,k,1),'w') /*look for #end*/
d=d+1 /*d is the number of digs so far.*/
end /*k*/
n=substr(x,j,d)+1 /*D is length of encoded number.*/
y=y || copies(substr(x,k,1),n) /*N is now the number of chars. */
j=j+d /*A bad thing to do, but simple. */
end /*j*/
say ' input=' x
say 'decoded=' y
/*stick a fork in it, we're done.*/
output when using the default input:
input= 11WB11W2B23WB13W decoded= WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW
[edit] Ruby
def encode(string)
string.scan(/(.)(\1*)/).collect do |char, repeat|
[char, 1 + repeat.length]
end
end
def decode(encoding)
encoding.collect { |char, length| char * length }.join
end
orig = "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"
enc = encode(orig) # => [["W", 12], ["B", 1], ["W", 12], ["B", 3], ["W", 24], ["B", 1], ["W", 14]]
dec = decode(enc)
puts "success!" if dec == orig
This usage also seems to be idiomatic, and perhaps less cryptic:
def encode(string)
encoding = []
for char, repeat in string.scan(/(.)(\1*)/)
encoding << [char, 1 + repeat.length]
end
encoding
end
def decode(encoding)
decoding = ""
for char, length in encoding
decoding << char * length
end
decoding
end
By regular expression
The simplified input range of only uppercase characters allows a simple regular expression to be applied repeatedly for encoding, and another for decoding:
def encode(str)
str.gsub(/(.)\1*/) {$&.length.to_s + $1}
end
def decode(str)
str.gsub(/(\d+)(\D)/) {$2 * $1.to_i}
end
encode('WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW') #=> "12W1B12W3B24W1B14W"
decode('12W1B12W3B24W1B14W') #=> "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"
[edit] Run BASIC
string$ = "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"Output:
beg = 1
i = 1
[loop]
s$ = mid$(string$,beg,1)
while mid$(string$,i,1) = s$
i = i + 1
wend
press$ = press$ ; i-beg;s$
beg = i
if i < len(string$) then goto [loop]
print "Compressed:";press$
beg = 1
i = 1
[expand]
while mid$(press$,i,1) <= "9"
i = i + 1
wend
for j = 1 to val(mid$(press$,beg, i - beg))
expand$ = expand$ + mid$(press$,i,1)
next j
i = i + 1
beg = i
if i < len(press$) then goto [expand]
print " Expanded:";expand$
Compressed:12W1B12W3B24W1B14W Expanded:WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW
[edit] Scala
Care is taken to use StringBuilder for performance reasons.
def encode(s: String) = (1 until s.size).foldLeft((1, s(0), new StringBuilder)) {
case ((len, c, sb), index) if c != s(index) => sb.append(len); sb.append(c); (1, s(index), sb)
case ((len, c, sb), _) => (len + 1, c, sb)
} match {
case (len, c, sb) => sb.append(len); sb.append(c); sb.toString
}
def decode(s: String) = {
val sb = new StringBuilder
val Code = """(\d+)([A-Z])""".r
for (Code(len, c) <- Code findAllIn s) sb.append(c * len.toInt)
sb.toString
}
A simpler (?) encoder:
def encode(s:String) = {
s.foldLeft((0,s(0),""))( (t,c) => t match {case (i,p,s) => if (p==c) (i+1,p,s) else (1,c,s+i+p)})
match {case (i,p,s) => s+i+p}
}
To make it faster (it's also faster than the longer implementation above) just replace "" with new StringBuilder and s+i+p with {s.append(i);s.append(p)}
[edit] Scheme
(define (run-length-decode v)
(apply string-append (map (lambda (p) (make-string (car p) (cdr p))) v)))
(define (run-length-encode s)
(let ((n (string-length s)))
(let loop ((i (- n 2)) (c (string-ref s (- n 1))) (k 1) (v '()))
(if (negative? i) (cons (cons k c) v)
(let ((x (string-ref s i)))
(if (char=? c x) (loop (- i 1) c (+ k 1) v)
(loop (- i 1) x 1 (cons (cons k c) v))))))))
(run-length-encode "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW")
; ((12 . #\W) (1 . #\B) (12 . #\W) (3 . #\B) (24 . #\W) (1 . #\B) (14 . #\W))
(run-length-decode '((12 . #\W) (1 . #\B) (12 . #\W) (3 . #\B) (24 . #\W) (1 . #\B) (14 . #\W)))
; "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"
[edit] sed
The encode script:
/^$/ b
:start
/^[0-9]/ b
s/^/1/
:loop
h
/^9+([^0-9])\1+/ {
s/^(9+).*/0\1/
y/09/10/
G
s/^(.+)\n[0-9]+.(.*)/\1\2/
b loop }
/^[0-9]*[0-8]([^0-9])\1+/ {
s/^[0-9]*([0-8]).*/\1/
y/012345678/123456789/
G
s/^(.)\n([0-9]*)[0-8].(.*)/\2\1\3/
b loop }
/^[0-9]+9+([^0-9])\1+/ {
s/^[0-9]*([0-8]9+).*/\1/
y/0123456789/1234567890/
G
s/^(.+)\n([0-9]*)[0-8]9+.(.*)/\2\1\3/
b loop }
s/^([0-9]+.)(.*)/\2\1/
b start
The decode script:
/^$/ b
:start
/^[^0-9]/ b
:loop
/^1[^0-9]/ {
s/^1(.)(\1*)(.*)/\3\1\2/
b start }
h
/^[0-9]*[1-9][^0-9]/ {
s/^[0-9]*([1-9]).*/\1/
y/123456789/012345678/
G
s/^([0-8])\n([0-9]*)[1-9]([^0-9])(.*)/\2\1\3\3\4/
b loop }
/^[0-9]+0[^0-9]/ {
s/^[0-9]*([1-9]0+)[^0-9].*/\1/
y/0123456789/9012345678/
G
s/^([0-9]+)\n([0-9]*)[1-9]0+([^0-9])(.*)/\2\1\3\3\4/
s/^0+//
b loop }
Example (assuming the scripts reside in the files encode.sed and decode.sed):
sed -rf encode.sed <<< "foo oops"
# 1f2o1 2o1p1s
sed -rf decode.sed <<< "1f2o1 2o1p1s"
# foo oops
(sed -rf decode.sed | sed -rf encode.sed) <<< 1000.
# 1000.
[edit] Seed7
$ include "seed7_05.s7i";
include "scanstri.s7i";
const func string: letterRleEncode (in string: data) is func
result
var string: result is "";
local
var char: code is ' ';
var integer: index is 1;
begin
if length(data) <> 0 then
code := data[1];
repeat
incr(index);
until index > length(data) or code <> data[index];
result := str(pred(index)) & str(code) & letterRleEncode(data[index ..]);
end if;
end func;
const func string: letterRleDecode (in var string: data) is func
result
var string: result is "";
local
var integer: count is 0;
begin
if length(data) <> 0 then
count := integer parse getDigits(data);
result := data[1 len 1] mult count & letterRleDecode(data[2 ..]);
end if;
end func;
const proc: main is func
begin
writeln(letterRleEncode("WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"));
writeln(letterRleDecode("12W1B12W3B24W1B14W"));
end func;
[edit] Smalltalk
See Run-length encoding/Smalltalk
[edit] SNOBOL4
* # Encode RLE
define('rle(str)c,n') :(rle_end)
rle str len(1) . c :f(return)
str span(c) @n =
rle = rle n c :(rle)
rle_end
* # Decode RLE
define('elr(str)c,n') :(elr_end)
elr str span('0123456789') . n len(1) . c = :f(return)
elr = elr dupl(c,n) :(elr)
elr_end
* # Test and display
str = 'WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW'
output = str;
str = rle(str); output = str
str = elr(str); output = str
end
Output:
WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW 12W1B12W3B24W1B14W WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW
[edit] SQL
- RLE encoding
-- variable table
DROP TABLE IF EXISTS var;
CREATE temp TABLE var ( VALUE VARCHAR(1000) );
INSERT INTO var(VALUE) SELECT 'WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW';
-- select
WITH recursive
ints(num) AS
(
SELECT 1
UNION ALL
SELECT num+1
FROM ints
WHERE num+1 <= LENGTH((SELECT VALUE FROM var))
)
,
chars(num,chr,nextChr,isGroupEnd) AS
(
SELECT tmp.*, CASE WHEN tmp.nextChr <> tmp.chr THEN 1 ELSE 0 END groupEnds
FROM (
SELECT num,
SUBSTRING((SELECT VALUE FROM var), num, 1) chr,
(SELECT SUBSTRING((SELECT VALUE FROM var), num+1, 1)) nextChr
FROM ints
) tmp
)
SELECT (SELECT VALUE FROM var) plain_text, (
SELECT string_agg(concat(CAST(maxNoWithinGroup AS VARCHAR(10)) , chr), '' ORDER BY num)
FROM (
SELECT *, MAX(noWithinGroup) OVER (partition BY chr, groupNo) maxNoWithinGroup
FROM (
SELECT num,
chr,
groupNo,
ROW_NUMBER() OVER( partition BY chr, groupNo ORDER BY num) noWithinGroup
FROM (
SELECT *, (SELECT COUNT(*)
FROM chars chars2
WHERE chars2.isGroupEnd = 1 AND
chars2.chr = chars.chr AND
chars2.num < chars.num) groupNo
FROM chars
) tmp
) sub
) final
WHERE noWithinGroup = 1
) Rle_Compressed
- RLE decoding
-- variable table
DROP TABLE IF EXISTS var;
CREATE temp TABLE var ( VALUE VARCHAR(1000) );
INSERT INTO var(VALUE) SELECT '1A2B3C4D5E6F';
-- select
WITH recursive
ints(num) AS
(
SELECT 1
UNION ALL
SELECT num+1
FROM ints
WHERE num+1 <= LENGTH((SELECT VALUE FROM var))
)
,
chars(num,chr,nextChr) AS
(
SELECT tmp.*
FROM (
SELECT num,
SUBSTRING((SELECT VALUE FROM var), num, 1) chr,
(SELECT SUBSTRING((SELECT VALUE FROM var), num+1, 1)) nextChr
FROM ints
) tmp
)
,
charsWithGroup(num,chr,nextChr,group_no) AS
(
SELECT *,(SELECT COUNT(*)
FROM chars chars2
WHERE chars2.chr !~ '[0-9]' AND
chars2.num < chars.num) group_No
FROM chars
)
,
charsWithGroupAndLetter(num,chr,nextChr,group_no,group_letter) AS
(
SELECT *,(SELECT chr
FROM charsWithGroup g2
WHERE g2.group_no = charsWithGroup.group_no
ORDER BY num DESC
LIMIT 1)
FROM charsWithGroup
)
,
lettersWithCount(group_no,amount,group_letter) AS
(
SELECT group_no, string_agg(chr, '' ORDER BY num), group_letter
FROM charsWithGroupAndLetter
WHERE chr ~ '[0-9]'
GROUP BY group_no, group_letter
)
,
lettersReplicated(group_no,amount,group_letter, replicated_Letter) AS
(
SELECT *, rpad(group_letter, CAST(amount AS INT), group_letter)
FROM lettersWithCount
)
SELECT (SELECT VALUE FROM var) rle_encoded,
string_agg(replicated_Letter, '' ORDER BY group_no) decoded_string
FROM lettersReplicated
[edit] Standard ML
fun encode str =
let
fun aux (sub, acc) =
case Substring.getc sub
of NONE => rev acc
| SOME (x, sub') =>
let
val (y, z) = Substring.splitl (fn c => c = x) sub'
in
aux (z, (x, Substring.size y + 1) :: acc)
end
in
aux (Substring.full str, [])
end
fun decode lst =
concat (map (fn (c,n) => implode (List.tabulate (n, fn _ => c))) lst)
Example:
- encode "aaaaahhhhhhmmmmmmmuiiiiiiiaaaaaa"; val it = [(#"a",5),(#"h",6),(#"m",7),(#"u",1),(#"i",7),(#"a",6)] : (char * int) list - decode [(#"a",5),(#"h",6),(#"m",7),(#"u",1),(#"i",7),(#"a",6)]; val it = "aaaaahhhhhhmmmmmmmuiiiiiiiaaaaaa" : string
[edit] Tcl
The encoding is an even-length list with elements {count char ...}
proc encode {string} {
set encoding {}
# use a regular expression to match runs of one character
foreach {run -} [regexp -all -inline {(.)\1+|.} $string] {
lappend encoding [string length $run] [string index $run 0]
}
return $encoding
}
proc decode {encoding} {
foreach {count char} $encoding {
append decoded [string repeat $char $count]
}
return $decoded
}
set str "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"
set enc [encode $str] ;# ==> {12 W 1 B 12 W 3 B 24 W 1 B 14 W}
set dec [decode $enc]
if {$str eq $dec} {
puts "success"
}
[edit] TUSCRIPT
$$ MODE TUSCRIPT,{}
input="WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW",output=""
string=strings(input," ? ")
letter=ACCUMULATE(string,freq)
freq=SPLIT(freq),letter=SPLIT(letter)
output=JOIN(freq,"",letter)
output=JOIN(output,"")
PRINT input
PRINT output
Output:
WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW 12W1B12W3B24W1B14W
[edit] Ursala
A standard library function, rlc, does most of the work for this task, which is a second order function taking a binary predicate that decides when consecutive items of an input list belong to the same run.
#import std
#import nat
encode = (rlc ==); *= ^lhPrNCT\~&h %nP+ length
decode = (rlc ~&l-=digits); *=zyNCXS ^|DlS/~& iota+ %np
test_data = 'WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW'
#show+
example =
<
encode test_data,
decode encode test_data>
The output shows an encoding of the test data, and a decoding of the encoding, which matches the original test data.
12W1B12W3B24W1B14W WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW
[edit] Vedit macro language
The following example encodes/decodes an entire file. Each run is coded with two bytes. The first byte is the run length with high bit set, the second byte is the character code. ASCII characters with run length of 1 are left unchanged. Character codes above 127 are always coded with run length. Newlines are not converted (the regular expression does not count newlines). This methods supports any type of input.
:RL_ENCODE:
BOF
While (!At_EOF) {
if (At_EOL) { Line(1) Continue } // skip newlines
#1 = Cur_Char // #1 = character
Match("(.)\1*", REGEXP) // count run length
#2 = Chars_Matched // #2 = run length
if (#2 > 127) { #2 = 127 } // can be max 127
if (#2 > 1 || #1 > 127) {
Del_Char(#2)
Ins_Char(#2 | 128) // run length (high bit set)
Ins_Char(#1) // character
} else { // single ASCII char
Char // skip
}
}
Return
:RL_DECODE:
BOF
While (!At_EOF) {
#2 = Cur_Char
if (#2 > 127) { // is this run length?
#1 = Cur_Char(1) // #1 = character value
Del_Char(2)
Ins_Char(#1, COUNT, #2 & 127)
} else { // single ASCII char
Char
}
}
Return
[edit] XPL0
include c:\cxpl\codes; \intrinsic 'code' declarations
string 0; \use zero-terminated strings, instead of MSb terminated
proc Compress(S); \Compress string using run-length encoding, & display it
char S;
int I, C0, C, N;
[I:= 0;
C0:= S(I); I:= I+1;
repeat ChOut(0, C0);
N:= 0;
repeat C:= S(I); I:= I+1;
N:= N+1;
until C#C0;
if N>1 then IntOut(0, N-1);
C0:= C;
until C=0;
]; \Compress
proc Expand(S); \Expand compressed string, and display it
char S;
int I, C0, C, N;
[I:= 0;
C0:= S(I); I:= I+1;
repeat ChOut(0, C0);
C:= S(I); I:= I+1;
if C>=^1 & C<=^9 then
[N:= 0;
while C>=^0 & C<=^9 do
[N:= N*10 + C-^0;
C:= S(I); I:= I+1;
];
while N do [ChOut(0, C0); N:= N-1];
];
C0:= C;
until C=0;
]; \Expand
[Compress("WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW");
CrLf(0);
Expand("W11BW11B2W23BW13"); CrLf(0);
]
Output (with slightly better compression than the example):
W11BW11B2W23BW13 WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW
- WikipediaSourced
- Programming Tasks
- Compression
- Ada
- AWK
- ALGOL 68
- APL
- AutoHotkey
- BASIC
- BBC BASIC
- Bracmat
- Burlesque
- C
- C++
- Boost
- C sharp
- Clojure
- CoffeeScript
- Common Lisp
- D
- E
- Erlang
- Euphoria
- F Sharp
- FALSE
- Fan
- Forth
- Fortran
- Go
- Groovy
- Haskell
- Icon
- Unicon
- J
- Java
- JUnit
- JavaScript
- K
- Liberty BASIC
- Logo
- Lua
- Mathematica
- Maxima
- MMIX
- Objective-C
- OCaml
- Oz
- PARI/GP
- Pascal
- Perl
- Perl 6
- PHP
- PicoLisp
- PL/I
- PowerBASIC
- PowerShell
- Prolog
- Pure
- PureBasic
- Python
- R
- Racket
- REXX
- Ruby
- Run BASIC
- Scala
- Scheme
- Sed
- Seed7
- Smalltalk
- SNOBOL4
- SQL
- Standard ML
- Tcl
- TUSCRIPT
- Ursala
- Vedit macro language
- XPL0