Run-length encoding

From Rosetta Code
Jump to: navigation, search
This page uses content from Wikipedia. The original article was at Run-length_encoding. The list of authors can be seen in the page history. As with Rosetta Code, the text of Wikipedia is available under the GNU FDL. (See links for details on variance)
Task
Run-length encoding
You are encouraged to solve this task according to the task description, using any language you may know.

Given a string containing uppercase characters (A-Z), compress repeated 'runs' of the same character by storing the length of that run, and provide a function to reverse the compression. The output can be anything, as long as you can recreate the input with it.

Example:

Input: WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW
Output: 12W1B12W3B24W1B14W

Note: the encoding step in the above example is the same as a step of the Look-and-say sequence.

Contents

[edit] Ada

with Ada.Text_IO;        use Ada.Text_IO;
with Ada.Strings.Fixed; use Ada.Strings.Fixed;
procedure Test_Run_Length_Encoding is
function Encode (Data : String) return String is
begin
if Data'Length = 0 then
return "";
else
declare
Code  : constant Character := Data (Data'First);
Index : Integer := Data'First + 1;
begin
while Index <= Data'Last and then Code = Data (Index) loop
Index := Index + 1;
end loop;
declare
Prefix : constant String := Integer'Image (Index - Data'First);
begin
return Prefix (2..Prefix'Last) & Code & Encode (Data (Index..Data'Last));
end;
end;
end if;
end Encode;
function Decode (Data : String) return String is
begin
if Data'Length = 0 then
return "";
else
declare
Index : Integer := Data'First;
Count : Natural := 0;
begin
while Index < Data'Last and then Data (Index) in '0'..'9' loop
Count := Count * 10 + Character'Pos (Data (Index)) - Character'Pos ('0');
Index := Index + 1;
end loop;
if Index > Data'First then
return Count * Data (Index) & Decode (Data (Index + 1..Data'Last));
else
return Data;
end if;
end;
end if;
end Decode;
begin
Put_Line (Encode ("WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"));
Put_Line (Decode ("12W1B12W3B24W1B14W"));
end Test_Run_Length_Encoding;

Sample output:

12W1B12W3B24W1B14W
WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW

[edit] ALGOL 68

Works with: ALGOL 68 version Revision 1 - no extensions to language used
Works with: ALGOL 68G version Any - tested with release 1.18.0-9h.tiny
Works with: ELLA ALGOL 68 version Any (with appropriate job cards) - tested with release 1.8-8d

Note: The following uses iterators, eliminating the need of declaring arbitrarily large CHAR arrays for caching.

STRING input := "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW";
STRING output := "12W1B12W3B24W1B14W";
 
MODE YIELDCHAR = PROC(CHAR)VOID;
MODE GENCHAR = PROC(YIELDCHAR)VOID;
 
PROC gen char string = (REF STRING s, YIELDCHAR yield)VOID:
FOR i FROM LWB s TO UPB s DO yield(s[i]) OD;
 
CO
# Note: The following 2 lines use currying. This not supported by ELLA ALGOL 68RS #
GENCHAR input seq = gen char string(input,),
output seq = gen char string(output,);
END CO
 
GENCHAR
input seq = (YIELDCHAR yield)VOID: gen char string(input, yield),
output seq = (YIELDCHAR yield)VOID: gen char string(output, yield);
 
PROC gen encode = (GENCHAR gen char, YIELDCHAR yield)VOID: (
INT count := 0;
CHAR prev;
# FOR CHAR c IN # gen char( # ) DO ( #
## (CHAR c)VOID: (
IF count = 0 THEN
count := 1;
prev := c
ELIF c NE prev THEN
STRING str count := whole(count,0);
gen char string(str count, yield); count := 1;
yield(prev); prev := c
ELSE
count +:=1
FI
# OD # ));
IF count NE 0 THEN
STRING str count := whole(count,0);
gen char string(str count,yield);
yield(prev)
FI
);
 
STRING zero2nine = "0123456789";
 
PROC gen decode = (GENCHAR gen char, YIELDCHAR yield)VOID: (
INT repeat := 0;
# FOR CHAR c IN # gen char( # ) DO ( #
## (CHAR c)VOID: (
IF char in string(c, LOC INT, zero2nine) THEN
repeat := repeat*10 + ABS c - ABS "0"
ELSE
FOR i TO repeat DO yield(c) OD;
repeat := 0
FI
# OD # ))
);
 
# iterate through input string #
print("Encode input: ");
# FOR CHAR c IN # gen encode(input seq, # ) DO ( #
## (CHAR c)VOID:
print(c)
# OD # );
print(new line);
 
# iterate through output string #
print("Decode output: ");
# FOR CHAR c IN # gen decode(output seq, # ) DO ( #
## (CHAR c)VOID:
print(c)
# OD # );
print(new line)

Output:

Encode input: 12W1B12W3B24W1B14W
Decode output: WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW

[edit] APL

    ∇ ret←RLL rll;count
[1] count←∣2-/((1,(2≠/rll),1)×⍳1+⍴rll)~0
[2] ret←(⍕count,¨(1,2≠/rll)/rll)~' '

 

Sample Output:

      RLL 'WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW'
12W1B12W3B24W1B14W

[edit] AutoHotkey

MsgBox % key := rle_encode("WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW")
MsgBox % rle_decode(key)
 
rle_encode(message)
{
StringLeft, previous, message, 1
StringRight, last, message, 1
message .= Asc(Chr(last)+1)
count = 0
Loop, Parse, message
{
If (previous == A_LoopField)
count +=1
Else
{
output .= previous . count
previous := A_LoopField
count = 1
}
}
Return output
}
 
rle_decode(message)
{
pos = 1
While, item := RegExMatch(message, "\D", char, pos)
{
digpos := RegExMatch(message, "\d+", dig, item)
Loop, % dig
output .= char
pos := digpos
}
Return output
}

[edit] AWK

Works with: gawk

It works with "textual" input. Lines containing numbers are skipped, since they can't be represented in a not ambiguous way in this implementation (e.g. "11AA" would be encoded as "212A", which would be decoded as A repeated 212 times!)

Encoding

BEGIN {
FS=""
}
/^[^0-9]+$/ {
cp = $1; j = 0
for(i=1; i <= NF; i++) {
if ( $i == cp ) {
j++;
} else {
printf("%d%c", j, cp)
j = 1
}
cp = $i
}
printf("%d%c", j, cp)
}

Decoding

BEGIN {
RS="[0-9]+[^0-9]"
final = "";
}
{
match(RT, /([0-9]+)([^0-9])/, r)
for(i=0; i < int(r[1]); i++) {
final = final r[2]
}
}
END {
print final
}

[edit] BASIC

Works with: QBasic
Translation of: PowerBASIC
DECLARE FUNCTION RLDecode$ (i AS STRING)
DECLARE FUNCTION RLEncode$ (i AS STRING)
 
DIM initial AS STRING, encoded AS STRING, decoded AS STRING
 
INPUT "Type something: ", initial
encoded = RLEncode(initial)
decoded = RLDecode(encoded)
PRINT initial
PRINT encoded
PRINT decoded
 
FUNCTION RLDecode$ (i AS STRING)
DIM Loop0 AS LONG, rCount AS STRING, outP AS STRING, m AS STRING
 
FOR Loop0 = 1 TO LEN(i)
m = MID$(i, Loop0, 1)
SELECT CASE m
CASE "0" TO "9"
rCount = rCount + m
CASE ELSE
IF LEN(rCount) THEN
outP = outP + STRING$(VAL(rCount), m)
rCount = ""
ELSE
outP = outP + m
END IF
END SELECT
NEXT
RLDecode$ = outP
END FUNCTION
 
FUNCTION RLEncode$ (i AS STRING)
DIM tmp1 AS STRING, tmp2 AS STRING, outP AS STRING
DIM Loop0 AS LONG, rCount AS LONG
 
tmp1 = MID$(i, 1, 1)
tmp2 = tmp1
rCount = 1
 
FOR Loop0 = 2 TO LEN(i)
tmp1 = MID$(i, Loop0, 1)
IF tmp1 <> tmp2 THEN
outP = outP + LTRIM$(RTRIM$(STR$(rCount))) + tmp2
tmp2 = tmp1
rCount = 1
ELSE
rCount = rCount + 1
END IF
NEXT
 
outP = outP + LTRIM$(RTRIM$(STR$(rCount)))
outP = outP + tmp2
RLEncode$ = outP
END FUNCTION

Sample output (last one shows errors from using numbers in input string):

Type something: aaaaeeeeeeiiiioooouuy
aaaaeeeeeeiiiioooouuy
4a6e4i4o2u1y
aaaaeeeeeeiiiioooouuy

Type something: My dog has fleas.
My dog has fleas.
1M1y1 1d1o1g1 1h1a1s1 1f1l1e1a1s1.
My dog has fleas.

Type something: 1r
1r
111r
rrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr

[edit] BBC BASIC

The run counts are indicated by means of character codes in the range 131 to 255.

      input$ = "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"
PRINT "Input: " input$
rle$ = FNencodeRLE(input$)
output$ = FNdecodeRLE(rle$)
PRINT "Output: " output$
END
 
DEF FNencodeRLE(text$)
LOCAL n%, r%, c$, o$
n% = 1
WHILE n% <= LEN(text$)
c$ = MID$(text$, n%, 1)
n% += 1
r% = 1
WHILE c$ = MID$(text$, n%, 1) AND r% < 127
r% += 1
n% += 1
ENDWHILE
IF r% < 3 o$ += STRING$(r%, c$) ELSE o$ += CHR$(128+r%) + c$
ENDWHILE
= o$
 
DEF FNdecodeRLE(rle$)
LOCAL n%, c$, o$
n% = 1
WHILE n% <= LEN(rle$)
c$ = MID$(rle$, n%, 1)
n% += 1
IF ASC(c$) > 128 THEN
o$ += STRING$(ASC(c$)-128, MID$(rle$, n%, 1))
n% += 1
ELSE
o$ += c$
ENDIF
ENDWHILE
= o$

[edit] Befunge

Not the same format as in the example,it puts "n\n" at the beginning so you can pipe the output back in and receive the input. Pipe the output of the program-it's more reliable.

Works with: CCBI version 2.1
                    ~"y"- ~$         v
<temp var for when char changes
format:
first,'n' and a newline.  :
a char then a v _"n",v
number then a space continuously 9
example: 1
n > v ,+<
a5 b2
decoded:aaaaabb
the program is ended using decoder
Ctrl-C on linux,or alt-f4
on windows.copy the output >\v encoder
of the program somewhere ^_ $ v
to encode press y  : > $11g:, v
to decode pipe file in >1-^ ~ v +1\<
the output of the encoder \ v< $ ^ .\_^
starts with n,this is so ^,:<\&~< _~:,>1>\:v>^
you can pipe it straight in ^ <
~
the spaces seem to be a annoying thing  :
thanks to CCBI...if a interpreter dosen't 1
create them it's non-conforming and thus 1
the validity of this program is NOT affected p-
>^
--written by Gamemanj,for Rosettacode

[edit] Bracmat

  ( run-length
= character otherCharacter acc begin end
.  :?acc
& 0:?begin
& @( !arg
 :  ?
[!begin
 %@?character
 ?
[?end
( (%@:~!character:?otherCharacter) ?
& !acc !end+-1*!begin !character:?acc
& !otherCharacter:?character
& !end:?begin
& ~`
| &!acc !end+-1*!begin !character:?acc
)
)
& str$!acc
)
& run-length$WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW
 
  12W1B12W3B24W1B14W

[edit] Burlesque

 
"WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"
=[{^^[~\/L[Sh}\m
 

[edit] C

Encoder that can deal with byte streams. Can encode/decode any byte values and any length with reasonable efficiency. Also showing OO and polymophism with structs.

#include <stdio.h>
#include <stdlib.h>
 
typedef struct stream_t stream_t, *stream;
struct stream_t {
/* get funciton is supposed to return a byte value (0-255),
or -1 to signify end of input */

int (*get)(stream);
/* put function does output, one byte at a time */
int (*put)(stream, int);
};
 
/* next two structs inherit from stream_t */
typedef struct {
int (*get)(stream);
int (*put)(stream, int);
char *string;
int pos;
} string_stream;
 
typedef struct {
int (*get)(stream);
int (*put)(stream, int);
FILE *fp;
} file_stream;
 
/* methods for above streams */
int sget(stream in)
{
int c;
string_stream* s = (string_stream*) in;
c = (unsigned char)(s->string[s->pos]);
if (c == '\0') return -1;
s->pos++;
return c;
}
 
int sput(stream out, int c)
{
string_stream* s = (string_stream*) out;
s->string[s->pos++] = (c == -1) ? '\0' : c;
if (c == -1) s->pos = 0;
return 0;
}
 
int file_put(stream out, int c)
{
file_stream *f = (file_stream*) out;
return fputc(c, f->fp);
}
 
/* helper function */
void output(stream out, unsigned char* buf, int len)
{
int i;
out->put(out, 128 + len);
for (i = 0; i < len; i++)
out->put(out, buf[i]);
}
 
/* Specification: encoded stream are unsigned bytes consisting of sequences.
* First byte of each sequence is the length, followed by a number of bytes.
* If length <=128, the next byte is to be repeated length times;
* If length > 128, the next (length - 128) bytes are not repeated.
* this is to improve efficiency for long non-repeating sequences.
* This scheme can encode arbitrary byte values efficiently.
* c.f. Adobe PDF spec RLE stream encoding (not exactly the same)
*/

void encode(stream in, stream out)
{
unsigned char buf[256];
int len = 0, repeat = 0, end = 0, c;
int (*get)(stream) = in->get;
int (*put)(stream, int) = out->put;
 
while (!end) {
end = ((c = get(in)) == -1);
if (!end) {
buf[len++] = c;
if (len <= 1) continue;
}
 
if (repeat) {
if (buf[len - 1] != buf[len - 2])
repeat = 0;
if (!repeat || len == 129 || end) {
/* write out repeating bytes */
put(out, end ? len : len - 1);
put(out, buf[0]);
buf[0] = buf[len - 1];
len = 1;
}
} else {
if (buf[len - 1] == buf[len - 2]) {
repeat = 1;
if (len > 2) {
output(out, buf, len - 2);
buf[0] = buf[1] = buf[len - 1];
len = 2;
}
continue;
}
if (len == 128 || end) {
output(out, buf, len);
len = 0;
repeat = 0;
}
}
}
put(out, -1);
}
 
void decode(stream in, stream out)
{
int c, i, cnt;
while (1) {
c = in->get(in);
if (c == -1) return;
if (c > 128) {
cnt = c - 128;
for (i = 0; i < cnt; i++)
out->put(out, in->get(in));
} else {
cnt = c;
c = in->get(in);
for (i = 0; i < cnt; i++)
out->put(out, c);
}
}
}
 
int main()
{
char buf[256];
string_stream str_in = { sget, 0,
"WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW", 0};
string_stream str_out = { sget, sput, buf, 0 };
file_stream file = { 0, file_put, stdout };
 
/* encode from str_in to str_out */
encode((stream)&str_in, (stream)&str_out);
 
/* decode from str_out to file (stdout) */
decode((stream)&str_out, (stream)&file);
 
return 0;
}

See Run-length encoding/C

[edit] C++

Library: boost
#include <iostream>
#include <string>
#include <sstream>
#include <boost/regex.hpp>
#include <cstdlib>
 
std::string encode ( const std::string & ) ;
std::string decode ( const std::string & ) ;
 
int main( ) {
std::string to_encode ( "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW" ) ;
std::cout << to_encode << " encoded:" << std::endl ;
std::string encoded ( encode ( to_encode ) ) ;
std::cout << encoded << std::endl ;
std::string decoded ( decode( encoded ) ) ;
std::cout << "Decoded again:\n" ;
std::cout << decoded << std::endl ;
if ( to_encode == decoded )
std::cout << "It must have worked!\n" ;
return 0 ;
}
 
std::string encode( const std::string & to_encode ) {
std::string::size_type found = 0 , nextfound = 0 ;
std::ostringstream oss ;
nextfound = to_encode.find_first_not_of( to_encode[ found ] , found ) ;
while ( nextfound != std::string::npos ) {
oss << nextfound - found ;
oss << to_encode[ found ] ;
found = nextfound ;
nextfound = to_encode.find_first_not_of( to_encode[ found ] , found ) ;
}
//since we must not discard the last characters we add them at the end of the string
std::string rest ( to_encode.substr( found ) ) ;//last run of characters starts at position found
oss << rest.length( ) << to_encode[ found ] ;
return oss.str( ) ;
}
 
std::string decode ( const std::string & to_decode ) {
boost::regex e ( "(\\d+)(\\w)" ) ;
boost::match_results<std::string::const_iterator> matches ;
std::ostringstream oss ;
std::string::const_iterator start = to_decode.begin( ) , end = to_decode.end( ) ;
while ( boost::regex_search ( start , end , matches , e ) ) {
std::string numberstring ( matches[ 1 ].first , matches[ 1 ].second ) ;
int number = atoi( numberstring.c_str( ) ) ;
std::string character ( matches[ 2 ].first , matches[ 2 ].second ) ;
for ( int i = 0 ; i < number ; i++ )
oss << character ;
start = matches[ 2 ].second ;
}
return oss.str( ) ;
}

[edit] C#

This example only works if there are no digits in the string to be encoded and then decoded.

       public static void Main(string[] args)
{
string input = "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW";
Console.WriteLine(Encode(input));//Outputs: 12W1B12W3B24W1B14W
Console.WriteLine(Decode(Encode(input)));//Outputs: WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW
Console.ReadLine();
}
public static string Encode(string s)
{
StringBuilder sb = new StringBuilder();
int count = 1;
char current =s[0];
for(int i = 1; i < s.Length;i++)
{
if (current == s[i])
{
count++;
}
else
{
sb.AppendFormat("{0}{1}", count, current);
count = 1;
current = s[i];
}
}
sb.AppendFormat("{0}{1}", count, current);
return sb.ToString();
}
public static string Decode(string s)
{
string a = "";
int count = 0;
StringBuilder sb = new StringBuilder();
char current = char.MinValue;
for(int i = 0; i < s.Length; i++)
{
current = s[i];
if (char.IsDigit(current))
a += current;
else
{
count = int.Parse(a);
a = "";
for (int j = 0; j < count; j++)
sb.Append(current);
}
}
return sb.ToString();
}

Somewhat shorter, using Regex.Replace with MatchEvaluator (using C#2 syntax only):

using System;
using System.Text.RegularExpressions;
 
public class Program
{
private delegate void fOk(bool ok, string message);
 
public static int Main(string[] args)
{
const string raw = "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW";
const string code = "12W1B12W3B24W1B14W";
 
fOk Ok = delegate(bool ok, string message)
{
Console.WriteLine("{0}: {1}", ok ? "ok" : "not ok", message);
};
Ok(code.Equals(Encode(raw)), "Encode");
Ok(raw.Equals(Decode(code)), "Decode");
return 0;
}
 
public static string Encode(string input)
{
return Regex.Replace(input, @"(.)\1*", delegate(Match m)
{
return string.Concat(m.Value.Length, m.Groups[1].Value);
});
}
 
public static string Decode(string input)
{
return Regex.Replace(input, @"(\d+)(\D)", delegate(Match m)
{
return new string(m.Groups[2].Value[0], int.Parse(m.Groups[1].Value));
});
}
}

[edit] Clojure

(defn compress [s]
(->> (partition-by identity s) (mapcat (juxt count first)) (apply str)))
 
(defn extract [s]
(->> (re-seq #"(\d+)([A-Z])" s)
(mapcat (fn [[_ n ch]] (repeat (Integer/parseInt n) ch)))
(apply str)))

[edit] COBOL

Works with: GNU Cobol version 2.0
       >>SOURCE FREE
IDENTIFICATION DIVISION.
PROGRAM-ID. run-length-encoding.
 
ENVIRONMENT DIVISION.
CONFIGURATION SECTION.
REPOSITORY.
FUNCTION encode
FUNCTION decode
.
DATA DIVISION.
WORKING-STORAGE SECTION.
01 input-str PIC A(100).
01 encoded PIC X(200).
01 decoded PIC X(200).
 
PROCEDURE DIVISION.
ACCEPT input-str
MOVE encode(FUNCTION TRIM(input-str)) TO encoded
DISPLAY "Encoded: " FUNCTION TRIM(encoded)
DISPLAY "Decoded: " FUNCTION TRIM(decode(encoded))
.
END PROGRAM run-length-encoding.
 
 
IDENTIFICATION DIVISION.
FUNCTION-ID. encode.
 
DATA DIVISION.
LOCAL-STORAGE SECTION.
01 str-len PIC 9(3) COMP.
 
01 i PIC 9(3) COMP.
 
01 current-char PIC A.
 
01 num-chars PIC 9(3) COMP.
01 num-chars-disp PIC Z(3).
 
01 encoded-pos PIC 9(3) COMP VALUE 1.
 
LINKAGE SECTION.
01 str PIC X ANY LENGTH.
 
01 encoded PIC X(200).
 
PROCEDURE DIVISION USING str RETURNING encoded.
MOVE FUNCTION LENGTH(str) TO str-len
MOVE str (1:1) TO current-char
MOVE 1 TO num-chars
PERFORM VARYING i FROM 2 BY 1 UNTIL i > str-len
IF str (i:1) <> current-char
CALL "add-num-chars" USING encoded, encoded-pos,
CONTENT current-char, num-chars
 
MOVE str (i:1) TO current-char
MOVE 1 TO num-chars
ELSE
ADD 1 TO num-chars
END-IF
END-PERFORM
 
CALL "add-num-chars" USING encoded, encoded-pos, CONTENT current-char,
num-chars
.
END FUNCTION encode.
 
IDENTIFICATION DIVISION.
PROGRAM-ID. add-num-chars.
 
DATA DIVISION.
WORKING-STORAGE SECTION.
01 num-chars-disp PIC Z(3).
 
LINKAGE SECTION.
01 str PIC X(200).
 
01 current-pos PIC 9(3) COMP.
 
01 char-to-encode PIC X.
 
01 num-chars PIC 9(3) COMP.
 
PROCEDURE DIVISION USING str, current-pos, char-to-encode, num-chars.
MOVE num-chars TO num-chars-disp
MOVE FUNCTION TRIM(num-chars-disp) TO str (current-pos:3)
ADD FUNCTION LENGTH(FUNCTION TRIM(num-chars-disp)) TO current-pos
MOVE char-to-encode TO str (current-pos:1)
ADD 1 TO current-pos
.
END PROGRAM add-num-chars.
 
 
IDENTIFICATION DIVISION.
FUNCTION-ID. decode.
 
DATA DIVISION.
LOCAL-STORAGE SECTION.
01 encoded-pos PIC 9(3) COMP VALUE 1.
01 decoded-pos PIC 9(3) COMP VALUE 1.
 
01 num-of-char PIC 9(3) COMP VALUE 0.
 
LINKAGE SECTION.
01 encoded PIC X(200).
 
01 decoded PIC X(100).
 
PROCEDURE DIVISION USING encoded RETURNING decoded.
PERFORM VARYING encoded-pos FROM 1 BY 1
UNTIL encoded (encoded-pos:2) = SPACES OR encoded-pos > 200
IF encoded (encoded-pos:1) IS NUMERIC
COMPUTE num-of-char = num-of-char * 10
+ FUNCTION NUMVAL(encoded (encoded-pos:1))
ELSE
PERFORM UNTIL num-of-char = 0
MOVE encoded (encoded-pos:1) TO decoded (decoded-pos:1)
ADD 1 TO decoded-pos
SUBTRACT 1 FROM num-of-char
END-PERFORM
END-IF
END-PERFORM
.
END FUNCTION decode.
Output:
WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW
Encoded: 12W1B12W3B24W1B14W
Decoded: WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW

[edit] CoffeeScript

encode = (str) ->
str.replace /(.)\1*/g, (w) ->
w[0] + w.length
 
decode = (str) ->
str.replace /(.)(\d+)/g, (m,w,n) ->
new Array(+n+1).join(w)
 
console.log s = "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"
console.log encode s
console.log decode encode s
WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW
W12B1W12B3W24B1W14
WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW

The following version encodes the number of ocurrences as an unicode character. You can change the way it looks by rotating the offset.

encode = (str, offset = 75) ->
str.replace /(.)\1*/g, (w) ->
w[0] + String.fromCharCode(offset+w.length)
 
decode = (str, offset = 75) ->
str.split('').map((w,i) ->
if not (i%2) then w else new Array(+w.charCodeAt(0)-offset).join(str[i-1])
).join('')
> encode "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"
WWBLWWBNWcBLWY
> encode "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW", 1200
WҼBұWҼBҳWӈBұWҾ
> encode "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW", 5200
WᑜBᑑWᑜBᑓWᑨBᑑWᑞ

[edit] Common Lisp

(defun group-similar (sequence &key (test 'eql))
(loop for x in (rest sequence)
with temp = (subseq sequence 0 1)
if (funcall test (first temp) x)
do (push x temp)
else
collect temp
and do (setf temp (list x))))
 
(defun run-length-encode (sequence)
(mapcar (lambda (group) (list (first group) (length group)))
(group-similar (coerce sequence 'list))))
 
(defun run-length-decode (sequence)
(reduce (lambda (s1 s2) (concatenate 'simple-string s1 s2))
(mapcar (lambda (elem)
(make-string (second elem)
:initial-element
(first elem)))
sequence)))
 
(run-length-encode "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW")
(run-length-decode '((#\W 12) (#\B 1) (#\W 12) (#\B 3) (#\W 24) (#\B 1)))

[edit] D

[edit] Short Functional Version

import std.algorithm, std.array;
 
alias encode = group;
 
auto decode(Group!("a == b", string) enc) {
return enc.map!(t => [t[0]].replicate(t[1])).join;
}
 
void main() {
immutable s = "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWW" ~
"WWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW";
assert(s.encode.decode.equal(s));
}

[edit] Basic Imperative Version

import std.stdio, std.array, std.conv;
 
// Similar to the 'look and say' function.
string encode(in string input) pure /*nothrow*/ {
if (input.empty)
return input;
char last = input[$ - 1];
string output;
int count;
 
foreach_reverse (immutable c; input) {
if (c == last) {
count++;
} else {
output = count.text ~ last ~ output;
count = 1;
last = c;
}
}
 
return count.text ~ last ~ output;
}
 
string decode(in string input) pure {
string i, result;
 
foreach (immutable c; input)
switch (c) {
case '0': .. case '9':
i ~= c;
break;
case 'A': .. case 'Z':
if (i.empty)
throw new Exception("Can not repeat a letter " ~
"without a number of repetitions");
result ~= [c].replicate(i.to!int);
i.length = 0;
break;
default:
throw new Exception("'" ~ c ~ "' is not alphanumeric");
}
 
return result;
}
 
void main() {
immutable txt = "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWW" ~
"WWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW";
writeln("Input: ", txt);
immutable encoded = txt.encode;
writeln("Encoded: ", encoded);
assert(txt == encoded.decode);
}
Output:
Input: WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW
Encoded: 12W1B12W3B24W1B14W

[edit] UTF String Version

D's native string is utf-encoded. This version works for utf string, and uses a Variable-length Quantity module.

import std.stdio, std.conv, std.utf, std.array;
import vlq;
 
struct RLE { // for utf string
ubyte[] encoded;
 
RLE encode(const string s) {
validate(s); // check if s is well-formed utf, throw if not
encoded.length = 0; // reset
if (s.length == 0) return this; // empty string
string last;
VLQ count;
for (int i = 0; i < s.length; ) {
auto k = s.stride(i);
auto ucode = cast(string)s[i .. i + k];
if (i == 0) last = ucode;
if (ucode == last)
count++;
else {
encoded ~= count.toVLQ ~ cast(ubyte[])last;
last = ucode;
count = 1;
}
i += k;
}
encoded ~= VLQ(count).toVLQ ~ cast(ubyte[])last;
return this;
}
 
int opApply(int delegate(ref ulong c, ref string u) dg) {
VLQ count;
string ucode;
 
for (int i = 0; i < encoded.length; ) {
auto k = count.extract(encoded[i .. $]);
i += k;
if (i >= encoded.length)
throw new Exception("not valid encoded string");
k = stride(cast(string) encoded[i .. $], 0);
if (k == 0xff) // not valid utf code point
throw new Exception("not valid encoded string");
ucode = cast(string)encoded[i .. i + k].dup;
dg(count.value, ucode);
i += k;
}
 
return 0;
}
 
string toString() {
string res;
foreach (ref i, s ; this)
if (indexOf("0123456789#", s) == -1)
res ~= text(i) ~ s;
else
res ~= text(i) ~ '#' ~ s;
return res;
}
 
string decode() {
string res;
foreach (ref i, s; this)
res ~= replicate(s, cast(uint)i);
return res;
}
}
 
void main() {
RLE r;
auto s = "尋尋覓覓冷冷清清淒淒慘慘戚戚\nWWWWWWWWWWWWBWWWWWWWWWWW" ~
"WBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW\n" ~
"11#222##333";
auto f = File("display.txt", "w");
f.writeln(s);
r.encode(s);
f.writefln("-----\n%s\n-----\n%s", r, r.decode());
auto sEncoded = RLE.init.encode(s).encoded ;
assert(s == RLE(sEncoded).decode(), "Not work");
}

output from "display.txt":

尋尋覓覓冷冷清清淒淒慘慘戚戚
WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW
11#222##333
-----
2尋2覓2冷2清2淒2慘2戚1
12W1B12W3B24W1B14W1
2#11##3#22##3#3
-----
尋尋覓覓冷冷清清淒淒慘慘戚戚
WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW
11#222##333

NOTE: some characters in this section use Chinese font.

[edit] UTF String Version with Regular Expression

Translation of: Python

The code looks more complex than the third Python version because this also handles digits by escaping them with #.

import std.stdio, std.conv, std.array, std.regex, std.utf,
std.algorithm;
 
string reEncode(string s) {
validate(s); // Throw if it's not a well-formed UTF string
static string rep(Captures!string m) {
auto c = canFind("0123456789#", m[1]) ? "#" ~ m[1] : m[1];
return text(m.hit.length / m[1].length) ~ c;
}
return std.regex.replace!rep(s, regex(`(.|[\n\r\f])\1*`, "g"));
}
 
 
string reDecode(string s) {
validate(s); // Throw if it's not a well-formed UTF string
static string rep(Captures!string m) {
string c = m[2];
if (c.length > 1 && c[0] == '#')
c = c[1 .. $];
return replicate(c, to!int(m[1]));
}
auto r=regex(`(\d+)(#[0123456789#]|[\n\r\f]|[^0123456789#\n\r\f]+)`
, "g");
return std.regex.replace!rep(s, r);
}
 
void main() {
auto s = "尋尋覓覓冷冷清清淒淒慘慘戚戚\nWWWWWWWWWWWWBWWWWWWWWWWW" ~
"WBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW\n" ~
"11#222##333";
assert(s == reDecode(reEncode(s)));
}

[edit] Déjà Vu

rle:
if not dup:
drop
return []
 
swap ]
 
local :source chars
pop-from source
1
for c in source:
if = c over:
++
else:
1 c &
&
return [
 
rld:
)
for pair in swap:
repeat &< pair:
&> pair
concat(
 
 
rle "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"
!. dup
!. rld
Output:
[ & 12 "W" & 1 "B" & 12 "W" & 3 "B" & 24 "W" & 1 "B" & 14 "W" ]
"WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"

[edit] E

def rle(string) {
var seen := null
var count := 0
var result := []
def put() {
if (seen != null) {
result with= [count, seen]
}
}
for ch in string {
if (ch != seen) {
put()
seen := ch
count := 0
}
count += 1
}
put()
return result
}
 
def unrle(coded) {
var result := ""
for [count, ch] in coded {
result += E.toString(ch) * count
}
return result
}
? rle("WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW")
# value: [[12, 'W'], [1, 'B'], [12, 'W'], [3, 'B'], [24, 'W'], [1, 'B'], [14, 'W']]
 
? unrle(rle("WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"))
# value: "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"

[edit] Erlang

A single-threaded/process version with a simple set of unit test.

-module(rle).
 
-export([encode/1,decode/1]).
 
-include_lib("eunit/include/eunit.hrl").
 
encode(S) ->
doEncode(string:substr(S, 2), string:substr(S, 1, 1), 1, []).
 
doEncode([], CurrChar, Count, R) ->
R ++ integer_to_list(Count) ++ CurrChar;
doEncode(S, CurrChar, Count, R) ->
NextChar = string:substr(S, 1, 1),
if
NextChar == CurrChar ->
doEncode(string:substr(S, 2), CurrChar, Count + 1, R);
true ->
doEncode(string:substr(S, 2), NextChar, 1,
R ++ integer_to_list(Count) ++ CurrChar)
end.
 
decode(S) ->
doDecode(string:substr(S, 2), string:substr(S, 1, 1), []).
 
doDecode([], _, R) ->
R;
doDecode(S, CurrString, R) ->
NextChar = string:substr(S, 1, 1),
IsInt = erlang:is_integer(catch(erlang:list_to_integer(NextChar))),
if
IsInt ->
doDecode(string:substr(S, 2), CurrString ++ NextChar, R);
true ->
doDecode(string:substr(S, 2), [],
R ++ string:copies(NextChar, list_to_integer(CurrString)))
end.
 
rle_test_() ->
PreEncoded =
"WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW",
Expected = "12W1B12W3B24W1B14W",
[
 ?_assert(encode(PreEncoded) =:= Expected),
 ?_assert(decode(Expected) =:= PreEncoded),
 ?_assert(decode(encode(PreEncoded)) =:= PreEncoded)
].

A version that works on character lists:

 
-module(rle).
 
-export([encode/1, decode/1]).
 
encode(L) -> encode(L, []).
encode([], Acc) -> {rle, lists:reverse(Acc)};
encode([H|T], []) ->
encode(T, [{1, H}]);
encode([H|T], [{Count, Char}|AT]) ->
if
H =:= Char ->
encode(T, [{Count + 1, Char}|AT]);
true ->
encode(T, [{1, H}|[{Count, Char}|AT]])
end.
 
decode({rle, L}) -> lists:append(lists:reverse(decode(L, []))).
decode([], Acc) -> Acc;
decode([{Count, Char}|T], Acc) ->
decode(T, [[Char || _ <- lists:seq(1, Count)]|Acc]).
 

[edit] Euphoria

include misc.e
 
function encode(sequence s)
sequence out
integer prev_char,count
if length(s) = 0 then
return {}
end if
out = {}
prev_char = s[1]
count = 1
for i = 2 to length(s) do
if s[i] != prev_char then
out &= {count,prev_char}
prev_char = s[i]
count = 1
else
count += 1
end if
end for
out &= {count,prev_char}
return out
end function
 
function decode(sequence s)
sequence out
out = {}
for i = 1 to length(s) by 2 do
out &= repeat(s[i+1],s[i])
end for
return out
end function
 
sequence s
s = encode("WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW")
pretty_print(1,s,{3})
puts(1,'\n')
puts(1,decode(s))

Output:

{12,'W',1,'B',12,'W',3,'B',24,'W',1,'B',14,'W'}
WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW

[edit] F#

 
open System
open System.Text.RegularExpressions
 
let encode data =
// encodeData : seq<'T> -> seq<int * 'T> i.e. Takes a sequence of 'T types and return a sequence of tuples containing the run length and an instance of 'T.
let rec encodeData input =
seq { if not (Seq.isEmpty input) then
let head = Seq.head input
let runLength = Seq.length (Seq.takeWhile ((=) head) input)
yield runLength, head
yield! encodeData (Seq.skip runLength input) }
 
encodeData data |> Seq.fold(fun acc (len, d) -> acc + len.ToString() + d.ToString()) ""
 
let decode str =
[ for m in Regex.Matches(str, "(\d+)(.)") -> m ]
|> List.map (fun m -> Int32.Parse(m.Groups.[1].Value), m.Groups.[2].Value)
|> List.fold (fun acc (len, s) -> acc + String.replicate len s) ""
 

[edit] FALSE

1^[^$~][$@$@=$[%%\1+\$0~]?~[@.,1\$]?%]#%\.,  {encode}
[0[^$$'9>'0@>|~]['0-\10*+]#]n:
[n;!$~][[\$][1-\$,]#%%]#%% {decode}

[edit] Fan

**
** Generates a run-length encoding for a string
**
class RLE
{
Run[] encode(Str s)
{
runs := Run[,]
 
s.size.times |i|
{
ch := s[i]
if (runs.size==0 || runs.last.char != ch)
runs.add(Run(ch))
runs.last.inc
}
return runs
}
 
Str decode(Run[] runs)
{
buf := StrBuf()
runs.each |run|
{
run.count.times { buf.add(run.char.toChar) }
}
return buf.toStr
}
 
Void main()
{
echo(decode(encode(
"WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"
)))
}
 
}
 
internal class Run
{
Int char
Int count := 0
new make(Int ch) { char = ch }
Void inc() { ++count }
 
override Str toStr() { return "${count}${char.toChar}" }
}

[edit] Forth

variable a
: n>a (.) tuck a @ swap move a +! ;
: >a a @ c! 1 a +! ;
: encode ( c-addr +n a -- a n' )
dup a ! -rot over c@ 1 2swap 1 /string bounds ?do
over i c@ = if 1+
else n>a >a i c@ 1 then
loop n>a >a a @ over - ;
 
: digit? [char] 0 [ char 9 1+ literal ] within ;
: decode ( c-addr +n a -- a n' )
dup a ! 0 2swap bounds ?do
i c@ digit? if 10 * i c@ [char] 0 - + else
a @ over i c@ fill a +! 0 then
loop drop a @ over - ;

Example:

s" WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"
here 1000 + encode here 2000 + decode cr 3 spaces type
WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW

[edit] Fortran

Works with: Fortran version 95 and later
program RLE
implicit none
 
integer, parameter :: bufsize = 100 ! Sets maximum size of coded and decoded strings, adjust as necessary
character(bufsize) :: teststr = "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"
character(bufsize) :: codedstr = "" Encode (Data (Index..Data'Last));
end;
end;
end if;
end Encode;
function Decode (Data : String) return String is
begin
if Data'
Length = 0 then
return "";
else
declare
Index : Integer := Data'First;
Count : Natural := 0;
begin
while Index , decodedstr = ""
 
call Encode(teststr, codedstr)
write(*,"(a)") trim(codedstr)
call Decode(codedstr, decodedstr)
write(*,"(a)") trim(decodedstr)
 
contains
 
subroutine Encode(instr, outstr)
character(*), intent(in)  :: instr
character(*), intent(out) :: outstr
character(8) :: tempstr = ""
character(26) :: validchars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
integer :: a, b, c, i
 
if(verify(trim(instr), validchars) /= 0) then
outstr = "Invalid input"
return
end if
outstr = ""
c = 1
a = iachar(instr(1:1))
do i = 2, len(trim(instr))
b = iachar(instr(i:i))
if(a == b) then
c = c + 1
else
write(tempstr, "(i0)") c
outstr = trim(outstr) // trim(tempstr) // achar(a)
a = b
c = 1
end if
end do
write(tempstr, "(i0)") c
outstr = trim(outstr) // trim(tempstr) // achar(b)
end subroutine
 
subroutine Decode(instr, outstr)
character(*), intent(in)  :: instr
character(*), intent(out) :: outstr
character(26) :: validchars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
integer :: startn, endn, n
 
outstr = ""
startn = 1
do while(startn < len(trim(instr)))
endn = scan(instr(startn:), validchars) + startn - 1
read(instr(startn:endn-1), "(i8)") n
outstr = trim(outstr) // repeat(instr(endn:endn), n)
startn = endn + 1
end do
end subroutine
end program

[edit] Go

Decoder kind of necessary to demonstrate task requirement that I can recreate the input.

package main
 
import "fmt"
 
// encoding scheme:
// encode to byte array
// byte value < 26 means single character: byte value + 'A'
// byte value 26..255 means (byte value - 24) copies of next byte
func rllEncode(s string) (r []byte) {
if s == "" {
return
}
c := s[0]
if c < 'A' || c > 'Z' {
panic("invalid")
}
nc := byte(1)
for i := 1; i < len(s); i++ {
d := s[i]
switch {
case d != c:
case nc < (255 - 24):
nc++
continue
}
if nc > 1 {
r = append(r, nc+24)
}
r = append(r, c-'A')
if d < 'A' || d > 'Z' {
panic("invalid")
}
c = d
nc = 1
}
if nc > 1 {
r = append(r, nc+24)
}
r = append(r, c-'A')
return
}
 
func main() {
s := "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"
fmt.Println("source: ", len(s), "bytes:", s)
e := rllEncode(s)
fmt.Println("encoded:", len(e), "bytes:", e)
d := rllDecode(e)
fmt.Println("decoded:", len(d), "bytes:", d)
fmt.Println("decoded = source:", d == s)
}
 
func rllDecode(e []byte) string {
var c byte
var d []byte
for i := 0; i < len(e); i++ {
b := e[i]
if b < 26 {
c = 1
} else {
c = b - 24
i++
b = e[i]
}
for c > 0 {
d = append(d, b+'A')
c--
}
}
return string(d)
}

Output:

source:  67 bytes: WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW
encoded: 12 bytes: [36 22 1 36 22 27 1 48 22 1 38 22]
decoded: 67 bytes: WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW
decoded = source: true

[edit] Groovy

def rleEncode(text) {
def encoded = new StringBuilder()
(text =~ /(([A-Z])\2*)/).each { matcher ->
encoded.append(matcher[1].size()).append(matcher[2])
}
encoded.toString()
}
 
def rleDecode(text) {
def decoded = new StringBuilder()
(text =~ /([0-9]+)([A-Z])/).each { matcher ->
decoded.append(matcher[2] * Integer.parseInt(matcher[1]))
}
decoded.toString()
}

Test code

def text = 'WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW'
def rleEncoded = rleEncode(text)
assert rleEncoded == '12W1B12W3B24W1B14W'
assert text == rleDecode(rleEncoded)
 
println "Original Text: $text"
println "Encoded Text: $rleEncoded"

Output:

Original Text: WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW
Encoded Text: 12W1B12W3B24W1B14W

[edit] Haskell

import Data.List (group)
 
-- Datatypes
type Encoded = [(Int, Char)] -- An encoded String with form [(times, char), ...]
type Decoded = String
 
-- Takes a decoded string and returns an encoded list of tuples
rlencode :: Decoded -> Encoded
rlencode = map (\g -> (length g, head g)) . group
 
-- Takes an encoded list of tuples and returns the associated decoded String
rldecode :: Encoded -> Decoded
rldecode = concatMap decodeTuple
where decodeTuple (n,c) = replicate n c
 
main :: IO ()
main = do
-- Get input
putStr "String to encode: "
input <- getLine
-- Output encoded and decoded versions of input
let encoded = rlencode input
decoded = rldecode encoded
putStrLn $ "Encoded: " ++ show encoded ++ "\nDecoded: " ++ show decoded

[edit] Icon and Unicon

procedure main(arglist)
 
s := "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"
 
write(" s=",image(s))
write("s1=",image(s1 := rle_encode(s)))
write("s2=",image(s2 := rle_decode(s1)))
 
if s ~== s2 then write("Encode/Decode problem.")
else write("Encode/Decode worked.")
end
 
procedure rle_encode(s)
es := ""
s ? while c := move(1) do es ||:= *(move(-1),tab(many(c))) || c
return es
end
 
procedure rle_decode(es)
s := ""
es ? while s ||:= Repl(tab(many(&digits)),move(1))
return s
end
 
procedure Repl(n, c)
return repl(c,n)
end

Sample output:

 s="WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"
s1="12W1B12W3B24W1B14W"
s2="WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"
Encode/Decode worked.

[edit] J

Solution:

rle=: ;@(<@(":@#,{.);.1~ 1, 2 ~:/\ ])
rld=: '0123456789'&(-.~ #~ i. ".@:{ ' ' ,~ [)

Example:

   rle 'WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW'
12W1B12W3B24W1B14W
 
rld '12W1B12W3B24W1B14W'
WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW

Note that this implementation fails for the empty case. Here's a version that fixes that:

rle=: ;@(<@(":@#,{.);.1~ 2 ~:/\ (a.{.@-.{.),])

Other approaches include using rle ::(''"_) or rle^:(*@#) or equivalent variations on the original sentence.

[edit] Java

import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class RunLengthEncoding {
 
public static String encode(String source) {
StringBuffer dest = new StringBuffer();
for (int i = 0; i < source.length(); i++) {
int runLength = 1;
while (i+1 < source.length() && source.charAt(i) == source.charAt(i+1)) {
runLength++;
i++;
}
dest.append(runLength);
dest.append(source.charAt(i));
}
return dest.toString();
}
 
public static String decode(String source) {
StringBuffer dest = new StringBuffer();
Pattern pattern = Pattern.compile("[0-9]+|[a-zA-Z]");
Matcher matcher = pattern.matcher(source);
while (matcher.find()) {
int number = Integer.parseInt(matcher.group());
matcher.find();
while (number-- != 0) {
dest.append(matcher.group());
}
}
return dest.toString();
}
 
public static void main(String[] args) {
String example = "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW";
System.out.println(encode(example));
System.out.println(decode("1W1B1W1B1W1B1W1B1W1B1W1B1W1B"));
}
}

Tests:

Library: JUnit
import static org.junit.Assert.assertEquals;
 
import org.junit.Test;
 
public class RunLengthEncodingTest {
private RLE = new RunLengthEncoding();
 
@Test
public void encodingTest() {
assertEquals("1W", RLE.encode("W"));
assertEquals("4W", RLE.encode("WWWW"));
assertEquals("5w4i7k3i6p5e4d2i1a",
RLE.encode("wwwwwiiiikkkkkkkiiippppppeeeeeddddiia"));
assertEquals("12B1N12B3N24B1N14B",
RLE.encode("BBBBBBBBBBBBNBBBBBBBBBBBBNNNBBBBBBBBBBBBBBBBBBBBBBBBNBBBBBBBBBBBBBB"));
assertEquals("12W1B12W3B24W1B14W",
RLE.encode("WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"));
assertEquals("1W1B1W1B1W1B1W1B1W1B1W1B1W1B", RLE.encode("WBWBWBWBWBWBWB"));
 
}
 
@Test
public void decodingTest() {
assertEquals("W", RLE.decode("1W"));
assertEquals("WWWW", RLE.decode("4W"));
assertEquals("WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW",
RLE.decode("12W1B12W3B24W1B14W"));
assertEquals("WBWBWBWBWBWBWB", RLE.decode("1W1B1W1B1W1B1W1B1W1B1W1B1W1B"));
assertEquals("WBWBWBWBWBWBWB", RLE.decode("1W1B1W1B1W1B1W1B1W1B1W1B1W1B"));
 
}
}

[edit] JavaScript

Here's an encoding method that walks the input string character by character

function encode(input) {
var encoding = [];
var prev, count, i;
for (count = 1, prev = input[0], i = 1; i < input.length; i++) {
if (input[i] != prev) {
encoding.push([count, prev]);
count = 1;
prev = input[i];
}
else
count ++;
}
encoding.push([count, prev]);
return encoding;
}
Here's an encoding method that uses a regular expression to grab the character runs (
Works with: JavaScript version 1.6
for the forEach method)
function encode_re(input) {
var encoding = [];
input.match(/(.)\1*/g).forEach(function(substr){ encoding.push([substr.length, substr[0]]) });
return encoding;
}

And to decode (see Repeating a string)

function decode(encoded) {
var output = "";
encoded.forEach(function(pair){ output += new Array(1+pair[0]).join(pair[1]) })
return output;
}

[edit] K

rle: {,/($-':i,#x),'x@i:&1,~=':x}
Translation of: J
rld: {d:"0123456789"; ,/(.(d," ")@d?/:x)#'x _dvl d}

Example:

  rle "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"
"12W1B12W3B24W1B14W"
rld "12W1B12W3B24W1B14W"
"WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"


[edit] Lasso

define rle(str::string)::string => {
local(orig = #str->values->asCopy,newi=array, newc=array, compiled=string)
while(#orig->size) => {
if(not #newi->size) => {
#newi->insert(1)
#newc->insert(#orig->first)
#orig->remove(1)
else
if(#orig->first == #newc->last) => {
#newi->get(#newi->size) += 1
else
#newi->insert(1)
#newc->insert(#orig->first)
}
#orig->remove(1)
}
}
loop(#newi->size) => {
#compiled->append(#newi->get(loop_count)+#newc->get(loop_count))
}
return #compiled
}
define rlde(str::string)::string => {
local(o = string)
while(#str->size) => {
loop(#str->size) => {
if(#str->isualphabetic(loop_count)) => {
if(loop_count == 1) => {
#o->append(#str->get(loop_count))
#str->removeLeading(#str->get(loop_count))
loop_abort
}
local(num = integer(#str->substring(1,loop_count)))
#o->append(#str->get(loop_count)*#num)
#str->removeLeading(#num+#str->get(loop_count))
loop_abort
}
}
}
return #o
}
//Tests:
rle('WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW')
rle('dsfkjhhkdsjfhdskhshdjjfhhdlsllw')
 
rlde('12W1B12W3B24W1B14W')
rlde('1d1s1f1k1j2h1k1d1s1j1f1h1d1s1k1h1s1h1d2j1f2h1d1l1s2l1w')
Output:
12W1B12W3B24W1B14W
1d1s1f1k1j2h1k1d1s1j1f1h1d1s1k1h1s1h1d2j1f2h1d1l1s2l1w


WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW
dsfkjhhkdsjfhdskhshdjjfhhdlsllw

[edit] Liberty BASIC

mainwin 100 20
 
'In$ ="aaaaaaaaaaaaaaaaaccbbbbbbbbbbbbbbba" ' testing...
In$ ="WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"
' Out$= "12W1B12W3B24W1B14W"
 
Out$ =Encoded$( In$)
Inv$ =Decoded$( Out$)
 
print " Supplied string ="; In$
Print " RLE version ="; Out$
print " Decoded back to ="; Inv$
 
end
 
function Encoded$( k$)
r$ =""
r =1
for i =2 to len( k$)
prev$ =mid$( k$, i -1, 1)
c$ =mid$( k$, i, 1)
if c$ =prev$ then ' entering a run of this character
r =r +1
else ' it occurred only once
r$ =r$ +str$( r) +prev$
r =1
end if
next i
r$ =r$ +str$( r) +c$
Encoded$ =r$
end function
 
function Decoded$( k$)
r$ =""
v =0
for i =1 to len( k$)
i$ =mid$( k$, i, 1)
if instr( "0123456789", i$) then
v =v *10 +val( i$)
else
for m =1 to v
r$ =r$ +i$
next m
v =0
end if
next i
Decoded$ =r$
end function

[edit]

to encode :str [:out "||] [:count 0] [:last first :str]
if empty? :str [output (word :out :count :last)]
if equal? first :str :last [output (encode bf :str :out :count+1 :last)]
output (encode bf :str (word :out :count :last) 1 first :str)
end
 
to reps :n :w
output ifelse :n = 0 ["||] [word :w reps :n-1 :w]
end
to decode :str [:out "||] [:count 0]
if empty? :str [output :out]
if number? first :str [output (decode bf :str :out 10*:count + first :str)]
output (decode bf :str word :out reps :count first :str)
end
 
make "foo "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW
make "rle encode :foo
show equal? :foo decode :rle

[edit] Lua

local C, Ct, R, Cf, Cc = lpeg.C, lpeg.Ct, lpeg.R, lpeg.Cf, lpeg.Cc
astable = Ct(C(1)^0)
 
function compress(t)
local ret = {}
for i, v in ipairs(t) do
if t[i-1] and v == t[i-1] then
ret[#ret - 1] = ret[#ret - 1] + 1
else
ret[#ret + 1] = 1
ret[#ret + 1] = v
end
end
t = ret
return table.concat(ret)
end
q = io.read()
print(compress(astable:match(q)))
 
undo = Ct((Cf(Cc"0" * C(R"09")^1, function(a, b) return 10 * a + b end) * C(R"AZ"))^0)
 
function decompress(s)
t = undo:match(s)
local ret = ""
for i = 1, #t - 1, 2 do
for _ = 1, t[i] do
ret = ret .. t[i+1]
end
end
return ret
end

[edit] Mathematica

Custom functions using Map, Apply, pure functions, replacing using pattern matching, delayed rules and other functions:

RunLengthEncode[input_String]:=StringJoin@@Sequence@@@({ToString @Length[#],First[#]}&/@Split[Characters[input]])
RunLengthDecode[input_String]:=StringJoin@@ConstantArray@@@Reverse/@Partition[(Characters[input]/.(ToString[#]->#&/@Range[0,9]))//.{x___,i_Integer,j_Integer,y___}:>{x,10i+j,y},2]

Example:

mystring="WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW";
RunLengthEncode[mystring]
RunLengthDecode[%]
%==mystring

gives back:

12W1B12W3B24W1B14W
WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW
True

An alternate solution:

RunLengthEncode[s_String] := StringJoin[
{ToString[Length[#]] <> First[#]} & /@ Split[StringSplit[s, ""]]
]
 
RunLengthDecode[s_String] := StringJoin[
Table[#[[2]], {ToExpression[#[[1]]]}] & /@
Partition[StringSplit[s, x : _?LetterQ :> x], 2]
]

This second encode function is adapted from the MathWorld example.

[edit] Maxima

rle(a) := block(
[n: slength(a), b: "", c: charat(a, 1), k: 1],
for i from 2 thru n do
if cequal(c, charat(a, i)) then k: k + 1 else (b: sconcat(b, k, c), c: charat(a, i), k: 1),
sconcat(b, k, c)
)$
 
rle("WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW");
"12W1B12W3B24W1B14W"

[edit] MMIX

	LOC	Data_Segment
GREG @
Buf OCTA 0,0,0,0 integer print buffer
Char BYTE 0,0 single char print buffer
task BYTE "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWW"
BYTE "WWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW",0
len GREG @-1-task

// task should become this

tEnc BYTE "12W1B12W3B24W1B14W",0
 
GREG @
// tuple array for encoding purposes
// each tuple is a tetra (4 bytes long or 2 wydes long)
// (c,l) in which c is a char and l = number of chars c
// high wyde of the tetra contains the char
// low wyde .. .. .. contains the length
RLE TETRA 0
 
LOC #100 locate program
GREG @
// print number to stdout
// destroys input arg $3 !
Prt64 LDA $255,Buf+23 points to LSD
// do
2H DIV $3,$3,10 (N,R) = divmod (N,10)
GET $13,rR get remainder
INCL $13,'0' convert to ascii
STBU $13,$255 store ascii digit
BZ $3,3F
SUB $255,$255,1 move pointer down
JMP 2B While N !=0
3H TRAP 0,Fputs,StdOut print number to standard out
GO $127,$127,0 return
 
GREG @
// print char to stdout
PChar LDA $255,Char
STBU $4,$255
TRAP 0,Fputs,StdOut
GO $127,$127,0
 
GREG @
// encode routine
// $0 string pointer
// $1 index var
// $2 pointer to tuple array
// $11 temp var tuple
Encode SET $1,0 initialize index = 0
SET $11,0 postion in string = 0
LDBU $3,$0,$1 get first char
ADDU $6,$3,0 remember it
do
1H INCL $1,1 repeat incr index
LDBU $3,$0,$1 get a char
BZ $3,2F if EOS then finish
CMP $7,$3,$6
PBZ $7,1B while new == old
XOR $4,$4,$4 new tuple
ADDU $4,$6,0
SLU $4,$4,16 old char to tuple -> (c,_)
SUB $7,$1,$11 length = index - previous position
ADDU $11,$1,0 incr position
OR $4,$4,$7 length l to tuple -> (c,l)
STT $4,$2 put tuple in array
ADDU $6,$3,0 remember new char
INCL $2,4 incr 'tetra' pointer
JMP 1B loop
2H XOR $4,$4,$4 put last tuple in array
ADDU $4,$6,0
SLU $4,$4,16
SUB $7,$1,$11
ADDU $11,$1,0
OR $4,$4,$7
STT $4,$2
GO $127,$127,0 return
 
GREG @
Main LDA $0,task pointer uncompressed string
LDA $2,RLE pointer tuple array
GO $127,Encode encode string
LDA $2,RLE points to start tuples
SET $5,#ffff mask for extracting length
1H LDTU $3,$2 while not End of Array
BZ $3,2F
SRU $4,$3,16 char = (c,_)
AND $3,$3,$5 length = (_,l)
GO $127,Prt64 print length
GO $127,PChar print char
INCL $2,4 incr tuple pointer
JMP 1B wend
2H SET $4,#a print NL
GO $127,PChar

// decode using the RLE tuples

LDA $2,RLE pointer tuple array
SET $5,#ffff mask
1H LDTU $3,$2 while not End of Array
BZ $3,2F
SRU $4,$3,16 char = (c,_)
AND $3,$3,$5 length = (_,l)
// for (i=0;i<length;i++) {
3H GO $127,PChar print a char
SUB $3,$3,1
PBNZ $3,3B
INCL $2,4
JMP 1B }
2H SET $4,#a print NL
GO $127,PChar
TRAP 0,Halt,0 EXIT

Example run encode --> decode:

~/MIX/MMIX/Rosetta> mmix rle
12W1B12W3B24W1B14W
WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW

[edit] Objeck

use RegEx;
 
class RunLengthEncoding {
function : Main(args : String[]) ~ Nil {
input := "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW";
 
encoded := Encode(input);
"encoding: {$encoded}"->PrintLine();
test := encoded->Equals("12W1B12W3B24W1B14W");
"encoding match: {$test}"->PrintLine();
 
decoded := Decode(encoded);
test := input->Equals(decoded);
"decoding match: {$test}"->PrintLine();
}
 
function : Encode(source : String) ~ String {
dest := "";
each(i : source) {
runLength := 1;
while(i+1 < source->Size() & source->Get(i) = source->Get(i+1)) {
runLength+= 1;
i+= 1;
};
dest->Append(runLength);
dest->Append(source->Get(i));
};
 
return dest;
}
 
function : Decode(source : String) ~ String {
output := "";
regex := RegEx->New("[0-9]+|([A-Z]|[a-z])");
found := regex->Find(source);
count : Int;
each(i : found) {
if(i % 2 = 0) {
count := found->Get(i)->As(String)->ToInt();
}
else {
letter := found->Get(i)->As(String);
while(count <> 0) {
output->Append(letter);
count -= 1;
};
};
};
 
return output;
}
}
encoding: 12W1B12W3B24W1B14W
encoding match: true
decoding match: true

[edit] Objective-C

See Run-length encoding/Objective-C

[edit] OCaml

let encode str =
let len = String.length str in
let rec aux i acc =
if i >= len then List.rev acc
else
let c1 = str.[i] in
let rec aux2 j =
if j >= len then (c1, j-i)
else
let c2 = str.[j] in
if c1 = c2
then aux2 (j+1)
else (c1, j-i)
in
let (c,n) as t = aux2 (i+1) in
aux (i+n) (t::acc)
in
aux 0 []
;;
 
let decode lst =
let l = List.map (fun (c,n) -> String.make n c) lst in
(String.concat "" l)
let () =
let e = encode "aaaaahhhhhhmmmmmmmuiiiiiiiaaaaaa" in
List.iter (fun (c,n) ->
Printf.printf " (%c, %d);\n" c n;
) e;
print_endline (decode [('a', 5); ('h', 6); ('m', 7); ('u', 1); ('i', 7); ('a', 6)]);
;;
Using regular expressions
#load "str.cma";;
 
open Str
 
let encode =
global_substitute (Str.regexp "\\(.\\)\\1*")
(fun s -> string_of_int (String.length (matched_string s)) ^
matched_group 1 s)
 
let decode =
global_substitute (Str.regexp "\\([0-9]+\\)\\([^0-9]\\)")
(fun s -> String.make (int_of_string (matched_group 1 s))
(matched_group 2 s).[0])
 
let () =
print_endline (encode "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW");
print_endline (decode "12W1B12W3B24W1B14W");

[edit] Oz

declare
fun {RLEncode Xs}
for G in {Group Xs} collect:C do
{C {Length G}#G.1}
end
end
 
fun {RLDecode Xs}
for C#Y in Xs append:Ap do
{Ap {Replicate Y C}}
end
end
 
%% Helpers
%% e.g. "1122" -> ["11" "22"]
fun {Group Xs}
case Xs of nil then nil
[] X|Xr then
Ys Zs
{List.takeDropWhile Xr fun {$ W} W==X end ?Ys ?Zs}
in
(X|Ys) | {Group Zs}
end
end
%% e.g. 3,4 -> [3 3 3 3]
fun {Replicate X N}
case N of 0 then nil
else X|{Replicate X N-1}
end
end
 
Data = "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"
Enc = {RLEncode Data}
in
{System.showInfo Data}
{Show Enc}
{System.showInfo {RLDecode Enc}}

[edit] PARI/GP

rle(s)={
if(s=="", return(s));
my(v=Vec(s),cur=v[1],ct=1,out="");
v=concat(v,99); \\ sentinel
for(i=2,#v,
if(v[i]==cur,
ct++
,
out=Str(out,ct,cur);
cur=v[i];
ct=1
)
);
out
};
elr(s)={
if(s=="", return(s));
my(v=Vec(s),ct=eval(v[1]),out="");
v=concat(v,99); \\ sentinel
for(i=2,#v,
if(v[i]>="0" && v[i]<="9",
ct=10*ct+eval(v[i])
,
for(j=1,ct,out=Str(out,v[i]));
ct=0
)
);
out
};
rle("WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW")
elr(%)

Output:

%1 = "12W1B12W3B24W1B14W"

%2 = "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"

[edit] Pascal

Program RunLengthEncoding(output);
 
procedure encode(s: string; var counts: array of integer; var letters: string);
var
i, j: integer;
begin
j := 0;
letters := '';
if length(s) > 0 then
begin
j := 1;
letters := letters + s[1];
counts[1] := 1;
for i := 2 to length(s) do
if s[i] = letters[j] then
inc(counts[j])
else
begin
inc(j);
letters := letters + s[i];
counts[j] := 1;
end;
end;
end;
 
procedure decode(var s: string; counts: array of integer; letters: string);
var
i, j: integer;
begin
s := '';
for i := 1 to length(letters) do
for j := 1 to counts[i] do
s := s + letters[i];
end;
 
var
s: string;
counts: array of integer;
letters: string;
i: integer;
begin
s := 'WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWW';
writeln(s);
setlength(counts, length(s));
encode(s, counts, letters);
for i := 1 to length(letters) - 1 do
write(counts[i], ' * ', letters[i], ', ');
writeln(counts[length(letters)], ' * ', letters[length(letters)]);
decode(s, counts, letters);
writeln(s);
end.

Output:

:> ./RunLengthEncoding
WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWW
12 * W, 1 * B, 12 * W, 3 * B, 24 * W, 1 * B, 13 * W
WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWW

[edit] Perl

 
# functional approach (return the encoded or decoded string)
sub encode {
(my $str = shift) =~ s {(.)(\1*)} {length($&).$1}gse;
return $str; }
sub decode {
(my $str = shift) =~ s {(\d+)(.)} {$2 x $1}gse;
return $str;}
 
# procedural approach (modify the argument in place)
sub encode {
$_[0] =~ s {(.)(\1*)} {length($&).$1}gse; }
sub decode {
$_[0] =~ s {(\d+)(.)} {$2 x $1}gse; }
 

The following modified versions of the previous one, encode/decode a bytes sequence in a way compatible with the functions of the C version.

sub encode
{my $str = shift;
$str =~ s {(.)(\1{0,254})} {pack("C",(length($2) + 1)) . $1 }gse;
return $str;}
 
sub decode
{
my @str = split //, shift;
my $r = "";
foreach my $i (0 .. scalar(@str)/2-1) {
$r .= $str[2*$i + 1] x unpack("C", $str[2*$i]);
}
return $r;
}

[edit] Perl 6

This currently depend on a workaround to pass the match object into the replacement closure as an explicit argument. This is supposed to happen automatically.

Note also that Perl 6 regexes don't care about unquoted whitespace, and that backrefs count from 0, not from 1.

sub encode($str) { $str.subst(/(.) $0*/, -> $/ { $/.chars ~ $0 ~ ' ' }, :g); }
 
sub decode($str) { $str.subst(/(\d+) (.) ' '/, -> $/ {$1 x $0}, :g); }
 
my $e = encode('WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW');
say $e;
say decode($e);

Output:

12W 1B 12W 3B 24W 1B 14W 
WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW

[edit] PHP

<?php
function encode($str) {
return preg_replace('/(.)\1*/e', 'strlen($0) . $1', $str);
}
 
function decode($str) {
return preg_replace('/(\d+)(\D)/e', 'str_repeat($2, $1)', $str);
}
 
echo encode('WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW'), "\n";
echo decode('12W1B12W3B24W1B14W'), "\n";
?>

[edit] PicoLisp

(de encode (Str)
(pack
(make
(for (Lst (chop Str) Lst)
(let (N 1 C)
(while (= (setq C (pop 'Lst)) (car Lst))
(inc 'N) )
(link N C) ) ) ) ) )
 
(de decode (Str)
(pack
(make
(let N 0
(for C (chop Str)
(if (>= "9" C "0")
(setq N (+ (format C) (* 10 N)))
(do N (link C))
(zero N) ) ) ) ) ) )
 
(and
(prinl "Data: " "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW")
(prinl "Encoded: " (encode @))
(prinl "Decoded: " (decode @)) )

Output:

Data:    WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW
Encoded: 12W1B12W3B24W1B14W
Decoded: WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW

[edit] PL/I

declare (c1, c2) character (1);
declare run_length fixed binary;
declare input file;
 
open file (input) title ('/RLE.DAT,type(text),recsize(20000)');
on endfile (input) go to epilog;
 
get file (input) edit (c1) (a(1));
run_length = 1;
do forever;
get file (input) edit (c2) (a(1));
if c1 = c2 then
run_length = run_length + 1;
else
do; put edit (trim(run_length), c1) (a); run_length=1; end;
c1 = c2;
end;
epilog:
put edit (trim(run_length), c1) (a);
put skip;
 
 
/* The reverse of the above operation: */
declare c character (1);
declare i fixed binary;
declare new file;
 
open file (new) title ('/NEW.DAT,type(text),recsize(20000)');
on endfile (new) stop;
do forever;
run_length = 0;
do forever;
get file (new) edit (c) (a(1));
if index('0123456789', c) = 0 then leave;
run_length = run_length*10 + c;
end;
put edit ((c do i = 1 to run_length)) (a);
end;

[edit] PowerBASIC

This version can handle any arbitrary string that doesn't contain numbers (not just letters). (A flag value could be added which would allow the inclusion of any character, but such a flag isn't in this example.)

FUNCTION RLDecode (i AS STRING) AS STRING
DIM Loop0 AS LONG, rCount AS STRING, outP AS STRING, m AS STRING
 
FOR Loop0 = 1 TO LEN(i)
m = MID$(i, Loop0, 1)
SELECT CASE m
CASE "0" TO "9"
rCount = rCount & m
CASE ELSE
IF LEN(rCount) THEN
outP = outP & STRING$(VAL(rCount), m)
rCount=""
ELSE
outP = outP & m
END IF
END SELECT
NEXT
FUNCTION = outP
END FUNCTION
 
FUNCTION RLEncode (i AS STRING) AS STRING
DIM tmp1 AS STRING, tmp2 AS STRING, outP AS STRING
DIM Loop0 AS LONG, rCount AS LONG
 
tmp1 = MID$(i, 1, 1)
tmp2 = tmp1
rCount = 1
 
FOR Loop0 = 2 TO LEN(i)
tmp1 = MID$(i, Loop0, 1)
IF tmp1 <> tmp2 THEN
outP = outP & TRIM$(STR$(rCount)) & tmp2
tmp2 = tmp1
rCount = 1
ELSE
INCR rCount
END IF
NEXT
 
outP = outP & TRIM$(STR$(rCount))
outP = outP & tmp2
FUNCTION = outP
END FUNCTION
 
FUNCTION PBMAIN () AS LONG
DIM initial AS STRING, encoded AS STRING, decoded AS STRING
initial = INPUTBOX$("Type something.")
encoded = RLEncode(initial)
decoded = RLDecode(encoded)
'in PB/Win, "?" = MSGBOX; in PB/DOS & PB/CC. "?" = PRINT
 ? initial & $CRLF & encoded & $CRLF & decoded
END FUNCTION

Outputs are similar to those in BASIC, above.

[edit] PowerShell

function Compress-RLE ($s) {
$re = [regex] '(.)\1*'
$ret = ""
foreach ($m in $re.Matches($s)) {
$ret += $m.Length
$ret += $m.Value[0]
}
return $ret
}
 
function Expand-RLE ($s) {
$re = [regex] '(\d+)(.)'
$ret = ""
foreach ($m in $re.Matches($s)) {
$ret += [string] $m.Groups[2] * [int] [string] $m.Groups[1]
}
return $ret
}

Output:

PS> Compress-RLE "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"
12W1B12W3B24W1B14W
PS> Expand-RLE "12W1B12W3B24W1B14W"
WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW

[edit] Prolog

Works with SWI-Prolog.
This code is inspired from a code found here : http://groups.google.com/group/comp.lang.prolog/browse_thread/thread/b053ea2512e8b350 (author : Pascal J. Bourguignon).

% the test
run_length :-
L = "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW",
writef('encode %s\n', [L]),
encode(L, R),
writeln(R), nl,
writef('decode %w\n', [R]),
decode(R, L1),
writeln(L1).
 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% encode
%
% translation
% from
% "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"
% to
% "12W1B12W3B24W1B14W"
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
encode(In, Out) :-
% Because of the special management of the "strings" by Prolog
( is_list(In) -> I = In; string_to_list(In, I)),
packList(I, R1),
dcg_packList2List(R1,R2, []),
string_to_list(Out,R2).
 
 
 
dcg_packList2List([[N, V]|T]) -->
{ number_codes(N, LN)},
LN,
[V],
dcg_packList2List(T).
 
dcg_packList2List([]) --> [].
 
 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% decode
%
% translation
% from
% "12W1B12W3B24W1B14W"
% to
% "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
decode(In, Out) :-
% Because of the special management of the "strings" by Prolog
( is_list(In) -> I = In; string_to_list(In, I)),
dcg_List2packList(I, R1, []),
packList(L1, R1),
string_to_list(Out, L1).
 
 
dcg_List2packList([H|T]) -->
{code_type(H, digit)},
parse_number([H|T], 0).
 
dcg_List2packList([]) --> [].
 
 
parse_number([H|T], N) -->
{code_type(H, digit), !,
N1 is N*10 + H - 48 },
parse_number(T, N1).
 
parse_number([H|T], N) -->
[[N, H]],
dcg_List2packList(T).
 
 
% use of library clpfd allows packList(?In, ?Out) to works
% in both ways In --> Out and In <-- Out.
 
:- use_module(library(clpfd)).
 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% ?- packList([a,a,a,b,c,c,c,d,d,e], L).
% L = [[3,a],[1,b],[3,c],[2,d],[1,e]] .
% ?- packList(R, [[3,a],[1,b],[3,c],[2,d],[1,e]]).
% R = [a,a,a,b,c,c,c,d,d,e] .
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
packList([],[]).
 
packList([X],[[1,X]]) :- !.
 
 
packList([X|Rest],[XRun|Packed]):-
run(X,Rest, XRun,RRest),
packList(RRest,Packed).
 
 
run(Var,[],[1,Var],[]).
 
run(Var,[Var|LRest],[N1, Var],RRest):-
N #> 0,
N1 #= N + 1,
run(Var,LRest,[N, Var],RRest).
 
 
run(Var,[Other|RRest], [1,Var],[Other|RRest]):-
dif(Var,Other).

Output :

 ?- run_length.
encode WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW
12W1B12W3B24W1B14W

decode 12W1B12W3B24W1B14W
WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW
true .

[edit] Pure

using system;
 
encode s = strcat $ map (sprintf "%d%s") $ encode $ chars s with
encode [] = [];
encode xs@(x:_) = (#takewhile (==x) xs,x) : encode (dropwhile (==x) xs);
end;
 
decode s = strcat [c | n,c = parse s; i = 1..n] with
parse s::string = regexg item "([0-9]+)(.)" REG_EXTENDED s 0;
item info = val (reg 1 info!1), reg 2 info!1;
end;
 
let s = "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW";
let r = encode s; // "12W1B12W3B24W1B14W"
decode r;

[edit] PureBasic

Translation of: PowerBasic
with some optimations to use pointers instead of string functions. According to the task description it works with uppercase A - Z. In this implementation it also functions with all characters that are non-digits and whose value is non-zero.
Procedure.s RLDecode(toDecode.s)
Protected.s repCount, output, currChar, tmp
Protected *c.Character = @toDecode
 
While *c\c <> #Null
currChar = Chr(*c\c)
Select *c\c
Case '0' To '9'
repCount + currChar
Default
If repCount
tmp = Space(Val(repCount))
ReplaceString(tmp, " ", currChar, #PB_String_InPlace)
output + tmp
repCount = ""
Else
output + currChar
EndIf
EndSelect
*c + SizeOf(Character)
Wend
 
ProcedureReturn output
EndProcedure
 
Procedure.s RLEncode(toEncode.s)
Protected.s currChar, prevChar, output
Protected repCount
Protected *c.Character = @toEncode
 
prevChar = Chr(*c\c)
repCount = 1
 
*c + SizeOf(Character)
While *c\c <> #Null
currChar = Chr(*c\c)
If currChar <> prevChar
output + Str(repCount) + prevChar
prevChar = currChar
repCount = 1
Else
repCount + 1
EndIf
*c + SizeOf(Character)
Wend
 
output + Str(repCount)
output + prevChar
ProcedureReturn output
EndProcedure
 
If OpenConsole()
Define initial.s, encoded.s, decoded.s
 
Print("Type something: ")
initial = Input()
encoded = RLEncode(initial)
decoded = RLDecode(encoded)
PrintN(initial)
PrintN(RLEncode(initial))
PrintN(RLDecode(encoded))
 
Print(#CRLF$ + #CRLF$ + "Press ENTER to exit")
Input()
CloseConsole()
EndIf

Sample output:

Type something: WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWW
WWW
WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW
12W1B12W3B24W1B14W
WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW

[edit] Python

def encode(input_string):
count = 1
prev = ''
lst = []
for character in input_string:
if character != prev:
if prev:
entry = (prev,count)
lst.append(entry)
#print lst
count = 1
prev = character
else:
count += 1
else:
entry = (character,count)
lst.append(entry)
return lst
 
 
def decode(lst):
q = ""
for character, count in lst:
q += character * count
return q
 
#Method call
encode("aaaaahhhhhhmmmmmmmuiiiiiiiaaaaaa")
decode([('a', 5), ('h', 6), ('m', 7), ('u', 1), ('i', 7), ('a', 6)])

Functional

Works with: Python version 2.4
from itertools import groupby
def encode(input_string):
return [(len(list(g)), k) for k,g in groupby(input_string)]
 
def decode(lst):
return ''.join(c * n for n,c in lst)
 
encode("aaaaahhhhhhmmmmmmmuiiiiiiiaaaaaa")
decode([(5, 'a'), (6, 'h'), (7, 'm'), (1, 'u'), (7, 'i'), (6, 'a')])


By regular expression
The simplified input range of only uppercase characters allows a simple regular expression to be applied repeatedly for encoding, and another for decoding:

from re import sub
 
def encode(text):
'''
Doctest:
>>> encode('WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW')
'12W1B12W3B24W1B14W'
'''

return sub(r'(.)\1*', lambda m: str(len(m.group(0))) + m.group(1),
text)
 
def decode(text):
'''
Doctest:
>>> decode('12W1B12W3B24W1B14W')
'WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW'
'''

return sub(r'(\d+)(\D)', lambda m: m.group(2) * int(m.group(1)),
text)
 
textin = "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"
assert decode(encode(textin)) == textin

[edit] R

R has a built-in function, rle, for run length encoding. This modification allows input and output in the forms specified above.

runlengthencoding <- function(x)
{
splitx <- unlist(strsplit(input, ""))
rlex <- rle(splitx)
paste(with(rlex, as.vector(rbind(lengths, values))), collapse="")
}
 
input <- "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"
runlengthencoding(input)

Similarly, inverse.rle provides decompression after a run length encoding.

inverserunlengthencoding <- function(x)
{
lengths <- as.numeric(unlist(strsplit(output, "[[:alpha:]]")))
values <- unlist(strsplit(output, "[[:digit:]]"))
values <- values[values != ""]
uncompressed <- inverse.rle(list(lengths=lengths, values=values))
paste(uncompressed, collapse="")
}
 
output <- "12W1B12W3B24W1B14W"
inverserunlengthencoding(output)

[edit] Racket

 
#lang racket
(define (encode str)
(regexp-replace* #px"(.)\\1*" str (λ (m c) (~a (string-length m) c))))
(define (decode str)
(regexp-replace* #px"([0-9]+)(.)" str (λ (m n c) (make-string (string->number n) (string-ref c 0)))))
 

[edit] REXX

The task (input) rule was relaxed a bit as this program accepts upper- and lowercase input.

[edit] Encoding

/*REXX program  encodes  a string  by using a  run-length  scheme.      */
parse arg x . /*normally, input would be a file*/
/*═══ arg x . ═══*/ /*◄── use if X must be uppercase.*/
def= 'WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW'
if x='' then x=def /*No input? Then use the default.*/
Lx=length(x) /*get the length of the X string.*/
y= /*Y is the output string (so far)*/
do j=1 by 0 to Lx /*J is incremented (below). */
c=substr(x,j,1) /*pick a character, check for err*/
if \datatype(c,'M') then do; say "error!: data isn't alphabetic:" c; exit 13; end
r=0 /*R is NOT the number of chars. */
do k=j+1 to Lx while substr(x,k,1)==c
r=r+1 /*R is a replication count. */
end /*k*/
j=j+1+r /*modify (add to) the do index. */
if r==0 then r= /*don't use R if R is zero.*/
Y = Y || r || c /*add it to the encoded string.*/
end /*j*/
 
say ' input=' x
say 'encoded=' y
/*stick a fork in it, we're done.*/

Output when using default input:

 input= WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW
output= 11WB11W2B23WB13W

[edit] Decoding

/*REXX program  decodes a string by using a  run-length  scheme.        */
parse arg x . /*normally, input would be a file*/
if x=='' then x='11WB11W2B23WB13W' /*No input? Then use the default*/
Lx=length(x) /*get the length of the X string.*/
y= /*Y is the output string (so far)*/
do j=1 by 0 to Lx /*warning! J is modified below.*/
c=substr(x,j,1)
if \datatype(c,'W') then do /*a loner char, simply add to OUT*/
y=y || c; j=j+1; iterate /*j*/
end
d=1
do k=j+1 to Lx while datatype(substr(x,k,1),'w') /*look for #end*/
d=d+1 /*D is the number of digs so far.*/
end /*k*/
 
n=substr(x,j,d)+1 /*D is length of encoded number.*/
y=y || copies(substr(x,k,1),n) /*N is now the number of chars. */
j=j+1+d /*increment the DO loop index. */
end /*j*/
 
say ' input=' x
say 'decoded=' y
/*stick a fork in it, we're done.*/

Output when using the default input:

  input= 11WB11W2B23WB13W
decoded= WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW

[edit] Ruby

def encode(string)
string.scan(/(.)(\1*)/).collect do |char, repeat|
[1 + repeat.length, char]
end.join
end
 
def decode(string)
string.scan(/(\d+)(\D)/).collect {|length, char| char * length.to_i}.join
end

This usage also seems to be idiomatic, and perhaps less cryptic:

def encode(string)
string.scan(/(.)(\1*)/).inject("") do |encoding, (char, repeat)|
encoding << (1 + repeat.length).to_s << char
end
end
 
def decode(string)
string.scan(/(\d+)(\D)/).inject("") do |decoding, (length, char)|
decoding << char * length.to_i
end
end


By regular expression
The simplified input range of only uppercase characters allows a simple regular expression to be applied repeatedly for encoding, and another for decoding:

def encode(str)
str.gsub(/(.)\1*/) {$&.length.to_s + $1}
end
 
def decode(str)
str.gsub(/(\d+)(\D)/) {$2 * $1.to_i}
end

Test:

orig = "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"
p enc = encode(orig)
p dec = decode(enc)
puts "success!" if dec == orig
Output:
"12W1B12W3B24W1B14W"
"WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"
success!

[edit] Run BASIC

string$ = "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"
beg = 1
i = 1
[loop]
s$ = mid$(string$,beg,1)
while mid$(string$,i,1) = s$
i = i + 1
wend
press$ = press$ ; i-beg;s$
beg = i
if i < len(string$) then goto [loop]
print "Compressed:";press$
 
beg = 1
i = 1
[expand]
while mid$(press$,i,1) <= "9"
i = i + 1
wend
for j = 1 to val(mid$(press$,beg, i - beg))
expand$ = expand$ + mid$(press$,i,1)
next j
i = i + 1
beg = i
if i < len(press$) then goto [expand]
print " Expanded:";expand$
Output:
Compressed:12W1B12W3B24W1B14W
  Expanded:WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW

[edit] Scala

Care is taken to use StringBuilder for performance reasons.

def encode(s: String) = (1 until s.size).foldLeft((1, s(0), new StringBuilder)) {
case ((len, c, sb), index) if c != s(index) => sb.append(len); sb.append(c); (1, s(index), sb)
case ((len, c, sb), _) => (len + 1, c, sb)
} match {
case (len, c, sb) => sb.append(len); sb.append(c); sb.toString
}
 
def decode(s: String) = {
val sb = new StringBuilder
val Code = """(\d+)([A-Z])""".r
for (Code(len, c) <- Code findAllIn s) sb.append(c * len.toInt)
sb.toString
}

A simpler (?) encoder:

def encode(s:String) = {
s.foldLeft((0,s(0),""))( (t,c) => t match {case (i,p,s) => if (p==c) (i+1,p,s) else (1,c,s+i+p)})
match {case (i,p,s) => s+i+p}
}

To make it faster (it's also faster than the longer implementation above) just replace "" with new StringBuilder and s+i+p with {s.append(i);s.append(p)}

[edit] Scheme

(define (run-length-decode v)
(apply string-append (map (lambda (p) (make-string (car p) (cdr p))) v)))
 
(define (run-length-encode s)
(let ((n (string-length s)))
(let loop ((i (- n 2)) (c (string-ref s (- n 1))) (k 1) (v '()))
(if (negative? i) (cons (cons k c) v)
(let ((x (string-ref s i)))
(if (char=? c x) (loop (- i 1) c (+ k 1) v)
(loop (- i 1) x 1 (cons (cons k c) v))))))))
 
(run-length-encode "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW")
; ((12 . #\W) (1 . #\B) (12 . #\W) (3 . #\B) (24 . #\W) (1 . #\B) (14 . #\W))
(run-length-decode '((12 . #\W) (1 . #\B) (12 . #\W) (3 . #\B) (24 . #\W) (1 . #\B) (14 . #\W)))
; "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"

[edit] sed

The encode script:

 
/^$/ b
:start
/^[0-9]/ b
s/^/1/
:loop
h
/^9+([^0-9])\1+/ {
s/^(9+).*/0\1/
y/09/10/
G
s/^(.+)\n[0-9]+.(.*)/\1\2/
b loop }
/^[0-9]*[0-8]([^0-9])\1+/ {
s/^[0-9]*([0-8]).*/\1/
y/012345678/123456789/
G
s/^(.)\n([0-9]*)[0-8].(.*)/\2\1\3/
b loop }
/^[0-9]+9+([^0-9])\1+/ {
s/^[0-9]*([0-8]9+).*/\1/
y/0123456789/1234567890/
G
s/^(.+)\n([0-9]*)[0-8]9+.(.*)/\2\1\3/
b loop }
s/^([0-9]+.)(.*)/\2\1/
b start
 

The decode script:

 
/^$/ b
:start
/^[^0-9]/ b
:loop
/^1[^0-9]/ {
s/^1(.)(\1*)(.*)/\3\1\2/
b start }
h
/^[0-9]*[1-9][^0-9]/ {
s/^[0-9]*([1-9]).*/\1/
y/123456789/012345678/
G
s/^([0-8])\n([0-9]*)[1-9]([^0-9])(.*)/\2\1\3\3\4/
b loop }
/^[0-9]+0[^0-9]/ {
s/^[0-9]*([1-9]0+)[^0-9].*/\1/
y/0123456789/9012345678/
G
s/^([0-9]+)\n([0-9]*)[1-9]0+([^0-9])(.*)/\2\1\3\3\4/
s/^0+//
b loop }
 

Example (assuming the scripts reside in the files encode.sed and decode.sed):

 
sed -rf encode.sed <<< "foo oops"
# 1f2o1 2o1p1s
 
sed -rf decode.sed <<< "1f2o1 2o1p1s"
# foo oops
 
(sed -rf decode.sed | sed -rf encode.sed) <<< 1000.
# 1000.
 

[edit] Seed7

$ include "seed7_05.s7i";
include "scanstri.s7i";
 
const func string: letterRleEncode (in string: data) is func
result
var string: result is "";
local
var char: code is ' ';
var integer: index is 1;
begin
if length(data) <> 0 then
code := data[1];
repeat
incr(index);
until index > length(data) or code <> data[index];
result := str(pred(index)) & str(code) & letterRleEncode(data[index ..]);
end if;
end func;
 
const func string: letterRleDecode (in var string: data) is func
result
var string: result is "";
local
var integer: count is 0;
begin
if length(data) <> 0 then
count := integer parse getDigits(data);
result := data[1 len 1] mult count & letterRleDecode(data[2 ..]);
end if;
end func;
 
const proc: main is func
begin
writeln(letterRleEncode("WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"));
writeln(letterRleDecode("12W1B12W3B24W1B14W"));
end func;

[edit] Smalltalk

See Run-length encoding/Smalltalk

[edit] SNOBOL4

Works with: Macro Spitbol
Works with: Snobol4+
Works with: CSnobol
*       # Encode RLE
define('rle(str)c,n') :(rle_end)
rle str len(1) . c :f(return)
str span(c) @n =
rle = rle n c :(rle)
rle_end
 
* # Decode RLE
define('elr(str)c,n') :(elr_end)
elr str span('0123456789') . n len(1) . c = :f(return)
elr = elr dupl(c,n) :(elr)
elr_end
 
* # Test and display
str = 'WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW'
output = str;
str = rle(str); output = str
str = elr(str); output = str
end

Output:

WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW
12W1B12W3B24W1B14W
WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW

[edit] SQL

Works with: PL/pgSQL


  • RLE encoding
 
-- variable table
DROP TABLE IF EXISTS var;
CREATE temp TABLE var ( VALUE VARCHAR(1000) );
INSERT INTO var(VALUE) SELECT 'WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW';
 
-- select
WITH recursive
ints(num) AS
(
SELECT 1
UNION ALL
SELECT num+1
FROM ints
WHERE num+1 <= LENGTH((SELECT VALUE FROM var))
)
,
chars(num,chr,nextChr,isGroupEnd) AS
(
SELECT tmp.*, CASE WHEN tmp.nextChr <> tmp.chr THEN 1 ELSE 0 END groupEnds
FROM (
SELECT num,
SUBSTRING((SELECT VALUE FROM var), num, 1) chr,
(SELECT SUBSTRING((SELECT VALUE FROM var), num+1, 1)) nextChr
FROM ints
) tmp
)
SELECT (SELECT VALUE FROM var) plain_text, (
SELECT string_agg(concat(CAST(maxNoWithinGroup AS VARCHAR(10)) , chr), '' ORDER BY num)
FROM (
SELECT *, MAX(noWithinGroup) OVER (partition BY chr, groupNo) maxNoWithinGroup
FROM (
SELECT num,
chr,
groupNo,
ROW_NUMBER() OVER( partition BY chr, groupNo ORDER BY num) noWithinGroup
FROM (
SELECT *, (SELECT COUNT(*)
FROM chars chars2
WHERE chars2.isGroupEnd = 1 AND
chars2.chr = chars.chr AND
chars2.num < chars.num) groupNo
FROM chars
) tmp
) sub
) final
WHERE noWithinGroup = 1
) Rle_Compressed
 
  • RLE decoding
 
-- variable table
DROP TABLE IF EXISTS var;
CREATE temp TABLE var ( VALUE VARCHAR(1000) );
INSERT INTO var(VALUE) SELECT '1A2B3C4D5E6F';
 
-- select
WITH recursive
ints(num) AS
(
SELECT 1
UNION ALL
SELECT num+1
FROM ints
WHERE num+1 <= LENGTH((SELECT VALUE FROM var))
)
,
chars(num,chr,nextChr) AS
(
SELECT tmp.*
FROM (
SELECT num,
SUBSTRING((SELECT VALUE FROM var), num, 1) chr,
(SELECT SUBSTRING((SELECT VALUE FROM var), num+1, 1)) nextChr
FROM ints
) tmp
)
,
charsWithGroup(num,chr,nextChr,group_no) AS
(
SELECT *,(SELECT COUNT(*)
FROM chars chars2
WHERE chars2.chr !~ '[0-9]' AND
chars2.num < chars.num) group_No
FROM chars
)
,
charsWithGroupAndLetter(num,chr,nextChr,group_no,group_letter) AS
(
SELECT *,(SELECT chr
FROM charsWithGroup g2
WHERE g2.group_no = charsWithGroup.group_no
ORDER BY num DESC
LIMIT 1)
FROM charsWithGroup
)
,
lettersWithCount(group_no,amount,group_letter) AS
(
SELECT group_no, string_agg(chr, '' ORDER BY num), group_letter
FROM charsWithGroupAndLetter
WHERE chr ~ '[0-9]'
GROUP BY group_no, group_letter
)
,
lettersReplicated(group_no,amount,group_letter, replicated_Letter) AS
(
SELECT *, rpad(group_letter, CAST(amount AS INT), group_letter)
FROM lettersWithCount
)
SELECT (SELECT VALUE FROM var) rle_encoded,
string_agg(replicated_Letter, '' ORDER BY group_no) decoded_string
FROM lettersReplicated
 

[edit] Standard ML

fun encode str =
let
fun aux (sub, acc) =
case Substring.getc sub
of NONE => rev acc
| SOME (x, sub') =>
let
val (y, z) = Substring.splitl (fn c => c = x) sub'
in
aux (z, (x, Substring.size y + 1) :: acc)
end
in
aux (Substring.full str, [])
end
 
fun decode lst =
concat (map (fn (c,n) => implode (List.tabulate (n, fn _ => c))) lst)

Example:

- encode "aaaaahhhhhhmmmmmmmuiiiiiiiaaaaaa";
val it = [(#"a",5),(#"h",6),(#"m",7),(#"u",1),(#"i",7),(#"a",6)]
  : (char * int) list
- decode [(#"a",5),(#"h",6),(#"m",7),(#"u",1),(#"i",7),(#"a",6)];
val it = "aaaaahhhhhhmmmmmmmuiiiiiiiaaaaaa" : string

[edit] Tcl

The encoding is an even-length list with elements {count char ...}

proc encode {string} {
set encoding {}
# use a regular expression to match runs of one character
foreach {run -} [regexp -all -inline {(.)\1+|.} $string] {
lappend encoding [string length $run] [string index $run 0]
}
return $encoding
}
 
proc decode {encoding} {
foreach {count char} $encoding {
append decoded [string repeat $char $count]
}
return $decoded
}
set str "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"
set enc [encode $str] ;# ==> {12 W 1 B 12 W 3 B 24 W 1 B 14 W}
set dec [decode $enc]
if {$str eq $dec} {
puts "success"
}

[edit] TUSCRIPT

 
$$ MODE TUSCRIPT,{}
input="WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW",output=""
string=strings(input," ? ")
letter=ACCUMULATE(string,freq)
freq=SPLIT(freq),letter=SPLIT(letter)
output=JOIN(freq,"",letter)
output=JOIN(output,"")
PRINT input
PRINT output
 

Output:

WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW
12W1B12W3B24W1B14W

[edit] Ursala

A standard library function, rlc, does most of the work for this task, which is a second order function taking a binary predicate that decides when consecutive items of an input list belong to the same run.

#import std
#import nat
 
encode = (rlc ==); *= ^lhPrNCT\~&h %nP+ length
 
decode = (rlc ~&l-=digits); *=zyNCXS ^|DlS/~& iota+ %np
 
test_data = 'WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW'
 
#show+
 
example =
 
<
encode test_data,
decode encode test_data>

The output shows an encoding of the test data, and a decoding of the encoding, which matches the original test data.

12W1B12W3B24W1B14W
WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW

[edit] Vedit macro language

The following example encodes/decodes an entire file. Each run is coded with two bytes. The first byte is the run length with high bit set, the second byte is the character code. ASCII characters with run length of 1 are left unchanged. Character codes above 127 are always coded with run length. Newlines are not converted (the regular expression does not count newlines). This methods supports any type of input.

:RL_ENCODE:
BOF
While (!At_EOF) {
if (At_EOL) { Line(1) Continue } // skip newlines
#1 = Cur_Char // #1 = character
Match("(.)\1*", REGEXP) // count run length
#2 = Chars_Matched // #2 = run length
if (#2 > 127) { #2 = 127 } // can be max 127
if (#2 > 1 || #1 > 127) {
Del_Char(#2)
Ins_Char(#2 | 128) // run length (high bit set)
Ins_Char(#1) // character
} else { // single ASCII char
Char // skip
}
}
Return
 
:RL_DECODE:
BOF
While (!At_EOF) {
#2 = Cur_Char
if (#2 > 127) { // is this run length?
#1 = Cur_Char(1) // #1 = character value
Del_Char(2)
Ins_Char(#1, COUNT, #2 & 127)
} else { // single ASCII char
Char
}
}
Return

[edit] XPL0

include c:\cxpl\codes;  \intrinsic 'code' declarations
string 0; \use zero-terminated strings, instead of MSb terminated
 
proc Compress(S); \Compress string using run-length encoding, & display it
char S;
int I, C0, C, N;
[I:= 0;
C0:= S(I); I:= I+1;
repeat ChOut(0, C0);
N:= 0;
repeat C:= S(I); I:= I+1;
N:= N+1;
until C#C0;
if N>1 then IntOut(0, N-1);
C0:= C;
until C=0;
]; \Compress
 
proc Expand(S); \Expand compressed string, and display it
char S;
int I, C0, C, N;
[I:= 0;
C0:= S(I); I:= I+1;
repeat ChOut(0, C0);
C:= S(I); I:= I+1;
if C>=^1 & C<=^9 then
[N:= 0;
while C>=^0 & C<=^9 do
[N:= N*10 + C-^0;
C:= S(I); I:= I+1;
];
while N do [ChOut(0, C0); N:= N-1];
];
C0:= C;
until C=0;
]; \Expand
 
[Compress("WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW");
CrLf(0);
Expand("W11BW11B2W23BW13"); CrLf(0);
]

Output (with slightly better compression than the example):

W11BW11B2W23BW13
WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW
Personal tools
Namespaces

Variants
Actions
Community
Explore
Misc
Toolbox