User:EMBee/Binary Serialization

From Rosetta Code

this task is not even a draft yet, just some sketches to collect the necessary details.

A message passing system uses the following binary serialization format:

Message header:

bytes type description
0 byte start byte (value 255)
1..4 int length of the message in bytes
5..8 int transaction id
9 byte message type
10..13 int destination id
14..17 int destination type (may be 0)
18..end message body


The body contains the actual message data which is encoded as follows:

type bytes value
Integer 0 1
1..4 network byte order
Float 0 2
1..4 Binary IEEE representation (single precision)
String 0 3
1..4 int, length
5.. char array
Object 0 4
1..4 int, object id
5..9 int, class id
Array 0 5
1..2 short, number of elements in array
3.. sequence of array values which may be of any type and are encoded as described in this table
Mapping 0 6
1..2 short, number of elements in mapping
3.. elements are encoded as a sequence of key-value pairs: key1value1key2value2...
Class 0 8
1..4 int, length
5.. char array of the class name
Function 0 10
1..4 int, length
5.. char array of "(function name:object id)"

using the above format, write a function to serialize a message.

the function should return a string and take the following arguments:

<lang pike>string encode(int transaction_id, int message_type, int destination_id, int destination_type, mixed args)</lang>

the function will be called with the following code: <lang pike>class Hello(int object_id) {

   int class_id = 4;
   string greeting()
   {
       write("Hello Rosettacode");
   }

}

object hello = Hello(34562);

string message = encode(12, 1, 34563, 4, ({ 12345, 678.9, hello, Hello, ([ "greeting":hello->greeting ])}));</lang>

the resulting message (encoded in base64) is:

/wAAAF0AAAAMAQAAhwMAAAAEBQAFAQAAMDkCRCm5mgQAAIcCAAAABAgAAAAOL21haW4oKS0+SGVs
bG8GAAEDAAAACGdyZWV0aW5nCgAAABIoZ3JlZXRpbmcoKTozNDU2Mik=

note that your result may be different depending on how class and function names are encoded.

write a function decode(string message) that is able to decode a binary message and return an array with the decoded data.

you are allowed to substitute the object with a structure suitable do represent the objects values.

Pike

<lang pike>string encode_message(int transaction_id, int message_type, int destination_id, int destination_type, mixed args) {

   string header = sprintf("%4c%1c%4c%4c", transaction_id, message_type, destination_id, destination_type);
   string body = encode_body(args);
   werror("size: %d+%d\n", sizeof(header), sizeof(body));
   return sprintf("%1c%4c%s%s", 255, sizeof(header)+sizeof(body), header, body);

}

string encode_body(mixed args) {

   if (arrayp(args))
       return encode_array(args);
   if (mappingp(args))
       return encode_mapping(args);
   if (intp(args))
       return sprintf("%1c%4c", 1, args);
   if (floatp(args))
       return sprintf("%1c%4F", 2, args);
   if (stringp(args))
       return sprintf("%1c%4c%s", 3, sizeof(args), args);
   if (objectp(args))
       return sprintf("%1c%4c%4c", 4, args->object_id, args->class_id);
   if (programp(args))
   {
       string class_name = master()->describe_program(args);
       return sprintf("%1c%4c%s", 8, sizeof(class_name), class_name);
   }
   if (functionp(args))
   {
       object o = function_object(args);
       string function_name = sprintf("(%s():%d)", function_name(args), o->object_id);
       return sprintf("%1c%4c%s", 10, sizeof(function_name), function_name);
   }

}

string encode_array(mixed args) {

   string encoded_array = sprintf("%1c%2c", 5, sizeof(args));
   foreach(args;; mixed arg)
       encoded_array += encode_body(arg);
   return encoded_array;

}

string encode_mapping(mixed args) {

   string encoded_mapping = sprintf("%1c%2c", 6, sizeof(args));
   foreach(args; mixed key; mixed value)
       encoded_mapping += encode_body(key)+encode_body(value);
   return encoded_mapping;

}

array decode_message(string message) {

   array result = array_sscanf(message, "%1c%4c%4c%1c%4c%4c%s");
   if (result[1]+5 != sizeof(message))
       werror("size missmatch: %d != %d\n", result[1]+5, sizeof(message));
   result[-1] = decode_element(decode_body(result[-1]));
   return result;

}

array decode_element(array input) {

   if (!sizeof(input))
       return ({});
   array first = input[0];
   
   switch(first[0])
   {
       case  1:
       case  2: return ({ first[1] })+decode_element(input[1..]);
       case  3: return ({ first[2] })+decode_element(input[1..]);
       case  4: return ({ find_object(first[1]) })+decode_element(input[1..]);
       case  5: return decode_array(input);
       case  6: return decode_mapping(input);
       case  8: return ({ find_program(first[2]) })+decode_element(input[1..]);
       case 10: return ({ find_function(first[2]) })+decode_element(input[1..]);
       default: werror("unknown type");
   }
   return ({});

}

array decode_array(array input) {

  array res = decode_element(input[1..]);
  return ({ res[0..input[0][1]-1] }) + res[input[0][1]..]; 

}

array decode_mapping(array input) {

  array res = decode_element(input[1..]);
  return ({ (mapping)(res[0..input[0][1]*2-1]/2) }) + res[input[0][1]*2..]; 

}

object find_object(int id) {

   return Hello(id);

}

program find_program(string class_name) {

   if (master()->describe_program(Hello) == class_name)
       return Hello;
   else
       werror("class %O not found", class_name);

}

function find_function(string function_name) {

   array res = array_sscanf(function_name, "(%s):%d");
   object o = find_object(res[1]);
   return o[res[0]];

}

array decode_body(string body) {

   if (!sizeof(body))
       return ({});
   int type = array_sscanf(body, "%1c")[0];
   switch(type)
   {
       case  1: return ({ array_sscanf(body, "%1c%4c") })+decode_body(body[5..]);
       case  2: return ({ array_sscanf(body, "%1c%4F") })+decode_body(body[5..]);
       case  3: 
       case  8:
       case 10: int length = array_sscanf(body[1..], "%4c")[0];
                return ({ ({ type, length, body[5..5+length-1] }) }) + decode_body(body[5+length..]);
       case  4: return ({ array_sscanf(body, "%1c%4c%4c") })+decode_body(body[9..]);
       case  5: 
       case  6: return ({ array_sscanf(body, "%1c%2c") })+decode_body(body[3..]);
       default: werror("unknown type: %d %s\n", type, body[1..]);
                return ({});
   }

}


class Hello(int object_id) {

   int class_id = 4;

   string greeting()
   {
       write("Hello Rosettacode");
   }

}

void main() {

   object hello = Hello(34562);
    
   string message = encode_message(12, 1, 34563, 4, ({ 12345, 678.9, hello, Hello, ([ "greeting":hello->greeting, "a":"b" ]), ([ ({1}):2, 3:({4,7}), 5:6 ]), "a", "b"}));
   write("%d:%q\n", sizeof(message), message);
   write("%s\n", MIME.encode_base64(message));
   array data = decode_message(message);
   write("%O\n", data);

}</lang>