Variable-length quantity: Difference between revisions

From Rosetta Code
Content added Content deleted
m (More English after reading it more carefully)
Line 102: Line 102:
2: 1fffff = (FF:FF:7F)
2: 1fffff = (FF:FF:7F)
3: 200000 = (81:80:80:00)</pre>
3: 200000 = (81:80:80:00)</pre>

=={{header|Python}}==
The vlq format is computed in a form for printing. This could easily be changed to a series of 8 bit ASCII chars whose integer value corresponds to the vlq for saving or transmission.

<lang python>def tobits(n, _group=8, _sep='_', _pad=False):
'Express n as binary bits with separator'
bits = '{0:b}'.format(n)[::-1]
if _pad:
bits = '{0:0{1}b}'.format(n,
((_group+len(bits)-1)//_group)*_group)[::-1]
answer = _sep.join(bits[i:i+_group]
for i in range(0, len(bits), _group))[::-1]
answer = '0'*(len(_sep)-1) + answer
else:
answer = _sep.join(bits[i:i+_group]
for i in range(0, len(bits), _group))[::-1]
return answer

def tovlq(n):
return tobits(n, _group=7, _sep='1_', _pad=True)

def toint(vlq):
return int(''.join(vlq.split('_1')), 2)</lang>

<br>'''Sample Output'''
The underscore separates groups of eight bits (octets), for readability
<lang python>>>> for n in (254, 255, 256, 257, -2+(1<<16), -1+(1<<16), 1<<16, 1+(1<<16), 0x200000, 0x1fffff ):
print('int: %7i bin: %26s vlq: %35s vlq->int: %7i' % (n, tobits(n,_pad=True), tovlq(n), toint(tovlq(n))))

int: 254 bin: 11111110 vlq: 00000001_11111110 vlq->int: 254
int: 255 bin: 11111111 vlq: 00000001_11111111 vlq->int: 255
int: 256 bin: 00000001_00000000 vlq: 00000010_10000000 vlq->int: 256
int: 257 bin: 00000001_00000001 vlq: 00000010_10000001 vlq->int: 257
int: 65534 bin: 11111111_11111110 vlq: 00000011_11111111_11111110 vlq->int: 65534
int: 65535 bin: 11111111_11111111 vlq: 00000011_11111111_11111111 vlq->int: 65535
int: 65536 bin: 00000001_00000000_00000000 vlq: 00000100_10000000_10000000 vlq->int: 65536
int: 65537 bin: 00000001_00000000_00000001 vlq: 00000100_10000000_10000001 vlq->int: 65537
int: 2097152 bin: 00100000_00000000_00000000 vlq: 00000001_10000000_10000000_10000000 vlq->int: 2097152
int: 2097151 bin: 00011111_11111111_11111111 vlq: 01111111_11111111_11111111 vlq->int: 2097151
>>> </lang>

Revision as of 21:30, 14 October 2010

This page uses content from Wikipedia. The original article was at Variable-length_quantity. The list of authors can be seen in the page history. As with Rosetta Code, the text of Wikipedia is available under the GNU FDL. (See links for details on variance)
Task
Variable-length quantity
You are encouraged to solve this task according to the task description, using any language you may know.

Implement some operations with respect to Variable-length Quantity, at least including conversions between a normal number in the language and the binary representation of the Variable-length Quantity for that number. Any variants are acceptable.

Task : With above operations,

  • convert these two numbers 0x200000 (2097152 in decimal) and 0x1fffff (2097151 in decimal) into sequences of octets (an eight-bit byte);
  • display these sequences of octets;
  • convert these sequences of octets back to numbers, and check that they are equal to original numbers.

D

This implements a Variable-length Quantity struct for an ulong integer.

<lang d>module vlq ; private import std.string ;

struct VLQ { // variable length quantity (unsiged long, max 63-bit)

   ulong value ;
   static ulong V2I(ubyte[] v) { return VLQ.init.from(v).value ; }
   uint extract(ubyte[] v) {
       ulong t = 0; 
       uint idx = 0, limit = v.length - 1 ;
       if(8 < limit) limit = 8 ;       
       while ((idx < limit) && ((v[idx] & 0x80) > 0))
           t = (t << 7) | (0x7f & v[idx++]) ;
       if(idx > limit)
           throw new Exception("too large for ulong or invalid format") ;
       else
           value = (t << 7) | v[idx] ;
       return idx + 1 ;
   }
   VLQ from(ubyte[] v) { extract(v) ; return this ; }    
   @property
   ubyte[] toVLQ() {
       ubyte[] v = [(0x7f & value)] ;
       ulong k = value >>> 7 ;
       while(k > 0) {
           v ~= (k & 0x7f) | 0x80 ;
           k >>>= 7 ;
       }
       if(v.length > 9)
           throw new Exception("value too large ") ;
       return v.reverse ;
   }
   @property
   string toStr() {
       string s ;
       foreach(e ; toVLQ)
           s ~= std.string.format("%02X:", e) ; 
       return "("~s[0..$-1]~")" ;
   }
   static ulong[] split(ubyte[] b) {
       ulong[] res ;
       VLQ v ;
       for(int i = 0, cnt = 1 ; i < b.length ; cnt++) { 
           auto k = v.extract(b[i..$]) ;
           res ~= v.value ;
           i += k ;
       } 
       return res ;    
   }
   alias value this ; // this allow VLQ works like ulong, eg add, multiply etc.

}</lang>

test code :

<lang d>import std.stdio ; import vlq ; void main() {

   VLQ a = VLQ(0x7f), b = VLQ(0x4000), c ;
   writefln("a:%8x = %s\nb:%8x = %s\nc:%8x = %s", 
       a.value, a.toStr, b.value, b.toStr, c.value, c.toStr) ;
   c = (a + 1) * b  ;
   a = c - 1 ;
   b = VLQ.init.from(a.toVLQ) ;
   a <<= 1 ;
   // convert ulong to octet sequence : VLQ(number).toVLQ
   writefln("a:%8x = %s\nb:%8x = %s\nc:%8x = %s", 
       a.value, a.toStr, b.value, b.toStr, c.value, c.toStr) ;
   //write them to a binary file 
   std.file.write("vlqtest.bin", a.toVLQ ~ b.toVLQ ~ c.toVLQ) ;
   //read them back
   auto buf = cast(ubyte[]) std.file.read("vlqtest.bin") ;     
   writefln("File length : %d", buf.length) ;
   auto cnt = 0 ;
   // convert octet sequence to ulong : (VLQ.init).from(octet sequence)
   foreach(v; VLQ.split(buf)) 
       writefln("%d:%8x = %s", 1 + cnt++, v, VLQ(v).toStr ) ; 

}</lang>

output:

a:      7f = (7F)
b:    4000 = (81:80:00)
c:       0 = (00)
a:  3ffffe = (81:FF:FF:7E)
b:  1fffff = (FF:FF:7F)
c:  200000 = (81:80:80:00)
File length : 11
1:  3ffffe = (81:FF:FF:7E)
2:  1fffff = (FF:FF:7F)
3:  200000 = (81:80:80:00)

Python

The vlq format is computed in a form for printing. This could easily be changed to a series of 8 bit ASCII chars whose integer value corresponds to the vlq for saving or transmission.

<lang python>def tobits(n, _group=8, _sep='_', _pad=False):

   'Express n as binary bits with separator'
   bits = '{0:b}'.format(n)[::-1]
   if _pad:
       bits = '{0:0{1}b}'.format(n,
                                 ((_group+len(bits)-1)//_group)*_group)[::-1]
       answer = _sep.join(bits[i:i+_group]
                                for i in range(0, len(bits), _group))[::-1]
       answer = '0'*(len(_sep)-1) + answer
   else:
       answer = _sep.join(bits[i:i+_group]
                          for i in range(0, len(bits), _group))[::-1]
   return answer

def tovlq(n):

   return tobits(n, _group=7, _sep='1_', _pad=True)

def toint(vlq):

   return int(.join(vlq.split('_1')), 2)</lang>


Sample Output The underscore separates groups of eight bits (octets), for readability <lang python>>>> for n in (254, 255, 256, 257, -2+(1<<16), -1+(1<<16), 1<<16, 1+(1<<16), 0x200000, 0x1fffff ):

   print('int: %7i bin: %26s vlq: %35s vlq->int: %7i' % (n, tobits(n,_pad=True), tovlq(n), toint(tovlq(n))))


int: 254 bin: 11111110 vlq: 00000001_11111110 vlq->int: 254 int: 255 bin: 11111111 vlq: 00000001_11111111 vlq->int: 255 int: 256 bin: 00000001_00000000 vlq: 00000010_10000000 vlq->int: 256 int: 257 bin: 00000001_00000001 vlq: 00000010_10000001 vlq->int: 257 int: 65534 bin: 11111111_11111110 vlq: 00000011_11111111_11111110 vlq->int: 65534 int: 65535 bin: 11111111_11111111 vlq: 00000011_11111111_11111111 vlq->int: 65535 int: 65536 bin: 00000001_00000000_00000000 vlq: 00000100_10000000_10000000 vlq->int: 65536 int: 65537 bin: 00000001_00000000_00000001 vlq: 00000100_10000000_10000001 vlq->int: 65537 int: 2097152 bin: 00100000_00000000_00000000 vlq: 00000001_10000000_10000000_10000000 vlq->int: 2097152 int: 2097151 bin: 00011111_11111111_11111111 vlq: 01111111_11111111_11111111 vlq->int: 2097151 >>> </lang>