Variable-length quantity: Difference between revisions
(Added PicoLisp) |
|||
Line 264: | Line 264: | ||
</pre> |
</pre> |
||
=={{header|PicoLisp}}== |
|||
<lang PicoLisp>(de numToVlq (Num) |
|||
(let Res (cons (& Num 127)) |
|||
(while (gt0 (setq Num (>> 7 Num))) |
|||
(push 'Res (| 128 (& Num 127))) ) |
|||
Res ) ) |
|||
(de vlqToNum (Vlq) |
|||
(let Res 0 |
|||
(for N Vlq |
|||
(setq Res (| (>> -7 Res) (& N 127))) ) ) ) |
|||
(for Num (0 15 16 127 128 255 2097151 2097152) |
|||
(let Vlq (numToVlq Num) |
|||
(tab (12 12 12) Num (glue ":" (mapcar hex Vlq)) (vlqToNum Vlq)) ) )</lang> |
|||
Output: |
|||
<pre> 0 0 0 |
|||
15 F 15 |
|||
16 10 16 |
|||
127 7F 127 |
|||
128 81:0 128 |
|||
255 81:7F 255 |
|||
2097151 FF:FF:7F 2097151 |
|||
2097152 81:80:80:0 2097152</pre> |
|||
=={{header|Python}}== |
=={{header|Python}}== |
Revision as of 10:58, 20 October 2010
This page uses content from Wikipedia. The original article was at Variable-length_quantity. The list of authors can be seen in the page history. As with Rosetta Code, the text of Wikipedia is available under the GNU FDL. (See links for details on variance) |
You are encouraged to solve this task according to the task description, using any language you may know.
Implement some operations with respect to Variable-length Quantity, at least including conversions between a normal number in the language and the binary representation of the Variable-length Quantity for that number. Any variants are acceptable.
Task : With above operations,
- convert these two numbers 0x200000 (2097152 in decimal) and 0x1fffff (2097151 in decimal) into sequences of octets (an eight-bit byte);
- display these sequences of octets;
- convert these sequences of octets back to numbers, and check that they are equal to original numbers.
C
<lang c>#include <stdio.h>
- include <stdint.h>
- include <stdlib.h>
- define BUFLEN 8
- define MAX_UINT32_AS_VLQ 8
size_t uint32_to_vlq(uint32_t number, uint8_t *buf, size_t buflen) {
uint8_t ibuf[MAX_UINT32_AS_VLQ]; size_t written = 0, i;
if (buflen == 0) return 0;
if ( number < 128 ) { *buf = number; return 1; }
ibuf[0] = number & 0x7f; written++; number >>= 7; while(number > 0) { ibuf[written] = (number & 0x7f) | 0x80; number >>= 7; written++; }
if (written > buflen) return 0;
for(i = 0; i < written; i++) buf[i] = ibuf[written-i-1];
return written;
}
uint32_t vlq_to_uint32(uint8_t *vlq) {
uint32_t res = 0;
if ( (*vlq & 0x80) == 0) return *vlq; for(; (*vlq & 0x80) != 0; vlq++) res = (res << 7) | (*vlq & 0x7f);
res = (res << 7) | *vlq; return res;
}
void print_hex8(uint8_t *buf, size_t buflen) {
size_t i;
for(i = 0; i < buflen; i++) { printf("%02X ", buf[i]); } putchar('\n');
}
- define TESTVLQ(N) do { \
howmany = uint32_to_vlq((N), buf, BUFLEN); \ printf("%08X is ", (N)); \ print_hex8(buf, howmany); \ n = vlq_to_uint32(buf); \ if ( n != (N) ) puts("WRONG!"); \ } while(0);
int main() {
uint8_t buf[BUFLEN]; size_t howmany; uint32_t n; TESTVLQ(0x7f); TESTVLQ(0x4000); TESTVLQ(0x00); TESTVLQ(0x3ffffe); TESTVLQ(0x1fffff); TESTVLQ(0x200000); TESTVLQ(0xffffffff);
return EXIT_SUCCESS;
}</lang>
D
This implements a Variable-length Quantity struct for an ulong integer.
<lang d>module vlq ; private import std.string ;
struct VLQ { // variable length quantity (unsiged long, max 63-bit)
ulong value ; static ulong V2I(ubyte[] v) { return VLQ.init.from(v).value ; } uint extract(ubyte[] v) { ulong t = 0; uint idx = 0, limit = v.length - 1 ; if(8 < limit) limit = 8 ; while ((idx < limit) && ((v[idx] & 0x80) > 0)) t = (t << 7) | (0x7f & v[idx++]) ; if(idx > limit) throw new Exception("too large for ulong or invalid format") ; else value = (t << 7) | v[idx] ; return idx + 1 ; } VLQ from(ubyte[] v) { extract(v) ; return this ; } @property ubyte[] toVLQ() { ubyte[] v = [(0x7f & value)] ; ulong k = value >>> 7 ; while(k > 0) { v ~= (k & 0x7f) | 0x80 ; k >>>= 7 ; } if(v.length > 9) throw new Exception("value too large ") ; return v.reverse ; } @property string toStr() { string s ; foreach(e ; toVLQ) s ~= std.string.format("%02X:", e) ; return "("~s[0..$-1]~")" ; } static ulong[] split(ubyte[] b) { ulong[] res ; VLQ v ; for(int i = 0, cnt = 1 ; i < b.length ; cnt++) { auto k = v.extract(b[i..$]) ; res ~= v.value ; i += k ; } return res ; } alias value this ; // this allow VLQ works like ulong, eg add, multiply etc.
}</lang>
test code :
<lang d>import std.stdio ; import vlq ; void main() {
VLQ a = VLQ(0x7f), b = VLQ(0x4000), c ; writefln("a:%8x = %s\nb:%8x = %s\nc:%8x = %s", a.value, a.toStr, b.value, b.toStr, c.value, c.toStr) ; c = (a + 1) * b ; a = c - 1 ; b = VLQ.init.from(a.toVLQ) ; a <<= 1 ; // convert ulong to octet sequence : VLQ(number).toVLQ writefln("a:%8x = %s\nb:%8x = %s\nc:%8x = %s", a.value, a.toStr, b.value, b.toStr, c.value, c.toStr) ; //write them to a binary file std.file.write("vlqtest.bin", a.toVLQ ~ b.toVLQ ~ c.toVLQ) ; //read them back auto buf = cast(ubyte[]) std.file.read("vlqtest.bin") ; writefln("File length : %d", buf.length) ; auto cnt = 0 ; // convert octet sequence to ulong : (VLQ.init).from(octet sequence) foreach(v; VLQ.split(buf)) writefln("%d:%8x = %s", 1 + cnt++, v, VLQ(v).toStr ) ;
}</lang>
output:
a: 7f = (7F) b: 4000 = (81:80:00) c: 0 = (00) a: 3ffffe = (81:FF:FF:7E) b: 1fffff = (FF:FF:7F) c: 200000 = (81:80:80:00) File length : 11 1: 3ffffe = (81:FF:FF:7E) 2: 1fffff = (FF:FF:7F) 3: 200000 = (81:80:80:00)
J
<lang j>N=: 128x v2i=: (N&| N&#./.~ [: +/\ _1 |. N&>)@i.~&a. i2v=: a. {~ [:;}.@(N+//.@,:N&#.inv)&.> ifv=: v2i :. i2v vfi=: i2v :. v2i</lang>
ifv
is an invertible function which gets an (unsigned, arbitrary precision) integer sequence from a variable-length quantity sequence. vfi
is an invertable function which gets a variable-length quantity sequence from an unsigned integer sequence.
Example use:
<lang j> require'convert'
numbers=: 16b7f 16b4000 0 16b3ffffe 16b1fffff 200000 av vlq=: vfi numbers
127 129 128 0 0 129 255 255 126 255 255 127 140 154 64
av (vfi 1 2 3 4 5 6) +&.ifv vlq
129 0 129 128 2 3 130 128 128 2 129 128 128 4 140 154 70</lang>
Perl
The vlg_encode sub returns an array of octets in most -> least significant order. Simply reverse the array to reverse the order.
<lang perl> use warnings; use strict;
for my $testcase (
0, 0xa, 123, 254, 255, 256, 257, 65534, 65535, 65536, 65537, 0x1fffff, 0x200000 )
{
my @vlq = vlq_encode($testcase); printf "%8s %12s %8s\n", $testcase, ( join ':', @vlq ), vlq_decode(@vlq);
}
sub vlq_encode {
my @vlq; my $binary = sprintf "%s%b", 0 x 7, shift; $binary =~ s/(.{7})$//; @vlq = ( unpack 'H2', ( pack 'B8', '0' . $1 ) ); while ( 0 + $binary ) { $binary =~ s/(.{7})$//; unshift @vlq, ( unpack 'H2', pack 'B8', '1' . $1 ); } return @vlq;
}
sub vlq_decode {
my $num; $num .= sprintf "%07b", hex(shift @_) & 0x7f while @_; return oct '0b' . $num;
} </lang>
Output:
0 00 0 10 0a 10 123 7b 123 254 81:7e 254 255 81:7f 255 256 82:00 256 257 82:01 257 65534 83:ff:7e 65534 65535 83:ff:7f 65535 65536 84:80:00 65536 65537 84:80:01 65537 2097151 ff:ff:7f 2097151 2097152 81:80:80:00 2097152
PicoLisp
<lang PicoLisp>(de numToVlq (Num)
(let Res (cons (& Num 127)) (while (gt0 (setq Num (>> 7 Num))) (push 'Res (| 128 (& Num 127))) ) Res ) )
(de vlqToNum (Vlq)
(let Res 0 (for N Vlq (setq Res (| (>> -7 Res) (& N 127))) ) ) )
(for Num (0 15 16 127 128 255 2097151 2097152)
(let Vlq (numToVlq Num) (tab (12 12 12) Num (glue ":" (mapcar hex Vlq)) (vlqToNum Vlq)) ) )</lang>
Output:
0 0 0 15 F 15 16 10 16 127 7F 127 128 81:0 128 255 81:7F 255 2097151 FF:FF:7F 2097151 2097152 81:80:80:0 2097152
Python
The vlq format is computed in a form for printing. This could easily be changed to a series of 8 bit ASCII chars whose integer value corresponds to the vlq for saving or transmission.
When transmitting the Vlq, octets are sent from the rightmost of the Vlq first.
<lang python>def tobits(n, _group=8, _sep='_', _pad=False):
'Express n as binary bits with separator' bits = '{0:b}'.format(n)[::-1] if _pad: bits = '{0:0{1}b}'.format(n, ((_group+len(bits)-1)//_group)*_group)[::-1] answer = _sep.join(bits[i:i+_group] for i in range(0, len(bits), _group))[::-1] answer = '0'*(len(_sep)-1) + answer else: answer = _sep.join(bits[i:i+_group] for i in range(0, len(bits), _group))[::-1] return answer
def tovlq(n):
return tobits(n, _group=7, _sep='1_', _pad=True)
def toint(vlq):
return int(.join(vlq.split('_1')), 2)
def vlqsend(vlq):
for i, byte in enumerate(vlq.split('_')[::-1]): print('Sent byte {0:3}: {1:#04x}'.format(i, int(byte,2)))</lang>
Sample Output
The underscore separates groups of eight bits (octets), for readability
<lang python>>>> for n in (254, 255, 256, 257, -2+(1<<16), -1+(1<<16), 1<<16, 1+(1<<16), 0x200000, 0x1fffff ):
print('int: %7i bin: %26s vlq: %35s vlq->int: %7i' % (n, tobits(n,_pad=True), tovlq(n), toint(tovlq(n))))
int: 254 bin: 11111110 vlq: 00000001_11111110 vlq->int: 254
int: 255 bin: 11111111 vlq: 00000001_11111111 vlq->int: 255
int: 256 bin: 00000001_00000000 vlq: 00000010_10000000 vlq->int: 256
int: 257 bin: 00000001_00000001 vlq: 00000010_10000001 vlq->int: 257
int: 65534 bin: 11111111_11111110 vlq: 00000011_11111111_11111110 vlq->int: 65534
int: 65535 bin: 11111111_11111111 vlq: 00000011_11111111_11111111 vlq->int: 65535
int: 65536 bin: 00000001_00000000_00000000 vlq: 00000100_10000000_10000000 vlq->int: 65536
int: 65537 bin: 00000001_00000000_00000001 vlq: 00000100_10000000_10000001 vlq->int: 65537
int: 2097152 bin: 00100000_00000000_00000000 vlq: 00000001_10000000_10000000_10000000 vlq->int: 2097152
int: 2097151 bin: 00011111_11111111_11111111 vlq: 01111111_11111111_11111111 vlq->int: 2097151
>>> vlqsend(tovlq(0x200000))
Sent byte 0: 0x80
Sent byte 1: 0x80
Sent byte 2: 0x80
Sent byte 3: 0x01
>>> vlqsend(tovlq(0x1fffff))
Sent byte 0: 0xff
Sent byte 1: 0xff
Sent byte 2: 0x7f
>>> </lang>
Tcl
<lang tcl>package require Tcl 8.5
proc vlqEncode number {
if {$number < 0} {error "negative not supported"} while 1 {
lappend digits [expr {$number & 0x7f}] if {[set number [expr {$number >> 7}]] == 0} break
} set out [format %c [lindex $digits 0]] foreach digit [lrange $digits 1 end] {
set out [format %c%s [expr {0x80+$digit}] $out]
} return $out
} proc vlqDecode chars {
set n 0 foreach c [split $chars ""] {
scan $c %c c set n [expr {($n<<7) | ($c&0x7f)}] if {!($c&0x80)} break
} return $n
}</lang> Demo code: <lang tcl>proc numtohex {num} {
binary scan [string trimleft [binary format W $num] \0] H* hexEncoded regsub -all "..(?=.)" $hexEncoded "&:"
} proc strtohex {string} {
binary scan $string H* hexEncoded regsub -all "..(?=.)" $hexEncoded "&:"
} foreach testcase {
123 254 255 256 257 65534 65535 65536 65537 2097152 2097151 12345678901234566789
} {
set encoded [vlqEncode $testcase] binary scan $encoded H* hexEncoded regsub -all {..(?=.)} $hexEncoded &: hexEncoded set decoded [vlqDecode $encoded] puts "$testcase ([numtohex $testcase]) ==>\
[strtohex $encoded] ([string length $encoded] bytes) ==>\ $decoded" }</lang> Output:
123 (7b) ==> 7b (1 bytes) ==> 123 254 (fe) ==> 81:7e (2 bytes) ==> 254 255 (ff) ==> 81:7f (2 bytes) ==> 255 256 (01:00) ==> 82:00 (2 bytes) ==> 256 257 (01:01) ==> 82:01 (2 bytes) ==> 257 65534 (ff:fe) ==> 83:ff:7e (3 bytes) ==> 65534 65535 (ff:ff) ==> 83:ff:7f (3 bytes) ==> 65535 65536 (01:00:00) ==> 84:80:00 (3 bytes) ==> 65536 65537 (01:00:01) ==> 84:80:01 (3 bytes) ==> 65537 2097152 (20:00:00) ==> 81:80:80:00 (4 bytes) ==> 2097152 2097151 (1f:ff:ff) ==> ff:ff:7f (3 bytes) ==> 2097151 12345678901234566789 (ab:54:a9:8c:eb:1f:06:85) ==> 81:ab:aa:aa:b1:ce:d8:fc:8d:05 (10 bytes) ==> 12345678901234566789