Find first and last set bit of a long integer: Difference between revisions
Make the task definition be meaningful |
|||
Line 245: | Line 245: | ||
upb=: (#: i: 1:)"0 |
upb=: (#: i: 1:)"0 |
||
rlwb=: #@#:"0 - 1: |
rlwb=: #@#:"0 - 1: |
||
rupb=: |
rupb=: rlwb - upb</lang> |
||
Notes: |
Notes: |
Revision as of 14:34, 9 December 2011
Define routines (or operators) lwb and upb that find the first and last set bit in a binary value. Implement using a binary search to find the location of the particular upper/lower bit.
Also: Define the reverse routines (or operators) rlwb and rupb that find host's positive integers least- and most-significant set bit in a binary value expressed in LSB 0 bit numbering, i.e. indexed from the extreme right bit.
Use primarily bit operations, such as and, or, and bit shifting. Avoid additions, multiplications and especially avoid divisions.
Two implementations:
- For the host word size on the host platform, implement the routine "efficiently" in without looping or recursion.
- For the extended precision/long word implement the algorithm more generally - maybe as a template, and maybe with looping - so that any bits width for a binary type can be accommodated.
Test cases:
- For the host machine word size: Use the powers of 42 up to host's the "natural" word size to calculate the index of the first and last set bit.
- For the extended precision: Use the powers of 1302 up to the host's next "natural" long host word size to calculate the index of the first and last set bit.
- Output bit indexes in LSB 0 bit numbering.
Additionally:
In a particular language, there maybe (at least) two alternative approaches of calculating the required values:
- Using an external library.
- Using a built-in library.
If any of these approaches are available, then also note the library or built-in name.
See also:
- Find the log base 2 of an N-bit integer in O(lg(N)) operations
- 80386 Instruction Set - BSF -- Bit Scan Forward
ALGOL 68
File: Template.Find_first_and_last_set_bit.a68<lang algol68>INT lbits width = UPB []BOOL(LBITS(2r0));
OP LWB = (BITS in x)INT: bits width - RUPB in x;
OP RUPB = (BITS in x)INT:
### 32 bit LWB Find Lower Set Bit using an unrolled loop ###
- Note: BITS ELEM 1 is actually numerically the Most Significant Bit!! #
IF in x = 2r0 THEN -1 # EXIT # ELSE BITS x := in x, out := 2r0; IF(x AND NOT 2r1111111111111111)/=2r0 THEN x := x SHR 16; out := out OR 2r10000 FI; IF(x AND NOT 2r11111111) /=2r0 THEN x := x SHR 8; out := out OR 2r1000 FI; IF(x AND NOT 2r1111) /=2r0 THEN x := x SHR 4; out := out OR 2r100 FI; IF(x AND NOT 2r11) /=2r0 THEN x := x SHR 2; out := out OR 2r10 FI; IF(x AND NOT 2r1) /=2r0 THEN out := out OR 2r1 FI; ABS out # EXIT # FI;
OP LWB = (LBITS in x)INT: lbits width - RUPB in x;
OP RUPB = (LBITS in x)INT:
### Generalised Find Lower Set Bit using a loop ###
- Note: BITS ELEM 32 is actually numerically the Least Significant Bit!! #
IF in x = 2r0 THEN -1 # EXIT # ELSE LBITS x := in x; BITS out bit := BIN 1 SHL (bits width - LWB BIN lbits width), out := BIN 0; WHILE LBITS mask := NOT BIN (ABS (LONG 2r1 SHL ABS out bit) - 1); IF(x AND mask) /= 2r0 THEN x := x SHR ABS out bit; out := out OR out bit FI; out bit := out bit SHR 1; # WHILE # out bit /= 2r0 DO SKIP OD; ABS out # EXIT # FI;
OP UPB = (BITS in x)INT: bits width - RLWB in x;
OP RLWB = (BITS in x)INT:
### 32 bit Find Upper Set Bit using an unrolled loop ###
- Note: BITS ELEM 1 is actually numerically the Most Significant Bit!! #
IF in x = 2r0 THEN 0 # EXIT # ELSE BITS x := in x, out := 2r0; IF(x AND 2r1111111111111111)=2r0 THEN x := x SHR 16; out := out OR 2r10000 FI; IF(x AND 2r11111111) =2r0 THEN x := x SHR 8; out := out OR 2r1000 FI; IF(x AND 2r1111) =2r0 THEN x := x SHR 4; out := out OR 2r100 FI; IF(x AND 2r11) =2r0 THEN x := x SHR 2; out := out OR 2r10 FI; IF(x AND 2r1) =2r0 THEN out := out OR 2r1 FI; ABS out # EXIT # FI;
OP UPB = (LBITS in x)INT: lbits width - RLWB in x;
OP RLWB = (LBITS in x)INT:
### Generalised Find Upper Set Bit using a loop ###
- Note: BITS ELEM 1 is actually numerically the Most Significant Bit!! #
IF in x = 2r0 THEN 0 # EXIT # ELSE LBITS x := in x; BITS out bit := BIN 1 SHL (bits width - LWB BIN lbits width), out := BIN 0; WHILE LBITS mask := BIN (ABS (LONG 2r1 SHL ABS out bit) - 1); IF(x AND mask) = 2r0 THEN x := x SHR ABS out bit; out := out OR out bit FI; out bit := out bit SHR 1; # WHILE # out bit /= 2r0 DO SKIP OD; ABS out # EXIT # FI;</lang>File: test.Find_first_and_last_set_bit.a68<lang algol68>#!/usr/local/bin/a68g --script #
MODE LBITS = LONG BITS; PR READ "Template.Find_first_and_last_set_bit.a68" PR
INT bits of prod; FORMAT header fmt = $g 36k"|RLWB|RUPB|Bits"l$; FORMAT row fmt0 = $g(-35)"|"2(g(-3)" |"),2rd l$; FORMAT row fmt = $g(-35)"|"2(g(-3)" |"),2rn(bits of prod+1)d l$;
test int:(
printf((header fmt, "INT: find first & last set bit")); INT prod := 0; # test case 0 # prod := 0; bits of prod := RUPB BIN prod; printf((row fmt0, prod, RLWB BIN prod, RUPB BIN prod, BIN prod)); prod := 1; # test case 1 etc ... # INT zoom := 2 * 3 * 7; WHILE bits of prod := RUPB BIN prod; printf((row fmt, prod, RLWB BIN prod, RUPB BIN prod, BIN prod));
- WHILE # prod <= max int / zoom DO
prod *:= zoom OD
);
test long int:(
printf(($l$,header fmt, "LONG INT:")); LONG INT prod := 0; # test case 0 # prod := 0; bits of prod := RUPB BIN prod; printf((row fmt0, prod, RLWB BIN prod, RUPB BIN prod, BIN prod)); prod := 1; # test case 1 etc ... # INT zoom := 2 * 3 * 7 * 31; WHILE bits of prod := RUPB BIN prod; printf((row fmt, prod, RLWB BIN prod, RUPB BIN prod, BIN prod));
- WHILE # prod <= long max int / zoom DO
prod *:= zoom OD
)</lang>Output:
INT: find first & last set bit |RLWB|RUPB|Bits 0| 0 | -1 |0 1| 0 | 0 |1 42| 1 | 5 |101010 1764| 2 | 10 |11011100100 74088| 3 | 16 |10010000101101000 3111696| 4 | 21 |1011110111101100010000 130691232| 5 | 26 |111110010100011000010100000 LONG INT: |RLWB|RUPB|Bits 0| 0 | -1 |0 1| 0 | 0 |1 1302| 1 | 10 |10100010110 1695204| 2 | 20 |110011101110111100100 2207155608| 3 | 31 |10000011100011101000010110011000 2873716601616| 4 | 41 |101001110100010110110110110111001100010000 3741579015304032| 5 | 51 |1101010010101111001001000000000110110011001101100000 4871535877925849664| 6 | 62 |100001110011011001011000001001000001010010101110100101001000000 6342739713059456262528| 7 | 72 |1010101111101011100110010001000111100000010010111111100111010000110000000 8258247106403412053811456| 8 | 82 |11011010100110000000111100100000001110101011000010011010001000101110110000100000000 10752237732537242494062515712| 9 | 93 |1000101011111000001010111001110110111101010011111100010111111101101100111001110101011000000000 13999413527763489727269395457024| 10 |103 |10110000101100101000101101110101000100000011010011101110001111100001001111100000100011110110010000000000 18227236413148063624904752885045248| 11 |113 |111000001010101100000100010100010101100000011011010011001110101111101110010001100000011001010001101001100000000000
C
<lang c>#include <stdio.h>
- include <stdint.h>
uint32_t msb32(uint32_t n) { uint32_t b = 1; if (!n) return 0;
- define step(x) if (n >= ((uint32_t)1) << x) b <<= x, n >>= x
step(16); step(8); step(4); step(2); step(1);
- undef step
return b; }
int msb32_idx(uint32_t n) { int b = 0; if (!n) return -1;
- define step(x) if (n >= ((uint32_t)1) << x) b += x, n >>= x
step(16); step(8); step(4); step(2); step(1);
- undef step
return b; }
- define lsb32(n) ( (uint32_t)(n) & -(int32_t)(n) )
/* finding the *position* of the least significant bit
rarely makes sense, so we don't put much effort in it*/
inline int lsb32_idx(uint32_t n) { return msb32_idx(lsb32(n)); }
int main() { int32_t n; int i;
for (i = 0, n = 1; ; i++, n *= 42) { printf("42**%d = %10d(x%08x): M x%08x(%2d) L x%03x(%2d)\n", i, n, n, msb32(n), msb32_idx(n), lsb32(n), lsb32_idx(n));
if (n >= ~(1U << 31) / 42) break; }
return 0; }</lang>output ("x###" are in base 16)
42**0 = 1(x00000001): M x00000001( 0) L x001( 0) 42**1 = 42(x0000002a): M x00000020( 5) L x002( 1) 42**2 = 1764(x000006e4): M x00000400(10) L x004( 2) 42**3 = 74088(x00012168): M x00010000(16) L x008( 3) 42**4 = 3111696(x002f7b10): M x00200000(21) L x010( 4) 42**5 = 130691232(x07ca30a0): M x04000000(26) L x020( 5)
J
Implementation:
<lang j>lwb=: 0: upb=: (#: i: 1:)"0 rlwb=: #@#:"0 - 1: rupb=: rlwb - upb</lang>
Notes:
J's #:
converts integers to bit lists.
This implementation is agnostic to numeric storage format.
Example use:
<lang j> (,.lwb,.upb,.rlwb,.rupb) <.i.@>.&.(42&^.) 2^64
1 0 0 0 0 42 0 4 5 1 1764 0 8 10 2 74088 0 13 16 3 3111696 0 17 21 4 130691232 0 21 26 5 5489031744 0 26 32 6 230539333248 0 30 37 7 9682651996416 0 35 43 8 406671383849472 0 39 48 9 17080198121677824 0 43 53 10
717368321110468608 0 48 59 11
(,.lwb,.upb,.rlwb,.rupb) i.@x:@>.&.(1302&^.) 2^128 1 0 0 0 0 1302 0 9 10 1 1695204 0 18 20 2 2207155608 0 28 31 3 2873716601616 0 37 41 4 3741579015304032 0 46 51 5 4871535877925849664 0 56 62 6 6342739713059456262528 0 65 72 7 8258247106403412053811456 0 74 82 8 10752237732537242494062515712 0 84 93 9 13999413527763489727269395457024 0 93 103 10 18227236413148063624904752885045248 0 102 113 11
23731861809918778839625988256328912896 0 112 124 12</lang>
Java
Library
Notes:
- least significant bit is bit 0 (such that bit i always has value 2i, and the result is independent of integer type width)
- when the integer 0 is given, mssb() and lssb() both return no bits set; mssb_idx() and lssb_idx() will return -1 and the integer type width, respectively
<lang java>public class FirstLastBits {
// most significant set bit public static int mssb(int x) { return Integer.highestOneBit(x); }
public static long mssb(long x) { return Long.highestOneBit(x); }
public static int mssb_idx(int x) { return 31 - Integer.numberOfLeadingZeros(x); }
public static int mssb_idx(long x) { return 63 - Long.numberOfLeadingZeros(x); }
// least significant set bit public static int lssb(int x) { return Integer.lowestOneBit(x); }
public static long lssb(long x) { return Long.lowestOneBit(x); }
public static int lssb_idx(int x) { return Integer.numberOfTrailingZeros(x); }
public static int lssb_idx(long x) { return Long.numberOfTrailingZeros(x); }
public static void main(String[] args) { System.out.println("int:"); int n1 = 1; for (int i = 0; ; i++, n1 *= 42) { System.out.printf("42**%d = %10d(x%08x): M x%08x(%2d) L x%03x(%2d)\n", i, n1, n1, mssb(n1), mssb_idx(n1), lssb(n1), lssb_idx(n1)); if (n1 >= Integer.MAX_VALUE / 42) break; } System.out.println(); System.out.println("long:"); long n2 = 1; for (int i = 0; ; i++, n2 *= 42) { System.out.printf("42**%02d = %20d(x%016x): M x%016x(%2d) L x%06x(%2d)\n", i, n2, n2, mssb(n2), mssb_idx(n2), lssb(n2), lssb_idx(n2)); if (n2 >= Long.MAX_VALUE / 42) break; } }
}</lang> output:
int: 42**0 = 1(x00000001): M x00000001( 0) L x001( 0) 42**1 = 42(x0000002a): M x00000020( 5) L x002( 1) 42**2 = 1764(x000006e4): M x00000400(10) L x004( 2) 42**3 = 74088(x00012168): M x00010000(16) L x008( 3) 42**4 = 3111696(x002f7b10): M x00200000(21) L x010( 4) 42**5 = 130691232(x07ca30a0): M x04000000(26) L x020( 5) long: 42**00 = 1(x0000000000000001): M x0000000000000001( 0) L x000001( 0) 42**01 = 42(x000000000000002a): M x0000000000000020( 5) L x000002( 1) 42**02 = 1764(x00000000000006e4): M x0000000000000400(10) L x000004( 2) 42**03 = 74088(x0000000000012168): M x0000000000010000(16) L x000008( 3) 42**04 = 3111696(x00000000002f7b10): M x0000000000200000(21) L x000010( 4) 42**05 = 130691232(x0000000007ca30a0): M x0000000004000000(26) L x000020( 5) 42**06 = 5489031744(x00000001472bfa40): M x0000000100000000(32) L x000040( 6) 42**07 = 230539333248(x00000035ad370e80): M x0000002000000000(37) L x000080( 7) 42**08 = 9682651996416(x000008ce6b086100): M x0000080000000000(43) L x000100( 8) 42**09 = 406671383849472(x000171dd8f5fea00): M x0001000000000000(48) L x000200( 9) 42**10 = 17080198121677824(x003cae5985bc6400): M x0020000000000000(53) L x000400(10) 42**11 = 717368321110468608(x09f49aaff0e86800): M x0800000000000000(59) L x000800(11)
Tcl
<lang tcl>proc lwb {x} {
if {$x == 0} {return -1} set n 0 while {($x&1) == 0} {
set x [expr {$x >> 1}] incr n
} return $n
} proc upb {x} {
if {$x == 0} {return -1} if {$x < 0} {error "no well-defined max bit for negative numbers"} set n 0 while {$x != 1} {
set x [expr {$x >> 1}] incr n
} return $n
}</lang> Code to use the above: <lang tcl>package require Tcl 8.6; # For convenient bit string printing
proc powsTo {pow bits} {
set result {} for {set n 1} {$n < 2**$bits} {set n [expr {$n * $pow}]} {
lappend result $n
} return $result
} proc printPows {pow pows} {
set len [string length [lindex $pows end]] puts [format "%8s | %*s | LWB | UPB | Bits" "What" $len "Number"] set n 0 foreach p $pows {
puts [format "%4d**%-2d = %*lld | %3d | %3d | %b" \ $pow $n $len $p [lwb $p] [upb $p] $p] incr n
}
}
puts "Powers of 42 up to machine word size:" printPows 42 [powsTo 42 [expr {$tcl_platform(wordSize) * 8}]] puts "Powers of 1302 up to 128 bits" printPows 1302 [powsTo 1302 128]</lang> Output:
Powers of 42 up to machine word size: What | Number | LWB | UPB | Bits 42**0 = 1 | 0 | 0 | 1 42**1 = 42 | 1 | 5 | 101010 42**2 = 1764 | 2 | 10 | 11011100100 42**3 = 74088 | 3 | 16 | 10010000101101000 42**4 = 3111696 | 4 | 21 | 1011110111101100010000 42**5 = 130691232 | 5 | 26 | 111110010100011000010100000 Powers of 1302 up to 128 bits What | Number | LWB | UPB | Bits 1302**0 = 1 | 0 | 0 | 1 1302**1 = 1302 | 1 | 10 | 10100010110 1302**2 = 1695204 | 2 | 20 | 110011101110111100100 1302**3 = 2207155608 | 3 | 31 | 10000011100011101000010110011000 1302**4 = 2873716601616 | 4 | 41 | 101001110100010110110110110111001100010000 1302**5 = 3741579015304032 | 5 | 51 | 1101010010101111001001000000000110110011001101100000 1302**6 = 4871535877925849664 | 6 | 62 | 100001110011011001011000001001000001010010101110100101001000000 1302**7 = 6342739713059456262528 | 7 | 72 | 1010101111101011100110010001000111100000010010111111100111010000110000000 1302**8 = 8258247106403412053811456 | 8 | 82 | 11011010100110000000111100100000001110101011000010011010001000101110110000100000000 1302**9 = 10752237732537242494062515712 | 9 | 93 | 1000101011111000001010111001110110111101010011111100010111111101101100111001110101011000000000 1302**10 = 13999413527763489727269395457024 | 10 | 103 | 10110000101100101000101101110101000100000011010011101110001111100001001111100000100011110110010000000000 1302**11 = 18227236413148063624904752885045248 | 11 | 113 | 111000001010101100000100010100010101100000011011010011001110101111101110010001100000011001010001101001100000000000 1302**12 = 23731861809918778839625988256328912896 | 12 | 124 | 10001110110101001011100011111110101101101100001101011011001001101111110110111011000001001000010001101000010010001000000000000