Posit numbers/decoding: Difference between revisions
Content added Content deleted
m (Grondilu moved page Posit numbers to Posit numbers/decoding) |
(→{{header|Wren}}: Made decode function more general.) |
||
Line 70: | Line 70: | ||
=={{header|Wren}}== |
=={{header|Wren}}== |
||
{{libheader|Wren-fmt}} |
{{libheader|Wren-fmt}} |
||
{{libheader|Wren- |
{{libheader|Wren-big}} |
||
<syntaxhighlight lang="ecmascript">import "./fmt" for Conv |
<syntaxhighlight lang="ecmascript">import "./fmt" for Conv |
||
import "./ |
import "./big" for BigRat |
||
var posit16_decode = Fn.new { |ps| |
var posit16_decode = Fn.new { |ps, maxExpSize| |
||
var p = ps.map { |c| c == "0" ? 0 : 1 }.toList |
var p = ps.map { |c| c == "0" ? 0 : 1 }.toList |
||
Line 94: | Line 94: | ||
} |
} |
||
} |
} |
||
var first = p[1] |
var first = p[1] |
||
var rs = |
var rs = 15 // regime size |
||
for (i in 2..15) { |
for (i in 2..15) { |
||
if (p[i] != first) { |
if (p[i] != first) { |
||
Line 104: | Line 103: | ||
} |
} |
||
var regime = p[1..rs] |
var regime = p[1..rs] |
||
var es = |
var es = (rs == 15) ? 0 : maxExpSize.min(14-rs) // actual exponent size |
||
var exponent = [0] |
var exponent = [0] |
||
if (es > 0) exponent = p[rs + 2...rs + 2 + es] |
if (es > 0) exponent = p[rs + 2...rs + 2 + es] |
||
var fs = 14 - rs - es // function size |
var fs = (es == 0) ? 0 : 14 - rs - es // function size |
||
var s = (p[0] == 0) ? 1 : -1 // sign |
var s = (p[0] == 0) ? 1 : -1 // sign |
||
var k = regime.all { |i| i == 0 } ? -rs : rs - 1 |
var k = regime.all { |i| i == 0 } ? -rs : rs - 1 |
||
var u = 2.pow(2.pow( |
var u = 2.pow(2.pow(maxExpSize)) |
||
var e = Conv.atoi(exponent.join(""), 2) |
var e = Conv.atoi(exponent.join(""), 2) |
||
var f = |
var f = BigRat.one |
||
if (fs > 0) { |
if (fs > 0) { |
||
var fraction = ps[-fs..-1] |
var fraction = ps[-fs..-1] |
||
f = Conv.atoi(fraction.join(""), 2) |
f = Conv.atoi(fraction.join(""), 2) |
||
f = |
f = BigRat.one + BigRat.new(f, 2.pow(fs)) |
||
} |
} |
||
return f * |
return f * BigRat.new(u, 1).pow(k) * s * 2.pow(e) |
||
} |
} |
||
var ps = "0000110111011101" |
var ps = "0000110111011101" |
||
System.print(posit16_decode.call(ps))</syntaxhighlight> |
System.print(posit16_decode.call(ps, 3))</syntaxhighlight> |
||
{{out}} |
{{out}} |
Revision as of 17:52, 18 September 2023
Posit numbers/decoding is a draft programming task. It is not yet considered ready to be promoted as a complete task, for reasons that should be found in its talk page.
Posit is a quantization of the real projective line proposed by John Gustafson in 2015. It is claimed to be an improvement over IEEE 754.
The purpose of this task is to write a program capable of decoding a posit number. You will use the example provided by Gustafson in his paper : 0b0000110111011101, representing a 16-bit long real number with three bits for the exponent. Once decoded, you should obtain either the fraction 477/134217728 or the floating point value 3.55393E−6.
Jeff Johnson from Facebook research, described posit numbers as such:
- A more efficient representation for tapered floating points is the recent posit format by Gustafson. It has no explicit size field; the exponent is encoded using a Golomb-Rice prefix-free code, with the exponent encoded as a Golomb-Rice quotient and remainder with in unary and in binary (in posit terminology, is the regime). Remainder encoding size is defined by the exponent scale , where is the Golomb-Rice divisor. Any space not used by the exponent encoding is used by the significand, which unlike IEEE 754 always has a leading 1; gradual underflow (and overflow) is handled by tapering. A posit number system is characterized by , where is the word length in bits and is the exponent scale. The minimum and maximum positive finite numbers in are Failed to parse (syntax error): {\displaystyle f_\mathrm{min} = 2^{−(N−2)2^s}} and Failed to parse (SVG (MathML can be enabled via browser plugin): Invalid response ("Math extension cannot connect to Restbase.") from server "https://wikimedia.org/api/rest_v1/":): {\displaystyle f_\mathrm{max} = 2^{(N−2)2^s}} . The number line is represented much as the projective reals, with a single point at bounding Failed to parse (syntax error): {\displaystyle −f_\mathrm{max}} and . and 0 have special encodings; there is no NaN. The number system allows any choice of and Failed to parse (syntax error): {\displaystyle 0\le s\le N − 3} .
- controls the dynamic range achievable; e.g., 8-bit (8, 5)-posit is larger than in float32. (8, 0) and (8, 1) are more reasonable values to choose for 8-bit floating point representations, with of 64 and 4096 accordingly. Precision is maximized in the range Failed to parse (syntax error): {\displaystyle \pm\left[2^{−(s+1)}, 2^{s+1}\right)} with Failed to parse (SVG (MathML can be enabled via browser plugin): Invalid response ("Math extension cannot connect to Restbase.") from server "https://wikimedia.org/api/rest_v1/":): {\displaystyle N − 3 − s} significand fraction bits, tapering to no fraction bits at .
- — Jeff Johnson, Rethinking floating point for deep learning, Facebook research.
raku
unit role Posit[UInt $N, UInt $es];
has UInt $.UInt;
method sign { self.UInt > 2**($N - 1) ?? -1 !! +1 }
method FatRat {
return 0 if self.UInt == 0;
my UInt $mask = 2**($N - 1);
return Inf if self.UInt == $mask;
my UInt $n = self.UInt;
my $sign = $n +& $mask ?? -1 !! +1;
my $r = $sign;
$n = ((2**$n - 1) +^ $n) + 1 if self.sign < 0;
my int $count = 0;
$mask +>= 1;
my Bool $first-bit = ?($n +& $mask);
repeat { $count++; $mask +>= 1;
} while ?($n +& $mask) == $first-bit && $mask;
my $m = $count;
my $k = $first-bit ?? $m - 1 !! -$m;
$r *= 2**($k*2**$es);
return $r unless $mask > 1;
$mask +>= 1;
$count = 0;
my UInt $exponent = 0;
while $mask && $count++ < $es {
$exponent +<= 1;
$exponent +|= 1 if $n +& $mask;
$mask +>= 1;
}
$r *= 2**$exponent;
my $fraction = 1.FatRat;
while $mask {
(state $power-of-two = 1) +<= 1;
$fraction += 1/$power-of-two if $n +& $mask;
$mask +>= 1;
}
$r *= $fraction;
return $r;
}
CHECK {
use Test;
# example from L<http://www.johngustafson.net/pdfs/BeatingFloatingPoint.pdf>
is Posit[16, 3]
.new(UInt => 0b0000110111011101)
.FatRat, 477.FatRat/134217728;
}
- Output:
ok 1 -
Wren
import "./fmt" for Conv
import "./big" for BigRat
var posit16_decode = Fn.new { |ps, maxExpSize|
var p = ps.map { |c| c == "0" ? 0 : 1 }.toList
// Deal with exceptional values.
if (p[1..-1].all { |i| i == 0 }) {
return (p[0] == 0) ? 0 : Conv.infinity
}
// Convert bits after sign bit to two's complement if negative.
if (p[0] == 1) {
for (i in 1..15) p[i] = (p[i] == 0) ? 1 : 0
for (i in 15..1) {
if (p[i] == 1) {
p[i] = 0
} else {
p[i] = 1
break
}
}
}
var first = p[1]
var rs = 15 // regime size
for (i in 2..15) {
if (p[i] != first) {
rs = i - 1
break
}
}
var regime = p[1..rs]
var es = (rs == 15) ? 0 : maxExpSize.min(14-rs) // actual exponent size
var exponent = [0]
if (es > 0) exponent = p[rs + 2...rs + 2 + es]
var fs = (es == 0) ? 0 : 14 - rs - es // function size
var s = (p[0] == 0) ? 1 : -1 // sign
var k = regime.all { |i| i == 0 } ? -rs : rs - 1
var u = 2.pow(2.pow(maxExpSize))
var e = Conv.atoi(exponent.join(""), 2)
var f = BigRat.one
if (fs > 0) {
var fraction = ps[-fs..-1]
f = Conv.atoi(fraction.join(""), 2)
f = BigRat.one + BigRat.new(f, 2.pow(fs))
}
return f * BigRat.new(u, 1).pow(k) * s * 2.pow(e)
}
var ps = "0000110111011101"
System.print(posit16_decode.call(ps, 3))
- Output:
477/134217728