Posit numbers/decoding: Difference between revisions

Content added Content deleted
(→‎{{header|Wren}}: Made decode function more general.)
(julia example)
Line 10: Line 10:
:<math>s</math> controls the dynamic range achievable; e.g., 8-bit (8, 5)-posit <math>f_\mathrm{max} = 2^{192}</math> is larger than <math>f_\mathrm{max}</math> in <tt>float32</tt>. (8, 0) and (8, 1) are more reasonable values to choose for 8-bit floating point representations, with <math>f_\mathrm{max}</math> of 64 and 4096 accordingly. Precision is maximized in the range <math>\pm\left[2^{−(s+1)}, 2^{s+1}\right)</math> with <math>N − 3 − s</math> significand fraction bits, tapering to no fraction bits at <math>\pm f_\mathrm{max}</math>.
:<math>s</math> controls the dynamic range achievable; e.g., 8-bit (8, 5)-posit <math>f_\mathrm{max} = 2^{192}</math> is larger than <math>f_\mathrm{max}</math> in <tt>float32</tt>. (8, 0) and (8, 1) are more reasonable values to choose for 8-bit floating point representations, with <math>f_\mathrm{max}</math> of 64 and 4096 accordingly. Precision is maximized in the range <math>\pm\left[2^{−(s+1)}, 2^{s+1}\right)</math> with <math>N − 3 − s</math> significand fraction bits, tapering to no fraction bits at <math>\pm f_\mathrm{max}</math>.
:— Jeff Johnson, ''[https://arxiv.org/abs/1811.01721 Rethinking floating point for deep learning]'', Facebook research.
:— Jeff Johnson, ''[https://arxiv.org/abs/1811.01721 Rethinking floating point for deep learning]'', Facebook research.



=={{header|Julia}}==
<syntaxhighlight lang="julia">struct PositType3{T<:Integer}
numbits::UInt16
es::UInt16
bits::T
PositType3(nb, ne, i) = new{typeof(i)}(UInt16(nb), UInt16(ne), i)
end

""" From posithub.org/docs/Posits4.pdf """
function Base.Rational(p::PositType3)
s = signbit(p.bits) # s for S signbit, is 1 if negative
pabs = p.bits << 1 # shift off signbit (adds a 0 to F at LSB)
pabs == 0 && return s ? 1 // 0 : 0 // 1 # if p is 0, return 0 if s = 0, error if s = 1
expsign = signbit(pabs) # exponent sign from 2nd bit now in MSB location
k = expsign == 1 ? leading_ones(pabs) : leading_zeros(pabs) # regime R bit count
scaling = 2^p.es * (expsign == 0 ? -1 : 1)
pabs <<= (k + 1) # shift off unwanted R bits
pabs >>= (k + 2) # shift back without the extra LSB bit
fsize = p.numbits - k - p.es - 2 # check how many F bits are actually explicit
f = fsize > 0 ? (pabs & (2^fsize - 1)) // 2^fsize : 0 # Get F value. Can be missing -> 0
e =
fsize > 0 ? (pabs >> (fsize)) : # Get E value. Can be up to p.es bits
pabs * 2^(p.es - p.numbits - k - 2) # implicit missing bits correction
pw = (1 - 2s) * (scaling * k + e + s)
return pw >= 0 ? ((1 - 3s) + f) * 2^pw // 1 : ((1 - 3s) + f) // 2^(-pw)
end

@show Rational(PositType3(16, 3, 0b0000110111011101)) == 477 // 134217728
</syntaxhighlight>{{out}} <pre> Rational(PositType3(16, 3, 0x0ddd)) == 477 // 134217728 = true</pre>