Legendre prime counting function: Difference between revisions

→‎{{header|Go}}: add F# non-memoized versions above
(→‎{{header|Wren}}: Added a third much faster version.)
(→‎{{header|Go}}: add F# non-memoized versions above)
Line 454:
10^9 50847534
</pre>
 
=={{header|F#}}==
 
There doesn't seem to be much point to contributing memoized (Hash table) versions as they seem to be the result of the task author not realizing that there is a simple way of keeping the computation complexity from having exponential growth with counting range by limiting the "splitting" of the "phi" calculation when the result must be just one. Accordingly, the following F# versions are [translated from the non-memoizing Nim versions](https://rosettacode.org/wiki/Legendre_prime_counting_function#Non-Memoized_Versions), which explanations can be referenced for an understanding of how they work:
 
'''The Basic Recursive Legendre Algorithm'''
{{trans|Nim}}
<syntaxhighlight lang="fsharp">let countPrimes (lmt: uint64) =
if lmt < 3UL then (if lmt < 2UL then 0L else 1L) else
let sqrtlmt = lmt |> float |> sqrt |> uint64
let mxndx = (sqrtlmt - 3UL) / 2UL |> int
let oprms =
let cb = Array.init (mxndx + 1) <| fun i -> uint32 (i + i + 3)
let rec loopi i =
let sqri = (i + i) * (i + 3) + 3
if sqri > mxndx then () else
if cb.[i] = 0u then loopi (i + 1) else
let bp = i + i + 3
let rec cull c = if c > mxndx then () else cb.[c] <- 0u; cull (c + bp)
cull sqri; loopi (i + 1)
loopi 0; cb |> Array.filter ((<>) 0u)
let rec phi x a =
if a <= 0 then x - (x >>> 1) |> int64 else
let na = a - 1 in let p = uint64 oprms.[na]
if x <= p then 1L else phi x na - phi (x / p) na
phi lmt oprms.Length + int64 oprms.Length
 
let strt = System.DateTime.Now.Ticks
 
{ 0 .. 9 } |> Seq.iter (fun i ->
printfn "π(10**%d) = %d" i (countPrimes (uint64(10. ** i))))
 
let elpsd = (System.DateTime.Now.Ticks - strt) / 10000L
printfn "This took %d milliseconds." elpsd</syntaxhighlight>
{{out}}
<pre>π(10**0) = 0
π(10**1) = 4
π(10**2) = 25
π(10**3) = 168
π(10**4) = 1229
π(10**5) = 9592
π(10**6) = 78498
π(10**7) = 664579
π(10**8) = 5761455
π(10**9) = 50847534
This took 525 milliseconds.</pre>
 
The above time as run on an Intel i5-6500 (3.6 GHz single-threaded boost) isn't particularly fast but includes some DotNet initialization and JIT compilation overhead, and is about as fast as a highly optimized page-segmented wheel-factorized Sieve of Eratosthenes; Although this is about ten times faster than if it were using memoization (and uses much less memory), it is only reasonably usable for trivial ranges such as this (trivial in terms of prime counting functions).
 
The Less Recursive "Bottom-Up" with a TinyPhi LUT Algorithm
 
Just substitute the following F# code for the `countPrimes` function above to enjoy the gain in speed:
{{trans|Nim}}
<syntaxhighlight lang="fsharp">let TinyPhiPrimes = [| 2; 3; 5; 7; 11; 13 |]
let TinyPhiDeg = TinyPhiPrimes.Length - 1
let TinyPhiOddCirc = (TinyPhiPrimes |> Seq.reduce (*)) / 2
let TinyPhiTot = TinyPhiPrimes |> Seq.fold (fun s p -> s * (p - 1)) 1
let TinyPhiLUT =
let cb = Array.init TinyPhiOddCirc (fun i -> 1u)
TinyPhiPrimes |> Seq.skip 1
|> Seq.iter (fun bp ->
cb.[(bp - 1) >>> 1] <- 0u
{ (bp * bp - 1) >>> 1 .. bp .. TinyPhiOddCirc - 1}
|> Seq.iter (fun c -> cb.[c] <- 0u) )
let rec loopi i acc =
if i >= TinyPhiOddCirc then () else
let nacc = acc + cb[i] in cb.[i] <- nacc; loopi (i + 1) nacc
loopi 0 0u; cb
let tinyPhi (x: uint64): int64 =
let ndx = (x - 1UL) >>> 1 |> int64
let numtots = ndx / int64 TinyPhiOddCirc
let li = ndx - numtots * int64 TinyPhiOddCirc |> int
numtots * int64 TinyPhiTot + int64 TinyPhiLUT.[li]
 
let countPrimes (lmt: uint64) =
if lmt < 169UL then // below 169 whose sqrt is 13 is where TinyPhi doesn't work...
( if lmt < 2UL then 0L else
if lmt < 3UL then 1L else
// adjust for the missing "degree" base primes
if lmt < 9UL then 1L + int64 (lmt - 1UL) / 2L else
if lmt <= 13UL then int64 (lmt - 1UL) / 2L else
5L + int64 TinyPhiLUT.[int (lmt - 1UL) / 2]) else
let sqrtlmt = lmt |> float |> sqrt |> uint64
let mxndx = (sqrtlmt - 3UL) / 2UL |> int
let oprms =
let cb = Array.init (mxndx + 1) <| fun i -> uint32 (i + i + 3)
let rec loopi i =
let sqri = (i + i) * (i + 3) + 3
if sqri > mxndx then () else
if cb.[i] = 0u then loopi (i + 1) else
let bp = i + i + 3
let rec cull c = if c > mxndx then () else cb.[c] <- 0u; cull (c + bp)
cull sqri; loopi (i + 1)
loopi 0; cb |> Array.filter ((<>) 0u)
let rec lvl pilmt m =
let rec looppi pi acc =
if pi >= pilmt then acc else
let p = uint64 oprms.[pi] in let nm = p * m
if lmt <= nm * p then acc + int64 (pilmt - pi) else
let nacc = if pi <= TinyPhiDeg then acc else acc - lvl pi nm
looppi (pi + 1) (nacc + tinyPhi (lmt / nm))
looppi TinyPhiDeg 0L
tinyPhi lmt - lvl oprms.Length 1UL + int64 oprms.Length</syntaxhighlight>
 
Use of the above function gets a gain in speed of about a further ten times for far larger ranges than this over the above version due to less recursion, the use of the "Tiny_Phi" Look Up Table (LUT) for smaller "degrees" of primes which greatly reduces the number of integer divisions. This version of prime counting function might be considered to be reasonably useful for counting primes up to a trillion (1e12) in a few tens of seconds.
 
'''The Non-Recursive Partial Sieving Algorithm'''
 
Just substitute the following F# code for the `countPrimes` function above to enjoy the further gain in speed; note that rather than using a recursive function loop for the partial sieving culling pass as per the above two version, this version uses a Seq iteration which is slower but more concise in expression, but as sieving is a negligible part of the total execution time, it doesn't matter and conciseness was considered to be more important:
{{trans|Nim}}
<syntaxhighlight lang="fsharp">let masks = Array.init 8 ((<<<) 1uy) // quick bit twiddling
 
let countPrimes (lmt: uint64): int64 =
if lmt < 3UL then (if lmt < 2UL then 0L else 1L) else
let inline half x = (x - 1) >>> 1
let inline divide nm d = (float nm) / (float d) |> int
let sqrtlmt = lmt |> float |> sqrt |> uint64
let mxndx = (sqrtlmt - 1UL) / 2UL |> int in let cbsz = (mxndx + 8) / 8
let cullbuf = Array.zeroCreate cbsz
let smalls = Array.init (mxndx + 1) uint32
let roughs = Array.init (mxndx + 1) <| fun i -> uint32 (i + i + 1)
let larges = Array.init (mxndx + 1) <| fun i ->
int64 (lmt / uint64 (i + i + 1) - 1UL) / 2L
 
let rec loopbp bp nobps rilmt =
let i = int (bp - 1UL) >>> 1 in let sqri = (i + i) * (i + 1)
if sqri > mxndx then nobps, rilmt else
if (cullbuf.[i >>> 3] &&& masks.[i &&& 7]) <> 0uy then
loopbp (bp + 2UL) nobps rilmt else
let w = i >>> 3 in cullbuf.[w] <- cullbuf.[w] ||| masks.[i &&& 7] // cull bp
{ sqri .. int bp .. mxndx } |> Seq.iter (fun c -> // cull multiples of bp...
let w = c >>> 3 in cullbuf.[w] <- cullbuf.[w] ||| masks.[c &&& 7] )
 
// adjust `larges for last partial loop pass;
// compress larges/roughs for current partial sieve pass...
let rec loopri iri ori =
if iri > rilmt then ori - 1 else
let r = uint64 roughs.[iri] in let sri = int (r >>> 1)
if (cullbuf.[sri >>> 3] &&& masks.[sri &&& 7]) <> 0uy then
loopri (iri + 1) ori else // skip for roughs culled this pass!
let d = bp * r
larges.[ori] <- larges.[iri] -
( if d <= sqrtlmt then
larges.[int smalls.[int (d >>> 1)] - nobps]
else let ndx = (half << divide lmt) d
int64 smalls.[ndx] ) + int64 nobps
roughs.[ori] <- uint32 r; loopri (iri + 1) (ori + 1)
 
// adjust `smalls` for last partial loop pass...
let rec loopbpm bpm mxsi =
if bpm < bp then () else
let c = smalls.[int (bpm >>> 1)] - uint32 nobps
let ei = (bpm * bp) >>> 1 |> int
let rec loopsi si =
if si < ei then si else smalls.[si] <- smalls.[si] - c; loopsi (si - 1)
loopbpm (bpm - 2UL) (loopsi mxsi)
 
let nrilmt = loopri 0 0
loopbpm ((sqrtlmt / bp - 1UL) ||| 1UL) mxndx
loopbp (bp + 2UL) (nobps + 1) nrilmt
 
// accumulate result so far; compensate for over subtraction...
let numobps, mxri = loopbp 3UL 0 mxndx
let rec smr i acc =
if i > mxri then // adjust accumulated answer!
acc + (int64 mxri + 1L + 2L * int64 (numobps - 1)) * int64 mxri / 2L
else smr (i + 1) (acc - larges.[i])
let ans0 = smr 1 larges.[0]
 
// finally, add result from pairs of rough primes up to cube root of range,
// where they are two different primes; compensating for over addition...
let rec loopri ri acc =
let p = uint64 roughs.[ri] in let q = lmt / p
let ei = int smalls.[(half << divide q) p] - numobps
if ei <= ri then acc else
let rec loopori ori oacc =
if ori > ei then oacc else
let ndx = (half << divide q) (uint64 roughs.[ori])
loopori (ori + 1) (oacc + int64 smalls.[ndx])
let nacc = loopori (ri + 1) acc // subtract over addition of base primes:
loopri (ri + 1) (nacc - int64 (ei - ri) * (int64 numobps + int64 ri - 1L))
 
loopri 1 ans0 + 1L // add one for only even prime of two!</syntaxhighlight>
 
The above function enjoys quite a large gain in speed of about a further ten times over the immediately preceding version (although one can't see that gain for these trivial ranges as it is buried in the constant overheads) due to not using recursion at all and the greatly reduced computational complexity of O(n**(3/4)/((log n)**2)) instead of the almost-linear-for-large-counting-ranges O(n/((log n)**2)) for the previous two versions, although the previous two versions differ in performance by a constant factor due to the overheads of recursion and division. This version of prime counting function might be considered to be reasonably useful for counting primes up to a 1e16 in about a hundred seconds as long as the computer used has the required free memory of about 800 Megabytes. This F# version is about twice as slow as the Nim version from which it was translated due to the benefits of native code compilation and more optimizations rather than JIT compilation for F#. At that, this version is about the same speed as when translated to Rust and Crystal once the range is increased where constant overheads aren't a factor, which both are native code compilers.
 
=={{header|Go}}==
474

edits