Legendre prime counting function: Difference between revisions

Line 454:

10^9 50847534

</pre>

=={{header|F#}}==

There doesn't seem to be much point to contributing memoized (Hash table) versions as they seem to be the result of the task author not realizing that there is a simple way of keeping the computation complexity from having exponential growth with counting range by limiting the "splitting" of the "phi" calculation when the result must be just one. Accordingly, the following F# versions are [translated from the non-memoizing Nim versions](https://rosettacode.org/wiki/Legendre_prime_counting_function#Non-Memoized_Versions), which explanations can be referenced for an understanding of how they work:

'''The Basic Recursive Legendre Algorithm'''

<syntaxhighlight lang="fsharp">let countPrimes (lmt: uint64) =

if lmt < 3UL then (if lmt < 2UL then 0L else 1L) else

let sqrtlmt = lmt |> float |> sqrt |> uint64

let mxndx = (sqrtlmt - 3UL) / 2UL |> int

let oprms =

let cb = Array.init (mxndx + 1) <| fun i -> uint32 (i + i + 3)

let rec loopi i =

let sqri = (i + i) * (i + 3) + 3

if sqri > mxndx then () else

if cb.[i] = 0u then loopi (i + 1) else

let bp = i + i + 3

let rec cull c = if c > mxndx then () else cb.[c] <- 0u; cull (c + bp)

cull sqri; loopi (i + 1)

loopi 0; cb |> Array.filter ((<>) 0u)

let rec phi x a =

if a <= 0 then x - (x >>> 1) |> int64 else

let na = a - 1 in let p = uint64 oprms.[na]

if x <= p then 1L else phi x na - phi (x / p) na

phi lmt oprms.Length + int64 oprms.Length

let strt = System.DateTime.Now.Ticks

{ 0 .. 9 } |> Seq.iter (fun i ->

printfn "π(10**%d) = %d" i (countPrimes (uint64(10. ** i))))

let elpsd = (System.DateTime.Now.Ticks - strt) / 10000L

printfn "This took %d milliseconds." elpsd</syntaxhighlight>

<pre>π(10**0) = 0

π(10**1) = 4

π(10**2) = 25

π(10**3) = 168

π(10**4) = 1229

π(10**5) = 9592

π(10**6) = 78498

π(10**7) = 664579

π(10**8) = 5761455

π(10**9) = 50847534

This took 525 milliseconds.</pre>

The above time as run on an Intel i5-6500 (3.6 GHz single-threaded boost) isn't particularly fast but includes some DotNet initialization and JIT compilation overhead, and is about as fast as a highly optimized page-segmented wheel-factorized Sieve of Eratosthenes; Although this is about ten times faster than if it were using memoization (and uses much less memory), it is only reasonably usable for trivial ranges such as this (trivial in terms of prime counting functions).

The Less Recursive "Bottom-Up" with a TinyPhi LUT Algorithm

Just substitute the following F# code for the `countPrimes` function above to enjoy the gain in speed:

<syntaxhighlight lang="fsharp">let TinyPhiPrimes = [| 2; 3; 5; 7; 11; 13 |]

let TinyPhiDeg = TinyPhiPrimes.Length - 1

let TinyPhiOddCirc = (TinyPhiPrimes |> Seq.reduce (*)) / 2

let TinyPhiTot = TinyPhiPrimes |> Seq.fold (fun s p -> s * (p - 1)) 1

let TinyPhiLUT =

let cb = Array.init TinyPhiOddCirc (fun i -> 1u)

TinyPhiPrimes |> Seq.skip 1

|> Seq.iter (fun bp ->

cb.[(bp - 1) >>> 1] <- 0u

{ (bp * bp - 1) >>> 1 .. bp .. TinyPhiOddCirc - 1}

|> Seq.iter (fun c -> cb.[c] <- 0u) )

let rec loopi i acc =

if i >= TinyPhiOddCirc then () else

let nacc = acc + cb[i] in cb.[i] <- nacc; loopi (i + 1) nacc

loopi 0 0u; cb

let tinyPhi (x: uint64): int64 =

let ndx = (x - 1UL) >>> 1 |> int64

let numtots = ndx / int64 TinyPhiOddCirc

let li = ndx - numtots * int64 TinyPhiOddCirc |> int

numtots * int64 TinyPhiTot + int64 TinyPhiLUT.[li]

let countPrimes (lmt: uint64) =

if lmt < 169UL then // below 169 whose sqrt is 13 is where TinyPhi doesn't work...

( if lmt < 2UL then 0L else

if lmt < 3UL then 1L else

// adjust for the missing "degree" base primes

if lmt < 9UL then 1L + int64 (lmt - 1UL) / 2L else

if lmt <= 13UL then int64 (lmt - 1UL) / 2L else

5L + int64 TinyPhiLUT.[int (lmt - 1UL) / 2]) else

let sqrtlmt = lmt |> float |> sqrt |> uint64

let mxndx = (sqrtlmt - 3UL) / 2UL |> int

let oprms =

let cb = Array.init (mxndx + 1) <| fun i -> uint32 (i + i + 3)

let rec loopi i =

let sqri = (i + i) * (i + 3) + 3

if sqri > mxndx then () else

if cb.[i] = 0u then loopi (i + 1) else

let bp = i + i + 3

let rec cull c = if c > mxndx then () else cb.[c] <- 0u; cull (c + bp)

cull sqri; loopi (i + 1)

loopi 0; cb |> Array.filter ((<>) 0u)

let rec lvl pilmt m =

let rec looppi pi acc =

if pi >= pilmt then acc else

let p = uint64 oprms.[pi] in let nm = p * m

if lmt <= nm * p then acc + int64 (pilmt - pi) else

let nacc = if pi <= TinyPhiDeg then acc else acc - lvl pi nm

looppi (pi + 1) (nacc + tinyPhi (lmt / nm))

looppi TinyPhiDeg 0L

tinyPhi lmt - lvl oprms.Length 1UL + int64 oprms.Length</syntaxhighlight>

Use of the above function gets a gain in speed of about a further ten times for far larger ranges than this over the above version due to less recursion, the use of the "Tiny_Phi" Look Up Table (LUT) for smaller "degrees" of primes which greatly reduces the number of integer divisions. This version of prime counting function might be considered to be reasonably useful for counting primes up to a trillion (1e12) in a few tens of seconds.

'''The Non-Recursive Partial Sieving Algorithm'''

Just substitute the following F# code for the `countPrimes` function above to enjoy the further gain in speed; note that rather than using a recursive function loop for the partial sieving culling pass as per the above two version, this version uses a Seq iteration which is slower but more concise in expression, but as sieving is a negligible part of the total execution time, it doesn't matter and conciseness was considered to be more important:

<syntaxhighlight lang="fsharp">let masks = Array.init 8 ((<<<) 1uy) // quick bit twiddling

let countPrimes (lmt: uint64): int64 =

if lmt < 3UL then (if lmt < 2UL then 0L else 1L) else

let inline half x = (x - 1) >>> 1

let inline divide nm d = (float nm) / (float d) |> int

let sqrtlmt = lmt |> float |> sqrt |> uint64

let mxndx = (sqrtlmt - 1UL) / 2UL |> int in let cbsz = (mxndx + 8) / 8

let cullbuf = Array.zeroCreate cbsz

let smalls = Array.init (mxndx + 1) uint32

let roughs = Array.init (mxndx + 1) <| fun i -> uint32 (i + i + 1)

let larges = Array.init (mxndx + 1) <| fun i ->

int64 (lmt / uint64 (i + i + 1) - 1UL) / 2L

let rec loopbp bp nobps rilmt =

let i = int (bp - 1UL) >>> 1 in let sqri = (i + i) * (i + 1)

if sqri > mxndx then nobps, rilmt else

if (cullbuf.[i >>> 3] &&& masks.[i &&& 7]) <> 0uy then

loopbp (bp + 2UL) nobps rilmt else

let w = i >>> 3 in cullbuf.[w] <- cullbuf.[w] ||| masks.[i &&& 7] // cull bp

{ sqri .. int bp .. mxndx } |> Seq.iter (fun c -> // cull multiples of bp...

let w = c >>> 3 in cullbuf.[w] <- cullbuf.[w] ||| masks.[c &&& 7] )

// adjust `larges for last partial loop pass;

// compress larges/roughs for current partial sieve pass...

let rec loopri iri ori =

if iri > rilmt then ori - 1 else

let r = uint64 roughs.[iri] in let sri = int (r >>> 1)

if (cullbuf.[sri >>> 3] &&& masks.[sri &&& 7]) <> 0uy then

loopri (iri + 1) ori else // skip for roughs culled this pass!

let d = bp * r

larges.[ori] <- larges.[iri] -

( if d <= sqrtlmt then

larges.[int smalls.[int (d >>> 1)] - nobps]

else let ndx = (half << divide lmt) d

int64 smalls.[ndx] ) + int64 nobps

roughs.[ori] <- uint32 r; loopri (iri + 1) (ori + 1)

// adjust `smalls` for last partial loop pass...

let rec loopbpm bpm mxsi =

if bpm < bp then () else

let c = smalls.[int (bpm >>> 1)] - uint32 nobps

let ei = (bpm * bp) >>> 1 |> int

let rec loopsi si =

if si < ei then si else smalls.[si] <- smalls.[si] - c; loopsi (si - 1)

loopbpm (bpm - 2UL) (loopsi mxsi)

let nrilmt = loopri 0 0

loopbpm ((sqrtlmt / bp - 1UL) ||| 1UL) mxndx

loopbp (bp + 2UL) (nobps + 1) nrilmt

// accumulate result so far; compensate for over subtraction...

let numobps, mxri = loopbp 3UL 0 mxndx

let rec smr i acc =

if i > mxri then // adjust accumulated answer!

acc + (int64 mxri + 1L + 2L * int64 (numobps - 1)) * int64 mxri / 2L

else smr (i + 1) (acc - larges.[i])

let ans0 = smr 1 larges.[0]

// finally, add result from pairs of rough primes up to cube root of range,

// where they are two different primes; compensating for over addition...

let rec loopri ri acc =

let p = uint64 roughs.[ri] in let q = lmt / p

let ei = int smalls.[(half << divide q) p] - numobps

if ei <= ri then acc else

let rec loopori ori oacc =

if ori > ei then oacc else

let ndx = (half << divide q) (uint64 roughs.[ori])

loopori (ori + 1) (oacc + int64 smalls.[ndx])

let nacc = loopori (ri + 1) acc // subtract over addition of base primes:

loopri (ri + 1) (nacc - int64 (ei - ri) * (int64 numobps + int64 ri - 1L))

loopri 1 ans0 + 1L // add one for only even prime of two!</syntaxhighlight>

The above function enjoys quite a large gain in speed of about a further ten times over the immediately preceding version (although one can't see that gain for these trivial ranges as it is buried in the constant overheads) due to not using recursion at all and the greatly reduced computational complexity of O(n**(3/4)/((log n)**2)) instead of the almost-linear-for-large-counting-ranges O(n/((log n)**2)) for the previous two versions, although the previous two versions differ in performance by a constant factor due to the overheads of recursion and division. This version of prime counting function might be considered to be reasonably useful for counting primes up to a 1e16 in about a hundred seconds as long as the computer used has the required free memory of about 800 Megabytes. This F# version is about twice as slow as the Nim version from which it was translated due to the benefits of native code compilation and more optimizations rather than JIT compilation for F#. At that, this version is about the same speed as when translated to Rust and Crystal once the range is increased where constant overheads aren't a factor, which both are native code compilers.

=={{header|Go}}==