Price list behind API
There is a list of around 100_000 prices in the range £0 to £100_000, expressed
in whole £, (no pence); and prices may be duplicated.
The API allows access to the maximum item price via
function get_max_price()
; and the number of items equal-to and between two
given price points via function get_prange_count(pricemin, pricemax)
.
Assume that for the purposes of testing, you have access to the actual number of priced items to split.
- Task
- Write functions to randomly generate around 100K prices and provide the
get_prange_count
andget_max_price
API calls. - Write functions to provide non-overlapping min and max price ranges that provide product counts where most are close to, but no more than, 5_000.
- Ensure that all priced items are covered by all the ranges of prices shown
- Show ascending price ranges and the number of items covered by each range.
- Show output from a sample run here.
11l
V price_list_size = random:(99'000 .< 101'000)
V price_list = (0 .< price_list_size).map(i -> random:(100'000))
V delta_price = 1
F get_prange_count(startp, endp)
R :price_list.filter(r -> Float(r) C @startp .. @endp).len
F get_max_price()
R max(:price_list)
F get_5k(Float mn, Float =mx; num = 5'000)
‘Binary search for num items between mn and mx, adjusting mx’
V count = get_prange_count(mn, mx)
V delta_mx = (mx - mn) / 2
L count != num & delta_mx >= :delta_price / 2
mx += I count > num {-delta_mx} E +delta_mx
mx = Float(mx I/ 1)
(count, delta_mx) = (get_prange_count(mn, mx), delta_mx / 2)
R (mx, count)
F get_all_5k(mn = 0.0, Float mx = get_max_price(); num = 5'000)
‘Get all non-overlapping ranges’
V (partmax, partcount) = get_5k(mn, mx, num)
V result = [(mn, partmax, partcount)]
L partmax < mx
V partmin = partmax + :delta_price
(partmax, partcount) = get_5k(partmin, mx, num)
assert(partcount > 0, ‘price_list from ’partmin‘ with too many of the same price’)
result.append((partmin, partmax, partcount))
R result
print(‘Using ’price_list_size‘ random prices from 0 to ’get_max_price())
V result = get_all_5k()
print(‘Splits into ’result.len‘ bins of approx 5000 elements’)
L(mn, mx, count) result
print(f:‘ From {mn:8.1} ... {mx:8.1} with {count} items.’)
I price_list.len != sum(result.map((mn, mx, count) -> count))
print("\nWhoops! Some items missing:")
- Output:
Using 99472 random prices from 0 to 99999 Splits into 20 bins of approx 5000 elements From 0.0 ... 5114.0 with 4998 items. From 5115.0 ... 10224.0 with 5000 items. From 10225.0 ... 15241.0 with 4998 items. From 15242.0 ... 20393.0 with 4999 items. From 20394.0 ... 25419.0 with 5000 items. From 25420.0 ... 30425.0 with 5000 items. From 30426.0 ... 35412.0 with 4999 items. From 35413.0 ... 40507.0 with 4999 items. From 40508.0 ... 45485.0 with 4999 items. From 45486.0 ... 50569.0 with 5000 items. From 50570.0 ... 55634.0 with 4998 items. From 55635.0 ... 60584.0 with 4999 items. From 60585.0 ... 65477.0 with 5000 items. From 65478.0 ... 70277.0 with 4999 items. From 70278.0 ... 75470.0 with 4999 items. From 75471.0 ... 80383.0 with 5000 items. From 80384.0 ... 85449.0 with 4996 items. From 85450.0 ... 90475.0 with 4999 items. From 90476.0 ... 95475.0 with 5000 items. From 95476.0 ... 104515.0 with 4490 items.
Go
package main
import (
"fmt"
"log"
"math"
"math/rand"
"time"
)
var minDelta = 1.0
func getMaxPrice(prices []float64) float64 {
max := prices[0]
for i := 1; i < len(prices); i++ {
if prices[i] > max {
max = prices[i]
}
}
return max
}
func getPRangeCount(prices []float64, min, max float64) int {
count := 0
for _, price := range prices {
if price >= min && price <= max {
count++
}
}
return count
}
func get5000(prices []float64, min, max float64, n int) (float64, int) {
count := getPRangeCount(prices, min, max)
delta := (max - min) / 2
for count != n && delta >= minDelta/2 {
if count > n {
max -= delta
} else {
max += delta
}
max = math.Floor(max)
count = getPRangeCount(prices, min, max)
delta /= 2
}
return max, count
}
func getAll5000(prices []float64, min, max float64, n int) [][3]float64 {
pmax, pcount := get5000(prices, min, max, n)
res := [][3]float64{{min, pmax, float64(pcount)}}
for pmax < max {
pmin := pmax + 1
pmax, pcount = get5000(prices, pmin, max, n)
if pcount == 0 {
log.Fatal("Price list from", pmin, "has too many with same price.")
}
res = append(res, [3]float64{pmin, pmax, float64(pcount)})
}
return res
}
func main() {
rand.Seed(time.Now().UnixNano())
numPrices := 99000 + rand.Intn(2001)
maxPrice := 1e5
prices := make([]float64, numPrices) // list of prices
for i := 0; i < numPrices; i++ {
prices[i] = float64(rand.Intn(int(maxPrice) + 1))
}
actualMax := getMaxPrice(prices)
fmt.Println("Using", numPrices, "items with prices from 0 to", actualMax, "\b:")
res := getAll5000(prices, 0, actualMax, 5000)
fmt.Println("Split into", len(res), "bins of approx 5000 elements:")
total := 0
for _, r := range res {
min := int(r[0])
tmx := r[1]
if tmx > actualMax {
tmx = actualMax
}
max := int(tmx)
cnt := int(r[2])
total += cnt
fmt.Printf(" From %6d to %6d with %4d items\n", min, max, cnt)
}
if total != numPrices {
fmt.Println("Something went wrong - grand total of", total, "doesn't equal", numPrices, "\b!")
}
}
- Output:
Using 99784 items with prices from 0 to 99999 : Split into 20 bins of approx 5000 elements: From 0 to 5061 with 4997 items From 5062 to 10031 with 5000 items From 10032 to 15091 with 5000 items From 15092 to 20114 with 5000 items From 20115 to 25141 with 5000 items From 25142 to 30206 with 4997 items From 30207 to 35291 with 5000 items From 35292 to 40333 with 4999 items From 40334 to 45451 with 4999 items From 45452 to 50422 with 4998 items From 50423 to 55355 with 4997 items From 55356 to 60268 with 4997 items From 60269 to 65240 with 5000 items From 65241 to 70193 with 4999 items From 70194 to 75272 with 4998 items From 75273 to 80154 with 5000 items From 80155 to 85218 with 5000 items From 85219 to 90120 with 4996 items From 90121 to 95102 with 4998 items From 95103 to 99999 with 4809 items
jq
Works with jq and gojq, the C and Go implementations of jq
In this entry, the API is envisioned as the interface between a server and a client, where the server is modeled by a database together with a program that reads the database and responds to client commands; and the client is modeled as an indefinitely long stream of API calls of the form specified in the task description.
Our server will respond to both well-formed and ill-formed API calls with JSON messages.
Since jq does not itself include a built-in PRNG, we will model the database as a file of prices generated by a jq program that uses /dev/urandom as a source of entropy, and that is separate from a second jq program (server.jq) that reads the data (once) and responds to API calls.
The driver program (server.jq) is loosely adapted from the Wren entry. It is left as an exercise to sort the prices for more efficient computation of the get_prange_count() API calls.
The Server (server.sh)
#!/bin/bash ApproxNumPrices=100000 maxPrice=100000 JQ=jq function prices { < /dev/urandom tr -cd '0-9' | fold -w 1 | $JQ -Rrnc --argjson ApproxNumPrices $ApproxNumPrices --argjson maxPrice $maxPrice ' # Output: a PRN in range(0;$n) where $n is `.` def prn: if . == 1 then 0 else . as $n | ([1, (($n-1)|tostring|length)]|max) as $w | [limit($w; inputs)] | join("") | tonumber | if . < $n then . else ($n | prn) end end; # Output: PRN in range($m;$n) if $n > $m def rand($m; $n): if $m > $n then "rand\($m);\($n))" | error else (($n-$m)|prn) + $m end ; range(0; $ApproxNumPrices - 50 + rand(0; 100)) | rand(0; 1+$maxPrice)' } # API commands: # get_max_price() # get_prange_count(pricemin, pricemax) jq -nrcR --slurpfile prices <(prices) -f server.jq
The Server Program (server.jq)
def count(s): reduce s as $_ (0; .+1);
def minDelta: 1;
def getMaxPrice: $prices | max;
def getPrangeCount($min; $max):
count( $prices[] | select($min <= . and . <= $max) ) ;
def get5000($prices; $min; $max; $n):
{ count: getPrangeCount($min; $max),
delta: (($max - $min) / 2),
$max }
| until(.count == $n or .delta < minDelta/2;
.max = (( if .count > $n then .max - .delta else .max + .delta end)|floor)
| .count = getPrangeCount($min; .max)
| .delta /= 2 )
| [.max, .count] ;
def getAll5000($min; $max; $n):
{ mc: get5000($prices; $min; $max; $n)}
| .pmax = .mc[0]
| .pcount = .mc[1]
| .res = [[$min, .pmax, .pcount]]
| until(.pmax >= $max;
.pmin = .pmax + 1
| .mc = get5000($prices; .pmin; $max; $n)
| .pmax = .mc[0]
| .pcount = .mc[1]
| if .pcount == 0 then "Price list from \(.pmin) has too many with same price." | error
else .res += [[.pmin, .pmax, .pcount]]
end )
| .res ;
def trim: sub("^ +";"") | sub(" +$";"");
def display($res; $numPrices; $actualMax; $approxBinSize):
"Using \($numPrices) items with prices from 0 to \($actualMax):",
"Split into \($res|length) bins of approx \($approxBinSize) elements:",
($res[] as [$min, $max, $n]
| (if $max > $actualMax then $actualMax else $max end) as $mx
| " From \($min) to \($mx) with \($n) items" ) ;
def repl(approxBinSize):
($prices|length) as $numPrices
| getMaxPrice as $actualMax
| getAll5000(0; $actualMax; approxBinSize) as $res
| def r:
trim
| . as $line
| if startswith("get_max_price") then {getMaxPrice: getMaxPrice}
else (scan(" *get_prange_count *[(]([0-9]+) *, *([0-9]+) *[)]")
| map(tonumber) as [$min, $max]
| {"getPrangeCount": {$min, $max, count: getPrangeCount($min; $max)}})
// {error: $line}
end;
display($res; $numPrices; $actualMax; approxBinSize),
(inputs | r);
# The approximate bin size
repl(5000)
Sample Client Calls (api.txt)
get_max_price() junk get_prange_count(1000, 2000)
- Output:
Invocation: server.sh < api.txt
Using 99989 items with prices from 0 to 99999: Split into 21 bins of approx 5000 elements: From 0 to 4996 with 5000 items From 4997 to 9993 with 4999 items From 9994 to 15031 with 4996 items From 15032 to 20063 with 4999 items From 20064 to 24992 with 5000 items From 24993 to 30001 with 5000 items From 30002 to 35032 with 4999 items From 35033 to 40131 with 5000 items From 40132 to 44960 with 4998 items From 44961 to 49951 with 4998 items From 49952 to 54930 with 4997 items From 54931 to 59996 with 4998 items From 59997 to 65055 with 5000 items From 65056 to 69923 with 5000 items From 69924 to 74834 with 4997 items From 74835 to 79779 with 4998 items From 79780 to 84865 with 5000 items From 84866 to 89811 with 5000 items From 89812 to 94818 with 5000 items From 94819 to 99985 with 4998 items From 99986 to 99999 with 12 items {"getMaxPrice":99999} {"getPrangeCount":{"min":0,"max":10,"count":17}} {"error":"junk"} {"getPrangeCount":{"min":1000,"max":2000,"count":1006}}
Julia
# Sample price generation
const price_list_size = rand(99000:100999)
const price_list = rand(0:99999, price_list_size)
const delta_price = 1 # Minimum difference between any two different prices.
""" The API provides these two """
get_prange_count(startp, endp) = sum([startp <= r <= endp for r in price_list])
get_max_price() = maximum(price_list)
""" Binary search for num items between mn and mx, adjusting mx """
function get_5k(mn=0, mx=get_max_price(), num=5_000)
count = get_prange_count(mn, mx)
delta_mx = (mx - mn) / 2
while count != num && delta_mx >= delta_price / 2
mx += (count > num ? -delta_mx : +delta_mx)
mx = floor(mx)
count, delta_mx = get_prange_count(mn, mx), delta_mx / 2
end
return mx, count
end
""" Get all non-overlapping ranges """
function get_all_5k(mn=0, mx=get_max_price(), num=5_000)
partmax, partcount = get_5k(mn, mx, num)
result = [(mn, partmax, partcount)]
while partmax < mx
partmin = partmax + delta_price
partmax, partcount = get_5k(partmin, mx, num)
@assert(partcount > 0, "pricelist from $partmin has too many same price")
push!(result, (partmin, partmax, partcount))
end
return result
end
function testpricelist()
println("Using $price_list_size random prices from 0 to $(get_max_price()).")
result = get_all_5k()
println("Splits into $(length(result)) bins of approximately 5000 elements.")
for (mn, mx, count) in result
println(" From $(Float32(mn)) ... $(Float32(mx)) with $count items.")
end
if length(price_list) != sum([x[3] for x in result])
print("\nWhoops! Some items missing.")
end
end
testpricelist()
- Output:
Using 100299 random prices from 0 to 99990. Splits into 21 bins of approximately 5000 elements. From 0.0 ... 4911.0 with 4998 items. From 4912.0 ... 9832.0 with 5000 items. From 9833.0 ... 14841.0 with 5000 items. From 14842.0 ... 19756.0 with 4999 items. From 19757.0 ... 24782.0 with 4994 items. From 24783.0 ... 29751.0 with 4999 items. From 29752.0 ... 34655.0 with 5000 items. From 34656.0 ... 39748.0 with 5000 items. From 39749.0 ... 44819.0 with 4999 items. From 44820.0 ... 49908.0 with 5000 items. From 49909.0 ... 54898.0 with 4999 items. From 54899.0 ... 59700.0 with 4999 items. From 59701.0 ... 64767.0 with 4999 items. From 64768.0 ... 69824.0 with 4999 items. From 69825.0 ... 74765.0 with 4999 items. From 74766.0 ... 79654.0 with 4999 items. From 79655.0 ... 84674.0 with 5000 items. From 84675.0 ... 89602.0 with 4999 items. From 89603.0 ... 94715.0 with 5000 items. From 94716.0 ... 99666.0 with 4997 items. From 99667.0 ... 100309.0 with 320 items.
Nim
import math, strformat
const MinDelta = 1.0
type Result = tuple[min, max: float; count: int]
func getPRangeCount(prices: seq[float]; min, max: float): int =
for price in prices:
if price in min..max:
inc result
func get5000(prices: seq[float]; min, max: float; n: int): (float, int) =
var
count = prices.getPRangeCount(min, max)
delta = (max - min) / 2
max = max
while count != n and delta >= MinDelta / 2:
if count > n: max -= delta
else: max += delta
max = floor(max)
count = getPRangeCount(prices, min, max)
delta /= 2
result = (max, count)
func getAll5000(prices: seq[float]; min, max: float; n: int): seq[Result] =
var (pmax, pcount) = prices.get5000(min, max, n)
result = @[(min, pmax, pcount)]
while pmax < max:
let pmin = pmax + 1
(pmax, pcount) = prices.get5000(pmin, max, n)
if pcount == 0:
raise newException(ValueError, &"Price list from {pmin} has too many with same price.")
result.add (pmin, pmax, pcount)
when isMainModule:
import random, sequtils
randomize()
let numPrices = rand(99_000..101_000)
const MaxPrice = 100_000
let prices = newSeqWith(numPrices, rand(1..MaxPrice).toFloat)
let actualMax = max(prices)
echo &"Using {numPrices} items with prices from 0 to {actualMax.int}:"
let res = prices.getAll5000(0, actualMax, 5000)
echo &"Split into {res.len} bins of approx 5000 elements:"
var total = 0
for (minf, maxf, count) in res:
let min = minf.toInt
let max = min(maxf, actualMax).toInt
inc total, count
echo &" From {min:6} to {max:6} with {count:4} items"
if total != numPrices:
echo &"Something went wrong: grand total of {total} doesn't equal {numPrices}!"
- Output:
Using 99244 items with prices from 0 to 100000: Split into 20 bins of approx 5000 elements: From 0 to 4933 with 5000 items From 4934 to 9949 with 4999 items From 9950 to 15074 with 4998 items From 15075 to 20081 with 5000 items From 20082 to 25011 with 5000 items From 25012 to 30106 with 5000 items From 30107 to 35208 with 5000 items From 35209 to 40210 with 4998 items From 40211 to 45208 with 4996 items From 45209 to 50271 with 5000 items From 50272 to 55318 with 4996 items From 55319 to 60384 with 5000 items From 60385 to 65323 with 5000 items From 65324 to 70425 with 5000 items From 70426 to 75509 with 4998 items From 75510 to 80536 with 4999 items From 80537 to 85603 with 4999 items From 85604 to 90642 with 5000 items From 90643 to 95681 with 4999 items From 95682 to 100000 with 4262 items
Perl
use strict;
use warnings;
use List::Util <min max shuffle>;
sub getPRangeCount {
my($min,$max,@prices) = @_;
grep { $min <= $_ <= $max } @prices;
}
sub get5000 {
my($min, $max, $n, @prices) = @_;
my $count = getPRangeCount($min, $max, @prices);
my $delta = ($max - $min) / 2;
while ($count != $n and $delta >= 1/2) {
$count > $n ? $max -= $delta : ($max += $delta);
$count = getPRangeCount($min, $max, @prices);
$delta /= 2;
}
$max, $count
}
sub getAll5000 {
my($min, $max, $n, @prices) = @_;
my ( $pmax, $pcount ) = get5000($min, $max, $n, @prices);
my @results = [ $min, $pmax, $pcount ];
while ($pmax < $max) {
my $pmin = $pmax + 1;
( $pmax, $pcount ) = get5000($pmin, $max, $n, @prices);
$pcount == 0 and print "Price list from $pmin has too many duplicates.\n";
push @results, [ $pmin, $pmax, $pcount ];
}
@results
}
my @prices;
push @prices, int rand 10_000+1 for 1 .. (my $numPrices = shuffle 99_990..100_050);
my $actualMax = max @prices;
print "Using $numPrices items with prices from 0 to $actualMax:\n";
my @results = getAll5000(0, $actualMax, 5000, @prices);
print "Split into " . @results . " bins of approx 5000 elements:\n";
for my $row (@results) {
my ($min,$max,$subtotal) = @$row;
$max = $actualMax if $max > $actualMax;
printf " From %6d to %6d with %4d items\n", $min, $max, $subtotal
}
- Output:
Using 100047 items with prices from 0 to 10000: Split into 20 bins of approx 5000 elements: From 0 to 496 with 5002 items From 497 to 991 with 4992 items From 992 to 1483 with 4996 items From 1484 to 1988 with 4998 items From 1989 to 2483 with 5002 items From 2484 to 2980 with 5009 items From 2981 to 3482 with 4996 items From 3483 to 3968 with 5000 items From 3969 to 4476 with 4998 items From 4477 to 4989 with 5009 items From 4990 to 5491 with 5005 items From 5492 to 5981 with 5005 items From 5982 to 6483 with 5002 items From 6484 to 6984 with 5005 items From 6985 to 7495 with 4992 items From 7496 to 8004 with 4991 items From 8005 to 8496 with 5000 items From 8497 to 9002 with 5000 items From 9003 to 9514 with 4995 items From 9515 to 10000 with 4850 items
Phix
Note that defaulted arguments of the form mx=get_max_price() are not currently supported, hence a slightly hacky workaround, of -1 then -1==>get_max_price().
Were you (or I) to define constant mp = get_max_price(), then mx=mp style parameter defaulting would be fine.
with javascript_semantics requires("0.8.3") -- [assert() now accepts a 3rd param] constant price_list_size = 99_000 + rand(2_001) - 1, price_list = sq_sub(sq_rand(repeat(100_000,price_list_size)),1), delta_price = 1 -- Minimum difference between any two different prices. function get_prange_count(integer startp, endp) return length(filter(price_list,"in",{startp,endp},"[]")) end function function get_max_price() return max(price_list) end function function get_5k(integer mn=0, mx=-1, num=5_000) if mx=-1 then mx = get_max_price() end if -- Binary search for num items between mn and mx, adjusting mx integer count = get_prange_count(mn, mx) atom delta_mx = (mx - mn) / 2 while count != num and delta_mx >= delta_price / 2 do mx = floor(mx + iff(count > num ? -delta_mx : +delta_mx)) {count, delta_mx} = {get_prange_count(mn, mx), delta_mx / 2} end while return {mx, count} end function function get_all_5k(integer mn=0, mx=-1, num=5_000) if mx=-1 then mx = get_max_price() end if -- Get all non-overlapping ranges integer {partmax, partcount} = get_5k(mn, mx, num) sequence result = {{mn, partmax, partcount}} while partmax < mx do integer partmin = partmax + delta_price {partmax, partcount} = get_5k(partmin, mx, num) assert(partcount>0,"Price list from %.2f has too many same price",{partmin}) result = append(result,{partmin, partmax, partcount}) end while return result end function printf(1,"Using %d random prices from 0 to %d\n",{price_list_size,get_max_price()}) sequence result = get_all_5k() printf(1,"Splits into %d bins of approx 5000 elements\n",{length(result)}) for i=1 to length(result) do printf(1," From %8.2f ... %8.2f with %d items.\n",result[i]) end for assert(length(price_list)==sum(vslice(result,3)),"Whoops! Some items missing!")
- Output:
Using 100957 random prices from 0 to 99999 Splits into 21 bins of approx 5000 elements From 0.00 ... 4838.00 with 4998 items. From 4839.00 ... 9765.00 with 4999 items. From 9766.00 ... 14602.00 with 4999 items. From 14603.00 ... 19575.00 with 5000 items. From 19576.00 ... 24515.00 with 4998 items. From 24516.00 ... 29476.00 with 5000 items. From 29477.00 ... 34386.00 with 5000 items. From 34387.00 ... 39289.00 with 4999 items. From 39290.00 ... 44349.00 with 5000 items. From 44350.00 ... 49265.00 with 4992 items. From 49266.00 ... 54262.00 with 4998 items. From 54263.00 ... 59289.00 with 4999 items. From 59290.00 ... 64191.00 with 5000 items. From 64192.00 ... 69119.00 with 4999 items. From 69120.00 ... 74095.00 with 4996 items. From 74096.00 ... 79144.00 with 4999 items. From 79145.00 ... 84093.00 with 4998 items. From 84094.00 ... 88961.00 with 4996 items. From 88962.00 ... 94051.00 with 4999 items. From 94052.00 ... 99038.00 with 5000 items. From 99039.00 ... 100955.00 with 988 items.
Python
import random
#%%Sample price generation
price_list_size = random.choice(range(99_000, 101_000))
price_list = random.choices(range(100_000), k=price_list_size)
delta_price = 1 # Minimum difference between any two different prices.
#%% API
def get_prange_count(startp, endp):
return len([r for r in price_list if startp <= r <= endp])
def get_max_price():
return max(price_list)
#%% Solution
def get_5k(mn=0, mx=get_max_price(), num=5_000):
"Binary search for num items between mn and mx, adjusting mx"
count = get_prange_count(mn, mx)
delta_mx = (mx - mn) / 2
while count != num and delta_mx >= delta_price / 2:
mx += -delta_mx if count > num else +delta_mx
mx = mx // 1 # Floor
count, delta_mx = get_prange_count(mn, mx), delta_mx / 2
return mx, count
def get_all_5k(mn=0, mx=get_max_price(), num=5_000):
"Get all non-overlapping ranges"
partmax, partcount = get_5k(mn, mx, num)
result = [(mn, partmax, partcount)]
while partmax < mx:
partmin = partmax + delta_price
partmax, partcount = get_5k(partmin, mx, num)
assert partcount > 0, \
f"price_list from {partmin} with too many of the same price"
result.append((partmin, partmax, partcount))
return result
if __name__ == '__main__':
print(f"Using {price_list_size} random prices from 0 to {get_max_price()}")
result = get_all_5k()
print(f"Splits into {len(result)} bins of approx 5000 elements")
for mn, mx, count in result:
print(f" From {mn:8.1f} ... {mx:8.1f} with {count} items.")
if len(price_list) != sum(count for mn, mx, count in result):
print("\nWhoops! Some items missing:")
- Output:
Using 99838 random prices from 0 to 99999 Splits into 20 bins of approx 5000 elements From 0.0 ... 4876.0 with 4999 items. From 4877.0 ... 9973.0 with 4997 items. From 9974.0 ... 14954.0 with 4999 items. From 14955.0 ... 20041.0 with 4997 items. From 20042.0 ... 25132.0 with 4999 items. From 25133.0 ... 30221.0 with 5000 items. From 30222.0 ... 35313.0 with 5000 items. From 35314.0 ... 40263.0 with 5000 items. From 40264.0 ... 45249.0 with 4997 items. From 45250.0 ... 50264.0 with 5000 items. From 50265.0 ... 55251.0 with 5000 items. From 55252.0 ... 60301.0 with 4997 items. From 60302.0 ... 65239.0 with 5000 items. From 65240.0 ... 70220.0 with 4998 items. From 70221.0 ... 75193.0 with 4999 items. From 75194.0 ... 80229.0 with 4996 items. From 80230.0 ... 85191.0 with 4997 items. From 85192.0 ... 90214.0 with 5000 items. From 90215.0 ... 95249.0 with 4999 items. From 95250.0 ... 104742.0 with 4864 items.
Raku
# 20210208 Raku programming solution
my \minDelta = 1;
sub getMaxPrice { @_.max }
sub getPRangeCount(@prices,\min,\max) { +@prices.grep: min ≤ * ≤ max }
sub get5000(@prices, $min, $max is copy, \n) {
my $count = getPRangeCount(@prices, $min, $max);
my $delta = ($max - $min) / 2;
while $count != n && $delta ≥ minDelta/2 {
$count > n ?? ($max -= $delta) !! ($max += $delta);
$count = getPRangeCount(@prices, $min, $max);
$delta /= 2;
}
$max, $count
}
sub getAll5000(@prices, \min, \max, \n) {
my ( $pmax, $pcount ) = get5000(@prices, min, max, n);
my @res = [ min, $pmax, $pcount ] , ;
while $pmax < max {
my $pmin = $pmax + 1;
( $pmax, $pcount ) = get5000(@prices, $pmin, max, n);
$pcount == 0 and note "Price list from $pmin has too many duplicates.";
@res.push: [ $pmin, $pmax, $pcount ];
}
@res
}
my $numPrices = (99000..101001).roll;
my \maxPrice = 1e5;
my @prices = (1..$numPrices+1).roll xx $numPrices ;
my $actualMax = getMaxPrice(@prices);
say "Using $numPrices items with prices from 0 to $actualMax:";
my @res = getAll5000(@prices, 0, $actualMax, 5000);
say "Split into ", +@res, " bins of approx 5000 elements:";
for @res -> @row {
my ($min,$max,$subtotal) = @row;
$max = $actualMax if $max > $actualMax ;
printf " From %6d to %6d with %4d items\n", $min, $max, $subtotal
}
- Output:
Using 99506 items with prices from 0 to 99507: Split into 20 bins of approx 5000 elements: From 0 to 4983 with 5000 items From 4984 to 10034 with 5003 items From 10035 to 15043 with 4998 items From 15044 to 20043 with 5001 items From 20044 to 24987 with 5001 items From 24988 to 30000 with 4998 items From 30001 to 34954 with 5000 items From 34955 to 40018 with 5000 items From 40019 to 45016 with 5000 items From 45017 to 49950 with 5000 items From 49951 to 54877 with 4999 items From 54878 to 59880 with 5002 items From 59881 to 64807 with 5001 items From 64808 to 69800 with 5000 items From 69801 to 74897 with 5000 items From 74898 to 79956 with 5002 items From 79957 to 85037 with 5000 items From 85038 to 90118 with 5001 items From 90119 to 95169 with 5001 items From 95170 to 99507 with 4470 items
Wren
import "random" for Random
import "./math" for Nums
import "./fmt" for Fmt
var rand = Random.new()
var minDelta = 1
var getMaxPrice = Fn.new { |prices| Nums.max(prices) }
var getPrangeCount = Fn.new { |prices, min, max| prices.count { |p| p >= min && p <= max } }
var get5000 = Fn.new { |prices, min, max, n|
var count = getPrangeCount.call(prices, min, max)
var delta = (max - min) / 2
while (count != n && delta >= minDelta/2) {
max = ((count > n) ? max-delta : max+delta).floor
count = getPrangeCount.call(prices, min, max)
delta = delta / 2
}
return [max, count]
}
var getAll5000 = Fn.new { |prices, min, max, n|
var mc = get5000.call(prices, min, max, n)
var pmax = mc[0]
var pcount = mc[1]
var res = [[min, pmax, pcount]]
while (pmax < max) {
var pmin = pmax + 1
mc = get5000.call(prices, pmin, max, n)
pmax = mc[0]
pcount = mc[1]
if (pcount == 0) Fiber.abort("Price list from %(pmin) has too many with same price.")
res.add([pmin, pmax, pcount])
}
return res
}
var numPrices = rand.int(99000, 101001)
var maxPrice = 1e5
var prices = List.filled(numPrices, 0) // list of prices
for (i in 1..numPrices) prices[i-1] = rand.int(maxPrice + 1)
var actualMax = getMaxPrice.call(prices)
System.print("Using %(numPrices) items with prices from 0 to %(actualMax):")
var res = getAll5000.call(prices, 0, actualMax, 5000)
System.print("Split into %(res.count) bins of approx 5000 elements:")
var total = 0
for (r in res) {
var min = r[0]
var max = r[1]
if (max > actualMax) max = actualMax
var cnt = r[2]
total = total + cnt
Fmt.print(" From $6d to $6d with $4d items", min, max, cnt)
}
if (total != numPrices) {
System.print("Something went wrong - grand total of %(total) doesn't equal %(numPrices)!")
}
- Output:
Sample run:
Using 99756 items with prices from 0 to 99998: Split into 20 bins of approx 5000 elements: From 0 to 4964 with 5000 items From 4965 to 9992 with 5000 items From 9993 to 15063 with 5000 items From 15064 to 20130 with 5000 items From 20131 to 25063 with 4998 items From 25064 to 30014 with 4998 items From 30015 to 35002 with 5000 items From 35003 to 40030 with 5000 items From 40031 to 45058 with 5000 items From 45059 to 50199 with 4999 items From 50200 to 55133 with 4999 items From 55134 to 60139 with 4997 items From 60140 to 65097 with 5000 items From 65098 to 69972 with 4999 items From 69973 to 74932 with 5000 items From 74933 to 80041 with 5000 items From 80042 to 85214 with 5000 items From 85215 to 90241 with 4999 items From 90242 to 95353 with 5000 items From 95354 to 99998 with 4767 items