Remove duplicate elements: Difference between revisions
Content added Content deleted
Line 655: | Line 655: | ||
<pre> a c b e d</pre> |
<pre> a c b e d</pre> |
||
=== ATS2 implementation |
=== ATS2 implementation using a radix sort === |
||
This method runs in O(nw) time, where n is the number of elements n and w is a factor that is constant for a fixed-size integer. The method also sorts the unique elements. It is restricted to element types that have a lexicographic ordering. |
|||
The implementation is for unsigned integers. |
|||
<syntaxhighlight lang="ats"> |
|||
(* Remove duplicate elements. |
|||
The best sorting algorithms, it is said, are O(n log n) and require |
|||
an order predicate. |
|||
But this is true only for a general sorting routine. A radix sort |
|||
for fixed-size integers is O(n), and requires no order predicate. |
|||
Here I use such a radix sort. *) |
|||
#include "share/atspre_staload.hats" |
|||
staload UN = "prelude/SATS/unsafe.sats" |
|||
(* How the remove_dups template function will be called. *) |
|||
extern fn {tk : tkind} |
|||
remove_dups |
|||
{n : int} |
|||
(src : arrayref (g0uint tk, n), |
|||
n : size_t n, |
|||
dst : arrayref (g0uint tk, n), |
|||
m : &size_t? >> size_t m) |
|||
: #[m : nat | m <= n] |
|||
void |
|||
(*------------------------------------------------------------------*) |
|||
(* A radix sort for unsigned integers, copied from my contribution to |
|||
the radix sort task. *) |
|||
extern fn {a : vt@ype} |
|||
{tk : tkind} |
|||
g0uint_radix_sort |
|||
{n : int} |
|||
(arr : &array (a, n) >> _, |
|||
n : size_t n) |
|||
:<!wrt> void |
|||
extern fn {a : vt@ype} |
|||
{tk : tkind} |
|||
g0uint_radix_sort$key |
|||
{n : int} |
|||
{i : nat | i < n} |
|||
(arr : &RD(array (a, n)), |
|||
i : size_t i) |
|||
:<> g0uint tk |
|||
fn {} |
|||
bin_sizes_to_indices |
|||
(bin_indices : &array (size_t, 256) >> _) |
|||
:<!wrt> void = |
|||
let |
|||
fun |
|||
loop {i : int | i <= 256} |
|||
{accum : int} |
|||
.<256 - i>. |
|||
(bin_indices : &array (size_t, 256) >> _, |
|||
i : size_t i, |
|||
accum : size_t accum) |
|||
:<!wrt> void = |
|||
if i <> i2sz 256 then |
|||
let |
|||
prval () = lemma_g1uint_param i |
|||
val elem = bin_indices[i] |
|||
in |
|||
if elem = i2sz 0 then |
|||
loop (bin_indices, succ i, accum) |
|||
else |
|||
begin |
|||
bin_indices[i] := accum; |
|||
loop (bin_indices, succ i, accum + g1ofg0 elem) |
|||
end |
|||
end |
|||
in |
|||
loop (bin_indices, i2sz 0, i2sz 0) |
|||
end |
|||
fn {a : vt@ype} |
|||
{tk : tkind} |
|||
count_entries |
|||
{n : int} |
|||
{shift : nat} |
|||
(arr : &RD(array (a, n)), |
|||
n : size_t n, |
|||
bin_indices : &array (size_t?, 256) |
|||
>> array (size_t, 256), |
|||
all_expended : &bool? >> bool, |
|||
shift : int shift) |
|||
:<!wrt> void = |
|||
let |
|||
fun |
|||
loop {i : int | i <= n} |
|||
.<n - i>. |
|||
(arr : &RD(array (a, n)), |
|||
bin_indices : &array (size_t, 256) >> _, |
|||
all_expended : &bool >> bool, |
|||
i : size_t i) |
|||
:<!wrt> void = |
|||
if i <> n then |
|||
let |
|||
prval () = lemma_g1uint_param i |
|||
val key : g0uint tk = g0uint_radix_sort$key<a><tk> (arr, i) |
|||
val key_shifted = key >> shift |
|||
val digit = ($UN.cast{uint} key_shifted) land 255U |
|||
val [digit : int] digit = g1ofg0 digit |
|||
extern praxi set_range : |
|||
() -<prf> [0 <= digit; digit <= 255] void |
|||
prval () = set_range () |
|||
val count = bin_indices[digit] |
|||
val () = bin_indices[digit] := succ count |
|||
in |
|||
all_expended := all_expended * iseqz key_shifted; |
|||
loop (arr, bin_indices, all_expended, succ i) |
|||
end |
|||
prval () = lemma_array_param arr |
|||
in |
|||
array_initize_elt<size_t> (bin_indices, i2sz 256, i2sz 0); |
|||
all_expended := true; |
|||
loop (arr, bin_indices, all_expended, i2sz 0) |
|||
end |
|||
fn {a : vt@ype} |
|||
{tk : tkind} |
|||
sort_by_digit |
|||
{n : int} |
|||
{shift : nat} |
|||
(arr1 : &RD(array (a, n)), |
|||
arr2 : &array (a, n) >> _, |
|||
n : size_t n, |
|||
all_expended : &bool? >> bool, |
|||
shift : int shift) |
|||
:<!wrt> void = |
|||
let |
|||
var bin_indices : array (size_t, 256) |
|||
in |
|||
count_entries<a><tk> (arr1, n, bin_indices, all_expended, shift); |
|||
if all_expended then |
|||
() |
|||
else |
|||
let |
|||
fun |
|||
rearrange {i : int | i <= n} |
|||
.<n - i>. |
|||
(arr1 : &RD(array (a, n)), |
|||
arr2 : &array (a, n) >> _, |
|||
bin_indices : &array (size_t, 256) >> _, |
|||
i : size_t i) |
|||
:<!wrt> void = |
|||
if i <> n then |
|||
let |
|||
prval () = lemma_g1uint_param i |
|||
val key = g0uint_radix_sort$key<a><tk> (arr1, i) |
|||
val key_shifted = key >> shift |
|||
val digit = ($UN.cast{uint} key_shifted) land 255U |
|||
val [digit : int] digit = g1ofg0 digit |
|||
extern praxi set_range : |
|||
() -<prf> [0 <= digit; digit <= 255] void |
|||
prval () = set_range () |
|||
val [j : int] j = g1ofg0 bin_indices[digit] |
|||
(* One might wish to get rid of this assertion somehow, |
|||
to eliminate the branch, should it prove a |
|||
problem. *) |
|||
val () = $effmask_exn assertloc (j < n) |
|||
val p_dst = ptr_add<a> (addr@ arr2, j) |
|||
and p_src = ptr_add<a> (addr@ arr1, i) |
|||
val _ = $extfcall (ptr, "memcpy", p_dst, p_src, |
|||
sizeof<a>) |
|||
val () = bin_indices[digit] := succ (g0ofg1 j) |
|||
in |
|||
rearrange (arr1, arr2, bin_indices, succ i) |
|||
end |
|||
prval () = lemma_array_param arr1 |
|||
in |
|||
bin_sizes_to_indices<> bin_indices; |
|||
rearrange (arr1, arr2, bin_indices, i2sz 0) |
|||
end |
|||
end |
|||
fn {a : vt@ype} |
|||
{tk : tkind} |
|||
g0uint_sort {n : pos} |
|||
(arr1 : &array (a, n) >> _, |
|||
arr2 : &array (a, n) >> _, |
|||
n : size_t n) |
|||
:<!wrt> void = |
|||
let |
|||
fun |
|||
loop {idigit_max, idigit : nat | idigit <= idigit_max} |
|||
.<idigit_max - idigit>. |
|||
(arr1 : &array (a, n) >> _, |
|||
arr2 : &array (a, n) >> _, |
|||
from1to2 : bool, |
|||
idigit_max : int idigit_max, |
|||
idigit : int idigit) |
|||
:<!wrt> void = |
|||
if idigit = idigit_max then |
|||
begin |
|||
if ~from1to2 then |
|||
let |
|||
val _ = |
|||
$extfcall (ptr, "memcpy", addr@ arr1, addr@ arr2, |
|||
sizeof<a> * n) |
|||
in |
|||
end |
|||
end |
|||
else if from1to2 then |
|||
let |
|||
var all_expended : bool |
|||
in |
|||
sort_by_digit<a><tk> (arr1, arr2, n, all_expended, |
|||
8 * idigit); |
|||
if all_expended then |
|||
() |
|||
else |
|||
loop (arr1, arr2, false, idigit_max, succ idigit) |
|||
end |
|||
else |
|||
let |
|||
var all_expended : bool |
|||
in |
|||
sort_by_digit<a><tk> (arr2, arr1, n, all_expended, |
|||
8 * idigit); |
|||
if all_expended then |
|||
let |
|||
val _ = |
|||
$extfcall (ptr, "memcpy", addr@ arr1, addr@ arr2, |
|||
sizeof<a> * n) |
|||
in |
|||
end |
|||
else |
|||
loop (arr1, arr2, true, idigit_max, succ idigit) |
|||
end |
|||
in |
|||
loop (arr1, arr2, true, sz2i sizeof<g1uint tk>, 0) |
|||
end |
|||
#define SIZE_THRESHOLD 256 |
|||
extern praxi |
|||
unsafe_cast_array |
|||
{a : vt@ype} |
|||
{b : vt@ype} |
|||
{n : int} |
|||
(arr : &array (b, n) >> array (a, n)) |
|||
:<prf> void |
|||
implement {a} {tk} |
|||
g0uint_radix_sort {n} (arr, n) = |
|||
if n <> 0 then |
|||
let |
|||
prval () = lemma_array_param arr |
|||
fn |
|||
sort {n : pos} |
|||
(arr1 : &array (a, n) >> _, |
|||
arr2 : &array (a, n) >> _, |
|||
n : size_t n) |
|||
:<!wrt> void = |
|||
g0uint_sort<a><tk> (arr1, arr2, n) |
|||
in |
|||
if n <= SIZE_THRESHOLD then |
|||
let |
|||
var arr2 : array (a, SIZE_THRESHOLD) |
|||
prval @(pf_left, pf_right) = |
|||
array_v_split {a?} {..} {SIZE_THRESHOLD} {n} (view@ arr2) |
|||
prval () = view@ arr2 := pf_left |
|||
prval () = unsafe_cast_array{a} arr2 |
|||
val () = sort (arr, arr2, n) |
|||
prval () = unsafe_cast_array{a?} arr2 |
|||
prval () = view@ arr2 := |
|||
array_v_unsplit (view@ arr2, pf_right) |
|||
in |
|||
end |
|||
else |
|||
let |
|||
val @(pf_arr2, pfgc_arr2 | p_arr2) = array_ptr_alloc<a> n |
|||
macdef arr2 = !p_arr2 |
|||
prval () = unsafe_cast_array{a} arr2 |
|||
val () = sort (arr, arr2, n) |
|||
prval () = unsafe_cast_array{a?} arr2 |
|||
val () = array_ptr_free (pf_arr2, pfgc_arr2 | p_arr2) |
|||
in |
|||
end |
|||
end |
|||
(*------------------------------------------------------------------*) |
|||
(* An implementation of the remove_dups template function, which also |
|||
sorts the elements. *) |
|||
implement {tk} |
|||
remove_dups {n} (src, n, dst, m) = |
|||
if n = i2sz 0 then |
|||
m := i2sz 0 |
|||
else |
|||
let |
|||
prval () = lemma_arrayref_param src (* Prove 0 <= n. *) |
|||
(* Sort a copy of src. *) |
|||
val arrptr = arrayref_copy (src, n) |
|||
val @(pf_arr | p_arr) = arrayptr_takeout_viewptr arrptr |
|||
val () = g0uint_radix_sort<g0uint tk><tk> (!p_arr, n) |
|||
prval () = arrayptr_addback (pf_arr | arrptr) |
|||
(* Copy only the first element of each run of equals. *) |
|||
val () = dst[0] := arrptr[0] |
|||
fun |
|||
loop {i : int | 1 <= i; i <= n} |
|||
{j : int | 1 <= j; j <= i} |
|||
.<n - i>. |
|||
(arrptr : !arrayptr (g0uint tk, n), |
|||
i : size_t i, |
|||
j : size_t j) |
|||
: [m : int | 1 <= m; m <= n] |
|||
size_t m = |
|||
if i = n then |
|||
j |
|||
else if arrptr[pred i] = arrptr[i] then |
|||
loop (arrptr, succ i, j) |
|||
else |
|||
begin |
|||
dst[j] := arrptr[i]; |
|||
loop (arrptr, succ i, succ j) |
|||
end |
|||
val () = m := loop (arrptr, i2sz 1, i2sz 1) |
|||
val () = arrayptr_free arrptr |
|||
in |
|||
end |
|||
(*------------------------------------------------------------------*) |
|||
(* A demonstration. *) |
|||
implement |
|||
main0 () = |
|||
let |
|||
implement |
|||
g0uint_radix_sort$key<uint><uintknd> (arr, i) = |
|||
arr[i] |
|||
val src = |
|||
arrayref_make_list<uint> |
|||
(10, $list (1U, 3U, 2U, 5U, 1U, 1U, 4U, 4U, 2U, 3U)) |
|||
val dst = arrayref_make_elt<uint> (i2sz 10, 123456789U) |
|||
var m : size_t |
|||
in |
|||
remove_dups<uintknd> (src, i2sz 10, dst, m); |
|||
let |
|||
prval [m : int] EQINT () = eqint_make_guint m |
|||
var i : natLte m |
|||
in |
|||
for (i := 0; i2sz i <> m; i := succ i) |
|||
print! (" ", dst[i]); |
|||
println! () |
|||
end |
|||
end |
|||
(*------------------------------------------------------------------*) |
|||
</syntaxhighlight> |
|||
{{out}} |
|||
<pre> 1 2 3 4 5</pre> |
|||
=== ATS2 implementation for values containing a mutable ''seen'' flag === |
|||
This method runs O(n) in the number of elements. It can be useful when you are working with references to complicated data structures. It works only if the array contains references to structures, rather than the structures themselves. |
This method runs O(n) in the number of elements. It can be useful when you are working with references to complicated data structures. It works only if the array contains references to structures, rather than the structures themselves. |