External sort: Difference between revisions

m
m (→‎{{header|Wren}}: Minor tidy)
 
(8 intermediate revisions by 6 users not shown)
Line 1:
{{Sorting Algorithm}}
[[Category:Sorting]]
{{draft task|Sorting Algorithms}}
 
Line 20 ⟶ 21:
A "half chunk" of integers at a time is read to each of two buffer lists covering different sections of the file range being partitioned. Only those integers needing to be swapped are written back to the file and each list is replaced as it's used up. When the converging sections eventually overlap, a single list is used instead which is updated in parallel with the file to ensure that the partitioning repeat stops in the right place. Partitions less than a "chunk" in length are sorted in memory with a heap sort. (The Foundation framework has a built-in NSMutableArray sort which is faster than a vanilla heap sort — even with the necessary derivation of NSMutableArrays from the lists and lists from the sorted arrays — but I don't know how well this fits the task's "low memory" conceit.)
 
<langsyntaxhighlight lang="applescript">(*
Quicksort algorithm: S.A.R. (Tony) Hoare, 1960.
Optimisations by Robert Sedgewick and others at various times.
Line 344 ⟶ 345:
 
set theFile to (path to desktop as text) & "Test.dat"
set sortedFile to externalSort(theFile)</langsyntaxhighlight>
 
=={{header|C++}}==
Line 368 ⟶ 369:
All sorted streams are merged in this way out to an external output file ''merged.txt''.
<langsyntaxhighlight lang="cpp">
/* ExternalSort.cpp */
 
Line 625 ⟶ 626:
 
/* inputfile integers -- one per line for simplicity */
</syntaxhighlight>
</lang>
 
{{out}}
Line 649 ⟶ 650:
 
A small test file consisting of random integers has been generated and sorted to demonstrate that the approach works.
<langsyntaxhighlight lang="go">package main
 
import (
Line 854 ⟶ 855:
check(err)
}
}</langsyntaxhighlight>
 
{{out}}
Line 866 ⟶ 867:
</pre>
 
=={{header|jJ}}==
Untested on a memory mapped file.
<syntaxhighlight lang="j">
<lang J>
NB. Apply an in-place sorting algorithm to a memory mapped file
NB. in-place sort is translation of in-place python quicksort.
Line 906 ⟶ 907:
i. 0 0 NB. verbs return the final noun
)
</syntaxhighlight>
</lang>
 
Demonstration the sorting works:
Line 916 ⟶ 917:
 
=={{header|Julia}}==
<langsyntaxhighlight lang="julia">intfile = open("/tmp/mmap.bin", "r+")
 
arr = Mmap.mmap(intfile, Vector{Int64}, (div(stat(intfile).size, 8))) # Int64 is 8 bytes
 
sort!(arr)
</syntaxhighlight>
</lang>
 
=={{header|Nim}}==
{{trans|Phix}}
 
<syntaxhighlight lang="nim">import algorithm, heapqueue, os, random, sequtils, strformat, strutils
 
 
proc sortFiles(filenames: seq[string]) =
for filename in filenames:
var lines = filename.readFile()
lines.stripLineEnd() # Ignore last line feed, if any.
var sortedLines = sorted(lines.splitLines())
echo &"""{filename} => {sortedLines.join(", ")}"""
filename.writeFile(sortedLines.join("\n"))
 
 
proc mergeFiles(outfile: File; filenames: seq[string]) =
var queue: HeapQueue[(string, File)]
for filename in filenames:
let f = open(fileName)
queue.push (f.readLine(), f)
while queue.len > 0:
let (s, infile) = queue.pop()
outfile.write s & '\n'
if infile.endOfFile:
infile.close()
else:
queue.push((infile.readLine(), infile))
 
 
when isMainModule:
const WriteToFile = false # Compile time switch.
 
randomize()
let nf = rand(2..4) # Number of files.
let lp = 3 # Lines per file.
var filenames: seq[string]
var lines = toSeq(1..nf*lp)
lines.shuffle()
 
for i in 1..nf:
let filename = &"file{i}.txt"
filenames.add filename
let f = open(filename, fmWrite)
for l in 1..lp:
f.write &"Line {lines[^l]:2}\n"
lines.setLen(lines.len - lp)
f.close()
 
echo &"sorting {nf * lp} lines split over {nf} files"
sortFiles(filenames)
 
when WriteToFile:
let f = open("results.txt", fmWrite)
mergeFiles(f, filenames)
f.close()
else:
mergeFiles(stdout, filenames)
 
for filename in filenames:
removeFile(filename)</syntaxhighlight>
 
{{out}}
<pre>sorting 12 lines split over 4 files
file1.txt => Line 3, Line 4, Line 10
file2.txt => Line 1, Line 6, Line 12
file3.txt => Line 2, Line 5, Line 8
file4.txt => Line 7, Line 9, Line 11
Line 1
Line 2
Line 3
Line 4
Line 5
Line 6
Line 7
Line 8
Line 9
Line 10
Line 11
Line 12</pre>
 
=={{header|Perl}}==
Simulate task by reading from 'DATA' handle and using tiny record limit. As written, works for any numeric input, but could define any kind of customized sorting.
<langsyntaxhighlight lang="perl">use strict;
use warnings;
 
Line 970 ⟶ 1,051:
654
789
234</langsyntaxhighlight>
{{out}}
<pre>123
Line 989 ⟶ 1,070:
=={{header|Phix}}==
Slight variation on [[Stream_Merge#Phix|Stream_Merge]]
<!--<syntaxhighlight lang="phix">(notonline)-->
<lang Phix>include builtins/pqueue.e
<span style="color: #008080;">without</span> <span style="color: #008080;">js</span> <span style="color: #000080;font-style:italic;">-- file i/o</span>
include builtins/pfile.e -- write_lines() - not [yet] documented
<span style="color: #008080;">include</span> <span style="color: #000000;">builtins</span><span style="color: #0000FF;">/</span><span style="color: #000000;">pqueue</span><span style="color: #0000FF;">.</span><span style="color: #000000;">e</span>
<span style="color: #008080;">include</span> <span style="color: #000000;">builtins</span><span style="color: #0000FF;">/</span><span style="color: #000000;">pfile</span><span style="color: #0000FF;">.</span><span style="color: #000000;">e</span> <span style="color: #000080;font-style:italic;">-- write_lines() - not [yet] documented</span>
procedure add(integer fn, pq)
object line = gets(fn)
<span style="color: #008080;">procedure</span> <span style="color: #000000;">add</span><span style="color: #0000FF;">(</span><span style="color: #004080;">integer</span> <span style="color: #000000;">fn</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">pq</span><span style="color: #0000FF;">)</span>
if line=-1 then
<span style="color: #004080;">object</span> <span style="color: #000000;">line</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">gets</span><span style="color: #0000FF;">(</span><span style="color: #000000;">fn</span><span style="color: #0000FF;">)</span>
close(fn)
<span style="color: #008080;">if</span> <span style="color: #000000;">line</span><span style="color: #0000FF;">=-</span><span style="color: #000000;">1</span> <span style="color: #008080;">then</span>
else
<span style="color: #7060A8;">close</span><span style="color: #0000FF;">(</span><span style="color: #000000;">fn</span><span style="color: #0000FF;">)</span>
pq_add({fn,line}, pq)
<span style="color: #008080;">else</span>
end if
<span style="color: #7060A8;">pq_add</span><span style="color: #0000FF;">({</span><span style="color: #000000;">fn</span><span style="color: #0000FF;">,</span><span style="color: #000000;">line</span><span style="color: #0000FF;">},</span> <span style="color: #000000;">pq</span><span style="color: #0000FF;">)</span>
end procedure
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span>
procedure sort_files(sequence filenames)
for i=1 to length(filenames) do
<span style="color: #008080;">procedure</span> <span style="color: #000000;">sort_files</span><span style="color: #0000FF;">(</span><span style="color: #004080;">sequence</span> <span style="color: #000000;">filenames</span><span style="color: #0000FF;">)</span>
sequence lines = get_text(filenames[i],GT_LF_STRIPPED),
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">filenames</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
sorted = sort(lines)
<span style="color: #004080;">sequence</span> <span style="color: #000000;">lines</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">get_text</span><span style="color: #0000FF;">(</span><span style="color: #000000;">filenames</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">],</span><span style="color: #004600;">GT_LF_STRIPPED</span><span style="color: #0000FF;">),</span>
printf(1,"%s:%v => %v\n",{filenames[i],lines,sorted})
<span style="color: #000000;">sorted</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">sort</span><span style="color: #0000FF;">(</span><span style="color: #000000;">lines</span><span style="color: #0000FF;">)</span>
if write_lines(filenames[i],sorted)!=1 then ?9/0 end if
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"%s:%v =&gt; %v\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">filenames</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">],</span><span style="color: #000000;">lines</span><span style="color: #0000FF;">,</span><span style="color: #000000;">sorted</span><span style="color: #0000FF;">})</span>
end for
<span style="color: #008080;">if</span> <span style="color: #7060A8;">write_lines</span><span style="color: #0000FF;">(</span><span style="color: #000000;">filenames</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">],</span><span style="color: #000000;">sorted</span><span style="color: #0000FF;">)!=</span><span style="color: #000000;">1</span> <span style="color: #008080;">then</span> <span style="color: #0000FF;">?</span><span style="color: #000000;">9</span><span style="color: #0000FF;">/</span><span style="color: #000000;">0</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
end procedure
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
 
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span>
procedure merge_files(integer outfn, sequence filenames)
integer pq = pq_new()
<span style="color: #008080;">procedure</span> <span style="color: #000000;">merge_files</span><span style="color: #0000FF;">(</span><span style="color: #004080;">integer</span> <span style="color: #000000;">outfn</span><span style="color: #0000FF;">,</span> <span style="color: #004080;">sequence</span> <span style="color: #000000;">filenames</span><span style="color: #0000FF;">)</span>
for i=1 to length(filenames) do
<span style="color: #004080;">integer</span> <span style="color: #000000;">pq</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">pq_new</span><span style="color: #0000FF;">()</span>
add(open(filenames[i], "r"),pq)
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">filenames</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
end for
<span style="color: #000000;">add</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">open</span><span style="color: #0000FF;">(</span><span style="color: #000000;">filenames</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">],</span> <span style="color: #008000;">"r"</span><span style="color: #0000FF;">),</span><span style="color: #000000;">pq</span><span style="color: #0000FF;">)</span>
while not pq_empty(pq) do
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
{integer fn, string line} = pq_pop(pq)
<span style="color: #008080;">while</span> <span style="color: #008080;">not</span> <span style="color: #7060A8;">pq_empty</span><span style="color: #0000FF;">(</span><span style="color: #000000;">pq</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
puts(outfn,line)
<span style="color: #0000FF;">{</span><span style="color: #004080;">integer</span> <span style="color: #000000;">fn</span><span style="color: #0000FF;">,</span> <span style="color: #004080;">string</span> <span style="color: #000000;">line</span><span style="color: #0000FF;">}</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">pq_pop</span><span style="color: #0000FF;">(</span><span style="color: #000000;">pq</span><span style="color: #0000FF;">)</span>
add(fn, pq)
<span style="color: #7060A8;">puts</span><span style="color: #0000FF;">(</span><span style="color: #000000;">outfn</span><span style="color: #0000FF;">,</span><span style="color: #000000;">line</span><span style="color: #0000FF;">)</span>
end while
<span style="color: #000000;">add</span><span style="color: #0000FF;">(</span><span style="color: #000000;">fn</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">pq</span><span style="color: #0000FF;">)</span>
pq_destroy(pq)
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
end procedure
<span style="color: #7060A8;">pq_destroy</span><span style="color: #0000FF;">(</span><span style="color: #000000;">pq</span><span style="color: #0000FF;">)</span>
 
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span>
procedure test()
integer nf = rand(5), -- number of files
<span style="color: #008080;">procedure</span> <span style="color: #000000;">test</span><span style="color: #0000FF;">()</span>
lp = 3 -- lines per file
<span style="color: #004080;">integer</span> <span style="color: #000000;">nf</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">rand</span><span style="color: #0000FF;">(</span><span style="color: #000000;">5</span><span style="color: #0000FF;">),</span> <span style="color: #000080;font-style:italic;">-- number of files</span>
sequence filenames = {},
<span style="color: #000000;">lp</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">3</span> <span style="color: #000080;font-style:italic;">-- lines per file</span>
lines = shuffle(tagset(nf*lp))
<span style="color: #004080;">sequence</span> <span style="color: #000000;">filenames</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{},</span>
for i=1 to nf do
<span style="color: #000000;">lines</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">shuffle</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">tagset</span><span style="color: #0000FF;">(</span><span style="color: #000000;">nf</span><span style="color: #0000FF;">*</span><span style="color: #000000;">lp</span><span style="color: #0000FF;">))</span>
string filename = sprintf("file%d.txt",i)
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #000000;">nf</span> <span style="color: #008080;">do</span>
filenames = append(filenames,filename)
<span style="color: #004080;">string</span> <span style="color: #000000;">filename</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">sprintf</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"file%d.txt"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">i</span><span style="color: #0000FF;">)</span>
integer fn = open(filename,"w")
<span style="color: #000000;">filenames</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">filenames</span><span style="color: #0000FF;">,</span><span style="color: #000000;">filename</span><span style="color: #0000FF;">)</span>
for l=1 to lp do
<span style="color: #004080;">integer</span> <span style="color: #000000;">fn</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">open</span><span style="color: #0000FF;">(</span><span style="color: #000000;">filename</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"w"</span><span style="color: #0000FF;">)</span>
printf(fn,"Line %02d\n",lines[l])
<span style="color: #008080;">for</span> <span style="color: #000000;">l</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #000000;">lp</span> <span style="color: #008080;">do</span>
end for
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">fn</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"Line %02d\n"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">lines</span><span style="color: #0000FF;">[</span><span style="color: #000000;">l</span><span style="color: #0000FF;">])</span>
lines = lines[lp+1..$]
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
close(fn)
<span style="color: #000000;">lines</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">lines</span><span style="color: #0000FF;">[</span><span style="color: #000000;">lp</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">..$]</span>
end for
<span style="color: #7060A8;">close</span><span style="color: #0000FF;">(</span><span style="color: #000000;">fn</span><span style="color: #0000FF;">)</span>
printf(1,"sorting %d lines split over %d files\n",{nf*lp,nf})
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
sort_files(filenames)
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"sorting %d lines split over %d files\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">nf</span><span style="color: #0000FF;">*</span><span style="color: #000000;">lp</span><span style="color: #0000FF;">,</span><span style="color: #000000;">nf</span><span style="color: #0000FF;">})</span>
integer outfn = 1 -- or open("results.txt","w")
<span style="color: #000000;">sort_files</span><span style="color: #0000FF;">(</span><span style="color: #000000;">filenames</span><span style="color: #0000FF;">)</span>
merge_files(outfn,filenames)
<span style="color: #004080;">integer</span> <span style="color: #000000;">outfn</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span> <span style="color: #000080;font-style:italic;">-- or open("results.txt","w")</span>
-- close(outfn)
<span style="color: #000000;">merge_files</span><span style="color: #0000FF;">(</span><span style="color: #000000;">outfn</span><span style="color: #0000FF;">,</span><span style="color: #000000;">filenames</span><span style="color: #0000FF;">)</span>
for i=1 to nf do
<span style="color: #000080;font-style:italic;">-- close(outfn)</span>
{} = delete_file(filenames[i])
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #000000;">nf</span> <span style="color: #008080;">do</span>
end for
<span style="color: #0000FF;">{}</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">delete_file</span><span style="color: #0000FF;">(</span><span style="color: #000000;">filenames</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">])</span>
end procedure
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
test()</lang>
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span>
<span style="color: #000000;">test</span><span style="color: #0000FF;">()</span>
<!--</syntaxhighlight>-->
{{out}}
<pre>
Line 1,067 ⟶ 1,151:
=={{header|Python}}==
A technique demonstrated with a short string character data.
<langsyntaxhighlight lang="python">
#! /usr/bin/python3
 
Line 1,151 ⟶ 1,235:
example = main
example()
</syntaxhighlight>
</lang>
 
=={{header|Raku}}==
(formerly Perl 6)
Borrowing from [http://rosettacode.org/wiki/Stream_Merge Stream_Merge] here. Temporary files are automatically deleted when program is done, so no explicit clean-up required.
<syntaxhighlight lang="raku" perl6line>use File::Temp;
 
sub merge_streams ( @streams ) {
Line 1,183 ⟶ 1,267:
@files.push: store(@chunk) if @chunk;
 
say join ' ', merge_streams @files».&open;</langsyntaxhighlight>
{{out}}
<pre>-11 -9 -2 0 2 3 4 15 32 34 42 43 45 45 55 64 66 76 78 87 92 123</pre>
Line 1,192 ⟶ 1,276:
<br>The sort work files are then sorted with an external sort, and then merged into one big file.
 
This particular example uses the DOS &nbsp; '''SORT''' &nbsp; and &nbsp; '''DELERASE''' &nbsp; commands.
<langsyntaxhighlight lang="rexx">/*REXX pgm reads a file, splits into smaller files, sorts 'em, combines into sorted file*/
parse arg FID n lim seed . /*obtain optional arguments from the CL*/
if FID=='' | FID=="," then FID= 'SORT_EXT.OUT' /*name of the output (sorted) file. */
if n=='' | n=="," then n= 500 500 /*number of records (rand #s) to gen. */
if lim=='' | lim=="," then lim= 10 10 /*number of records per SORTWORK file. */
if datatype(seed, 'W') then call random ,,seed /*Numeric? Then use it as a rand seed.*/
sWork = 'SORTWORK.' /*the filename of the SORTWORK files.*/
Line 1,203 ⟶ 1,287:
call srt # /*sort records in all SORTWORK files.*/
call mrg /*merge records in the SORTWORK files.*/
exit 0 /*stick a fork in it, we're all done. */
/*──────────────────────────────────────────────────────────────────────────────────────*/
mrg: procedure expose FID sWork; parse arg # /*#: the number of SORTWORK files. */
@.= copies('ff'x, 1e5) /*no value should be larger than this. */
call lineout FID, , 1 /*position the output file at rec # 1. */
 
do j=1 until @.j==@.; call rdr j /*read any number of SORTWORK files, */
do j=1 until @.j==@.; call rdr j /*read any number of SORTWORK files, */
end /*j*/ /*but initially just 1 record per file.*/
 
j=j-1 /*adj. J; read from a non─existent file*/
j= j - do1 forever; y=@.; z=0 /*find the lowest value for N values /*adj. J; read from a non─existent file*/
 
do k=1 for j /*traipse through the stemmed @ array.*/
do forever; if @.k==@. then call rdr k y= @. /*Notfind defined?the lowest Thenvalue readfor a fileN record values.*/
z= 0
if @.k<<y then do; y=@.k; z=k; end /*Lowest so far? Then mark this as min.*/
end /*do k*/ =1 for j /*traipse [↑] notethrough usethe ofstemmed << exact@ comparisonarray.*/
if @.k==@. then call rdr k /*Not defined? Then read a file record*/
if @.k<<y then do; y= @.k /*Lowest so far? Then mark this as min.*/
z= k
end
end /*k*/ /* [↑] note use of << exact comparison*/
 
if z==0 then leave /*Any more records? No, close file. */
call lineout FID, @.z /*write the value to the output file. */
call rdr z /*re-populate a value from the # file. */
end /*forever*/ /*keep reading/merging until exhausted.*/
 
call lineout FID /*close the output file (just in case).*/
'DELERASE' sWORK"*" /*delete all the SORTWORK files. */
return
/*──────────────────────────────────────────────────────────────────────────────────────*/
gen: procedure expose #; parse arg m,siz; d= digits() /*used for justify. */
# = 0 /*number of SORTWORK.nnn files so far*/
do j=1 for m; #= 1 + j % siz /*create workfile#*/
call lineout 'SORTWORK.'#, right(random(, 1e5), d) /*write rand #. */
end /*j*/
do k=1 for #; call lineout 'SORTWORK.'#; end /*close a workfile*/
return end /*k*/
return
/*──────────────────────────────────────────────────────────────────────────────────────*/
rdr: parse arg a; @.a=@.; f= sWork || a; if lines(f)\==0 then @.a= linein(f); return
/*──────────────────────────────────────────────────────────────────────────────────────*/
srt: procedure expose sWork; parse arg #
do j=1 for #; fn= sWORK || j; 'SORT' fn "/O" fn; end /*j*/; return</langsyntaxhighlight><br><br>
 
<br><br>
=={{header|Wren}}==
{{trans|Go}}
{{libheader|Wren-dynamic}}
{{libheader|Wren-sort}}
{{libheader|Wren-str}}
A bit simpler than the Go version as we use fixed length integers which (together with a following space) can be sorted as strings.
<syntaxhighlight lang="wren">import "io" for File
import "random" for Random
import "./dynamic" for Struct
import "./sort" for Sort
import "./str" for Str
 
var MinHeapNode = Struct.create("MinHeapNode", ["element", "index"])
 
class MinHeap {
construct new(nodes) {
_nodes = nodes
var start = ((_nodes.count-1)/2).floor
for (i in start..0) minHeapify(i)
}
 
left(i) { 2*i + 1 }
right(i) { 2*i + 2 }
 
nodes { _nodes }
 
min { _nodes[0] }
 
replaceMin(x) {
_nodes[0] = x
minHeapify(0)
}
 
minHeapify(i) {
var l = left(i)
var r = right(i)
var smallest = i
var heapSize = _nodes.count
if (l < heapSize && Str.lt(_nodes[l].element, _nodes[i].element)) smallest = l
if (r < heapSize && Str.lt(_nodes[r].element, _nodes[smallest].element)) smallest = r
if (smallest != i) {
_nodes.swap(i, smallest)
minHeapify(smallest)
}
}
}
 
// Merge k sorted files: es0,es1 etc.
var mergeFiles = Fn.new { |outputFile, k, e|
var inp = List.filled(k, null)
var offset = List.filled(k, 0) // current offset for each input file
for (i in 0...k) {
var fileName = "es%(i)"
inp[i] = File.open(fileName)
}
var out = File.create(outputFile)
var nodes = List.filled(k, null)
for (i in 0...k) nodes[i] = MinHeapNode.new(0, 0)
var i = 0
while (i < k) {
var bytes = inp[i].readBytes(e)
if (bytes.count < e) break // end of file reached
nodes[i].element = bytes
nodes[i].index = i
offset[i] = offset[i] + e
i = i + 1
}
var hp = MinHeap.new(nodes[0...i])
var count = 0
while (count != i) {
var root = hp.min
out.writeBytes(root.element)
var bytes = inp[root.index].readBytes(e, offset[root.index])
if (bytes.count < e) { // end of file reached
root.element = "999999 "
count = count + 1
} else {
root.element = bytes
offset[root.index] = offset[root.index] + e
}
hp.replaceMin(root)
}
for (j in 0...k) inp[j].close()
out.close()
}
 
// Create initial runs, divide them evenly amongst the output files
// and then merge-sort them.
var createInitialRuns = Fn.new { |inputFile, numWays, runSize, elementSize|
var inp = File.open(inputFile)
var offset = 0
for (i in 0...numWays) {
var fileName = "es%(i)" // es0, es1 etc.
var bytes = inp.readBytes(runSize * elementSize, offset)
offset = offset + runSize * elementSize
var numbers = Str.chunks(bytes, elementSize)
numbers = Sort.merge(numbers)
File.create(fileName) { |f| f.writeBytes(numbers.join("")) }
}
inp.close()
}
 
var externalSort = Fn.new { |inputFile, outputFile, numWays, runSize, elementSize|
createInitialRuns.call(inputFile, numWays, runSize, elementSize)
mergeFiles.call(outputFile, numWays, elementSize)
}
 
// Create a small test file of 40 random 6 digit integers and split it into 4 files
// of 10 such integers each.
var numWays = 4
var runSize = 10
var elementSize = 7 // 6 digits + a following space
var inputFile = "external_sort_input.txt"
var outputFile = "external_sort_output.txt"
var inp = File.create(inputFile)
var rand = Random.new()
var min = 100000
var max = 999999
for (i in 0...numWays*runSize) inp.writeBytes("%(rand.int(min, max).toString) ")
inp.close()
externalSort.call(inputFile, outputFile, numWays, runSize, elementSize)
// remove temporary files
for (i in 0...numWays) {
var fileName = "es%(i)"
File.delete(fileName)
}</syntaxhighlight>
 
{{out}}
Sample run:
<pre>
Contents of external_sort_input.txt:
195387 279593 270645 457221 187563 459521 984067 443317 890630 986820 357072 302605 354825 295908 541221 273855 318978 913819 961359 776939 337617 640070 100140 266938 597987 305187 731698 449166 388165 121283 516001 256453 197931 660491 785453 544828 346520 532447 688793 194774
 
Contents of external_sort_output.txt:
100140 121283 187563 194774 195387 197931 256453 266938 270645 273855 279593 295908 302605 305187 318978 337617 346520 354825 357072 388165 443317 449166 457221 459521 516001 532447 541221 544828 597987 640070 660491 688793 731698 776939 785453 890630 913819 961359 984067 986820
</pre>
9,482

edits