Compare sorting algorithms' performance: Difference between revisions
Content added Content deleted
(added task) |
(added Python example) |
||
Line 10: | Line 10: | ||
Consider at least two different sorting function (different algorithms or/and different implementation of the same algorithm). |
Consider at least two different sorting function (different algorithms or/and different implementation of the same algorithm). |
||
For example, consider [[Bubble Sort]], [[Insertion |
For example, consider [[Bubble Sort]], [[Insertion sort]], [[Quick sort]] or/and implementations of [[Quick sort]] with different pivot selection mechanisms. |
||
Where possible, use existing implementations. |
Where possible, use existing implementations. |
||
Preliminary subtask: |
Preliminary subtask: |
||
* [[Bubble Sort]], [[Insertion |
* [[Bubble Sort]], [[Insertion sort]], [[Quick sort]], [[Radix sort]], [[Shell sort]] |
||
* [[Query Performance]] |
* [[Query Performance]] |
||
* [[Write float arrays to a text file]] |
* [[Write float arrays to a text file]] |
||
Line 25: | Line 25: | ||
# Plot timings. |
# Plot timings. |
||
# What conclusions about relative performance of the sorting routines could be made based on the plots? |
# What conclusions about relative performance of the sorting routines could be made based on the plots? |
||
=={{header|Python}}== |
|||
'''Interpreter:''' [[Python]] 2.5 |
|||
[[Category:Python]] |
|||
===Sequence generators=== |
|||
def ones(n): |
|||
return [1]*n |
|||
def reversedrange(n): |
|||
x = range(n) |
|||
x.reverse() |
|||
return x |
|||
def shuffledrange(n): |
|||
x = range(n) |
|||
random.shuffle(x) |
|||
return x |
|||
===Write timings=== |
|||
<code> |
|||
def write_timings(npoints=10, maxN=10**4, sort_functions=(builtinsort,insertion_sort, qsort), sequence_creators = (ones, range, shuffledrange)): |
|||
"""`npoints' and `maxN' are recomendations that may be ignored by implementation""" |
|||
Ns = range(2, maxN, maxN//npoints) |
|||
for sort in sort_functions: |
|||
for make_seq in sequence_creators: |
|||
Ts = map(lambda n: usec(sort, (make_seq(n),)), Ns) |
|||
writedat('%s-%s-%d-%d.xy' % (sort.__name__, make_seq.__name__, len(Ns), max(Ns)), |
|||
Ns, Ts) |
|||
</code> |
|||
Where ''writedat()'' is defined in the [[Write float arrays to a text file]] subtask, ''usec()'' is defined in the [[Query Performance]] subtask. |
|||
===Plot timings=== |
|||
{{library|matplotlib}} |
|||
<code> |
|||
import pylab |
|||
def plotdd(dictplotdict): |
|||
"""See ``plot_timings()`` below.""" |
|||
symbols = ('o', '^', 'v', '<', '>', 's', '+', 'x', 'D', 'd', |
|||
'1', '2', '3', '4', 'h', 'H', 'p', '|', '_') |
|||
colors = map(None, 'bgrcmyk') |
|||
for npoints, plotdict in dictplotdict.iteritems(): |
|||
for ttle, lst in plotdict.iteritems(): |
|||
pylab.hold(False) |
|||
for i, (label, polynom, x, y) in enumerate(sorted(lst,key=operator.itemgetter(0))): |
|||
pylab.plot(x, y, colors[i % len(colors)] + symbols[i % len(symbols)], |
|||
label='%s %s' % (polynom, label)) |
|||
pylab.hold(True) |
|||
y = numpy.polyval(polynom, x) |
|||
pylab.plot(x, y, colors[i % len(colors)], label= '_nolegend_') |
|||
pylab.legend(loc='upper left') |
|||
pylab.xlabel(p.variable) |
|||
pylab.ylabel('log2( time in microseconds )') |
|||
pylab.title(ttle, verticalalignment='bottom') |
|||
figname = '_%(npoints)03d%(ttle)s' % vars() |
|||
pylab.savefig(figname+'.png') |
|||
pylab.savefig(figname+'.pdf') |
|||
print figname |
|||
</code> |
|||
See [[Plot x, y arrays]] and [[Polynomial Fitting]] subtask for basic usage of ''pylab.plot()'' and ''numpy.polyfit()''. |
|||
{{library|numpy}} |
|||
<code> |
|||
import numpy |
|||
def plot_timings(): |
|||
makedict = lambda: collections.defaultdict(lambda: collections.defaultdict(list)) |
|||
df = makedict() |
|||
ds = makedict() |
|||
# populate plot dictionaries |
|||
for filename in glob.glob('*.xy'): |
|||
m = re.match(r'([^-]+)-([^-]+)-(\d+)-(\d+)\.xy', filename) |
|||
print filename |
|||
assert m, filename |
|||
funcname, seqname, npoints, maxN = m.groups() |
|||
npoints, maxN = int(npoints), int(maxN) |
|||
a = numpy.fromiter(itertools.imap(float, open(filename).read().split()), dtype='f') |
|||
Ns = a[::2] # sequences lengths |
|||
Ts = a[1::2] # corresponding times |
|||
assert len(Ns) == len(Ts) == npoints |
|||
assert max(Ns) <= maxN |
|||
# |
|||
logsafe = logical_and(Ns>0, Ts>0) |
|||
Ts = numpy.log2(Ts[logsafe]) |
|||
Ns = numpy.log2(Ns[logsafe]) |
|||
coeffs = numpy.polyfit(Ns, Ts, deg=1) |
|||
poly = numpy.poly1d(coeffs, variable='log2(N)') |
|||
# |
|||
df[npoints][funcname].append((seqname, poly, Ns, Ts)) |
|||
ds[npoints][seqname].append((funcname, poly, Ns, Ts)) |
|||
# actual plotting |
|||
plotdd(df) |
|||
plotdd(ds) # see ``plotdd()`` above |
|||
</code> |