aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
-rw-r--r--.gitignore3
-rw-r--r--LICENSE.txt21
-rw-r--r--README.md49
-rw-r--r--bubble_sort.py33
-rw-r--r--heapsort.py54
-rw-r--r--insertion_sort.py15
-rw-r--r--merge_sort.py29
-rw-r--r--plot.bat40
-rw-r--r--plot.py145
-rw-r--r--quicksort.py65
-rw-r--r--selection_sort.py17
11 files changed, 471 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..c0da6bc
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+__pycache__/
+*.png
+!plots/*.png
diff --git a/LICENSE.txt b/LICENSE.txt
new file mode 100644
index 0000000..fbbdd68
--- /dev/null
+++ b/LICENSE.txt
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2015 Egor Tensin <Egor.Tensin@gmail.com>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..f765c8f
--- /dev/null
+++ b/README.md
@@ -0,0 +1,49 @@
+# Sorting algorithms
+
+Gettting the hang out of sorting algorithms.
+Hosted on [GitHub Pages](https://pages.github.com) at
+https://egor-tensin.github.io/sorting_algorithms/.
+
+## Algorithm implementations
+
+Sorting algorithms are implemented in separate Python scripts.
+You can test each of the implemented algorithms by passing a sequence of
+integer numbers to the corresponding script.
+
+Currently the following sorting algorithms are implemented:
+* bubble sort (`bubble_sort.py`),
+* heapsort (`heapsort.py`),
+* insertion sort (`insertion_sort.py`),
+* merge sort (`merge_sort.py`),
+* selection sort (`selection_sort.py`),
+* quicksort (`quicksort.py`).
+
+Some scripts actually implement more than one version of a sorting algorithm.
+For example, a quicksort is implemented in multiple versions depending on the
+choice of the pivot element.
+
+## Plots
+
+Running time of the implemented sorting algorithms is measured and plotted.
+The plots are stored in the `plots/` directory.
+
+Each algorithm is provided with three lists:
+* a list of sorted numbers,
+* a reversed list of sorted numbers,
+* and a list of numbers in random order.
+
+## Usage
+
+### Prerequisites
+
+To use this software, you need to be able to run Python 3 scripts.
+
+To plot a sorting algorithm, use `plot.py` (on Windows, you can also use
+`plot.bat`, which simply calls `plot.py` three times, providing the sorting
+algorithm with sorted, reversed, and randomized inputs).
+
+## Licensing
+
+This project, including all of the files and their contents, is licensed under
+the terms of the MIT License.
+See LICENSE.txt for details.
diff --git a/bubble_sort.py b/bubble_sort.py
new file mode 100644
index 0000000..8309201
--- /dev/null
+++ b/bubble_sort.py
@@ -0,0 +1,33 @@
+# Copyright 2015 Egor Tensin <Egor.Tensin@gmail.com>
+# This file is licensed under the terms of the MIT License.
+# See LICENSE.txt for details.
+
+def bubble_sort(xs):
+ while True:
+ swapped = False
+ for i in range(1, len(xs)):
+ if xs[i - 1] > xs[i]:
+ xs[i], xs[i - 1] = xs[i - 1], xs[i]
+ swapped = True
+ if not swapped:
+ break
+ return xs
+
+def bubble_sort_optimized(xs):
+ n = len(xs)
+ while True:
+ new_n = 0
+ for i in range(1, n):
+ if xs[i - 1] > xs[i]:
+ xs[i], xs[i - 1] = xs[i - 1], xs[i]
+ new_n = i
+ n = new_n
+ if not n:
+ break
+ return xs
+
+if __name__ == '__main__':
+ import sys
+ xs = list(map(int, sys.argv[1:]))
+ print(bubble_sort(list(xs)))
+ print(bubble_sort_optimized(list(xs)))
diff --git a/heapsort.py b/heapsort.py
new file mode 100644
index 0000000..3f44e51
--- /dev/null
+++ b/heapsort.py
@@ -0,0 +1,54 @@
+# Copyright 2015 Egor Tensin <Egor.Tensin@gmail.com>
+# This file is licensed under the terms of the MIT License.
+# See LICENSE.txt for details.
+
+# Disclaimer: implemented in the most literate way.
+
+def heapsort(xs):
+ heapify(xs)
+ first, last = 0, len(xs) - 1
+ for end in range(last, first, -1):
+ xs[end], xs[first] = xs[first], xs[end]
+ siftdown(xs, first, end - 1)
+ return xs
+
+# In a heap stored in a zero-based array,
+# left_child = node * 2 + 1
+# right_child = node * 2 + 2
+# parent = (node - 1) // 2
+
+def _get_parent(node):
+ return (node - 1) // 2
+
+def _get_left_child(node):
+ return node * 2 + 1
+
+def _get_right_child(node):
+ return node * 2 + 2
+
+def heapify(xs):
+ last = len(xs) - 1
+ first_parent, last_parent = 0, _get_parent(last)
+ for parent in range(last_parent, first_parent - 1, -1):
+ siftdown(xs, parent, last)
+
+def siftdown(xs, start, end):
+ root = start
+ while True:
+ # We swap if there is at least one child
+ child = _get_left_child(root)
+ if child > end:
+ break
+ # If there are two children, select the minimum
+ right_child = _get_right_child(root)
+ if right_child <= end and xs[child] < xs[right_child]:
+ child = right_child
+ if xs[root] < xs[child]:
+ xs[root], xs[child] = xs[child], xs[root]
+ root = child
+ else:
+ break
+
+if __name__ == '__main__':
+ import sys
+ print(heapsort(list(map(int, sys.argv[1:]))))
diff --git a/insertion_sort.py b/insertion_sort.py
new file mode 100644
index 0000000..9e0912d
--- /dev/null
+++ b/insertion_sort.py
@@ -0,0 +1,15 @@
+# Copyright 2015 Egor Tensin <Egor.Tensin@gmail.com>
+# This file is licensed under the terms of the MIT License.
+# See LICENSE.txt for details.
+
+def insertion_sort(xs):
+ for i in range(1, len(xs)):
+ j = i
+ while j > 0 and xs[j - 1] > xs[j]:
+ xs[j], xs[j - 1] = xs[j - 1], xs[j]
+ j -= 1
+ return xs
+
+if __name__ == '__main__':
+ import sys
+ print(insertion_sort(list(map(int, sys.argv[1:]))))
diff --git a/merge_sort.py b/merge_sort.py
new file mode 100644
index 0000000..6d5403d
--- /dev/null
+++ b/merge_sort.py
@@ -0,0 +1,29 @@
+# Copyright 2015 Egor Tensin <Egor.Tensin@gmail.com>
+# This file is licensed under the terms of the MIT License.
+# See LICENSE.txt for details.
+
+def merge(left, right):
+ result = []
+ l, r = 0, 0
+ while l < len(left) and r < len(right):
+ if left[l] <= right[r]:
+ result.append(left[l])
+ l += 1
+ else:
+ result.append(right[r])
+ r += 1
+ if left:
+ result.extend(left[l:])
+ if right:
+ result.extend(right[r:])
+ return result
+
+def merge_sort(xs):
+ if len(xs) < 2:
+ return xs
+ mid = len(xs) // 2
+ return merge(merge_sort(xs[:mid]), merge_sort(xs[mid:]))
+
+if __name__ == '__main__':
+ import sys
+ print(merge_sort(list(map(int, sys.argv[1:]))))
diff --git a/plot.bat b/plot.bat
new file mode 100644
index 0000000..e389c5c
--- /dev/null
+++ b/plot.bat
@@ -0,0 +1,40 @@
+@rem Copyright 2015 Egor Tensin <Egor.Tensin@gmail.com>
+@rem This file is licensed under the terms of the MIT License.
+@rem See LICENSE.txt for details.
+
+@setlocal enabledelayedexpansion
+
+@if E%1 == E goto :print_usage
+@set algorithm=%1
+
+@if not E%2 == E (
+ set repetitions=%2
+) else (
+ set repetitions=100
+)
+@if not E%3 == E (
+ set min=%3
+) else (
+ set min=0
+)
+@if not E%4 == E (
+ set max=%4
+) else (
+ set max=200
+)
+
+plot.py -l "%algorithm%" -a "%min%" -b "%max%" -r "%repetitions%" ^
+ -i sorted -o "%algorithm%_%repetitions%_sorted_%min%_%max%.png" ^
+ || exit /b !errorlevel!
+plot.py -l "%algorithm%" -a "%min%" -b "%max%" -r "%repetitions%" ^
+ -i randomized -o "%algorithm%_%repetitions%_randomized_%min%_%max%.png" ^
+ || exit /b !errorlevel!
+plot.py -l "%algorithm%" -a "%min%" -b "%max%" -r "%repetitions%" ^
+ -i reversed -o "%algorithm%_%repetitions%_reversed_%min%_%max%.png" ^
+ || exit /b !errorlevel!
+
+@exit /b
+
+:print_usage:
+@echo Usage: %0 ALGORITHM [REPETITIONS [MIN [MAX]]]
+@exit /b 1
diff --git a/plot.py b/plot.py
new file mode 100644
index 0000000..33743aa
--- /dev/null
+++ b/plot.py
@@ -0,0 +1,145 @@
+# Copyright 2015 Egor Tensin <Egor.Tensin@gmail.com>
+# This file is licensed under the terms of the MIT License.
+# See LICENSE.txt for details.
+
+from time import clock
+import gc
+
+_ALGORITHMS = (
+ 'bubble',
+ 'bubble_optimized',
+ 'heap',
+ 'insertion',
+ 'merge',
+ 'quick_first',
+ 'quick_second',
+ 'quick_middle',
+ 'quick_last',
+ 'quick_random',
+ 'selection',
+)
+
+def _get_context():
+ import argparse
+ parser = argparse.ArgumentParser()
+ parser.add_argument('--repetitions', '-r', type=int, default=1,
+ help='set number of sorting repetitions')
+ parser.add_argument('--input', '-i',
+ default='randomized', metavar='INPUT',
+ choices=('sorted', 'randomized', 'reversed'),
+ help='choose initial input state')
+ parser.add_argument('--algorithm', '-l', metavar='ALGORITHM',
+ choices=_ALGORITHMS, required=True,
+ help='select sorting algorithm to use')
+ parser.add_argument('--min', '-a', type=int, required=True,
+ help='set min input length',
+ dest='min_input_length')
+ parser.add_argument('--max', '-b', type=int, required=True,
+ help='set max input length',
+ dest='max_input_length')
+ parser.add_argument('--output', '-o', dest='plot_path',
+ help='set plot output path')
+ args = parser.parse_args()
+ if args.repetitions < 1:
+ parser.error('number of repetitions must be > 0')
+ if args.min_input_length < 0:
+ parser.error('min sequence length must be >= 0')
+ if args.max_input_length < 0:
+ parser.error('max sequence length must be >= 0')
+ if args.max_input_length < args.min_input_length:
+ parser.error('max sequence length cannot be less than min sequence length')
+ return args
+
+def get_timestamp():
+ return clock()
+
+def init_clock():
+ get_timestamp()
+
+def gen_input(args, n):
+ if args.input == 'sorted':
+ return list(range(n))
+ elif args.input == 'reversed':
+ return sorted(range(n), reverse=True)
+ elif args.input == 'randomized':
+ from random import sample
+ return sample(range(n), n)
+ else:
+ raise NotImplementedError(
+ 'unimplemented initial input state \'{}\''.format(args.input))
+
+def measure_running_time(ctx, algorithm, input_length):
+ xs = gen_input(ctx, input_length)
+ assert algorithm(list(xs)) == sorted(xs)
+ input_copies = [list(xs) for i in range(ctx.repetitions)]
+ gc.disable()
+ starting_time = get_timestamp()
+ for i in range(ctx.repetitions):
+ algorithm(input_copies[i])
+ running_time = get_timestamp() - starting_time
+ gc.enable()
+ return running_time
+
+def _decorate_plot(ctx, plt):
+ plt.grid()
+ plt.xlabel('Input length')
+ plt.ylabel('Running time (sec)')
+ plt.title('{} sort, {} repetition(s), {} input'.format(
+ ctx.algorithm, ctx.repetitions, ctx.input))
+
+def plot_algorithm(ctx, algorithm):
+ import matplotlib.pyplot as plt
+ _decorate_plot(ctx, plt)
+ input_length = range(ctx.min_input_length,
+ ctx.max_input_length + 1)
+ running_time = []
+ for n in input_length:
+ running_time.append(measure_running_time(ctx, algorithm, n))
+ plt.plot(input_length, running_time)
+ if ctx.plot_path is not None:
+ plt.savefig(ctx.plot_path)
+ else:
+ plt.show()
+
+def plot(ctx):
+ if ctx.algorithm == 'bubble':
+ from bubble_sort import bubble_sort
+ plot_algorithm(ctx, bubble_sort)
+ elif ctx.algorithm == 'bubble_optimized':
+ from bubble_sort import bubble_sort_optimized
+ plot_algorithm(ctx, bubble_sort_optimized)
+ elif ctx.algorithm == 'heap':
+ from heapsort import heapsort
+ plot_algorithm(ctx, heapsort)
+ elif ctx.algorithm == 'insertion':
+ from insertion_sort import insertion_sort
+ plot_algorithm(ctx, insertion_sort)
+ elif ctx.algorithm == 'merge':
+ from merge_sort import merge_sort
+ plot_algorithm(ctx, merge_sort)
+ elif ctx.algorithm == 'quick_first':
+ from quicksort import quicksort_first
+ plot_algorithm(ctx, quicksort_first)
+ elif ctx.algorithm == 'quick_second':
+ from quicksort import quicksort_second
+ plot_algorithm(ctx, quicksort_second)
+ elif ctx.algorithm == 'quick_middle':
+ from quicksort import quicksort_middle
+ plot_algorithm(ctx, quicksort_middle)
+ elif ctx.algorithm == 'quick_last':
+ from quicksort import quicksort_last
+ plot_algorithm(ctx, quicksort_last)
+ elif ctx.algorithm == 'quick_random':
+ from quicksort import quicksort_random
+ plot_algorithm(ctx, quicksort_random)
+ elif ctx.algorithm == 'selection':
+ from selection_sort import selection_sort
+ plot_algorithm(ctx, selection_sort)
+ else:
+ raise NotImplementedError(
+ 'unknown algorithm \'{}\''.format(ctx.algorithm))
+
+if __name__ == '__main__':
+ ctx = _get_context()
+ init_clock()
+ plot(ctx)
diff --git a/quicksort.py b/quicksort.py
new file mode 100644
index 0000000..b8ecc18
--- /dev/null
+++ b/quicksort.py
@@ -0,0 +1,65 @@
+# Copyright 2015 Egor Tensin <Egor.Tensin@gmail.com>
+# This file is licensed under the terms of the MIT License.
+# See LICENSE.txt for details.
+
+from random import randrange
+
+def _partition(xs, beg, end, select_pivot):
+ pivot = select_pivot(xs, beg, end)
+ xs[pivot], xs[end] = xs[end], xs[pivot]
+ for i in range(beg, end):
+ if xs[i] <= xs[end]:
+ xs[i], xs[beg] = xs[beg], xs[i]
+ beg += 1
+ xs[beg], xs[end] = xs[end], xs[beg]
+ return beg
+
+def _quicksort(xs, beg, end, select_pivot):
+ if beg < end:
+ pivot = _partition(xs, beg, end, select_pivot)
+ _quicksort(xs, beg, pivot - 1, select_pivot)
+ _quicksort(xs, pivot + 1, end, select_pivot)
+
+def _select_first(xs, beg, end):
+ return beg
+
+def _select_second(xs, beg, end):
+ return beg + 1
+
+def _select_middle(xs, beg, end):
+ return (beg + end) // 2
+
+def _select_last(xs, beg, end):
+ return end
+
+def _select_random(xs, beg, end):
+ return randrange(beg, end + 1)
+
+def quicksort_first(xs):
+ _quicksort(xs, 0, len(xs) - 1, _select_first)
+ return xs
+
+def quicksort_second(xs):
+ _quicksort(xs, 0, len(xs) - 1, _select_second)
+ return xs
+
+def quicksort_middle(xs):
+ _quicksort(xs, 0, len(xs) - 1, _select_middle)
+ return xs
+
+def quicksort_last(xs):
+ _quicksort(xs, 0, len(xs) - 1, _select_last)
+ return xs
+
+def quicksort_random(xs):
+ _quicksort(xs, 0, len(xs) - 1, _select_random)
+ return xs
+
+if __name__ == '__main__':
+ import sys
+ xs = list(map(int, sys.argv[1:]))
+ print(quicksort_first(list(xs)))
+ print(quicksort_second(list(xs)))
+ print(quicksort_middle(list(xs)))
+ print(quicksort_last(list(xs)))
+ print(quicksort_random(list(xs)))
diff --git a/selection_sort.py b/selection_sort.py
new file mode 100644
index 0000000..c466bea
--- /dev/null
+++ b/selection_sort.py
@@ -0,0 +1,17 @@
+# Copyright 2015 Egor Tensin <Egor.Tensin@gmail.com>
+# This file is licensed under the terms of the MIT License.
+# See LICENSE.txt for details.
+
+def selection_sort(xs):
+ for i in range(len(xs) - 1):
+ min_i = i
+ for j in range(i + 1, len(xs)):
+ if xs[j] < xs[min_i]:
+ min_i = j
+ if min_i != i:
+ xs[i], xs[min_i] = xs[min_i], xs[i]
+ return xs
+
+if __name__ == '__main__':
+ import sys
+ print(selection_sort(list(map(int, sys.argv[1:]))))