From d6de118675a192ce6e01574eb4eb2541db7aca7a Mon Sep 17 00:00:00 2001 From: Egor Tensin Date: Wed, 6 May 2015 06:14:54 +0300 Subject: initial commit --- .gitignore | 3 ++ LICENSE.txt | 21 ++++++++ README.md | 49 ++++++++++++++++++ bubble_sort.py | 33 +++++++++++++ heapsort.py | 54 ++++++++++++++++++++ insertion_sort.py | 15 ++++++ merge_sort.py | 29 +++++++++++ plot.bat | 40 +++++++++++++++ plot.py | 145 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ quicksort.py | 65 ++++++++++++++++++++++++ selection_sort.py | 17 +++++++ 11 files changed, 471 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE.txt create mode 100644 README.md create mode 100644 bubble_sort.py create mode 100644 heapsort.py create mode 100644 insertion_sort.py create mode 100644 merge_sort.py create mode 100644 plot.bat create mode 100644 plot.py create mode 100644 quicksort.py create mode 100644 selection_sort.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c0da6bc --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +__pycache__/ +*.png +!plots/*.png diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..fbbdd68 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2015 Egor Tensin + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..f765c8f --- /dev/null +++ b/README.md @@ -0,0 +1,49 @@ +# Sorting algorithms + +Gettting the hang out of sorting algorithms. +Hosted on [GitHub Pages](https://pages.github.com) at +https://egor-tensin.github.io/sorting_algorithms/. + +## Algorithm implementations + +Sorting algorithms are implemented in separate Python scripts. +You can test each of the implemented algorithms by passing a sequence of +integer numbers to the corresponding script. + +Currently the following sorting algorithms are implemented: +* bubble sort (`bubble_sort.py`), +* heapsort (`heapsort.py`), +* insertion sort (`insertion_sort.py`), +* merge sort (`merge_sort.py`), +* selection sort (`selection_sort.py`), +* quicksort (`quicksort.py`). + +Some scripts actually implement more than one version of a sorting algorithm. +For example, a quicksort is implemented in multiple versions depending on the +choice of the pivot element. + +## Plots + +Running time of the implemented sorting algorithms is measured and plotted. +The plots are stored in the `plots/` directory. + +Each algorithm is provided with three lists: +* a list of sorted numbers, +* a reversed list of sorted numbers, +* and a list of numbers in random order. + +## Usage + +### Prerequisites + +To use this software, you need to be able to run Python 3 scripts. + +To plot a sorting algorithm, use `plot.py` (on Windows, you can also use +`plot.bat`, which simply calls `plot.py` three times, providing the sorting +algorithm with sorted, reversed, and randomized inputs). + +## Licensing + +This project, including all of the files and their contents, is licensed under +the terms of the MIT License. +See LICENSE.txt for details. diff --git a/bubble_sort.py b/bubble_sort.py new file mode 100644 index 0000000..8309201 --- /dev/null +++ b/bubble_sort.py @@ -0,0 +1,33 @@ +# Copyright 2015 Egor Tensin +# This file is licensed under the terms of the MIT License. +# See LICENSE.txt for details. + +def bubble_sort(xs): + while True: + swapped = False + for i in range(1, len(xs)): + if xs[i - 1] > xs[i]: + xs[i], xs[i - 1] = xs[i - 1], xs[i] + swapped = True + if not swapped: + break + return xs + +def bubble_sort_optimized(xs): + n = len(xs) + while True: + new_n = 0 + for i in range(1, n): + if xs[i - 1] > xs[i]: + xs[i], xs[i - 1] = xs[i - 1], xs[i] + new_n = i + n = new_n + if not n: + break + return xs + +if __name__ == '__main__': + import sys + xs = list(map(int, sys.argv[1:])) + print(bubble_sort(list(xs))) + print(bubble_sort_optimized(list(xs))) diff --git a/heapsort.py b/heapsort.py new file mode 100644 index 0000000..3f44e51 --- /dev/null +++ b/heapsort.py @@ -0,0 +1,54 @@ +# Copyright 2015 Egor Tensin +# This file is licensed under the terms of the MIT License. +# See LICENSE.txt for details. + +# Disclaimer: implemented in the most literate way. + +def heapsort(xs): + heapify(xs) + first, last = 0, len(xs) - 1 + for end in range(last, first, -1): + xs[end], xs[first] = xs[first], xs[end] + siftdown(xs, first, end - 1) + return xs + +# In a heap stored in a zero-based array, +# left_child = node * 2 + 1 +# right_child = node * 2 + 2 +# parent = (node - 1) // 2 + +def _get_parent(node): + return (node - 1) // 2 + +def _get_left_child(node): + return node * 2 + 1 + +def _get_right_child(node): + return node * 2 + 2 + +def heapify(xs): + last = len(xs) - 1 + first_parent, last_parent = 0, _get_parent(last) + for parent in range(last_parent, first_parent - 1, -1): + siftdown(xs, parent, last) + +def siftdown(xs, start, end): + root = start + while True: + # We swap if there is at least one child + child = _get_left_child(root) + if child > end: + break + # If there are two children, select the minimum + right_child = _get_right_child(root) + if right_child <= end and xs[child] < xs[right_child]: + child = right_child + if xs[root] < xs[child]: + xs[root], xs[child] = xs[child], xs[root] + root = child + else: + break + +if __name__ == '__main__': + import sys + print(heapsort(list(map(int, sys.argv[1:])))) diff --git a/insertion_sort.py b/insertion_sort.py new file mode 100644 index 0000000..9e0912d --- /dev/null +++ b/insertion_sort.py @@ -0,0 +1,15 @@ +# Copyright 2015 Egor Tensin +# This file is licensed under the terms of the MIT License. +# See LICENSE.txt for details. + +def insertion_sort(xs): + for i in range(1, len(xs)): + j = i + while j > 0 and xs[j - 1] > xs[j]: + xs[j], xs[j - 1] = xs[j - 1], xs[j] + j -= 1 + return xs + +if __name__ == '__main__': + import sys + print(insertion_sort(list(map(int, sys.argv[1:])))) diff --git a/merge_sort.py b/merge_sort.py new file mode 100644 index 0000000..6d5403d --- /dev/null +++ b/merge_sort.py @@ -0,0 +1,29 @@ +# Copyright 2015 Egor Tensin +# This file is licensed under the terms of the MIT License. +# See LICENSE.txt for details. + +def merge(left, right): + result = [] + l, r = 0, 0 + while l < len(left) and r < len(right): + if left[l] <= right[r]: + result.append(left[l]) + l += 1 + else: + result.append(right[r]) + r += 1 + if left: + result.extend(left[l:]) + if right: + result.extend(right[r:]) + return result + +def merge_sort(xs): + if len(xs) < 2: + return xs + mid = len(xs) // 2 + return merge(merge_sort(xs[:mid]), merge_sort(xs[mid:])) + +if __name__ == '__main__': + import sys + print(merge_sort(list(map(int, sys.argv[1:])))) diff --git a/plot.bat b/plot.bat new file mode 100644 index 0000000..e389c5c --- /dev/null +++ b/plot.bat @@ -0,0 +1,40 @@ +@rem Copyright 2015 Egor Tensin +@rem This file is licensed under the terms of the MIT License. +@rem See LICENSE.txt for details. + +@setlocal enabledelayedexpansion + +@if E%1 == E goto :print_usage +@set algorithm=%1 + +@if not E%2 == E ( + set repetitions=%2 +) else ( + set repetitions=100 +) +@if not E%3 == E ( + set min=%3 +) else ( + set min=0 +) +@if not E%4 == E ( + set max=%4 +) else ( + set max=200 +) + +plot.py -l "%algorithm%" -a "%min%" -b "%max%" -r "%repetitions%" ^ + -i sorted -o "%algorithm%_%repetitions%_sorted_%min%_%max%.png" ^ + || exit /b !errorlevel! +plot.py -l "%algorithm%" -a "%min%" -b "%max%" -r "%repetitions%" ^ + -i randomized -o "%algorithm%_%repetitions%_randomized_%min%_%max%.png" ^ + || exit /b !errorlevel! +plot.py -l "%algorithm%" -a "%min%" -b "%max%" -r "%repetitions%" ^ + -i reversed -o "%algorithm%_%repetitions%_reversed_%min%_%max%.png" ^ + || exit /b !errorlevel! + +@exit /b + +:print_usage: +@echo Usage: %0 ALGORITHM [REPETITIONS [MIN [MAX]]] +@exit /b 1 diff --git a/plot.py b/plot.py new file mode 100644 index 0000000..33743aa --- /dev/null +++ b/plot.py @@ -0,0 +1,145 @@ +# Copyright 2015 Egor Tensin +# This file is licensed under the terms of the MIT License. +# See LICENSE.txt for details. + +from time import clock +import gc + +_ALGORITHMS = ( + 'bubble', + 'bubble_optimized', + 'heap', + 'insertion', + 'merge', + 'quick_first', + 'quick_second', + 'quick_middle', + 'quick_last', + 'quick_random', + 'selection', +) + +def _get_context(): + import argparse + parser = argparse.ArgumentParser() + parser.add_argument('--repetitions', '-r', type=int, default=1, + help='set number of sorting repetitions') + parser.add_argument('--input', '-i', + default='randomized', metavar='INPUT', + choices=('sorted', 'randomized', 'reversed'), + help='choose initial input state') + parser.add_argument('--algorithm', '-l', metavar='ALGORITHM', + choices=_ALGORITHMS, required=True, + help='select sorting algorithm to use') + parser.add_argument('--min', '-a', type=int, required=True, + help='set min input length', + dest='min_input_length') + parser.add_argument('--max', '-b', type=int, required=True, + help='set max input length', + dest='max_input_length') + parser.add_argument('--output', '-o', dest='plot_path', + help='set plot output path') + args = parser.parse_args() + if args.repetitions < 1: + parser.error('number of repetitions must be > 0') + if args.min_input_length < 0: + parser.error('min sequence length must be >= 0') + if args.max_input_length < 0: + parser.error('max sequence length must be >= 0') + if args.max_input_length < args.min_input_length: + parser.error('max sequence length cannot be less than min sequence length') + return args + +def get_timestamp(): + return clock() + +def init_clock(): + get_timestamp() + +def gen_input(args, n): + if args.input == 'sorted': + return list(range(n)) + elif args.input == 'reversed': + return sorted(range(n), reverse=True) + elif args.input == 'randomized': + from random import sample + return sample(range(n), n) + else: + raise NotImplementedError( + 'unimplemented initial input state \'{}\''.format(args.input)) + +def measure_running_time(ctx, algorithm, input_length): + xs = gen_input(ctx, input_length) + assert algorithm(list(xs)) == sorted(xs) + input_copies = [list(xs) for i in range(ctx.repetitions)] + gc.disable() + starting_time = get_timestamp() + for i in range(ctx.repetitions): + algorithm(input_copies[i]) + running_time = get_timestamp() - starting_time + gc.enable() + return running_time + +def _decorate_plot(ctx, plt): + plt.grid() + plt.xlabel('Input length') + plt.ylabel('Running time (sec)') + plt.title('{} sort, {} repetition(s), {} input'.format( + ctx.algorithm, ctx.repetitions, ctx.input)) + +def plot_algorithm(ctx, algorithm): + import matplotlib.pyplot as plt + _decorate_plot(ctx, plt) + input_length = range(ctx.min_input_length, + ctx.max_input_length + 1) + running_time = [] + for n in input_length: + running_time.append(measure_running_time(ctx, algorithm, n)) + plt.plot(input_length, running_time) + if ctx.plot_path is not None: + plt.savefig(ctx.plot_path) + else: + plt.show() + +def plot(ctx): + if ctx.algorithm == 'bubble': + from bubble_sort import bubble_sort + plot_algorithm(ctx, bubble_sort) + elif ctx.algorithm == 'bubble_optimized': + from bubble_sort import bubble_sort_optimized + plot_algorithm(ctx, bubble_sort_optimized) + elif ctx.algorithm == 'heap': + from heapsort import heapsort + plot_algorithm(ctx, heapsort) + elif ctx.algorithm == 'insertion': + from insertion_sort import insertion_sort + plot_algorithm(ctx, insertion_sort) + elif ctx.algorithm == 'merge': + from merge_sort import merge_sort + plot_algorithm(ctx, merge_sort) + elif ctx.algorithm == 'quick_first': + from quicksort import quicksort_first + plot_algorithm(ctx, quicksort_first) + elif ctx.algorithm == 'quick_second': + from quicksort import quicksort_second + plot_algorithm(ctx, quicksort_second) + elif ctx.algorithm == 'quick_middle': + from quicksort import quicksort_middle + plot_algorithm(ctx, quicksort_middle) + elif ctx.algorithm == 'quick_last': + from quicksort import quicksort_last + plot_algorithm(ctx, quicksort_last) + elif ctx.algorithm == 'quick_random': + from quicksort import quicksort_random + plot_algorithm(ctx, quicksort_random) + elif ctx.algorithm == 'selection': + from selection_sort import selection_sort + plot_algorithm(ctx, selection_sort) + else: + raise NotImplementedError( + 'unknown algorithm \'{}\''.format(ctx.algorithm)) + +if __name__ == '__main__': + ctx = _get_context() + init_clock() + plot(ctx) diff --git a/quicksort.py b/quicksort.py new file mode 100644 index 0000000..b8ecc18 --- /dev/null +++ b/quicksort.py @@ -0,0 +1,65 @@ +# Copyright 2015 Egor Tensin +# This file is licensed under the terms of the MIT License. +# See LICENSE.txt for details. + +from random import randrange + +def _partition(xs, beg, end, select_pivot): + pivot = select_pivot(xs, beg, end) + xs[pivot], xs[end] = xs[end], xs[pivot] + for i in range(beg, end): + if xs[i] <= xs[end]: + xs[i], xs[beg] = xs[beg], xs[i] + beg += 1 + xs[beg], xs[end] = xs[end], xs[beg] + return beg + +def _quicksort(xs, beg, end, select_pivot): + if beg < end: + pivot = _partition(xs, beg, end, select_pivot) + _quicksort(xs, beg, pivot - 1, select_pivot) + _quicksort(xs, pivot + 1, end, select_pivot) + +def _select_first(xs, beg, end): + return beg + +def _select_second(xs, beg, end): + return beg + 1 + +def _select_middle(xs, beg, end): + return (beg + end) // 2 + +def _select_last(xs, beg, end): + return end + +def _select_random(xs, beg, end): + return randrange(beg, end + 1) + +def quicksort_first(xs): + _quicksort(xs, 0, len(xs) - 1, _select_first) + return xs + +def quicksort_second(xs): + _quicksort(xs, 0, len(xs) - 1, _select_second) + return xs + +def quicksort_middle(xs): + _quicksort(xs, 0, len(xs) - 1, _select_middle) + return xs + +def quicksort_last(xs): + _quicksort(xs, 0, len(xs) - 1, _select_last) + return xs + +def quicksort_random(xs): + _quicksort(xs, 0, len(xs) - 1, _select_random) + return xs + +if __name__ == '__main__': + import sys + xs = list(map(int, sys.argv[1:])) + print(quicksort_first(list(xs))) + print(quicksort_second(list(xs))) + print(quicksort_middle(list(xs))) + print(quicksort_last(list(xs))) + print(quicksort_random(list(xs))) diff --git a/selection_sort.py b/selection_sort.py new file mode 100644 index 0000000..c466bea --- /dev/null +++ b/selection_sort.py @@ -0,0 +1,17 @@ +# Copyright 2015 Egor Tensin +# This file is licensed under the terms of the MIT License. +# See LICENSE.txt for details. + +def selection_sort(xs): + for i in range(len(xs) - 1): + min_i = i + for j in range(i + 1, len(xs)): + if xs[j] < xs[min_i]: + min_i = j + if min_i != i: + xs[i], xs[min_i] = xs[min_i], xs[i] + return xs + +if __name__ == '__main__': + import sys + print(selection_sort(list(map(int, sys.argv[1:])))) -- cgit v1.2.3