diff options
Diffstat (limited to 'algorithms')
-rw-r--r-- | algorithms/__init__.py | 0 | ||||
-rw-r--r-- | algorithms/algorithm.py | 23 | ||||
-rw-r--r-- | algorithms/impl/__init__.py | 30 | ||||
-rw-r--r-- | algorithms/impl/bubble_sort.py | 56 | ||||
-rw-r--r-- | algorithms/impl/heapsort.py | 82 | ||||
-rw-r--r-- | algorithms/impl/insertion_sort.py | 37 | ||||
-rw-r--r-- | algorithms/impl/median.py | 67 | ||||
-rw-r--r-- | algorithms/impl/merge_sort.py | 52 | ||||
-rw-r--r-- | algorithms/impl/quicksort.py | 103 | ||||
-rw-r--r-- | algorithms/impl/selection_sort.py | 39 | ||||
-rw-r--r-- | algorithms/input_kind.py | 34 | ||||
-rw-r--r-- | algorithms/params.py | 142 | ||||
-rw-r--r-- | algorithms/plotter.py | 25 | ||||
-rw-r--r-- | algorithms/registry.py | 17 | ||||
-rw-r--r-- | algorithms/timer.py | 27 |
15 files changed, 734 insertions, 0 deletions
diff --git a/algorithms/__init__.py b/algorithms/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/algorithms/__init__.py diff --git a/algorithms/algorithm.py b/algorithms/algorithm.py new file mode 100644 index 0000000..66f8724 --- /dev/null +++ b/algorithms/algorithm.py @@ -0,0 +1,23 @@ +# Copyright (c) 2016 Egor Tensin <Egor.Tensin@gmail.com> +# This file is part of the "Sorting algorithms" project. +# For details, see https://github.com/egor-tensin/sorting-algorithms. +# Distributed under the MIT License. + +from . import input_kind + + +class Algorithm: + def __init__(self, codename, display_name, f): + self.codename = codename + self.display_name = display_name + self.function = f + + @staticmethod + def gen_input(n, case=input_kind.InputKind.AVERAGE): + #raise NotImplementedError('input generation is not defined for a generic algorithm') + return input_kind.gen_input_for_sorting(n, case) + + +class SortingAlgorithm(Algorithm): + def gen_input(self, n, case=input_kind.InputKind.AVERAGE): + return input_kind.gen_input_for_sorting(n, case) diff --git a/algorithms/impl/__init__.py b/algorithms/impl/__init__.py new file mode 100644 index 0000000..84bd702 --- /dev/null +++ b/algorithms/impl/__init__.py @@ -0,0 +1,30 @@ +# Copyright (c) 2016 Egor Tensin <Egor.Tensin@gmail.com> +# This file is part of the "Sorting algorithms" project. +# For details, see https://github.com/egor-tensin/sorting-algorithms. +# Distributed under the MIT License. + +from importlib import import_module +import os.path +from pkgutil import iter_modules + +from .. import algorithm + + +_ALGORITHMS_NAME = '_ALGORITHMS' + + +def refresh_algorithms(): + all_algorithms = {} + + for _, module_name, is_pkg in iter_modules([os.path.dirname(__file__)]): + if is_pkg: + continue + module = import_module('.' + module_name, __package__) + if hasattr(module, _ALGORITHMS_NAME): + module_algorithms = getattr(module, _ALGORITHMS_NAME) + for descr in module_algorithms: + assert isinstance(descr, algorithm.Algorithm) + assert descr.codename not in all_algorithms + all_algorithms[descr.codename] = descr + + return all_algorithms diff --git a/algorithms/impl/bubble_sort.py b/algorithms/impl/bubble_sort.py new file mode 100644 index 0000000..0ea66bb --- /dev/null +++ b/algorithms/impl/bubble_sort.py @@ -0,0 +1,56 @@ +# Copyright (c) 2015 Egor Tensin <Egor.Tensin@gmail.com> +# This file is part of the "Sorting algorithms" project. +# For details, see https://github.com/egor-tensin/sorting-algorithms. +# Distributed under the MIT License. + +import sys + +from ..algorithm import SortingAlgorithm + + +def bubble_sort(xs): + while True: + swapped = False + for i in range(1, len(xs)): + if xs[i - 1] > xs[i]: + xs[i], xs[i - 1] = xs[i - 1], xs[i] + swapped = True + if not swapped: + break + return xs + + +def bubble_sort_optimized(xs): + n = len(xs) + while True: + new_n = 0 + for i in range(1, n): + if xs[i - 1] > xs[i]: + xs[i], xs[i - 1] = xs[i - 1], xs[i] + new_n = i + n = new_n + if not n: + break + return xs + + +_ALGORITHMS = [ + SortingAlgorithm('bubble_sort', 'Bubble sort', bubble_sort), + SortingAlgorithm('bubble_sort_optimized', 'Bubble sort (optimized)', bubble_sort_optimized), +] + + +def _parse_args(args=None): + if args is None: + args = sys.argv[1:] + return list(map(int, args)) + + +def main(args=None): + xs = _parse_args(args) + print(bubble_sort(list(xs))) + print(bubble_sort_optimized(list(xs))) + + +if __name__ == '__main__': + main() diff --git a/algorithms/impl/heapsort.py b/algorithms/impl/heapsort.py new file mode 100644 index 0000000..81bea20 --- /dev/null +++ b/algorithms/impl/heapsort.py @@ -0,0 +1,82 @@ +# Copyright (c) 2015 Egor Tensin <Egor.Tensin@gmail.com> +# This file is part of the "Sorting algorithms" project. +# For details, see https://github.com/egor-tensin/sorting-algorithms. +# Distributed under the MIT License. + +import sys + +from ..algorithm import SortingAlgorithm + +# Disclaimer: implemented in the most literate way. + + +def heapsort(xs): + _heapify(xs) + first, last = 0, len(xs) - 1 + for end in range(last, first, -1): + xs[end], xs[first] = xs[first], xs[end] + _siftdown(xs, first, end - 1) + return xs + + +# In a heap stored in a zero-based array, +# left_child = node * 2 + 1 +# right_child = node * 2 + 2 +# parent = (node - 1) // 2 + + +def _get_parent(node): + return (node - 1) // 2 + + +def _get_left_child(node): + return node * 2 + 1 + + +def _get_right_child(node): + return node * 2 + 2 + + +def _heapify(xs): + last = len(xs) - 1 + first_parent, last_parent = 0, _get_parent(last) + for parent in range(last_parent, first_parent - 1, -1): + _siftdown(xs, parent, last) + + +def _siftdown(xs, start, end): + root = start + while True: + # We swap if there is at least one child + child = _get_left_child(root) + if child > end: + break + # If there are two children, select the minimum + right_child = _get_right_child(root) + if right_child <= end and xs[child] < xs[right_child]: + child = right_child + if xs[root] < xs[child]: + xs[root], xs[child] = xs[child], xs[root] + root = child + else: + break + + +_ALGORITHMS = [ + SortingAlgorithm('heapsort', 'Heapsort', heapsort), +] + + +def _parse_args(args=None): + if args is None: + args = sys.argv[1:] + return list(map(int, args)) + + +def main(args=None): + xs = _parse_args(args) + print(heapsort(list(xs))) + + +if __name__ == '__main__': + main() diff --git a/algorithms/impl/insertion_sort.py b/algorithms/impl/insertion_sort.py new file mode 100644 index 0000000..13f16a0 --- /dev/null +++ b/algorithms/impl/insertion_sort.py @@ -0,0 +1,37 @@ +# Copyright (c) 2015 Egor Tensin <Egor.Tensin@gmail.com> +# This file is part of the "Sorting algorithms" project. +# For details, see https://github.com/egor-tensin/sorting-algorithms. +# Distributed under the MIT License. + +import sys + +from ..algorithm import SortingAlgorithm + + +def insertion_sort(xs): + for i in range(1, len(xs)): + j = i + while j > 0 and xs[j - 1] > xs[j]: + xs[j], xs[j - 1] = xs[j - 1], xs[j] + j -= 1 + return xs + + +_ALGORITHMS = [ + SortingAlgorithm('insertion_sort', 'Insertion sort', insertion_sort), +] + + +def _parse_args(args=None): + if args is None: + args = sys.argv[1:] + return list(map(int, args)) + + +def main(args=None): + xs = _parse_args(args) + print(insertion_sort(list(xs))) + + +if __name__ == '__main__': + main() diff --git a/algorithms/impl/median.py b/algorithms/impl/median.py new file mode 100644 index 0000000..a754cdb --- /dev/null +++ b/algorithms/impl/median.py @@ -0,0 +1,67 @@ +# Copyright (c) 2016 Egor Tensin <Egor.Tensin@gmail.com> +# This file is part of the "Sorting algorithms" project. +# For details, see https://github.com/egor-tensin/sorting-algorithms. +# Distributed under the MIT License. + +from heapq import heappush, heappop +import sys + +from ..algorithm import Algorithm +from .quicksort import quicksort_random + + +def calc_median_heaps(xs): + cur_median = 0.0 + min_heap, max_heap = [], [] + for x in xs: + if x < cur_median: + heappush(max_heap, -x) + elif x > cur_median or len(max_heap) > len(min_heap): + heappush(min_heap, x) + else: + heappush(max_heap, -x) + + if len(max_heap) > len(min_heap) + 1: + heappush(min_heap, -heappop(max_heap)) + elif len(min_heap) > len(max_heap) + 1: + heappush(max_heap, -heappop(min_heap)) + + if len(max_heap) > len(min_heap): + cur_median = -max_heap[0] + elif len(max_heap) == len(min_heap): + cur_median = -max_heap[0] / 2 + min_heap[0] / 2 + else: + cur_median = min_heap[0] + return cur_median + + +def calc_median_sorting(xs): + if not xs: + return 0.0 + quicksort_random(xs) + if len(xs) % 2: + return xs[len(xs) // 2] + else: + return xs[len(xs) // 2 - 1] / 2 + xs[len(xs) // 2] / 2 + + +_ALGORITHMS = [ + Algorithm('median_sorting', 'Median value (using explicit sorting)', calc_median_sorting), + Algorithm('median_heaps', 'Median value (using heaps)', calc_median_heaps), +] + + +def _parse_args(args=None): + if args is None: + args = sys.argv[1:] + return list(map(int, args)) + + +def main(args=None): + xs = _parse_args(args) + print(calc_median_sorting(list(xs))) + print(calc_median_heaps(list(xs))) + + +if __name__ == '__main__': + main() diff --git a/algorithms/impl/merge_sort.py b/algorithms/impl/merge_sort.py new file mode 100644 index 0000000..fecdf78 --- /dev/null +++ b/algorithms/impl/merge_sort.py @@ -0,0 +1,52 @@ +# Copyright (c) 2015 Egor Tensin <Egor.Tensin@gmail.com> +# This file is part of the "Sorting algorithms" project. +# For details, see https://github.com/egor-tensin/sorting-algorithms. +# Distributed under the MIT License. + +import sys + +from ..algorithm import SortingAlgorithm + + +def merge(left, right): + result = [] + l, r = 0, 0 + while l < len(left) and r < len(right): + if left[l] <= right[r]: + result.append(left[l]) + l += 1 + else: + result.append(right[r]) + r += 1 + if left: + result.extend(left[l:]) + if right: + result.extend(right[r:]) + return result + + +def merge_sort(xs): + if len(xs) < 2: + return xs + mid = len(xs) // 2 + return merge(merge_sort(xs[:mid]), merge_sort(xs[mid:])) + + +_ALGORITHMS = [ + SortingAlgorithm('merge_sort', 'Merge sort', merge_sort), +] + + +def _parse_args(args=None): + if args is None: + args = sys.argv[1:] + return list(map(int, args)) + + +def main(args=None): + xs = _parse_args(args) + print(merge_sort(list(xs))) + + +if __name__ == '__main__': + main() diff --git a/algorithms/impl/quicksort.py b/algorithms/impl/quicksort.py new file mode 100644 index 0000000..dd0c92b --- /dev/null +++ b/algorithms/impl/quicksort.py @@ -0,0 +1,103 @@ +# Copyright (c) 2015 Egor Tensin <Egor.Tensin@gmail.com> +# This file is part of the "Sorting algorithms" project. +# For details, see https://github.com/egor-tensin/sorting-algorithms. +# Distributed under the MIT License. + +from random import randrange, seed +import sys + +from ..algorithm import SortingAlgorithm + + +seed() + + +def _partition(xs, beg, end, select_pivot): + pivot = select_pivot(xs, beg, end) + xs[pivot], xs[end] = xs[end], xs[pivot] + for i in range(beg, end): + if xs[i] <= xs[end]: + xs[i], xs[beg] = xs[beg], xs[i] + beg += 1 + xs[beg], xs[end] = xs[end], xs[beg] + return beg + + +def _quicksort(xs, beg, end, select_pivot): + if beg < end: + pivot = _partition(xs, beg, end, select_pivot) + _quicksort(xs, beg, pivot - 1, select_pivot) + _quicksort(xs, pivot + 1, end, select_pivot) + + +def _select_first(xs, beg, end): + return beg + + +def _select_second(xs, beg, end): + return beg + 1 + + +def _select_middle(xs, beg, end): + return (beg + end) // 2 + + +def _select_last(xs, beg, end): + return end + + +def _select_random(xs, beg, end): + return randrange(beg, end + 1) + + +def quicksort_first(xs): + _quicksort(xs, 0, len(xs) - 1, _select_first) + return xs + + +def quicksort_second(xs): + _quicksort(xs, 0, len(xs) - 1, _select_second) + return xs + + +def quicksort_middle(xs): + _quicksort(xs, 0, len(xs) - 1, _select_middle) + return xs + + +def quicksort_last(xs): + _quicksort(xs, 0, len(xs) - 1, _select_last) + return xs + + +def quicksort_random(xs): + _quicksort(xs, 0, len(xs) - 1, _select_random) + return xs + + +_ALGORITHMS = [ + SortingAlgorithm('quicksort_first', 'Quicksort (first element as pivot)', quicksort_first), + SortingAlgorithm('quicksort_second', 'Quicksort (second element as pivot)', quicksort_second), + SortingAlgorithm('quicksort_middle', 'Quicksort (middle element as pivot)', quicksort_middle), + SortingAlgorithm('quicksort_last', 'Quicksort (last element as pivot)', quicksort_last), + SortingAlgorithm('quicksort_random', 'Quicksort (random element as pivot)', quicksort_random), +] + + +def _parse_args(args=None): + if args is None: + args = sys.argv[1:] + return list(map(int, args)) + + +def main(args=None): + xs = _parse_args(args) + print(quicksort_first(list(xs))) + print(quicksort_second(list(xs))) + print(quicksort_middle(list(xs))) + print(quicksort_last(list(xs))) + print(quicksort_random(list(xs))) + + +if __name__ == '__main__': + main() diff --git a/algorithms/impl/selection_sort.py b/algorithms/impl/selection_sort.py new file mode 100644 index 0000000..e9a5a09 --- /dev/null +++ b/algorithms/impl/selection_sort.py @@ -0,0 +1,39 @@ +# Copyright (c) 2015 Egor Tensin <Egor.Tensin@gmail.com> +# This file is part of the "Sorting algorithms" project. +# For details, see https://github.com/egor-tensin/sorting-algorithms. +# Distributed under the MIT License. + +import sys + +from ..algorithm import SortingAlgorithm + + +def selection_sort(xs): + for i in range(len(xs) - 1): + min_i = i + for j in range(i + 1, len(xs)): + if xs[j] < xs[min_i]: + min_i = j + if min_i != i: + xs[i], xs[min_i] = xs[min_i], xs[i] + return xs + + +_ALGORITHMS = [ + SortingAlgorithm('selection_sort', 'Selection sort', selection_sort), +] + + +def _parse_args(args=None): + if args is None: + args = sys.argv[1:] + return list(map(int, args)) + + +def main(args=None): + xs = _parse_args(args) + print(selection_sort(list(xs))) + + +if __name__ == '__main__': + main() diff --git a/algorithms/input_kind.py b/algorithms/input_kind.py new file mode 100644 index 0000000..08e1c9e --- /dev/null +++ b/algorithms/input_kind.py @@ -0,0 +1,34 @@ +# Copyright (c) 2016 Egor Tensin <Egor.Tensin@gmail.com> +# This file is part of the "Sorting algorithms" project. +# For details, see https://github.com/egor-tensin/sorting-algorithms. +# Distributed under the MIT License. + +from array import array +from enum import Enum +from random import seed, sample + + +seed() + + +class InputKind(Enum): + BEST, AVERAGE, WORST = 'best', 'average', 'worst' + + def __str__(self): + return self.value + + +def _gen_input_from(xs): + return array('l', xs) + + +def gen_input_for_sorting(n, case=InputKind.AVERAGE): + if n < 0: + raise ValueError('input length cannot be less than zero') + if case is InputKind.BEST: + return _gen_input_from(range(n)) + if case is InputKind.AVERAGE: + return _gen_input_from(sample(range(n), n)) + if case is InputKind.WORST: + return _gen_input_from(range(n - 1, -1, -1)) + raise NotImplementedError('invalid input kind: ' + str(case)) diff --git a/algorithms/params.py b/algorithms/params.py new file mode 100644 index 0000000..486cb0e --- /dev/null +++ b/algorithms/params.py @@ -0,0 +1,142 @@ +# Copyright (c) 2016 Egor Tensin <Egor.Tensin@gmail.com> +# This file is part of the "Sorting algorithms" project. +# For details, see https://github.com/egor-tensin/sorting-algorithms. +# Distributed under the MIT License. + +from enum import Enum +from numbers import Integral + +from .input_kind import InputKind +from .plotter import PlotBuilder +from . import registry +from .timer import Timer + + +class TimeUnits(Enum): + SECONDS = 'seconds' + MILLISECONDS = 'milliseconds' + MICROSECONDS = 'microseconds' + + def get_factor(self): + if self is TimeUnits.SECONDS: + return 1. + if self is TimeUnits.MILLISECONDS: + return 1000. + if self is TimeUnits.MICROSECONDS: + return 1000000. + raise NotImplementedError('invalid time units: ' + str(self)) + + def __str__(self): + return self.value + + +class AlgorithmParameters: + def __init__(self, algorithm, min_len, max_len, + input_kind=InputKind.AVERAGE, iterations=1): + + if isinstance(algorithm, str): + algorithm = registry.get(algorithm) + self.algorithm = algorithm + + self.input_kind = input_kind + + self._min_len = None + self._max_len = None + self.min_len = min_len + self.max_len = max_len + + self._iterations = None + self.iterations = iterations + + @property + def min_len(self): + return self._min_len + + @min_len.setter + def min_len(self, val): + if not isinstance(val, Integral): + raise TypeError('must be an integral value') + val = int(val) + if val < 0: + raise ValueError('must be non-negative') + if self.max_len is not None and self.max_len < val: + raise ValueError('must not be greater than the maximum length') + self._min_len = val + + @property + def max_len(self): + return self._max_len + + @max_len.setter + def max_len(self, val): + if not isinstance(val, Integral): + raise TypeError('must be an integral value') + val = int(val) + if val < 0: + raise ValueError('must be non-negative') + if self.min_len is not None and self.min_len > val: + raise ValueError('must not be lesser than the minimum length') + self._max_len = val + + @property + def iterations(self): + return self._iterations + + @iterations.setter + def iterations(self, val): + if not isinstance(val, Integral): + raise TypeError('must be an integral value') + val = int(val) + if val < 1: + raise ValueError('must be positive') + self._iterations = val + + def measure_running_time(self): + input_len_range = list(range(self.min_len, self.max_len + 1)) + running_time = [] + for input_len in input_len_range: + input_sample = self.algorithm.gen_input(input_len, self.input_kind) + input_copies = [list(input_sample) for _ in range(self.iterations)] + with Timer(running_time, self.iterations): + for i in range(self.iterations): + self.algorithm.function(input_copies[i]) + return input_len_range, running_time + + @staticmethod + def _format_plot_xlabel(): + return 'Input length' + + @staticmethod + def _format_plot_ylabel(units): + return 'Running time ({})'.format(units) + + def _format_plot_title(self): + return '{}, {} case'.format( + self.algorithm.display_name, self.input_kind) + + def _format_plot_suptitle(self): + return self.algorithm.display_name + + @staticmethod + def _derive_time_units(ys): + max_y = max(ys) + if max_y > 0.1: + return TimeUnits.SECONDS + if max_y > 0.0001: + return TimeUnits.MILLISECONDS + return TimeUnits.MICROSECONDS + + def plot_running_time(self, output_path=None): + xs, ys = self.measure_running_time() + units = self._derive_time_units(ys) + ys = [y * units.get_factor() for y in ys] + + plot_builder = PlotBuilder() + title = self._format_plot_title() + xlabel = self._format_plot_xlabel() + ylabel = self._format_plot_ylabel(units) + plot_builder.plot(title, xlabel, ylabel, xs, ys) + if output_path is None: + plot_builder.show() + else: + plot_builder.save(output_path) diff --git a/algorithms/plotter.py b/algorithms/plotter.py new file mode 100644 index 0000000..d3e4ee4 --- /dev/null +++ b/algorithms/plotter.py @@ -0,0 +1,25 @@ +# Copyright (c) 2016 Egor Tensin <Egor.Tensin@gmail.com> +# This file is part of the "Sorting algorithms" project. +# For details, see https://github.com/egor-tensin/sorting-algorithms. +# Distributed under the MIT License. + +import matplotlib.pyplot as plt + + +class PlotBuilder: + def __init__(self): + self._fig, self._ax = plt.subplots(figsize=(8, 6), dpi=200) + self._ax.grid(alpha=0.8, linestyle=':') + + def plot(self, title, xlabel, ylabel, xs, ys): + self._ax.set_title(title) + self._ax.set_xlabel(xlabel) + self._ax.set_ylabel(ylabel) + self._ax.plot(xs, ys) + + @staticmethod + def show(): + plt.show() + + def save(self, output_path): + self._fig.savefig(output_path) diff --git a/algorithms/registry.py b/algorithms/registry.py new file mode 100644 index 0000000..3bc6495 --- /dev/null +++ b/algorithms/registry.py @@ -0,0 +1,17 @@ +# Copyright (c) 2016 Egor Tensin <Egor.Tensin@gmail.com> +# This file is part of the "Sorting algorithms" project. +# For details, see https://github.com/egor-tensin/sorting-algorithms. +# Distributed under the MIT License. + +from . import impl + + +_ALL_ALGORITHMS = impl.refresh_algorithms() + + +def get_codenames(): + return _ALL_ALGORITHMS.keys() + + +def get(codename): + return _ALL_ALGORITHMS[codename] diff --git a/algorithms/timer.py b/algorithms/timer.py new file mode 100644 index 0000000..c567a3f --- /dev/null +++ b/algorithms/timer.py @@ -0,0 +1,27 @@ +# Copyright (c) 2016 Egor Tensin <Egor.Tensin@gmail.com> +# This file is part of the "Sorting algorithms" project. +# For details, see https://github.com/egor-tensin/sorting-algorithms. +# Distributed under the MIT License. + +import gc +import time + + +def get_timestamp(): + return time.perf_counter() + + +class Timer: + def __init__(self, dest, iterations=1): + self._dest = dest + self._iterations = iterations + + def __enter__(self): + gc.disable() + self._start = get_timestamp() + return self + + def __exit__(self, *args): + end = get_timestamp() + gc.enable() + self._dest.append((end - self._start) / self._iterations) |