排序

作者: 喵帕斯0_0 | 来源:发表于2018-08-18 19:16 被阅读4次

    排序是生活中常常会遇到的问题,也是面试中经常会问的算法,本文简单记录了常见的排序算法,使用C++与Python分别实现。

    稳定性

    将设ki = kj ( 1 <= i < j <=n ),在序列中的位置ri 领先于 rj,如果排序后 ri依旧领先于 rj,则成为算法是稳定的,反之,如果可能使得排序后的序列中 rj 领先于ri,则称排序为不稳定的。

    排序的种类
    代码用到公用函数
    c++
    //utils.h
    #include<random>
    #include<ctime>
    #include<vector>
    #include<iostream>
    
    typedef std::vector<int> IntArray;
    typedef IntArray::iterator ArrayIter;
    
    void GetRandomArray(IntArray &array, int min, int max, uint32_t size);
    int GetRandom(int min, int max);
    void PrintArray(const IntArray &array);
    void swap(ArrayIter val1, ArrayIter val2);
    void swap(int &a, int &b);
    
    //utils.cc
    #include "utils.h"
    
    int GetRandom(int min, int max)
    {
        srand(clock());
        int interval = max - min;
        int random = rand() % interval;
    
        return random + min;
    }   
    
    void GetRandomArray(IntArray &array, int min, int max, uint32_t size){
        while(size--){
            array.push_back(GetRandom(min, max));
        }
    }
    
    void PrintArray(const IntArray &array)
    {
        const int ROW_NUM = 20;
        const int WIDTH = 8;
        int row_index = 0;   
        for (IntArray::const_iterator iter = array.begin(); iter != array.end(); iter++)
        {
            std::cout.setf(std::ios::left);
            std::cout.width(WIDTH);
            std::cout << *iter;
            row_index ++;
            if(row_index == ROW_NUM){
                std::cout << std::endl;
                row_index = 0;
            }   
        }
        std::cout << std::endl;
    }
    
    void swap(ArrayIter val1, ArrayIter val2){
        int temp = *val1;
        *val1 = *val2;
        *val2 = temp;
    }
    
    void swap(int &a, int &b)
    {
        int temp = a;
        a = b;
        b = temp;
    }
    
    python
    import random
    
    CONST_ROW_NUM = 10
    
    def get_random(min, max, num):
        return [int(random.random()*(max-min) + min) for i in range(num)]
    
    def print_nums(nums):
        for i in range(len(nums)):
            if i!=0 and i%CONST_ROW_NUM == 0:
                print()
            print("%-10d"%(nums[i]), end="")
    
        print()
    
    冒泡排序

    两两比较相邻记录,若反序则交换,直到没有反序的记录为止。

    冒泡排序的过程

    从图中可以看到,每一轮比较可以得到待比较序列的最大值,每次将最大值往上移动后,对剩下的序列进行冒泡排序,最终可以得到有序序列。

    c++
    #include <iostream>
    
    #include "utils.h"
    
    using namespace std;
    
    void BubbleSort(IntArray &data)
    {
        bool sort_flag = true;
    
        for (ArrayIter out_iter = data.begin(); out_iter != data.end() && sort_flag; out_iter++)
        {
            sort_flag = false;
            for (ArrayIter in_iter = data.end() - 1; in_iter > out_iter; in_iter--)
            {
                if(*in_iter < *(in_iter - 1)){
                    sort_flag = true;           //优化,如果没有走到这一步,那么序列现在已经有序,无需在进行下去
                    int temp = *in_iter;
                    *in_iter = *(in_iter - 1);
                    *(in_iter - 1) = temp;
                }
            }
        }
    }
    
    int main(int argc, char const *argv[])
    {
        IntArray data;
        GetRandomArray(data, 0, 100, 10);
        cout << "Raw input:" << endl;;
        PrintArray(data);
    
        BubbleSort(data);
    
        cout << "Result output:"<<endl;
        PrintArray(data);
    
        return 0;
    }
    
    
    python
    import utils 
    
    def bubble_sort(nums):
        sort_flag = True
    
        for i in range(len(nums) - 1):
            sort_flag = False
            for j in range(len(nums)-1, i, -1):
                if nums[j] < nums[j-1]:
                    sort_flag = True
                    nums[j],nums[j-1] = nums[j-1],nums[j]
    
            if not sort_flag:
                break
    def main():
        nums = utils.get_random(0,100,10)
        print("Raw input:")
        utils.print_nums(nums)
        bubble_sort(nums)
        print("Result output:")
        utils.print_nums(nums)
    
    if __name__ == "__main__":
        main()
    
    时间复杂度

    最坏情况下,即序列是逆序的,需要进行1+2+3+...+n=n(n-1)/2次比较,并做同等数量的移动,因此,总的时间复杂度为o(n^2)

    选择排序

    通过n-i次的比较,从n-i+1序列中选择最小(最大)的元素,并和第i个记录进行交换。

    选择排序的过程
    c++
    #include <iostream>
    
    #include "utils.h"
    
    using namespace std;
    
    void SelectSort(IntArray &data)
    {
        for (ArrayIter out_iter = data.begin(); out_iter != data.end(); out_iter++)
        {
            ArrayIter min_iter = out_iter;
            for (ArrayIter in_iter = out_iter + 1; in_iter != data.end(); in_iter++)
            {
                if (*min_iter > *(in_iter))
                {
                    min_iter = in_iter;
                }
            }
    
            if (min_iter != out_iter)
            {
                swap(min_iter, out_iter);
            }
        }
    }
    
    int main(int argc, char const *argv[])
    {
        IntArray data;
        GetRandomArray(data, 0, 100, 10);
        cout << "Raw input:" << endl;
        PrintArray(data);
    
        SelectSort(data);
    
        cout << "Result output:" << endl;
        PrintArray(data);
    
        return 0;
    }
    
    python
    import utils
    
    def select_sort(nums):
        for i in range(len(nums) - 1):
            min_index = i
            for j in range(i+1, len(nums)):
                if nums[min_index] > nums[j]:
                    min_index = j
    
            if min_index != i:
                nums[min_index], nums[i] = nums[i], nums[min_index]
    
    def main():
        nums = utils.get_random(0,100,10)
        print("Raw input:")
        utils.print_nums(nums)
        select_sort(nums)
        print("Result output:")
        utils.print_nums(nums)
    
    if __name__ == "__main__":
        main()
    
    时间复杂度

    选择排序无论最好还是最坏的情况下,其比较次数一样多,第i趟排序需要n-i次比较,因此需要比较n-1+n-2+...+1 = n(n-1)/2次,对于交换次数,最好情况下,交换0次,最坏情况下交换n-1次,因此总的时间复杂度仍然是O(n^2)

    直接插入排序

    将一个记录插入到已排序的序列当中。

    直接插入排序的过程
    c++
    #include <iostream>
    #include <limits>
    
    #include "utils.h"
    
    using namespace std;
    
    void InsertSort(IntArray &data)
    {
        IntArray temp;
        for (ArrayIter input_iter = data.begin(); input_iter != data.end(); input_iter++)
        {
            if (temp.empty() || *input_iter > temp.back())
            {
                temp.push_back(*input_iter);
            }
            else
            {
                temp.push_back(INT_MIN);
                ArrayIter output_iter;
                // 终止条件比较复杂
                for (output_iter = temp.end() - 1; *(output_iter - 1) > *input_iter && output_iter > temp.begin(); output_iter--)
                {
                    *(output_iter) = *(output_iter-1);
                }
    
                *output_iter = *input_iter;
            }
        }
        data = temp;
    }
    
    int main(int argc, char const *argv[])
    {
        IntArray data;
        GetRandomArray(data, 0, 100, 10);
        cout << "Raw input:" << endl;
        PrintArray(data);
    
        InsertSort(data);
    
        cout << "Result output:" << endl;
        PrintArray(data);
    
        return 0;
    }
    
    python
    import utils
    
    def insert_sort(nums):
        temp = []
        for num in nums:
            if len(temp) == 0 or num > temp[-1]:
                temp.append(num)
            else:
                temp.append(num)
                i = len(temp) - 2
                while temp[i] > num and i > -1:
                    temp[i+1] = temp[i]
                    i -= 1
                temp[i+1] = num
    
        for i in range(len(nums)):
            nums[i] = temp[i]
    
    def main():
        nums = utils.get_random(0,100,10)
        print("Raw input:")
        nums= [2,0,2,1,1,0]
        utils.print_nums(nums)
        insert_sort(nums)
        print("Result output:")
        utils.print_nums(nums)
    
    if __name__ == "__main__":
        main()
    
    时间复杂度

    最好情况下,即序列本身是有序的,无需移动,只需比较n次,因此时间复杂度为O(n);最坏情况下,即序列本身是逆序的,因此需要比较 2 + 3 + 4 + ... + n = (n+2)(n-1)/2次,记录的移动次数也达到最大值(n+4)(n-1)/2次,因此最大时间复杂度为O(n^2);平均时间复杂度约为n^2/4

    堆排序

    堆是具有下列性质的完全二叉树,每个节点的值都大于或等于其左右孩子节点的值,成为大顶堆;或者每个几点的值小于或等于其左右孩子节点的值,称为小顶堆。
    堆排序的具体过程为,将待排序的序列构成一个大顶堆,此时根节点一定是最大值,将根节点与尾节点进行交换,然后将剩余的n-1个序列重新构成一个堆,这样就可以得到n个元素中的次大值,如此反复执行,最后就构成一个有序序列。

    堆排序过程
    c++
    #include <iostream>
    
    #include "utils.h"
    
    using namespace std;
    
    void BuildSort(IntArray &data, int index, int len)
    {
        //置顶向下调整
        for (int i = index; i < data.size();)
        {
            int left_child_index = 2 * i + 1;
            int max_child_index = left_child_index;
    
            if (left_child_index >= len)
            {
                //无子节点
                break;
            }
    
            if (left_child_index + 1 < len && data[left_child_index + 1] > data[left_child_index])
            {
                max_child_index = left_child_index + 1;
            }
    
            if (data[i] > data[left_child_index])
            {
                //接下去的不用调整了
                break;
            }
            else
            {
                //交换
                int temp = data[max_child_index];
                data[max_child_index] = data[i];
                data[i] = temp;
            }
            i = max_child_index;
        }
    }
    
    void HeapSort(IntArray &data)
    {
        for (int i = data.size() / 2 - 1; i >= 0; i--)
        {
            BuildSort(data, i, data.size());
        }
    
        for (int i = data.size(); i > 0; i--)
        {
    
            swap(data[0], data[i - 1]);
            BuildSort(data, 0, i - 1); //是i-1而不是i
        }
    }
    
    int main(int argc, char const *argv[])
    {
        IntArray data;
        GetRandomArray(data, 0, 100, 100);
        cout << "Raw input:" << endl;
    
        PrintArray(data);
    
        HeapSort(data);
    
        cout << "Result output:" << endl;
        PrintArray(data);
    
        return 0;
    }
    
    python
    import utils
    
    #从heap_index往下重建堆
    def build_heap(nums, heap_index, end):
        while heap_index <=end:
    
            left_index = 2 * heap_index + 1
            right_index = 2 * heap_index + 2
            max_child_index = left_index
            
            #到堆底
            if left_index > end:  
                return 
    
            if right_index <= end and nums[right_index] > nums[left_index]:
                max_child_index = right_index
    
            if nums[max_child_index] <= nums[heap_index]:
                return 
    
            nums[heap_index],nums[max_child_index] = nums[max_child_index],nums[heap_index]
            heap_index = max_child_index
            
    def heap_sort(nums):
        for heap_index in range(int(len(nums)/2) - 1, -1, -1):
            # 初始重建需要从下往上建立
            build_heap(nums, heap_index, len(nums) - 1)
    
        for i in range(len(nums)-1, 0, -1):
            nums[i], nums[0] = nums[0], nums[i]
            #调整
            build_heap(nums, 0, i-1)
    
    def main():
        nums = utils.get_random(0,100,10)
        print("Raw input:")
        utils.print_nums(nums)
        heap_sort(nums)
        print("Result output:")
        utils.print_nums(nums)
    
    if __name__ == "__main__":
        main()
    
    时间复杂度

    构建堆的时间复杂度为O(n),每次重建对堆的需要用O(logn),需要取n-1次堆顶记录,因此时间复杂度为O(nlongn)

    归并排序

    假设初始序列有n个记录,则可以看成是有n个有序的子序列,每个子序列的长度为1,然后两两归并,得到[n/2]个长度为2或为1的子序列,然后再两两归并,如此重复,直到一个长度为n的有序序列位置,这种成为2路归并排序。

    递归的归并排序 非归并排序的过程
    c++
    //递归版
    #include <iostream>
    #include <vector>
    
    #include "utils.h"
    
    using namespace std;
    
    void Merge(IntArray::iterator iter, uint32_t left, uint32_t mid, uint32_t right)
    {
        IntArray temp_array;
        uint32_t left_index = left, right_index = mid + 1;
        //比较
        while (left_index <= mid && right_index <= right)
        {
            if (*(iter + left_index) < *(iter + right_index))
            {
                temp_array.push_back(*(iter + left_index));
                left_index++;
            }
            else
            {
                temp_array.push_back(*(iter + right_index));
                right_index++;
            }
        }
        //将剩余的复制过去
        while (left_index <= mid)
        {
            temp_array.push_back(*(iter + (left_index++)));
        }
        //将剩余的复制过去
        while (right_index <= right)
        {
            temp_array.push_back(*(iter + (right_index++)));
        }
        //注意坐标的变化
        int begin_index = 0;
        while (begin_index + left <= right)
        {
            *(iter + left + begin_index) = temp_array[begin_index];
            begin_index++;
        }
    }
    
    void _MergeSort(IntArray::iterator iter, uint32_t left, uint32_t right)
    {
        if (left == right)
        {
            return;
        }
    
        uint32_t mid = (right + left) / 2;
    
        _MergeSort(iter, left, mid); //递归归并排序
        _MergeSort(iter, mid + 1, right);
        Merge(iter, left, mid, right); //合并
        return;
    }
    
    void MergeSort(IntArray &input_data)
    {
        _MergeSort(input_data.begin(), 0, input_data.size() - 1);
    }
    
    int main(int argc, char const *argv[])
    {
        const uint32_t TEST_NUM_COUNT = 10;
        const int MIN_NUM = 0;
        const int MAX_NUM = 100;
    
        IntArray input_data;
        GetRandomArray(input_data, MIN_NUM, MAX_NUM, TEST_NUM_COUNT);
        cout << "Raw input:" << endl;
        PrintArray(input_data);
    
        MergeSort(input_data);
    
        cout << "Raw input:" << endl;
        PrintArray(input_data);
    
        return 0;
    }
    
    //非递归版
    #include <iostream>
    #include <unistd.h>
    #include "utils.h"
    
    using namespace std;
    
    //合并子序列
    void Merge(IntArray &input, IntArray &output, int left, int mid, int right)
    {
        int left_index = left, right_index = mid + 1;
        int start_index = left;
    
        while (left_index <= mid && right_index <= right)
        {
            if (input[left_index] < input[right_index])
            {
                output[start_index++] = input[left_index];
                left_index++;
            }
            else
            {
                output[start_index++] = input[right_index];
                right_index++;
            }
        }
        while (left_index <= mid)
        {
            output[start_index++] = input[left_index];
            left_index++;
        }
    
        while (right_index <= right)
        {
            output[start_index++] = input[right_index];
            right_index++;
        }
    }
    
    void MergeSort(IntArray &input)
    {
        int len = (int32_t)(input.size());      //转化为有符号整型,防止下面计算的时候负数变为正整数(len-2*k -1) 
        IntArray output(input.size(), 0);
        //k表示合并的子序列长度,大于len就无意义了,每次以2的倍数增长
        for (int k = 1; k < len; k *= 2)      
        {
            int l = 0;
            //序号0开始, 一直到要合并的第二对合并子序列,因为最后一对可能长度不够,因此终止条件为  len-1 - 2*k
            //如果恰好最后一对也是两个k序列,那么  刚刚len-1-2*k +2*k 为最后一个序号
            for (l = 0; l <= len  - 2 * k - 1; l += 2 * k)
            {
                Merge(input, output, l,  l + k - 1  , l+2*k - 1 );
            }
            //l是最后一对要合并的序列的起始序号,如果剩余的序列长度>k要合并,否则不需要合并
            if(len - l >= k)
            {
                Merge(input, output, l, l + k - 1, len - 1);
            }
            input = output;
        }
    }                          
    int main(int argc, char const *argv[])
    {
        IntArray input_data, output_data;
        GetRandomArray(input_data, 0, 100, 10);
        cout << "Raw input:" << endl;
    
        PrintArray(input_data);
    
        MergeSort(input_data);
    
        cout << "Result output:" << endl;
        PrintArray(input_data);
    
        return 0;
    
        return 0;
    }
    
    python
    #非递归
    import utils 
    
    def _merge(nums, left, mid, right):
        temp = []
        left_index = left
        right_index = mid+1
    
        while left_index <= mid and right_index <= right:
            if nums[left_index] <= nums[right_index]:
                temp.append(nums[left_index])
                left_index += 1
            else:
                temp.append(nums[right_index])
                right_index += 1
    
        while left_index <= mid:
            temp.append(nums[left_index])
            left_index += 1
    
        while right_index <= right:
            temp.append(nums[right_index])
            right_index += 1
    
        for index,num in enumerate(temp):
            nums[left + index] = num
    
    def _merge_sort(nums, left, right):
        if left == right:
            return
    
        mid = int((left+right) / 2)
    
        _merge_sort(nums, left, mid)
        _merge_sort(nums, mid+1, right)
        _merge(nums, left, mid, right)
    
    def merge_sort(nums):
        _merge_sort(nums, 0, len(nums)-1)
    
    def main():
        nums = utils.get_random(0,100,10)
        print("Raw input:")
        utils.print_nums(nums)
        merge_sort(nums)
        print("Result output:")
        utils.print_nums(nums)
    
    if __name__ == "__main__":
        main()
    
    #非递归
    import utils
    
    def _merge(nums, left, mid, right):
        temp = []
        left_index = left
        right_index = mid+1
    
        while left_index <= mid and right_index <= right:
            if nums[left_index] <= nums[right_index]:
                temp.append(nums[left_index])
                left_index += 1
            else:
                temp.append(nums[right_index])
                right_index += 1
    
        while left_index <= mid:
            temp.append(nums[left_index])
            left_index += 1
    
        while right_index <= right:
            temp.append(nums[right_index])
            right_index += 1
    
        for index,num in enumerate(temp):
            nums[left + index] = num
    
    def merge_sort(nums):
        k = 1      #子序列大小从1开始
        while k < len(nums):
            
            start = 0
    
            while start <= len(nums) - 2*k :
                _merge(nums, start, start + k - 1, start + 2*k -1)
                start += 2*k
            #剩余长度不足2k长度的序列
            if len(nums) > start + k:
                _merge(nums, start, start + k -1, len(nums) - 1)
    
            k *= 2  #每次扩大到原来的2倍
    
    def main():
        nums = utils.get_random(0,100,10)
        nums = [0,0,1,1,2,2]
        print("Raw input:")
        utils.print_nums(nums)
        merge_sort(nums)
        print("Result output:")
        utils.print_nums(nums)
    
    if __name__ == "__main__":
        main()
    
    快速排序

    对于一个顺序的序列来说,对于其中的每一个数字,它左边的数字总是小于或等于它,它右边的数字总是大于或等于它,根据这个思想,提出了快速排序的概念。
    基本思想是通过一趟排序将待排序的记录分割成独立的两部分,其中一部分记录的关键字比另一个部分的关键字小,则可分别对这两部分记录进行排序,已达到整体有序的目的。

    快速排序的过程
    c++
    #include <iostream>
    
    #include "utils.h"
    using namespace std;
    
    void Partition(IntArray &input, int low, int high, int &partition_index)
    {
        int key = input[low];
    
        while (low < high)
        {
            //找到右侧小于key的值
            while (low < high && input[high] > key)
            {
                high--;
            }
            //移动到左边
            if (low < high)
            {
                int temp = input[low];
                input[low] = input[high];
                input[high] = temp;
            }
            //寻找左边大于key 的值
            while (low < high && input[low] <= key)
            {
                low++;
            }
            //移动到右边
            if (low < high)
            {
                int temp = input[low];
                input[low] = input[high];
                input[high] = temp;
            }
            //low~hight又是跟初始状态一样,再继续寻找
        }
    
        partition_index = low;
    }
    
    void QSort(IntArray &input, int low, int high)
    {
        if (low >= high)
        {
            return;
        }
    
        int partition_index = -1;
        /*
        原理:元素在排序中所在的位置,之前的元素都比该元素小,之后的元素都比该元素大
        因此快排的原理就是寻找位置,使得之前的元素比该值小,之后的元素比该值大
        */
        Partition(input, low, high, partition_index);
        QSort(input, low, partition_index - 1);
        QSort(input, partition_index + 1, high);
    }
    
    void QuickSort(IntArray &input_data)
    {
        int low = 0;
        int high = input_data.size() - 1;
    
        QSort(input_data, low, high);
    }
    
    int main(int argc, char const *argv[])
    {
        IntArray input_data, output_data;
        GetRandomArray(input_data, 0, 100, 10);
        cout << "Raw input:" << endl;
    
        PrintArray(input_data);
    
        QuickSort(input_data);
    
        cout << "Result output:" << endl;
        PrintArray(input_data);
    
        return 0;
    
        return 0;
    }
    
    python
    import utils
    import time
    
    def _partitions(nums, low, high):
        key = nums[low]
    
        while low < high:
            while low < high and nums[high] >= key:
                high -= 1
    
            if low < high:
                nums[low] = nums[high]
    
            while low < high and nums[low] <= key:
                low += 1
    
            if low < high:
                nums[high] = nums[low]
    
        nums[low] = key
        return low
    
    def _quick_sort(nums, low, high):
        if low >= high:    #条件为大于或等于
            return 
        partition_index = _partitions(nums, low, high)
        _quick_sort(nums, low, partition_index)
        _quick_sort(nums, partition_index+1, high)
    
    def quick_sort(nums):
        _quick_sort(nums, 0, len(nums) - 1 )
    
    def main():
        nums = utils.get_random(0,100,10)
        print("Raw input:")
        utils.print_nums(nums)
        quick_sort(nums)
        print("Result output:")
        utils.print_nums(nums)
    
    if __name__ == "__main__":
        main()
    
    总结
    排序方法 平均复杂度 最好情况 最坏情况 辅助空间 稳定性
    冒泡 O(n^2) O(n) O(n^2) O(1) 稳定
    选择排序 O(n^2) O(n^2) (n^2) O(1) 稳定
    插入排序 O(n^2) O(n) O(n^2) O(n) 稳定
    堆排序 O(nlogn)~O(n^2) O(n^1.3) O(n^2) O(1) 不稳定
    归并排序 O(nlogn) O(nlogn) O(nlogn) O(n) O(n) 不稳定
    快速排序 O(nlogn) O(nlogn) O(n^2) O(logn)~O(n) 不稳定

    相关文章

      网友评论

          本文标题:排序

          本文链接:https://www.haomeiwen.com/subject/tlgxiftx.html