第k大个数

导言

寻找数组第K大数是大厂面试中经常考到的一题，有的小伙伴会直接用sort()进行排序，两行代码解决，这样看似可行，实则掉入了出题人的陷阱。面试官希望看到的是你对算法的理解，而不是函数的调用。

我们都知道，排序算法的复杂度是$O(nlogn)$，挑选最大值的算法是遍历一遍，复杂度为$O(n)$。

对于第k大个数这种情况，既然不能用排序的话，我可以扫描k次，从而求解，复杂度为$O(kn)$。代码如下：

class Solution {
public:
    int findKthLargest(vector<int> a, int n, int k) {
        for(int i=0; i<k; ++i) {
            for(int j=0; j<n-i-1; ++j) {
                if(a[j]>a[j+1]) {
                    int temp = a[j];
                    a[j] = a[j+1];
                    a[j+1] = temp;
                }
            }
        }
        return a[n-K];
    }
};

这个和冒泡排序的思想是一样的，对于第一层每一趟for循环的i（i的范围是[0,k]），都挑出区间[0,n-i)最大的元素，并将其放到末尾的位置。

当k为1时，复杂度为$O(n)$，k为n的时候，复杂度变为了$O(n^2)$。这个算法还有优化的空间。

堆排序

对于第k大的数，我们可以理解成是前k个大的数中最小的那个，因此我们可以建立一个k大小的小顶堆，然后对数组进行遍历，依次将其与堆顶元素进行比较，那么最终遍历完成之后，第k大的数将处于堆顶的位置。代码如下：

class Solution {
public:
    // 最小堆调整
    void heap_adjust(vector<int>& nums, int i, int n) {
        int j = 2*i+1;
        while (j < n) {
            if (j + 1 < n && nums[j+1] < nums[j]) j++;
            if (nums[j] < nums[i]) {
                swap(nums[i], nums[j]);
                i = j;
                j = 2*i+1;
            } else {
                break;
            }
        }
    }
    int findKthLargest(vector<int>& nums, int k) {

        // 构建k大小的最小堆
        vector<int> heap(nums.begin(), nums.begin() + k);
        for (int i = (k-1) / 2; i >= 0; --i) {
            heap_adjust(heap, i, k);
        }
        int n = nums.size();
        for (int i = k; i < n; ++i) {
            if (heap[0] < nums[i]) {
                // 最小的元素换成当前较大的元素
                heap[0] = nums[i];
                // 调整
                heap_adjust(heap, 0, k);
            }
        }
        return heap[0];

    }
};

当然手写堆比较麻烦，我们可以直接用优先队列。

class Solution {
    public int findKthLargest(int[] nums, int k) {
        int n = nums.length;
        Queue<Integer> pq = new PriorityQueue<Integer>();
        for (int i = 0; i < k; i++) {
            pq.offer(nums[i]);
        }

        for (int i = k; i < n; i++) {
            if (pq.peek() < nums[i]) {
                pq.poll();
                pq.offer(nums[i]);
            }
        }
        return pq.peek();
    }
}

空间复杂度$O(k)$，时间复杂度$O(n)$

快速选择

快排我们知道，就是通过划分，使两边整体有序，然后继续不停地划分，当所有小的区间都整体有序的时候，就排好了。

快速选择是建立在快排基础上的，通过划分得出位置loc，可以保证loc两边元素的大致位置，然后我们就知道第k个数处于哪个，然后再对该区间进行考虑。相当于每次少了一半的子问题，复杂度降为了$O(n)$（这个大家可以自行推导一下）。

代码如下：

class Solution {
    public int findKthLargest(int[] nums, int k) {
        int n = nums.length;
        int l = 0, r = n-1;
        while (true) {
            int loc = quickSelect(nums, l, r);
            if (loc == n-k) {
                return nums[n-k];
            } else if (loc > n-k) {
                r = loc-1;
            } else {
                l = loc+1;
            }
        }       
    }

    private int quickSelect(int[] nums, int l, int r) {
        int i = l, j = r+1;
        while (true) {
            do {
                j--;
            } while (j > i && nums[j] >= nums[l]);
            do {
                i++;
            } while (j > i && nums[i] <= nums[l]);
            if (j > i) {
                int t = nums[j];
                nums[j] = nums[i];
                nums[i] = t;
            } else {
                int t = nums[j];
                nums[j] = nums[l];
                nums[l] = t;
                return j;
            }
        }  
    }
}

这里有两个要注意的地方，quickSelect函数体中的else部分，最终是和右指针j进行交换，而不是左指针，因为右指针是先移动的，右指针移动到了i的位置以后，左指针会往右移动一格（因为是do-while循环），不在正确的位置上。也有可能右指针移动了以后，左指针移动到了j的位置，因此右指针最终的位置是对的，而左指针的位置有可能在右指针的右边一格，有可能和右指针重叠。要想和左指针交换的话就得这么写：

class Solution {
    public int findKthLargest(int[] nums, int k) {
        int n = nums.length;
        int l = 0, r = n-1;
        while (true) {
            int loc = quickSelect(nums, l, r);
            if (loc == n-k) {
                return nums[n-k];
            } else if (loc > n-k) {
                r = loc-1;
            } else {
                l = loc+1;
            }
        }       
    }

    private int quickSelect(int[] nums, int l, int r) {
        int i = l, j = r+1;
        while (true) {
            do {
                j--;
            } while (j > i && nums[j] >= nums[l]);
            do {
                i++;
            } while (j > i && nums[i] <= nums[l]);
            if (j > i) {
                int t = nums[j];
                nums[j] = nums[i];
                nums[i] = t;
            } else if (i > j){ // 左指针在右指针的右边一格
                int t = nums[i-1];
                nums[i-1] = nums[l];
                nums[l] = t;
                return i-1;
            } else { // 左指针和右指针重叠
                int t = nums[i];
                nums[i] = nums[l];
                nums[l] = t;
                return i;
            }
        }  
    }
}

第二个要注意的地方，还是要强调一下for、while、do-while循环。这三个循环都有个共同之处，就是循环正常结束完以后的的状态和循环的状态是正好相反的，也就是结束的终点是一致的。举个例子：

do {
    j--;
} while (j > i && nums[j] >= nums[l]);

// 执行完之后 j == i || nums[j] < nums[l]
// 也就是说j == i和nums[j] < nums[l]两个条件都会使循环终止

do-while和for、while也有不同之处：do-while循环的起点一般是要遍历的开始位置的前一个位置，而while循环和for循环的起点都是要遍历的开始位置，这也就是为什么while循环和for循环用的更多一点。

看似是一个比较基础的东西，但实际写代码的时候很多人犯错，尤其是写双指针的时候，所以一定要注意。