最快的排序算法是什么,很多人的第一反应是快排,感觉QuickSort 当然应该最快了,其实并非如此,快排是不稳定的,最坏情况下,快排序并不是最优,Java7 中引入的 TimSort 就是一个结合了插入排序和归并排序的高效算法.
Timsort最早是 Tim Peters 于2001年为 Python 写的排序算法。自从发明该算法以来,它已被用作Python,Java,Android平台和GNU Octave中的默认排序算法。
关于此算法的详细描述参见 http://svn.python.org/projects/python/trunk/Objects/listsort.txt
看看它与另外两个高效排序算法的比较
相比之下, TimSort 的最佳,平均和最坏情况综合起来最佳。在数据量比较少(<=64)的情况下,它直接用 Insert Sort,否则使用 MergeSort + BinarySearch 来提高排序效率
下面写一个给扑克牌排序的例子,比较一下冒泡,插入,快排,归并排序,TimSort的性能:
- 扑克牌对象类如下
package com.github.walterfan.helloalgorithm;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* @Author: Walter Fan
**/
public class Poker {
public static class Card {
enum Suite {
Spades(4), Hearts(3), Clubs(2), Diamonds(1);
int value;
Suite(int value) {
this.value = value;
}
private static Map<Integer, Suite> valueMap = new HashMap<>();
static {
for (Suite suite : Suite.values()) {
valueMap.put(suite.value, suite);
}
}
public static Suite valueOf(int pageType) {
return valueMap.get(pageType);
}
}
Suite suite;
//1~13
int point;
public Card(int suiteValue, int point) {
this.suite = Suite.valueOf(suiteValue);
this.point = point;
}
public String toString() {
String strPoint = Integer.toString(point);
if (point > 10) {
switch (point) {
case 11:
strPoint = "J";
break;
case 12:
strPoint = "Q";
break;
case 13:
strPoint = "K";
break;
}
}
return suite.name() + ":" + strPoint;
}
public int getScore() {
return suite.value * 100 + point;
}
}
public static List<Card> createCardList(int suiteCount) {
List<Card> cards = new ArrayList<>(52);
for(int i = 1; i < 5; i++) {
for(int j = 1; j < 14 ;++j) {
cards.add(new Card(i, j));
}
}
List<Card> totalCards = new ArrayList<>(suiteCount );
for(int j = 0; j < suiteCount; j++) {
totalCards.addAll(new ArrayList<>(cards));
}
Collections.shuffle(totalCards);
return totalCards;
}
public static class CardComparator implements Comparator<Card> {
@Override
public int compare(Card o1, Card o2) {
return o1.getScore() - o2.getScore();
}
}
}
- 各种排序算法的实现如下
package com.github.walterfan.helloalgorithm;
import com.google.common.base.Stopwatch;
import lombok.extern.slf4j.Slf4j;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
import static java.util.concurrent.TimeUnit.MILLISECONDS;
@Slf4j
public class SortCardTask implements Callable<Long> {
public enum SortMethod {
BUBBLE_SORT,
INSERT_SORT,
QUICK_SORT,
MERGE_SORT,
TIM_SORT}
private final List<Poker.Card> cards;
private final SortMethod sortMethod;
private final int taskNumber;
private final AtomicInteger taskCounter;
public SortCardTask(List<Poker.Card> cards, SortMethod method, int taskNumber, AtomicInteger taskCounter) {
this.cards = cards;
this.sortMethod = method;
this.taskNumber = taskNumber;
this.taskCounter = taskCounter;
}
@Override
public Long call() {
Stopwatch stopwatch = Stopwatch.createStarted();
log.info("* {} begin to sort {} cards ({} suite) by {}", this.taskNumber, cards.size(), cards.size()/52, sortMethod);
Comparator<Poker.Card> comparator = new Poker.CardComparator();
switch(sortMethod) {
case BUBBLE_SORT:
bubbleSort(cards, comparator);
break;
case INSERT_SORT:
insertSort(cards, comparator);
break;
case QUICK_SORT:
quickSort(cards, comparator);
break;
case MERGE_SORT:
mergeSort(cards, comparator);
break;
case TIM_SORT:
timSort(cards, comparator);
break;
}
stopwatch.stop();
long millis = stopwatch.elapsed(MILLISECONDS);
log.info("* {} end to sort {} cards ({} suite)sort by {} spend {} milliseconds - {}" , this.taskNumber, cards.size(), cards.size()/52, sortMethod, millis, stopwatch); // formatted string like "12.3 ms"
taskCounter.incrementAndGet();
return millis;
}
public static <T> void bubbleSort(List<T> aList, Comparator<T> comparator) {
boolean sorted = false;
int loopCount = aList.size() - 1;
while (!sorted) {
sorted = true;
for (int i = 0; i < loopCount; i++) {
if (comparator.compare(aList.get(i), aList.get(i + 1)) > 0) {
Collections.swap(aList, i, i + 1);
sorted = false;
}
}
}
}
public static <T> void insertSort(List<T> aList, Comparator<T> comparator) {
int size = aList.size();
for (int i = 1; i < size; ++i) {
T selected = aList.get(i);
if (size < 10) {
log.info("{} insert to {}", selected, aList.subList(0, i).stream().map(String::valueOf).collect(Collectors.joining(", ")));
}
int j = i - 1;
//find a position for insert currentElement in the left sorted collection
while (j >= 0 && comparator.compare(selected, aList.get(j)) < 0) {
//it does not overwrite existed element because the j+1=i that is currentElement at beginging
aList.set(j + 1, aList.get(j));
j--;
}
aList.set(j + 1, selected);
}
}
public static <T> void timSort(List<T> aList, Comparator<T> comparator) {
//aList.parallelStream().sorted(comparator).collect(Collectors.toList());
aList.sort(comparator);
}
public static <T> void quickSort(List<T> aList, Comparator<T> comparator) {
T[] arr = (T[]) aList.toArray();
quickSort(arr, comparator, 0, aList.size() - 1);
}
public static <T> void quickSort(T[] arr, Comparator<T> comparator, int begin, int end) {
if (begin < end) {
int partitionIndex = partition(arr, comparator, begin, end);
quickSort(arr, comparator, begin, partitionIndex-1);
quickSort(arr, comparator, partitionIndex+1, end);
}
}
private static <T> int partition(T[] arr, Comparator<T> comparator, int begin, int end) {
T pivot = arr[end];
int i = (begin-1);
for (int j = begin; j < end; j++) {
if (comparator.compare(arr[j], pivot) <= 0) { //arr[j] <= pivot
i++;
T swapTemp = arr[i];
arr[i] = arr[j];
arr[j] = swapTemp;
}
}
T swapTemp = arr[i+1];
arr[i+1] = arr[end];
arr[end] = swapTemp;
return i+1;
}
public static <T> void mergeSort(List<T> aList, Comparator<T> comparator) {
T[] arr = (T[]) aList.toArray();
mergeSort(arr, 0, aList.size() - 1, comparator);
}
public static <T> void mergeSort(T arr[], int l, int r, Comparator<T> comparator)
{
if (l < r) {
// Same as (l+r)/2, but avoids overflow for
// large l and h
int m = l + (r - l) / 2;
// Sort first and second halves
mergeSort(arr, l, m, comparator);
mergeSort(arr, m + 1, r, comparator);
merge(arr, l, m, r, comparator);
}
}
public static <T> void merge(T arr[], int l, int m, int r, Comparator<T> comparator)
{
int i, j, k;
int n1 = m - l + 1;
int n2 = r - m;
/* create temp arrays */
T[] L = (T[])new Object[n1];
T[] R = (T[])new Object[n2];
/* Copy data to temp arrays L[] and R[] */
for (i = 0; i < n1; i++)
L[i] = arr[l + i];
for (j = 0; j < n2; j++)
R[j] = arr[m + 1 + j];
/* Merge the temp arrays back into arr[l..r]*/
i = 0; // Initial index of first subarray
j = 0; // Initial index of second subarray
k = l; // Initial index of merged subarray
while (i < n1 && j < n2) {
if (comparator.compare(L[i], R[j]) <= 0) {
arr[k] = L[i];
i++;
}
else {
arr[k] = R[j];
j++;
}
k++;
}
/* Copy the remaining elements of L[], if there
are any */
while (i < n1) {
arr[k] = L[i];
i++;
k++;
}
/* Copy the remaining elements of R[], if there
are any */
while (j < n2) {
arr[k] = R[j];
j++;
k++;
}
}
@Override
public String toString() {
return "SortCardTask{" +
"taskNumber=" + taskNumber +
", sortMethod=" + sortMethod +
'}';
}
}
然后分别用以上5种排序方法来做下性能比较
package com.github.walterfan.helloalgorithm;
import com.google.common.base.Stopwatch;
import com.google.common.util.concurrent.Uninterruptibles;
import lombok.extern.slf4j.Slf4j;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
@Slf4j
public class SortAlgorithmDemo {
public static final int SORT_TIMEOUT_SEC = 30;
private static AtomicInteger finishCounter = new AtomicInteger(0);
private AtomicInteger taskNumber = new AtomicInteger(0);
private ExecutorService executorService =Executors.newSingleThreadExecutor();
public Callable<Long> createTask(int cardSuiteCount, SortCardTask.SortMethod method) {
List<Poker.Card> cards = Poker.createCardList(cardSuiteCount);
return new SortCardTask(cards, method, taskNumber.incrementAndGet(), finishCounter);
}
public List<Future<Long>> exeucteTasks(List<Callable<Long>> tasks, Duration waitTime) {
try {
return this.executorService.invokeAll(tasks, waitTime.toMillis(), TimeUnit.MILLISECONDS);
} catch (InterruptedException e) {
log.warn("invokeAll interrupt", e);
return Collections.emptyList();
}
}
private void runTasks(int cardSuiteCount, SortCardTask.SortMethod sortMethod) {
List<Callable<Long>> tasks = new ArrayList<>();
tasks.add(this.createTask(cardSuiteCount, sortMethod));
List<Future<Long>> results = this.exeucteTasks(tasks, Duration.ofSeconds(SORT_TIMEOUT_SEC));
results.stream().filter(x -> !x.isDone()).forEach(x -> log.info("{} is not done", x));
}
private void waitAndstop(int seconds) {
Uninterruptibles.sleepUninterruptibly(seconds, TimeUnit.SECONDS);
this.executorService.shutdownNow();
}
public static void main(String[] args) {
SortAlgorithmDemo demo = new SortAlgorithmDemo();
int cardSuiteCount = 1000;
demo.runTasks(cardSuiteCount, SortCardTask.SortMethod.BUBBLE_SORT);
demo.runTasks(cardSuiteCount, SortCardTask.SortMethod.INSERT_SORT);
demo.runTasks(cardSuiteCount, SortCardTask.SortMethod.QUICK_SORT);
demo.runTasks(cardSuiteCount, SortCardTask.SortMethod.MERGE_SORT);
demo.runTasks(cardSuiteCount, SortCardTask.SortMethod.TIM_SORT);
demo.waitAndstop(30);
}
}
将1000 副牌打乱顺序的扑克牌排序下来,结果如下
- 冒泡排序:23.77 秒
- 插入排序:1.231 秒
- 快速排序: 135 毫秒
- 归并排序: 23.96 毫秒
- TimSort: 18.98 毫秒
09:06:35.315: c.g.w.h.SortCardTask * 1 begin to sort 52000 cards (1000 suite) by BUBBLE_SORT
09:06:59.082: c.g.w.h.SortCardTask * 1 end to sort 52000 cards (1000 suite)sort by BUBBLE_SORT spend 23769 milliseconds - 23.77 s
09:06:59.151: c.g.w.h.SortCardTask * 2 begin to sort 52000 cards (1000 suite) by INSERT_SORT
09:07:00.382: c.g.w.h.SortCardTask * 2 end to sort 52000 cards (1000 suite)sort by INSERT_SORT spend 1230 milliseconds - 1.231 s
09:07:00.388: c.g.w.h.SortCardTask * 3 begin to sort 52000 cards (1000 suite) by QUICK_SORT
09:07:00.523: c.g.w.h.SortCardTask * 3 end to sort 52000 cards (1000 suite)sort by QUICK_SORT spend 135 milliseconds - 135.0 ms
09:07:00.525: c.g.w.h.SortCardTask * 4 begin to sort 52000 cards (1000 suite) by MERGE_SORT
09:07:00.549: c.g.w.h.SortCardTask * 4 end to sort 52000 cards (1000 suite)sort by MERGE_SORT spend 23 milliseconds - 23.96 ms
09:07:00.551: c.g.w.h.SortCardTask * 5 begin to sort 52000 cards (1000 suite) by TIM_SORT
09:07:00.570: c.g.w.h.SortCardTask * 5 end to sort 52000 cards (1000 suite)sort by TIM_SORT spend 18 milliseconds - 18.98 ms
但是 TimSort 也有一个问题, 在 JDK7 的描述中提到它有如下兼容性问题
- Area: API: Utilities
-
Synopsis: Updated sort behavior for Arrays and Collections may throw an
IllegalArgumentException
-
Description: The sorting algorithm used by
java.util.Arrays.sort
and (indirectly) byjava.util.Collections.sort
has been replaced. The new sort implementation may throw anIllegalArgumentException
if it detects aComparable
that violates theComparable
contract. The previous implementation silently ignored such a situation. If the previous behavior is desired, you can use the new system property,java.util.Arrays.useLegacyMergeSort
, to restore previous mergesort behavior. - Nature of Incompatibility: behavioral
- RFE: 6804124
所以在JDK7以后,实现Comparable接口的比较器需要满足以下三个约束条件:
1) 自反性:x,y 的比较结果和 y,x 的比较结果相反。
2) 传递性:x>y, y>z,则 x>z。
3) 对称性:x=y,则 x,z 比较结果和 y,z 比较结果相同。
如果你的比较方法违反了以上的约束,要么你不使用这个新的算法,还是回到传统的归并排序
System.setProperty("java.util.Arrays.useLegacyMergeSort", "true");
要么修改你的比较器以符合上述的约束条件。
举两个例子如下
- 例一: 违反了传递性
package com.github.walterfan.helloalgorithm;
import lombok.extern.slf4j.Slf4j;
import java.util.Arrays;
import java.util.Comparator;
import java.util.LinkedList;
import java.util.List;
import java.util.stream.Collectors;
/**
* @Author: Walter Fan
**/
@Slf4j
public class TimSortExam {
public static class WorkLoadComparator implements Comparator<WorkNode> {
@Override
public int compare(WorkNode o1, WorkNode o2) {
double ratio1 = o1.getRatio();
double ratio2 = o2.getRatio();
double distance = ratio1 - ratio2;
if(Double.compare(Math.abs(distance), 0.02) < 0) {
int ret = Long.compare(o1.getConnections(), o2.getConnections());
log.info("o1={}, o2={}, distance={}, ret={}" , ratio1, ratio2, distance, ret);
return ret;
}
return Double.compare(ratio1, ratio2);
}
}
public static void main(String[] args) {
WorkNode node1 = new WorkNode(100, 20, 30);
WorkNode node2 = new WorkNode(100, 22, 20);
WorkNode node3 = new WorkNode(100, 23, 10);
WorkNode node4 = new WorkNode(100, 25, 50);
Comparator<WorkNode> comparator = new WorkLoadComparator();
int ret = comparator.compare(node1, node2); log.info("node1 {} node2 ", ret > 0? ">":"<=");
ret = comparator.compare(node2, node3); log.info("node2 {} node3", ret > 0? ">":"<=");
ret = comparator.compare(node1, node3); log.info("node1 {} node3", ret > 0? ">":"<=");
List<WorkNode> aList = new LinkedList<>();
for(int i = 0; i < 1; ++i) {
aList.addAll(Arrays.asList(node1, node2, node3, node4));
}
List<WorkNode> sortedList = aList.stream().sorted(comparator).collect(Collectors.toList());
log.info("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~");
sortedList.stream().forEach(x -> log.info("{}", x));
}
}
- 例二: 违反了对称性
package com.github.walterfan.helloalgorithm;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import java.util.Random;
public class TimSortDemo {
public static void sortMessageError(List<String> list) {
System.out.println("this is error comparator");
Collections.sort(list, new Comparator<String>() {
public int compare(String arg1, String arg2) {
if (StringUtils.isBlank(arg1) || StringUtils.isBlank(arg2)) {
return 0;
}
return arg1.compareTo(arg2);
}
});
}
public static void sortMessageCorrect(List<String> list) {
System.out.println("this is correct comparator");
Collections.sort(list, new Comparator<String>() {
public int compare(String arg1, String arg2) {
if(StringUtils.isBlank(arg1) ) {
if(StringUtils.isBlank(arg2)) {
return 0;
}else {
return -1;
}
}else if(StringUtils.isBlank(arg2)){
return 1;
}
return arg1.compareTo(arg2);
}
});
}
public static List<String> createList() {
List<String> list = new ArrayList<String>();
Random random = new Random();
for (int i = 10000; i > 0; i--) {
if (i % 5000 != 0) {
list.add(random.nextInt(1000) + "");
} else {
list.add("");
}
}
return list;
}
public static void main(String[] args) throws InterruptedException {
sortMessageCorrect(createList());
sortMessageError(createList());
}
}
网友评论