思想
- 超大文件无法一次性全部加载到内存中;
- 可以将超大文件分片排序,然后遍历分片,输出排序后内容至指定文件;
编码
创建超大文件
private static void createBigFile() {
Random random = new Random();
try (FileWriter writer = new FileWriter(BIG_FILE_NAME)) {
for (int i = 0; i < LINE_COUNT; i++) {
int val = random.nextInt(Integer.MAX_VALUE);
writer.write(val + LINE_SEPARATOR);
}
} catch (IOException e) {
e.printStackTrace();
}
}
超大文件分片排序
private static List<String> separateFile() {
List<String> fileNameList = new ArrayList<>();
try (BufferedReader reader = new BufferedReader(new FileReader(BIG_FILE_NAME))) {
int index = 0;
List<Integer> batchLineList = new ArrayList<>(BATCH_SIZE);
String line;
while ((line = reader.readLine()) != null) {
batchLineList.add(Integer.valueOf(line));
if (batchLineList.size() == BATCH_SIZE) {
// 内容排序
batchLineList.sort(Comparator.comparingInt(a -> a));
// 写小文件
String fileName = BIG_FILE_NAME + ".tmp." + index++;
try (FileWriter tmpWriter = new FileWriter(fileName)) {
for (Integer val : batchLineList) {
tmpWriter.write(val + LINE_SEPARATOR);
}
}
fileNameList.add(fileName);
batchLineList.clear();
}
}
} catch (IOException e) {
e.printStackTrace();
}
return fileNameList;
}
分片合并输出
private static void mergeFile(List<String> fileNameList) {
Map<BufferedReader, String> map = new HashMap<>();
try (FileWriter writer = new FileWriter(SORT_FILE_NAME)) {
for (String fileName : fileNameList) {
BufferedReader tmpReader = new BufferedReader(new FileReader(fileName));
map.put(tmpReader, tmpReader.readLine());
}
while (true) {
boolean canRead = false;
Map.Entry<BufferedReader, String> minEntry = null;
for (Map.Entry<BufferedReader, String> entry : map.entrySet()) {
String value = entry.getValue();
if (value == null) {
continue;
}
// 获取当前 reader 内容最小 entry
if ((minEntry == null) || (Integer.valueOf(value) < Integer.valueOf(minEntry.getValue()))) {
minEntry = entry;
}
canRead = true;
}
// 当且仅当所有 reader 内容为空时,跳出循环
if (!canRead) {
break;
}
writer.write(minEntry.getValue() + LINE_SEPARATOR);
minEntry.setValue(minEntry.getKey().readLine());
}
} catch (Exception e) {
e.printStackTrace();
} finally {
// 注意关闭分片文件输入流
for (BufferedReader reader : map.keySet()) {
try {
reader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
网友评论