美文网首页
Java 多线程——统计文件中各单词数量

Java 多线程——统计文件中各单词数量

作者: BitterOutsider | 来源:发表于2020-11-26 20:30 被阅读0次

问题描述

Word Count 是一个著名的练手程序。一个文本文件包含若干行,每行包含若干个只包含小写字母的单词,单词之间以空格分割。编写一个程序,统计一个文件列表中,每个单词出现的次数。

方法一:使用Future与线程池

public class MultiThreadWordCount {
    public static Map<String, Integer> count(int threadNum,
                                             List<File> files) throws FileNotFoundException, ExecutionException, InterruptedException {
        final ExecutorService threadPool = Executors.newFixedThreadPool(threadNum);
        final HashMap<String, Integer> result = new HashMap<>();
        final ArrayList<Future<Map<String, Integer>>> allFilesFutureWordsCount = new ArrayList<>();
        for (File file : files) {
            final BufferedReader reader = new BufferedReader(new FileReader(file));
            allFilesFutureWordsCount.addAll(readAFileAndCountWord(reader, threadPool, threadNum));
        }

        for (Future<Map<String, Integer>> future : allFilesFutureWordsCount) {
            final Map<String, Integer> aFileWordCount = future.get();
            mergeAFileWordCountIntoResultMap(aFileWordCount, result);
        }

        return result;
    }

    private static void mergeAFileWordCountIntoResultMap(Map<String, Integer> aFileWordCount, HashMap<String, Integer> result) {
        for (Map.Entry<String, Integer> entry : aFileWordCount.entrySet()) {
            result.put(entry.getKey(), result.getOrDefault(entry.getKey(), 0) + entry.getValue());
        }
    }

    private static ArrayList<Future<Map<String, Integer>>> readAFileAndCountWord(BufferedReader reader,
                                                                                 ExecutorService threadPool,
                                                                                 int threadNum) {
        final ArrayList<Future<Map<String, Integer>>> allFilesWordCount = new ArrayList<>();
        for (int i = 0; i < threadNum; i++) {
            allFilesWordCount.add(threadPool.submit(new JobWorker(reader)));
        }
        return allFilesWordCount;
    }

    static class JobWorker implements Callable<Map<String, Integer>> {
        private final BufferedReader reader;

        public JobWorker(BufferedReader reader) {
            this.reader = reader;
        }

        @Override
        public Map<String, Integer> call() throws Exception {
            String line;
            final HashMap<String, Integer> aFileWordCount = new HashMap<>();
            while ((line = reader.readLine()) != null) {
                final String[] words = line.split(" ");
                for (String word :
                        words) {
                    aFileWordCount.put(word, aFileWordCount.getOrDefault(word, 0) + 1);
                }
            }
            return aFileWordCount;
        }
    }
}

方法二:使用ReentrantLock

public class MultiThreadWordCount {
    public static Map<String, Integer> count(int threadNum, List<File> files) throws FileNotFoundException {
        final HashMap<String, Integer> result = new HashMap<>();
        final ArrayList<Map<String, Integer>> allFilesFutureWordsCount = new ArrayList<>();
        for (File file : files) {
            final BufferedReader reader = new BufferedReader(new FileReader(file));
            allFilesFutureWordsCount.add(readAFileAndCountWord(reader, threadNum));
        }

        for (Map<String, Integer> aFileWordCount : allFilesFutureWordsCount) {
            mergeAFileWordCountIntoResultMap(aFileWordCount, result);
        }

        return result;
    }

    private static void mergeAFileWordCountIntoResultMap(Map<String, Integer> aFileWordCount, HashMap<String, Integer> result) {
        for (Map.Entry<String, Integer> entry : aFileWordCount.entrySet()) {
            result.put(entry.getKey(), result.getOrDefault(entry.getKey(), 0) + entry.getValue());
        }
    }

    private static Map<String, Integer> readAFileAndCountWord(BufferedReader reader, int threadNum) {
        final Map<String, Integer> aFileWordCount = new HashMap<>();
        final ReentrantLock lock = new ReentrantLock();
        for (int i = 0; i < threadNum; i++) {
            new Thread(new JobWorker(lock, reader, aFileWordCount)).start();
        }
        return aFileWordCount;
    }

    static class JobWorker implements Runnable {
        private final ReentrantLock lock;
        private final BufferedReader reader;
        private final Map<String, Integer> aFileWordCount;

        public JobWorker(ReentrantLock lock, BufferedReader reader, Map<String, Integer> aFileWordCount) {
            this.lock = lock;
            this.reader = reader;
            this.aFileWordCount = aFileWordCount;
        }

        @Override
        public void run() {
            lock.lock();
            String line;
            try {
                while ((line = reader.readLine()) != null) {
                    final String[] words = line.split(" ");
                    for (String word : words) {
                        aFileWordCount.put(word, aFileWordCount.getOrDefault(word, 0) + 1);
                    }
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
            lock.unlock();
        }
    }
}

方法三:使用synchronized同步块

static class JobWorker implements Runnable {
    private final Object lock;
    private final BufferedReader reader;
    private final Map<String, Integer> aFileWordCount;

    public JobWorker(Object lock, BufferedReader reader, Map<String, Integer> aFileWordCount) {
        this.lock = lock;
        this.reader = reader;
        this.aFileWordCount = aFileWordCount;
    }

    @Override
    public void run() {
        synchronized (lock) {
            String line;
            try {
                while ((line = reader.readLine()) != null) {
                    final String[] words = line.split(" ");
                    for (String word : words) {
                        aFileWordCount.put(word, aFileWordCount.getOrDefault(word, 0) + 1);
                    }
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
}

相关文章

网友评论

      本文标题:Java 多线程——统计文件中各单词数量

      本文链接:https://www.haomeiwen.com/subject/jovziktx.html