美文网首页后端小树林数据结构和算法
Java实现大量文件中读取关键字

Java实现大量文件中读取关键字

作者: 奔跑的蛙牛 | 来源:发表于2018-09-27 22:12 被阅读24次
    package searchWorld;
    
    // 实现从大量文件中超过一百次的关键字
    
    import java.io.File;
    import java.io.IOException;
    import java.util.Scanner;
    import java.util.concurrent.ArrayBlockingQueue;
    import java.util.concurrent.BlockingQueue;
    import java.util.concurrent.ConcurrentHashMap;
    import java.util.concurrent.atomic.LongAdder;
    
    public class BlockingQueueTest {
        // blockqueue len = 10
        private static final int FILE_QUEUE_SIZE = 10;
        private static final int SEARCH_THREADS = 1000;
        private static final File DUMMY = new File("");
        private static BlockingQueue<File> queue = new ArrayBlockingQueue<>(FILE_QUEUE_SIZE);
        private static ConcurrentHashMap<String,LongAdder> hashMap = new ConcurrentHashMap<>();
        public static void main(String[] args) {
            // input a dir
    
            try(Scanner in = new Scanner(System.in)) {
                System.out.println("请输入一个目录");
                String directory = in.nextLine();
                Runnable enumerator = () -> {
                    try {
                        enumrate(new File(directory));
    
                        // add a last file DUMMY as bool
                        queue.put(DUMMY);
                    } catch (InterruptedException e) {
                        e.printStackTrace();
                    }
                };
                new Thread(enumerator).start();
                for (int i = 1; i <=SEARCH_THREADS ; i++) {
                    Runnable search = ()->{
                      boolean done = false;
                      while (!done){
                          try {
                              File file = queue.take();
                              if(file == DUMMY){
                                  queue.put(file);
                                  done = true;
                              }else search(file);
                              done = true;
                          } catch (InterruptedException e) {
                              e.printStackTrace();
                          } catch (IOException e) {
                              e.printStackTrace();
                          }
                      }
                    };
                    new Thread(search).start();
                }
    
                while (Thread.activeCount() == 1){
                    getMoreHundredWord();
                }
            }
    
    
        }
    
        // put all file to a blockqueue
        public static void enumrate(File directory) throws InterruptedException{
            File[] files = directory.listFiles();
            for (File file: files
                 ) {
                if(file.isDirectory()) enumrate(file);
                else {queue.put(file);}
            }
        }
    
        // search keywords
        public static void search(File file) throws IOException{
            try(Scanner in = new Scanner(file,"UTF-8")) {
                while (in.hasNextLine()){
                    String line = in.nextLine();
                    // regular get word
                    line =line.replaceAll("[^a-zA-Z\\s+]", "");
                    String[] words =line.split("[\\s+,\\.\n]");
                    System.out.println(words);
                    for(String word:words) {
                        // automic update
                        hashMap.putIfAbsent(word, new LongAdder());
                        hashMap.get(word).increment();
                    }
                }
            }
    
        }
    
        // get > 100 words
        public static void getMoreHundredWord(){
            hashMap.forEach(1,
                    (k, v) -> {
                    if (v.longValue() > new Long(100).longValue())
                    System.out.println(k + " -> " + v);
            });
    
        }
    }
    
    

    相关文章

      网友评论

        本文标题:Java实现大量文件中读取关键字

        本文链接:https://www.haomeiwen.com/subject/jabyoftx.html