美文网首页
java爬取12306查询余票的操作

java爬取12306查询余票的操作

作者: 魔幻有趣 | 来源:发表于2018-05-21 15:31 被阅读0次

    首先访问12306的进行余票查询,比如我查询 武汉到深圳 时间是 2018-05-22的余票

    通过浏览器F12可以看到查询信息的接口是如下:

    https://kyfw.12306.cn/otn/leftTicket/query?leftTicketDTO.train_date=2018-05-22&leftTicketDTO.from_station=WHN&leftTicketDTO.to_station=SZQ&purpose_codes=ADULT

    可以看到接口中传入四个参数,leftTicketDTO.train_date,leftTicketDTO.from_station,leftTicketDTO.to_station,

    purpose_codes.代表的意思是出发时间,起始地,目的地,票的类型(默认为成年人)

    参数里面的地点用的是字典码,需要我们找到字典,

    还是通过浏览器F12查看,来自于一个js(我也是看别人的文章知道的)

    https://kyfw.12306.cn/otn/resources/js/framework/station_name.js?station_version=1.9053

    可以直接打开看到里面是一串很长的字符串,

    1526887430(1).png

    接下来就是对这个字符串的处理:

    我这里用java的httpclient处理的:

     CloseableHttpClient httpClient = HttpClients.createDefault();
            URI uri = new URIBuilder()
                    .setScheme("https")
                    .setHost("kyfw.12306.cn")
                    .setPath("otn/resources/js/framework/station_name.js")
                    .setParameter("station_version", "1.9053")
                    .build();
            System.out.println(uri);
            HttpGet httpGet = new HttpGet(uri);
            httpGet.setHeader("User-Agent","Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.162 Safari/537.36");
            HashMap<String, String> map = new HashMap<>();
            HashMap<Object, Object> rev_map = new HashMap<>();
            try {
                CloseableHttpResponse response = httpClient.execute(httpGet);
                HttpEntity entity = response.getEntity();
                String mess = EntityUtils.toString(entity);
                System.out.println(mess);//内容是全的
                String reg = "([\\u4e00-\\u9fa5]+)\\|([A-Z]+)";//正则表达式匹配
                Pattern compile = Pattern.compile(reg);
                //利用正则把所有匹配到的都存入map集合
                while (true) {
                    Matcher matcher = compile.matcher(mess);
                    if (matcher.find()) {
                        //System.out.println(matcher.group());
                        mess = mess.substring(mess.indexOf(matcher.group())+matcher.group().length());
                        String[] strings = matcher.group().split("\\|"); //双斜线转义
                        map.put(strings[1],strings[0]); //地点,地点码
                        rev_map.put(strings[0],strings[1]); //地点码,地点
                    }else {
                        break;
                    }
                }
    

    这里将得到的字典序列化到本地,避免每次使用字典都去查一次.

     FileOutputStream out = new FileOutputStream("D:\\station.txt");
                ObjectOutputStream oos = new ObjectOutputStream(out);
                oos.writeObject(map);
                oos.close();
    

    接下来直接直接调用接口就行了,还是用的httpclient

    public static List getINfo(String from_station,String to_station,String date) throws URISyntaxException {
            CloseableHttpClient httpClient = HttpClients.createDefault();
    
            String from_code = station_name.getCodeByCity(from_station);
            String ro_code = station_name.getCodeByCity(to_station);
            //uri的构造器
            URI uri = new URIBuilder()
                    .setScheme("https")
                    .setHost("kyfw.12306.cn")
                    .setPath("/otn/leftTicket/query")
                    .setParameter("leftTicketDTO.train_date", date) //2018-05-04
                    .setParameter("leftTicketDTO.from_station", from_code)
                    .setParameter("leftTicketDTO.to_station", ro_code)
                    .setParameter("purpose_codes", "ADULT")
                    .build();
            // HttpGet httpGet = new HttpGet("https://kyfw.12306.cn/otn/leftTicket/query?leftTicketDTO.train_date=2018-05-03&leftTicketDTO.from_station=HKN&leftTicketDTO.to_station=HAN&purpose_codes=ADULT");
            System.out.println(uri);
            HttpGet httpGet = new HttpGet(uri);
            httpGet.setHeader("User-Agent","Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.162 Safari/537.36");
            //System.out.println(httpGet.getURI());
            List<Map<String,String>> list = new ArrayList<>();
            try {
                CloseableHttpResponse response = httpClient.execute(httpGet);
                HttpEntity entity = response.getEntity();
                //System.out.println(entity);
                String message = EntityUtils.toString(entity);
                System.out.println(message);
    
                ObjectMapper mapper = new ObjectMapper();
                JsonNode node = mapper.readTree(message);
                JsonNode result = node.get("data").get("result");
                System.out.println(result.get(1));
                //直接遍历result
    
    
                for (int i = 0;i < result.size();i++) {
                    HashMap<String, String> map = new LinkedHashMap<>(); 
                    String str = String.valueOf(result.get(i));
                    //System.out.println(str);
                    String[] strings = str.split("\\|");  //加两个反斜线转义
                    String train_no = strings[3];
    
                    String from_station_code = strings[6];
                    String from_station_name = station_name.getCityByCode(from_station_code);
    
                    String to_station_code = strings[7];
                    String to_station_name = station_name.getCityByCode(to_station_code);
    
                    String start_time = strings[8];
                    String arrive_time = strings[9];
                    String first_class_seat = strings[31] ; //一等座
                    String second_class_seat = strings[30]; //二等座
                    String soft_sleep = strings[23]; //软卧
                    String hard_sleep = strings[28]; //硬卧
                    String hard_seat = strings[29]; //硬座
                    String no_seat = strings[26]; //无座
                    map.put("train_no",train_no);
                    map.put("from_station_name",from_station_name);
                    map.put("to_station_name",to_station_name);
                    map.put("start_time",start_time);
                    map.put("arrive_time",arrive_time);
                    map.put("first_class_seat",first_class_seat);
                    map.put("second_class_seat",second_class_seat);
                    map.put("soft_sleep",soft_sleep);
                    map.put("hard_sleep",hard_sleep);
                    map.put("hard_seat",hard_seat);
                    map.put("no_seat",no_seat);
                    //System.out.println(map);
                    list.add(map);
    
                }
                System.out.println(list);
                System.out.println("-------------------------------------------------");
               // System.out.println(getStation("WCN"));
                response.close();
                httpClient.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
            return list;
        }
    

    最后处理了下,做了个简单的查询页面:


    1526887668(1).png

    相关文章

      网友评论

          本文标题:java爬取12306查询余票的操作

          本文链接:https://www.haomeiwen.com/subject/lunfjftx.html