JAVA抓取网页方法

JAVA抓取网页方法

作者: 龍狮虎 | 来源:发表于2017-11-21 14:14 被阅读0次

package com.xxx.core;

import org.apache.http.client.methods.CloseableHttpResponse;

import org.apache.http.client.methods.HttpGet;

import org.apache.http.impl.client.CloseableHttpClient;

import org.apache.http.impl.client.HttpClients;

import org.apache.http.util.EntityUtils;

import org.jsoup.Jsoup;

import org.jsoup.nodes.Document;

import org.jsoup.select.Elements;

import java.io.*;

public class GlodonFinanceInfo {

public static void main(String[] args) {

// String url = args[0];

// String filePath = args[1];

String url = "http://quote.eastmoney.com/sz002410.html?StockCode=002410";

String jsonUrl = "http://nuff.eastmoney.com/EM_Finance2015TradeInterface/JS.ashx?id=0024102";

String filePath = "E:\\text2.txt";

GlodonFinanceInfo info = new GlodonFinanceInfo();

String html = info.getHtml(url, "gb2312");

String json = info.getHtml(jsonUrl, "utf-8");

System.out.println(json);

// info.save(filePath, html);

}

private String getHtml(String url, String code) {

String html = "";

try {

CloseableHttpClient httpClient = HttpClients.createDefault();

HttpGet get = new HttpGet(url);

CloseableHttpResponse response = httpClient.execute(get);

int statusCode = response.getStatusLine().getStatusCode();

if (statusCode == 200) {

byte[] bytes = EntityUtils.toByteArray(response.getEntity());

html = new String(bytes, code);

}

} catch (IOException e) {

e.printStackTrace();

} finally {

return html;

}

}

private void saveHtml(String filePath, String text) {

OutputStreamWriter writer = null;

try {

File file = new File(filePath);

writer = new OutputStreamWriter(new FileOutputStream(file), "UTF-8");

Document parse = Jsoup.parse(text);

Elements table = parse.select(".cwzb");

Elements tr = table.select("tr");

for (int i = 1; i < tr.size(); i++) {

if (i == 4) {

tr.select("div").remove();

}

Elements td = tr.get(i).select("td");

for (int j = 0; j < td.size(); j++) {

writer.write(td.get(j).text());

if (j < td.size() - 1) {

writer.write("$");

}

}

writer.write("\r\n");

}

} catch (IOException e) {

e.printStackTrace();

} finally {

try {

if (writer != null) {

writer.close();

}

} catch (IOException e) {

e.printStackTrace();

}

}

}

private void saveJson(String filePath, String text) {

OutputStreamWriter writer = null;

try {

File file = new File(filePath);

writer = new OutputStreamWriter(new FileOutputStream(file), "UTF-8");

} catch (UnsupportedEncodingException e) {

e.printStackTrace();

} catch (FileNotFoundException e) {

e.printStackTrace();

} finally {

try {

if (writer != null) {

writer.close();

}

} catch (IOException e) {

e.printStackTrace();

}

}

}

}

相关文章

网友评论

本文标题：JAVA抓取网页方法

本文链接：https://www.haomeiwen.com/subject/kyqwvxtx.html

延伸阅读

深度阅读

您也可以注册成为美文阅读网的作者，发表您的原创作品、分享您的心情！

栏目导航

热点阅读

关于我们|服务条款|联系我们|JAVA抓取网页方法|投稿指南|网站地图|RSS订阅|排版工具|手机版

提供经典美文摘抄,优美散文欣赏,现代诗歌精选,短篇小说,心情随笔,表白情书范文,故事会在线阅读欣赏

Copyright © 2014-2023 Haomeiwen.com All Rights Reserved. 好美文阅读网版权所有

备案信息：桂公网安备 45052102000051号 · 桂ICP备13007215号-3

本站所收录作品、热点评论等信息部分来源互联网，目的只是为了系统归纳学习和传递资讯

所有作品版权归原创作者所有，与本站立场无关，如不慎侵犯了你的权益，请联系我们告知，我们将做删除处理！