package com.wonder.spider;
import org.apache.commons.httpclient.Cookie;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.httpclient.cookie.CookiePolicy;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.methods.PostMethod;
import org.apache.http.HttpEntity;
import org.apache.http.StatusLine;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.junit.Test;
import java.io.IOException;
import java.nio.charset.Charset;
/**
* Created by xiaohuihui on 2018-05-02
*/
public class HttpClientGet {
public static void main(String[] args) throws Exception {
//1.创建get请求对象
HttpGet get = new HttpGet("http://www.itcast.cn?id=11");
//2.创建HttpClient对象
CloseableHttpClient client = HttpClients.createDefault();
//3.得到响应对象,接收响应的数据
CloseableHttpResponse response = client.execute(get);
//响应行信息
StatusLine line = response.getStatusLine();
//响应状态码
int code = line.getStatusCode();
if (code == 200) {
//响应数据
HttpEntity entity = response.getEntity();
//把响应数据转换成字符串
String html = EntityUtils.toString(entity, Charset.forName("utf-8"));
System.out.println(html);
}
response.close();
}
/**
* 唱吧首页爬取
* @throws IOException
*/
@Test
public void getChangBa() throws IOException {
HttpGet get = new HttpGet("http://changba.com/s/QpT3lOar-4DtKbmCFNr_wA?code=011pjohh01Qnsz1aUnfh0mrxhh0pjohq&state=STATE");
CloseableHttpClient client = HttpClients.createDefault();
CloseableHttpResponse response = client.execute(get);
//响应行信息
StatusLine line = response.getStatusLine();
//响应状态码
int code = line.getStatusCode();
if (code == 200) {
//响应数据
HttpEntity entity = response.getEntity();
//把响应数据转换成字符串
String html = EntityUtils.toString(entity, Charset.forName("utf-8"));
System.out.println(html);
}
response.close();
}
/**
* post方法登录后携带cookie访问请求
*/
@Test
public void testLoginPinyougou() {
String loginUrl = "http://localhost:9102/login";
// 登陆以后需要访问的页面
String afterLoginUrl = "http://localhost:9102/itemCat/findByParentId.do?parentId=1";
HttpClient httpClient = new HttpClient();
// 使用post方式进行登录
PostMethod postMethod = new PostMethod(loginUrl);
// 设置登录信息
NameValuePair[] data = {
new NameValuePair("username", "wangyi"), new NameValuePair("password", "123456"),
new NameValuePair("m1", "2")
};
postMethod.setRequestBody(data);
// 设置 HttpClient 接收 Cookie,用与浏览器一样的策略
httpClient.getParams().setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY);
try {
httpClient.executeMethod(postMethod);
} catch (IOException e) {
e.printStackTrace();
}
// 获得登陆后的 Cookie
Cookie[] cookies = httpClient.getState().getCookies();
StringBuffer tmpcookies = new StringBuffer();
for (Cookie c : cookies) {
tmpcookies.append(c.toString() + ";");
}
// 进行登录后的操作
GetMethod getMethod = new GetMethod(afterLoginUrl);
// 每次访问需授权的网址时需带上前面的 cookie 作为通行证
getMethod.setRequestHeader("cookie", tmpcookies.toString());
// 你还可以通过 PostMethod/GetMethod 设置更多的请求后数据
// 例如,referer 从哪里来的,UA 像搜索引擎都会表名自己是谁,无良搜索引擎除外
postMethod.setRequestHeader("Referer", "http://localhost:9102/admin/goods_edit.html");
postMethod.setRequestHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 Safari/537.36");
try {
httpClient.executeMethod(getMethod);
// 打印出返回数据,检验一下是否成功
String text = getMethod.getResponseBodyAsString();
System.out.println(text);
} catch (IOException e) {
e.printStackTrace();
}
}
}

图片.png
网友评论