前言
马上要中秋节了,白又白开始给各位看官准备好了月饼,有了月饼我们就的赏月了,赏月好像或多或少有点单调哈,那我们先赏嫦娥姐姐(本来白又白是想自己画嫦娥的,但是实在复杂就放弃了)
采集数据目标
工具使用
开发工具:pycharm
开发环境:python3.7, Windows10使用工具包:requests
项目思路解析
请求的接口里有对应图片链接,思路清晰准备编写代码1.获取到url地址通过requests发送网络请求,请求时需要注意请求头因为百度图片也是采集的数据,会有网页重定向
# 添加请求头headers = { # cookie信息 "Cookie": "BDqhfp=%E6%98%8E%E6%98%9F%E5%8D%95%E4%BA%BA%E7%85%A7%26%260-10-1undefined%26%261132%26%263; PSTM=1606885275; BAIDUID=D9B7A2A3C7555B9A30BC448DE032D13B:FG=1; BIDUPSID=5EEB8A912FC8FDCB0C187D519FABA455; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; BDSFRCVID=RxtOJeC62CaLjt3rvnJAhLqAYfS_AG3TH6ao5bUjY3stbaPxsXJ7EG0P8f8g0KubzcDrogKKLmOTHpKF_2uxOjjg8UtVJeC6EG0Ptf8g0f5; H_BDCLCKID_SF=tJPD_CLKtDI3fP36qRQtbt00qxby26nfa6T9aJ5nJDoNqIopb5bK54CkXM7rbxok3gQ3Lqo8QpP-HJ7zbxRqQhkD3NQXJU3p2erEKl0MLU7tbb0xynoDMbtNMfnMBMnramOnaPJc3fAKftnOM46JehL3346-35543bRTLnLy5KJYMDF4jj-hj5QLjaRf-b-X2CjyWb88Kb7VbUo95MnkbfJBD4bKWPTJt5rqWqcp2pRaEfTI0pnNQTt7yajK25QaQCQko-O2KJjmJ-Oyy6JpQT8reMDOK5OibCrE3hb-ab3vOpRzXpO1KMPzBN5thURB2DkO-4bCWJ5TMl5jDh3Mb6ksDMDtqtJHKbDDVILMJMK; BDUSS=g5U3pnMlhCRWJWT1lYQzR3SVBuSUN0MUNkWERaSDJHV2xKN3NHUDJzMFhGZTlmRUFBQUFBJCQAAAAAAAAAAAEAAAAIs6iyAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABeIx18XiMdfRE; BDUSS_BFESS=g5U3pnMlhCRWJWT1lYQzR3SVBuSUN0MUNkWERaSDJHV2xKN3NHUDJzMFhGZTlmRUFBQUFBJCQAAAAAAAAAAAEAAAAIs6iyAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABeIx18XiMdfRE; delPer=0; PSINO=6; __yjsv5_shitong=1.0_7_65901da49a5a59037e920cbd2020d5073dda_300_1606918958343_113.240.215.138_cdf8224b; H_PS_PSSID=1468_32855_33059_33098_33100_33199_33147_22160; BA_HECTOR=20050k2gakah8100r71fsguaf0q; BDRCVFR[X_XKQks0S63]=mk3SLVN4HKm; firstShowTip=1; cleanHistoryStatus=0; BDRCVFR[dG2JNJb_ajR]=mk3SLVN4HKm; userFrom=tupian.baidu.com; BDRCVFR[-pGxjrCMryR]=mk3SLVN4HKm; indexPageSugList=%5B%22%E6%98%8E%E6%98%9F%E5%8D%95%E4%BA%BA%E7%85%A7%22%2C%22%E6%98%8E%E6%98%9F%22%5D", # 用户代理 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36", # 请求数据来源 "Referer": "https://tupian.baidu.com/search/index", "Host": "tupian.baidu.com"}for i in range(5, 50): url = "https://tupian.baidu.com/search/acjson?tn=resultjson_com&logid=11528842549528169565&ipn=rj&ct=201326592&is=&fp=result&queryWord={}&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=-1&z=&ic=0&hd=&latest=©right=&word={}&s=&se=&tab=&width=&height=&face=0&istype=2&qc=&nc=1&fr=&expermode=&force=&pn={}&rn=30&gsm=78&1606974013784=".format(key, key, i*30) # 发送请求 response = requests.get(url, headers=headers)
获取的数据为json数据转换成字典直接获取里面的图片url
# 正则匹配数据 url_list = re.findall('"thumbURL":"(.*?)",', response.text) print(url_list)
对图片网址发送网络请求,用网络地址保存对应图片名字
关注我持续为您分享干货内容,你的收藏、评论、点赞就是对我最大的支持!
网友评论