参考四月党同好的crawl思路 ,爬取URL:https://www.pixiv.net/search.php?s_mode=s_tag&word=%E5%AE%AB%E5%9B%AD%E8%96%B0
analysis.jpg1).模拟登录 pixiv登录入口
解析DOM结构时,未发现表单字段name值,模拟表单post,
email字段name值为pixiv_id,password的name值为password
fromdata
分析URL
- 登录URL https://accounts.pixiv.net/login?lang=zh&source=pc&view_type=page&ref=wwwtop_accounts_index
- 用户设置URL https://www.pixiv.net/setting_profile.php
测试是否允许跟踪Cookies(ps:这里用题主的账号测试,求不黑)
import requests
session = requests.Session()
params ={'pixiv_id':'2664504212@qq.com','password':'knxy0616'}
s = session.post("https://accounts.pixiv.net/login?lang=zh&source=pc&view_type=page&ref=wwwtop_accounts_index",params)
print("Cookies is set to:")
print(s.cookies.get_dict())
print("-------------------")
print("Going to Page")
s = session.get("https://www.pixiv.net/setting_profile.php")
print(s.text)
console output
C:\Users\26645\AppData\Local\Programs\Python\Python36\python.exe F:/pythonProject/PixivSpider/PixivChange.py
Cookies is set to:
{'PHPSESSID': '215f3e623ac9dda164ba310349d62f34', 'p_ab_id': '7', 'p_ab_id_2': '0'}
-------------------
Going to Page
<!DOCTYPE html>
<!-- https://bugbounty.jp/program/0602f8c6f136dbbd92fbb909 --><html lang="ja" class=" page-cool-index" xmlns:wb="http://open.weibo.com/wb">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=970">
<meta name="format-detection" content="telephone=no">
<meta property="og:site_name" content="pixiv">
<meta property="fb:app_id" content="140810032656374">
<meta property="wb:webmaster" content="4fd391fccdb49500" />
<meta property="twitter:card" content="summary_large_image">
<meta property="twitter:site" content="@pixiv">
<meta property="twitter:title" content="イラストコミュニケーションサービス [pixiv]">
<meta property="twitter:description" content="pixiv(ピクシブ)は、作品の投稿・閲覧が楽しめる「イラストコミュニケーションサービス」です。幅広いジャンルの作品が投稿され、ユーザー発の企画やメーカー公認のコンテストが開催されています。">
<meta property="twitter:image" content="https://source.pixiv.net/www/images/share/pictures.jpg">
<meta property="og:title" content="イラストコミュニケーションサービス [pixiv]">
<meta property="og:type" content="website">
<meta property="og:description" content="pixiv(ピクシブ)は、作品の投稿・閲覧が楽しめる「イラストコミュニケーションサービス」です。幅広いジャンルの作品が投稿され、ユーザー発の企画やメーカー公認のコンテストが開催されています。">
<meta property="og:image" content="https://source.pixiv.net/www/images/share/pictures.jpg">
<meta name="application-name" content="pixiv">
<meta name="msapplication-tooltip" content="イラストコミュニケーションサービス">
<meta name="msapplication-starturl" content="https://www.pixiv.net/"><meta name="msapplication-navbutton-color" content="#0096db">
<meta name="msapplication-task" content="name=作品投稿;action-uri=https://www.pixiv.net/upload.php;icon-uri=https://source.pixiv.net/www/images/ico/upload.ico">
<meta name="msapplication-task" content="name=作品管理;action-uri=https://www.pixiv.net/member_illust.php;icon-uri=https://source.pixiv.net/www/images/ico/settings.ico">
<meta name="msapplication-task" content="name=ブックマーク;action-uri=https://www.pixiv.net/bookmark.php;icon-uri=https://source.pixiv.net/www/images/ico/bookmarks.ico">
<meta name="msapplication-task" content="name=受信箱;action-uri=https://www.pixiv.net/msgbox.php;icon-uri=https://source.pixiv.net/www/images/ico/messages.ico">
<meta name="msapplication-task" content="name=フィード;action-uri=https://www.pixiv.net/stacc/;icon-uri=https://source.pixiv.net/www/images/ico/stacc.ico">
<meta name="google" content="nositelinkssearchbox">
<title>イラスト コミュニケーションサービス[pixiv(ピクシブ)]</title>
<meta name="keywords" content="pixiv,ピクシブ,イラスト,イラストレーション,マンガ,漫画,manga,コミュニティ,SNS,投稿,コンテスト">
<meta name="description" content="pixiv(ピクシブ)は、作品の投稿・閲覧が楽しめる「イラストコミュニケーションサービス」です。幅広いジャンルの作品が投稿され、ユーザー発の企画やメーカー公認のコンテストが開催されています。">
<script>
var pageLoadStartTime = +(new Date);
</script>
<script>
console && console.log && console.log("https://bugbounty.jp/program/0602f8c6f136dbbd92fbb909"); </script>
<link rel="alternate" media="only screen and (max-width: 640px)" href="https://touch.pixiv.net/index.php?return_to=%2Fsetting_profile.php" >
<link rel="canonical" href="https://www.pixiv.net/">
<link rel="shortcut icon" type="image/vnd.microsoft.icon" href="https://www.pixiv.net/favicon.ico">
<script>
!function(){"use strict";function a(){for(var a=[/\bMSIE\b/,/\bBingPreview\b/],b=0,c=a;b<c.length;b++){if(c[b].test(navigator.userAgent))return!0}return!1}function b(a,b){var c=b?"Promise.reject: ":"";if(d(a))return c+["type: "+a.type,f(a.target)?"target: {src: "+a.target.src+"}":"target: "+a.target,"currentTarget: "+a.currentTarget,"eventPhase: "+a.eventPhase].join(", ");if(e(a))return c+a.toString();if("object"==typeof a)try{return c+JSON.stringify(a)}catch(g){}return c+a}function c(a,b){return Object.prototype.toString.call(a)==="[object "+b+"]"}function d(a){return!!c(a,"Event")||/^\[object \w+Event\]$/.test(Object.prototype.toString.call(a))}function e(a){return c(a,"Error")}function f(a){return c(a,"HTMLScriptElement")}window.ErrorLogger=function(){function c(a,b,c){this.userId=a,this.production=b,this.premium=c,b?(this.service="www.pixiv.net",this.api="https://www.pixiv.net/rpc/js_error.php"):(this.service=location.host,this.api="/rpc/js_error.php")}return c.prototype.install=function(){var b=this;if(!a()){var c=!1;window.onerror=function(a,d,e,f,g){window.onerror=null,b.handle(a,d,e,f,g,c)};var d=function(a){e(a.reason)&&(c||(c=!0,setTimeout(function(){throw a.reason})))};window.onunhandledrejection=d,window.addEventListener&&window.addEventListener("unhandledrejection",d)}},c.prototype.time=function(a,b,c){this.send("js_time",location.href,0,a,b,c)},c.prototype.send=function(a,c,d,e,f,g,h){var i=encodeURIComponent;try{var j=b(a,!!h),k=["service="+i(this.service),"message="+i(j),"url="+i(""+c),"line="+i(""+d),"location="+i(location.href),"user_id="+i(this.userId),"premium="+this.premium];null!=e&&k.push("html_end_sec="+i(""+e),"dom_ready_sec="+i(""+(f||0)),"onload_sec="+i(""+(g||0))),(new Image).src=this.api+"?"+k.join("&")}catch(l){(new Image).src=this.api+"?service="+i(this.service)+"&message="+i("send error: "+l.message)+"&line="+i(""+(l.line||""))}},c.prototype.handle=function(a,b,c,d,e,f){if(b||0!==c){var g=null!=e?e:a;this.production?this.send(g,b,c,undefined,undefined,undefined,!!f):"undefined"!=typeof console&&console&&console.debug?"undefined"!=typeof navigator&&navigator.userAgent.match(/Firefox\//)&&console.error(g):alert(["[JavaScript Error]","",g,"",(b||"unknown")+":"+(c||"unknown"),"","---",location.href].join("\n"))}},c}()}();
</script><script>
(function() {
var h = new ErrorLogger("",true,0);
delete window.ErrorLogger;
h.install();
window._time = function () { h.time.apply(h, arguments) };
window._send = function () { h.send.apply(h, arguments) };
})()
</script>
<!--[if IE 8]>
<link rel="stylesheet" href="https://source.pixiv.net/www/css/ie.css?9955c9072470e7612f9192fb8238fa08">
<![endif]-->
<!--[if IE 9]>
<link rel="stylesheet" href="https://source.pixiv.net/www/css/ie9.css?e2871da6753d43533b513cdac2d82a38">
<![endif]-->
<!--[if lte IE 8]>
<script src="https://source.pixiv.net/www/js/lib/html5shiv/html5shiv.js"></script>
<![endif]-->
<!--[if gte IE 9]><!-->
<script src="https://source.pixiv.net/www/js/lib/svg4everybody/svg4everybody.ie8.js"></script>
<!--<![endif]-->
<script>
Object.defineProperty(window, 'bundle_public_path', {
value: "https:\/\/source.pixiv.net\/www\/js\/bundle\/"
})
</script>
<link rel="stylesheet" href="https://source.pixiv.net/www/js/bundle/app.826b11dbfb9b59b8f927e42397790e96.css">
<script src="https://source.pixiv.net/www/js/bundle/bootstrap.339540ab3f02832e3b02.js" crossorigin="anonymous"></script>
<script src="https://source.pixiv.net/www/js/bundle/lib.39b9f9b70abb49df7d92.js" crossorigin="anonymous"></script>
<script src="https://source.pixiv.net/www/js/bundle/colon.74b72b6a439a1014ee2c.js" crossorigin="anonymous"></script>
<script src="https://source.pixiv.net/www/js/bundle/app.a8874808f2e169fbdc82.js" crossorigin="anonymous"></script>
<!--[if lte IE 9]>
<script src="https://source.pixiv.net/www/js/lib/ajaxhooks/xdr.js"></script>
<![endif]-->
<script>
pixiv.development = false;
pixiv.sourcePath = "https:\/\/source.pixiv.net\/www\/";
pixiv.commonSourcePath = "https:\/\/source.pixiv.net\/common\/";
pixiv.config.sketchUrlBase = "https:\/\/sketch.pixiv.net";
pixiv.context.token = "6d5f43b58dbdd4f240a55565c6c1c9f8";
</script>
<script>
pixiv.user.loggedIn = false;
pixiv.ads = {};
pixiv.ads.is_active_www_illustup = false;
</script>
<script>
var _gaq = _gaq || [];
(function() {
var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
ga.src = ('https:' == document.location.protocol ? 'https://' : 'http://') + 'stats.g.doubleclick.net/dc.js';
var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
})();
_gaq.push(['_setAccount', 'UA-1830249-3']);
_gaq.push(['_setDomainName', 'pixiv.net']);
if (window.pixiv) {
if (pixiv.user && pixiv.user.loggedIn) {
_gaq.push(['_setCustomVar', 1, 'login', 'yes', 3]);
_gaq.push(['_setCustomVar', 3, 'plan', pixiv.user.premium ? 'premium' : 'normal', 1]);
_gaq.push(['_setCustomVar', 5, 'gender', pixiv.user.gender, 1]);
_gaq.push(['_setCustomVar', 6, 'user_id', pixiv.user.id, 1]);
_gaq.push(['_setCustomVar', 12, 'illustup_flg', pixiv.user.illustup_flg ? 'uploaded' : 'not_uploaded', 3]);
} else {
_gaq.push(['_setCustomVar', 1, 'login', 'no', 3]);
}
(function() {
// クッキーあれば、一回でもログインした人とみなす
if (pixiv.user && window.colon && colon.storage) {
var cookie_name = 'login_ever';
if (colon.storage.cookie(cookie_name)) {// 一度でもログインしたことある
_gaq.push(['_setCustomVar', 2, 'login ever', 'yes', 1]);
} else if (pixiv.user.loggedIn) { // ログインしてる
colon.storage.cookie(cookie_name, 'yes', {
expires: 1000 * 60 * 60 * 24 * 365 * 5, // 5 years
domain: location.hostname
});
_gaq.push(['_setCustomVar', 2, 'login ever', 'yes', 1]);
} else { // ログインしたこと無いし、ログインしてもない
_gaq.push(['_setCustomVar', 2, 'login ever', 'no', 1]);
}
var p_ab_id = colon.storage.cookie('p_ab_id');
var p_ab_id_2 = colon.storage.cookie('p_ab_id_2');
_gaq.push(['_setCustomVar', 9, 'p_ab_id', p_ab_id, 1]);
_gaq.push(['_setCustomVar', 10, 'p_ab_id_2', p_ab_id_2, 1]);
}
} ())
_gaq.push(['_setCustomVar', 11, 'lang', "ja", 1]);
}
if (window.pixiv && pixiv.tracking && pixiv.tracking.URL) {
_gaq.push(['_trackPageview', pixiv.tracking.URL]);
} else {
_gaq.push(['_trackPageview']);
}
</script>
<script>
if (window.pixiv && !pixiv.text) pixiv.text = {};
pixiv.text.modalDefaultCaption = '他にも便利な機能がいっぱい!';
pixiv.text.registerMypixiv = 'pixivに登録して<br>ユーザーとマイピクになろう!!!!';
pixiv.text.loginMypixiv = 'pixivにログインして<br>ユーザーにマイピク申請しよう!!!!';
pixiv.text.registerFavorite = 'pixivに登録して<br>ユーザーをフォローしよう!!!!';
pixiv.text.loginFavorite = 'pixivにログインして<br>ユーザーをフォローしよう!!!!';
pixiv.text.registerMessage = 'pixivに登録して<br>ユーザーにメッセージを送ろう!!!!';
pixiv.text.loginMessage = 'pixivにログインして<br>ユーザーにメッセージを送ろう!!!!';
pixiv.text.registerImage = 'pixivに登録すると<br>さらに大きな画像で閲覧できる!!!!';
pixiv.text.registerBookmark = 'pixivに登録して<br>気に入った作品をブックマークに追加しよう!!!!';
pixiv.text.loginBookmark = 'pixivにログインして<br>気に入った作品をブックマークに追加しよう!!!!';
pixiv.text.registerRate = 'pixivに登録して<br>気に入った作品をいいね!しよう!!!!';
pixiv.text.loginRate = 'pixivにログインして<br>気に入った作品にいいね!を送ろう!!!!';
pixiv.text.overaccess = 'もっと見るならpixivの会員になってさらに快適に!!!';
pixiv.text.overaccessCaption = '大きな画像で閲覧できたり、お気に入り作品をブックマーク!!';
pixiv.text.visit = 'まずは無料登録!<br>pixivに登録して、<br>お絵かきをもっと楽しもう!!!!';
pixiv.text.searchModalPremium = 'pixivプレミアムなら人気の作品がすぐにみつかる!';
pixiv.text.searchModalPremiumCaption = '1番人気の作品を一発検索!<br>1日、1週間、1ヶ月の期間で絞り込むこともできます。';
pixiv.text.reuploadModalPremiumCaption = '塗り忘れを見つけても安心!<br>コメントやいいね!を消さずに再投稿できます!';
pixiv.text.reuploadModalPremium = 'pixivプレミアムになると<br>作品を再投稿できます!';
pixiv.text.captionModalPremiumCaption = '作品の説明などに利用するキャプションの、文字色や太さを変更して装飾することができます。';
pixiv.text.bookmarkRange = 'pixivプレミアムで<br>作品をブックマーク数<br>ごとに<br>絞り込み表示できます';
</script>
<script>
pixiv.context.popular_search_trial_cookie_name = "popular_search_trial_cookie_12";
pixiv.context.popular_search_trial_is_target_user = false;
</script>
<link rel="stylesheet" href="https://source.pixiv.net/www/css/beta/app/app.css?547c10ee1ce15c6866be4c609c65e46d"><link rel="stylesheet" href="https://source.pixiv.net/www/css/novel.css?940d7e622d68e0dfd717a3103d24036b"><link rel="stylesheet" href="https://source.pixiv.net/www/js/bundle/app.826b11dbfb9b59b8f927e42397790e96.css"> <link rel="stylesheet" type="text/css" href="https://source.pixiv.net/www/css/accounts-index.css?632ed6e393fd310954a21583f8858e86">
<script>pixiv.context.backgroundSlideshow = true</script>
<script src="https://source.pixiv.net/common/background-slideshow/bundle.js?d254c75c1687037fa95b"></script>
</head>
<body class="not-logged-in">
<!-- Google Tag Manager -->
<noscript><iframe src="//www.googletagmanager.com/ns.html?id=GTM-55FG"
height="0" width="0" style="display:none;visibility:hidden"></iframe></noscript>
<script>(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':
new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0],
j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src=
'//www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f);
})(window,document,'script','dataLayer','GTM-55FG');</script>
<!-- End Google Tag Manager -->
<div id="ui-tooltip-container" class="_hidden">
<div class="wrapper">
<div class="content"></div>
<div class="nipple"></div>
</div>
</div>
<div id="status-bar"></div>
<script>
pixiv.text.today = '本日';
pixiv.text.yesterday = '昨日';
pixiv.text.notifications = 'メッセージ・ポップボード';
pixiv.text.dailyRanking = 'デイリーランキング';
pixiv.text.weeklyRanking = 'ウィークリーランキング';
pixiv.text.monthlyRanking = 'マンスリーランキング';
pixiv.text.rookieRanking = 'ルーキーランキング';
pixiv.text.daily_r18Ranking = 'R-18 デイリーランキング';
pixiv.text.r18gRanking = 'R-18G ランキング';
pixiv.text.maleRanking = '男子に人気ランキング';
pixiv.text.femaleRanking = '女子に人気ランキング';
</script>
<div id="wrapper">
<noscript>
<div style="background-color:#F2F4F6;text-align:center;margin-bottom:10px;padding:5px;">
<p style="color:#ff0000;">ウェブブラウザのJavaScript(ジャバスクリプト)の設定が無効になっています。<br>Javascriptが無効になっていると、サイト内の一部機能がご利用いただけません。</p>
</div>
</noscript>
<div id="background-slideshow"></div><div class="signup-form"><div class="signup-form__logo-box"><div class="signup-form__logo"></div><div class="signup-form__catchphrase">創作活動をもっとたのしく</div></div><div><a href="https://accounts.pixiv.net/signup?return_to=http%3A%2F%2Fwww.pixiv.net%2Fsetting_profile.php&lang=ja&source=pc&view_type=page&ref=wwwtop_accounts_index" class="signup-form__submit ga-event" data-category="signup_pc_page" data-action="step1" data-label="mail">新規登録</a><a href="https://accounts.pixiv.net/login?return_to=http%3A%2F%2Fwww.pixiv.net%2Fsetting_profile.php&lang=ja&source=pc&view_type=page&ref=wwwtop_accounts_index" class="signup-form__submit--login">ログイン</a></div><div class="signup-form__sns-btn-area"><div>持っているアカウントではじめる</div><div class="sns-button-list"><a href="https://accounts.pixiv.net/gigya-auth?mode=signin&provider=googleplus&source=pc&view_type=page&return_to=http%3A%2F%2Fwww.pixiv.net%2Fsetting_profile.php&lang=ja&ref=wwwtop_accounts_index_google" class="btn-item btn-gplus ga-event" data-category="signup_pc_page" data-action="step1" data-label="googleplus"></a><a href="https://accounts.pixiv.net/gigya-auth?mode=signin&provider=facebook&source=pc&view_type=page&return_to=http%3A%2F%2Fwww.pixiv.net%2Fsetting_profile.php&lang=ja&ref=wwwtop_accounts_index_facebook" class="btn-item btn-facebook ga-event" data-category="signup_pc_page" data-action="step1" data-label="facebook"></a><a href="https://accounts.pixiv.net/gigya-auth?mode=signin&provider=twitter&source=pc&view_type=page&return_to=http%3A%2F%2Fwww.pixiv.net%2Fsetting_profile.php&lang=ja&ref=wwwtop_accounts_index_twitter" class="btn-item btn-twitter ga-event" data-category="signup_pc_page" data-action="step1" data-label="twitter"></a></div></div></div><div id="footer"><div class="_footer-container"><div class="_footer-content"><div class="_footer-content-item"><span class="_icon-text">pixivについて</span><i class="_pico-12 _icon-menu"></i><div class="content-popup about">![](https://img.haomeiwen.com/i5309010/90b9750ae2c326bd.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240)<span class="footer-pixiv-motto-desc">pixiv(ピクシブ)は、イラストの投稿・閲覧が楽しめる「イラストコミュニケーションサービス」です。幅広いジャンルの作品が投稿され、ユーザー発の企画やメーカー公認のコンテストが開催されています。</span><div></div><dl class="links"><dt>サービス</dt><dd><ul><li><a href="https://comic.pixiv.net/" target="_blank">pixivコミック</a></li><li><a href="https://novel.pixiv.net/" target="_blank">pixivノベル</a></li><li><a href="https://factory.pixiv.net/" target="_blank">pixivFACTORY</a><a href="https://factory.pixiv.net/books" target="_blank">BOOKS</a></li><li><a href="https://booth.pm" target="_blank">BOOTH</a></li><li><a href="https://booth.pm/apollo/" target="_blank">APOLLO</a></li><li><a href="https://www.pixivision.net/ja/" target="_blank">pixivision</a></li><li><a href="https://sketch.pixiv.net/" target="_blank">pixiv Sketch</a></li><li><a href="https://sensei.pixiv.net/" target="_blank">sensei</a></li><li><a href="https://dic.pixiv.net/" target="_blank">ピクシブ百科事典</a></li><li><a href="http://dai2noare.com/" target="_blank">pixiv×テレビ東京 第2のアレ</a></li><li><a href="https://pawoo.net/" target="_blank">Pawoo</a><a href="https://music.pawoo.net/" target="_blank">Pawoo Music</a></li><li><a href="http://drawr.net/" target="_blank">drawr</a></li></ul></dd></dl><dl class="links"><dt>ご利用について</dt><dd><ul><li><a href="https://www.pixiv.net/terms.php">利用規約</a></li><li><a href="https://www.pixiv.net/guideline.php">ガイドライン</a></li><li><a href="https://www.pixiv.net/privacy.php">プライバシーポリシー</a></li><li><a href="https://www.pixiv.help/hc/">お問い合わせ</a></li><li><a href="https://www.pixiv.help/hc/" target="_blank">ヘルプ</a></li></ul></dd></dl><dl class="links"><dt>お知らせ</dt><dd><ul><li><a href="https://www.pixiv.net/info.php">お知らせ</a></li><li><a href="http://inside.pixiv.blog/" target="_blank">pixiv inside</a></li><li><a href="http://twitter.com/pixiv" target="_blank">Twitter</a></li><li><a href="http://www.facebook.com/pixiv" target="_blank">Facebook</a></li><li><a href="https://plus.google.com/108650212710562225539" target="_blank" rel="publisher">Google+</a></li><li><a href="http://instagram.com/pixiv" target="_blank">Instagram</a></li><li><a href="http://www.plurk.com/pixiv_tw" target="_blank">Plurk</a></li><li><a href="http://weibo.com/2230227495" target="_blank">weibo</a></li></ul></dd></dl><dl class="links"><dt>広告</dt><dd><ul><li><a href="http://www.pixiv.co.jp/ads">広告掲載</a></li><li><a href="http://www.pixiv.co.jp/ads">公式イラストコンテスト</a></li><li><a href="http://www.pixiv.co.jp/ads">広告資料ダウンロード</a></li></ul></dd></dl><dl class="links"><dt>会社情報</dt><dd><ul><li><a href="http://www.pixiv.co.jp/" target="_blank">運営会社</a></li><li><a href="http://recruit.pixiv.net/" target="_blank" class="js-click-trackable" data-click-category="recruit" data-click-action="From_Footer_ja" data-click-label="">採用情報</a></li></ul></dd></dl><div class="footer-pixiv-c">© pixiv</div></div></div><div class="_footer-content-item"><span class="_icon-text">日本語</span><i class="_pico-12 _icon-menu"></i><ul class="content-popup language"><li class="item ja current"><i class="_pico-12 _icon-check"></i><span class="_icon-text">日本語</span></li><li class="item en "><form name="seten" method="GET" action="/"><input type="hidden" name="lang" value="en"><input type="hidden" name="return_to" value="http://www.pixiv.net/setting_profile.php"><input class="button" type="submit" value="English"></form></li><li class="item ko "><form name="setko" method="GET" action="/"><input type="hidden" name="lang" value="ko"><input type="hidden" name="return_to" value="http://www.pixiv.net/setting_profile.php"><input class="button" type="submit" value="한국어"></form></li><li class="item zh "><form name="setzh" method="GET" action="/"><input type="hidden" name="lang" value="zh"><input type="hidden" name="return_to" value="http://www.pixiv.net/setting_profile.php"><input class="button" type="submit" value="简体中文"></form></li><li class="item zh_tw "><form name="setzh_tw" method="GET" action="/"><input type="hidden" name="lang" value="zh_tw"><input type="hidden" name="return_to" value="http://www.pixiv.net/setting_profile.php"><input class="button" type="submit" value="繁體中文"></form></li></ul></div></div></div></div><input type="hidden" id="init-config" class="json-data" value='{"pixivBackgroundSlideshow.illusts":{"portrait":[],"landscape":[{"illust_id":"36919122","illust_title":"Chiptune Hell","url":{"medium":"https:\/\/i.pximg.net\/c\/600x600\/img-master\/img\/2013\/07\/08\/01\/50\/25\/36919122_p0_master1200.jpg","1200x1200":"https:\/\/i.pximg.net\/img-master\/img\/2013\/07\/08\/01\/50\/25\/36919122_p0_master1200.jpg"},"user_name":"shirakaba","profile_img":{"main_s":"https:\/\/i.pximg.net\/user-profile\/img\/2016\/08\/18\/03\/56\/50\/11367123_035c3d7f5ed4b3c3214d0040d1aa73d3_50.png"},"www_member_illust_medium_url":"https:\/\/www.pixiv.net\/member_illust.php?mode=medium\u0026illust_id=36919122","www_user_url":"https:\/\/www.pixiv.net\/member.php?id=10163"},{"illust_id":"16848987","illust_title":"\u5d29\u58de\u306e\u4e16\u754c","url":{"medium":"https:\/\/i.pximg.net\/c\/600x600\/img-master\/img\/2011\/02\/21\/14\/30\/27\/16848987_p0_master1200.jpg","1200x1200":"https:\/\/i.pximg.net\/img-master\/img\/2011\/02\/21\/14\/30\/27\/16848987_p0_master1200.jpg"},"user_name":"\u7121\u8a00","profile_img":{"main_s":"https:\/\/i.pximg.net\/user-profile\/img\/2008\/03\/19\/18\/48\/41\/87063_5b0f9ee7f1f3ca45b36c0be8f8252ded_50.jpg"},"www_member_illust_medium_url":"https:\/\/www.pixiv.net\/member_illust.php?mode=medium\u0026illust_id=16848987","www_user_url":"https:\/\/www.pixiv.net\/member.php?id=104247"},{"illust_id":"15126670","illust_title":"brunch table","url":{"medium":"https:\/\/i.pximg.net\/c\/600x600\/img-master\/img\/2010\/12\/12\/00\/38\/32\/15126670_p0_master1200.jpg","1200x1200":"https:\/\/i.pximg.net\/img-master\/img\/2010\/12\/12\/00\/38\/32\/15126670_p0_master1200.jpg"},"user_name":"comet","profile_img":{"main_s":"https:\/\/i.pximg.net\/user-profile\/img\/2010\/09\/14\/23\/43\/37\/2198389_b5cacb037104a8d4220783db2fa7ca9b_50.jpg"},"www_member_illust_medium_url":"https:\/\/www.pixiv.net\/member_illust.php?mode=medium\u0026illust_id=15126670","www_user_url":"https:\/\/www.pixiv.net\/member.php?id=113726"},{"illust_id":"43409888","illust_title":"\u3086\u3089\u3086\u3089","url":{"medium":"https:\/\/i.pximg.net\/c\/600x600\/img-master\/img\/2014\/05\/10\/05\/54\/44\/43409888_p0_master1200.jpg","1200x1200":"https:\/\/i.pximg.net\/img-master\/img\/2014\/05\/10\/05\/54\/44\/43409888_p0_master1200.jpg"},"user_name":"\u6708\u5ca1\u6708\u7a42","profile_img":{"main_s":"https:\/\/i.pximg.net\/user-profile\/img\/2014\/10\/21\/23\/40\/05\/8537098_38e0f21511a621d5549e768d01d4d4dd_50.png"},"www_member_illust_medium_url":"https:\/\/www.pixiv.net\/member_illust.php?mode=medium\u0026illust_id=43409888","www_user_url":"https:\/\/www.pixiv.net\/member.php?id=122500"},{"illust_id":"37203249","illust_title":"Vanity","url":{"medium":"https:\/\/i.pximg.net\/c\/600x600\/img-master\/img\/2013\/07\/21\/01\/10\/21\/37203249_p0_master1200.jpg","1200x1200":"https:\/\/i.pximg.net\/img-master\/img\/2013\/07\/21\/01\/10\/21\/37203249_p0_master1200.jpg"},"user_name":"\u5343\u5e74\u51db","profile_img":{"main_s":"https:\/\/i.pximg.net\/user-profile\/img\/2016\/10\/15\/23\/07\/58\/11624649_ec6b0d60444e422ac596612a160bdafc_50.png"},"www_member_illust_medium_url":"https:\/\/www.pixiv.net\/member_illust.php?mode=medium\u0026illust_id=37203249","www_user_url":"https:\/\/www.pixiv.net\/member.php?id=143452"},{"illust_id":"22600763","illust_title":"\uff06 Gothika","url":{"medium":"https:\/\/i.pximg.net\/c\/600x600\/img-master\/img\/2011\/10\/24\/23\/02\/44\/22600763_p0_master1200.jpg","1200x1200":"https:\/\/i.pximg.net\/img-master\/img\/2011\/10\/24\/23\/02\/44\/22600763_p0_master1200.jpg"},"user_name":"chibi","profile_img":{"main_s":"https:\/\/i.pximg.net\/user-profile\/img\/2011\/09\/28\/23\/42\/32\/3667550_112278a588cf43b2625ff22dce8c5ac6_50.gif"},"www_member_illust_medium_url":"https:\/\/www.pixiv.net\/member_illust.php?mode=medium\u0026illust_id=22600763","www_user_url":"https:\/\/www.pixiv.net\/member.php?id=14914"},{"illust_id":"31251762","illust_title":"\u3042\u306e\u65e5\u898b\u305f\u7a7a\u3092\u50d5\u306f\u5fd8\u308c\u306a\u3044","url":{"medium":"https:\/\/i.pximg.net\/c\/600x600\/img-master\/img\/2012\/11\/03\/15\/05\/14\/31251762_p0_master1200.jpg","1200x1200":"https:\/\/i.pximg.net\/img-master\/img\/2012\/11\/03\/15\/05\/14\/31251762_p0_master1200.jpg"},"user_name":"\u3068\u308d\u3063\u3061","profile_img":{"main_s":"https:\/\/i.pximg.net\/user-profile\/img\/2012\/09\/23\/09\/37\/16\/5188950_847637cb21364a05efe0823eae6f4436_50.jpg"},"www_member_illust_medium_url":"https:\/\/www.pixiv.net\/member_illust.php?mode=medium\u0026illust_id=31251762","www_user_url":"https:\/\/www.pixiv.net\/member.php?id=1661432"},{"illust_id":"37016225","illust_title":"\u590f\u98a8\u524d\u7dda","url":{"medium":"https:\/\/i.pximg.net\/c\/600x600\/img-master\/img\/2013\/07\/13\/00\/10\/28\/37016225_p0_master1200.jpg","1200x1200":"https:\/\/i.pximg.net\/img-master\/img\/2013\/07\/13\/00\/10\/28\/37016225_p0_master1200.jpg"},"user_name":"\u9577\u4e43","profile_img":{"main_s":"https:\/\/i.pximg.net\/user-profile\/img\/2016\/07\/01\/00\/20\/00\/11135863_048d96d55816dc89fbd236d526330fd8_50.png"},"www_member_illust_medium_url":"https:\/\/www.pixiv.net\/member_illust.php?mode=medium\u0026illust_id=37016225","www_user_url":"https:\/\/www.pixiv.net\/member.php?id=1975032"},{"illust_id":"47621790","illust_title":"\u671d\u306e\u6d77\u6e2f","url":{"medium":"https:\/\/i.pximg.net\/c\/600x600\/img-master\/img\/2014\/12\/18\/23\/09\/24\/47621790_p0_master1200.jpg","1200x1200":"https:\/\/i.pximg.net\/img-master\/img\/2014\/12\/18\/23\/09\/24\/47621790_p0_master1200.jpg"},"user_name":"ArseniXC","profile_img":{"main_s":"https:\/\/i.pximg.net\/user-profile\/img\/2012\/02\/06\/20\/44\/48\/4172636_cf06fd6cfd7be58ee2990a9f97db6cb8_50.jpg"},"www_member_illust_medium_url":"https:\/\/www.pixiv.net\/member_illust.php?mode=medium\u0026illust_id=47621790","www_user_url":"https:\/\/www.pixiv.net\/member.php?id=202175"},{"illust_id":"57080648","illust_title":"\u5922\u98a8\u306e\u30e9\u30d7\u30bd\u30c7\u30a3\u30fc","url":{"medium":"https:\/\/i.pximg.net\/c\/600x600\/img-master\/img\/2016\/05\/27\/00\/12\/07\/57080648_p0_master1200.jpg","1200x1200":"https:\/\/i.pximg.net\/img-master\/img\/2016\/05\/27\/00\/12\/07\/57080648_p0_master1200.jpg"},"user_name":"\u9632\u4eba","profile_img":{"main_s":"https:\/\/i.pximg.net\/user-profile\/img\/2014\/12\/02\/18\/57\/54\/8681178_97ea866b8a4d52acac0d7a820783d00e_50.jpg"},"www_member_illust_medium_url":"https:\/\/www.pixiv.net\/member_illust.php?mode=medium\u0026illust_id=57080648","www_user_url":"https:\/\/www.pixiv.net\/member.php?id=211515"},{"illust_id":"29601755","illust_title":"\u304a\u5f15\u3063\u8d8a\u3057","url":{"medium":"https:\/\/i.pximg.net\/c\/600x600\/img-master\/img\/2012\/08\/24\/00\/00\/27\/29601755_p0_master1200.jpg","1200x1200":"https:\/\/i.pximg.net\/img-master\/img\/2012\/08\/24\/00\/00\/27\/29601755_p0_master1200.jpg"},"user_name":"\u84bc\u5ddd\u308f\u304b","profile_img":{"main_s":"https:\/\/i.pximg.net\/user-profile\/img\/2017\/01\/30\/22\/18\/24\/12078436_97aa21b0ffb8d7e7f61b2c53f1e1e156_50.jpg"},"www_member_illust_medium_url":"https:\/\/www.pixiv.net\/member_illust.php?mode=medium\u0026illust_id=29601755","www_user_url":"https:\/\/www.pixiv.net\/member.php?id=25587"},{"illust_id":"24958388","illust_title":"\u63a2\u5075\u306e\u30d6\u30e9\u30f3\u30c1","url":{"medium":"https:\/\/i.pximg.net\/c\/600x600\/img-master\/img\/2012\/02\/10\/00\/12\/59\/24958388_p0_master1200.jpg","1200x1200":"https:\/\/i.pximg.net\/img-master\/img\/2012\/02\/10\/00\/12\/59\/24958388_p0_master1200.jpg"},"user_name":"\u96e8","profile_img":{"main_s":"https:\/\/i.pximg.net\/user-profile\/img\/2014\/05\/29\/18\/32\/16\/7926046_bcd4d216d7d0c8fbfeb1062bf70617f5_50.png"},"www_member_illust_medium_url":"https:\/\/www.pixiv.net\/member_illust.php?mode=medium\u0026illust_id=24958388","www_user_url":"https:\/\/www.pixiv.net\/member.php?id=27087"},{"illust_id":"40191798","illust_title":"\u4e0b\u304c\u3063\u3066\u3066","url":{"medium":"https:\/\/i.pximg.net\/c\/600x600\/img-master\/img\/2013\/12\/09\/20\/52\/42\/40191798_p0_master1200.jpg","1200x1200":"https:\/\/i.pximg.net\/img-master\/img\/2013\/12\/09\/20\/52\/42\/40191798_p0_master1200.jpg"},"user_name":"psk","profile_img":{"main_s":"https:\/\/i.pximg.net\/user-profile\/img\/2013\/03\/21\/15\/52\/43\/5990070_c93b9aa0b1fd1da49dcf7bc9d4edbdca_50.jpg"},"www_member_illust_medium_url":"https:\/\/www.pixiv.net\/member_illust.php?mode=medium\u0026illust_id=40191798","www_user_url":"https:\/\/www.pixiv.net\/member.php?id=3351039"},{"illust_id":"11333874","illust_title":"\u96ea\u5c71","url":{"medium":"https:\/\/i.pximg.net\/c\/600x600\/img-master\/img\/2010\/06\/17\/15\/52\/09\/11333874_p0_master1200.jpg","1200x1200":"https:\/\/i.pximg.net\/img-master\/img\/2010\/06\/17\/15\/52\/09\/11333874_p0_master1200.jpg"},"user_name":"Gu","profile_img":{"main_s":"https:\/\/i.pximg.net\/user-profile\/img\/2014\/11\/19\/20\/23\/24\/8638105_e851c496724f437d94262c58d0c13e1b_50.jpg"},"www_member_illust_medium_url":"https:\/\/www.pixiv.net\/member_illust.php?mode=medium\u0026illust_id=11333874","www_user_url":"https:\/\/www.pixiv.net\/member.php?id=366411"},{"illust_id":"8913281","illust_title":"Vision","url":{"medium":"https:\/\/i.pximg.net\/c\/600x600\/img-master\/img\/2010\/02\/21\/00\/03\/29\/8913281_p0_master1200.jpg","1200x1200":"https:\/\/i.pximg.net\/img-master\/img\/2010\/02\/21\/00\/03\/29\/8913281_p0_master1200.jpg"},"user_name":"\u3075\u3049\uff5e\u3069","profile_img":{"main_s":"https:\/\/i.pximg.net\/user-profile\/img\/2016\/02\/24\/16\/02\/08\/10580950_076b832608f8d62bc20808d84e77d7e8_50.jpg"},"www_member_illust_medium_url":"https:\/\/www.pixiv.net\/member_illust.php?mode=medium\u0026illust_id=8913281","www_user_url":"https:\/\/www.pixiv.net\/member.php?id=370712"},{"illust_id":"35231457","illust_title":"\u30e1\u30ab\u30cb\u30c3\u30af\u30fb\u30ed\u30f3\u30c9","url":{"medium":"https:\/\/i.pximg.net\/c\/600x600\/img-master\/img\/2013\/04\/25\/18\/54\/59\/35231457_p0_master1200.jpg","1200x1200":"https:\/\/i.pximg.net\/img-master\/img\/2013\/04\/25\/18\/54\/59\/35231457_p0_master1200.jpg"},"user_name":"\u304a\u306b\u306d\u3053","profile_img":{"main_s":"https:\/\/i.pximg.net\/user-profile\/img\/2017\/04\/16\/22\/36\/57\/12427531_354afa0665b7e041a2a0005772f44900_50.png"},"www_member_illust_medium_url":"https:\/\/www.pixiv.net\/member_illust.php?mode=medium\u0026illust_id=35231457","www_user_url":"https:\/\/www.pixiv.net\/member.php?id=3952"},{"illust_id":"38631998","illust_title":"\u6d41\u661f\u96e8","url":{"medium":"https:\/\/i.pximg.net\/c\/600x600\/img-master\/img\/2013\/09\/20\/17\/53\/12\/38631998_p0_master1200.jpg","1200x1200":"https:\/\/i.pximg.net\/img-master\/img\/2013\/09\/20\/17\/53\/12\/38631998_p0_master1200.jpg"},"user_name":"\u3052\u307f","profile_img":{"main_s":"https:\/\/i.pximg.net\/user-profile\/img\/2014\/03\/12\/18\/32\/50\/7589860_5117ba8cd0df0963633ae6a6b5616b5c_50.jpg"},"www_member_illust_medium_url":"https:\/\/www.pixiv.net\/member_illust.php?mode=medium\u0026illust_id=38631998","www_user_url":"https:\/\/www.pixiv.net\/member.php?id=396769"},{"illust_id":"35019721","illust_title":"epitaph","url":{"medium":"https:\/\/i.pximg.net\/c\/600x600\/img-master\/img\/2013\/04\/15\/00\/43\/25\/35019721_p0_master1200.jpg","1200x1200":"https:\/\/i.pximg.net\/img-master\/img\/2013\/04\/15\/00\/43\/25\/35019721_p0_master1200.jpg"},"user_name":"\u3042\u3055\u304e\u308a","profile_img":{"main_s":"https:\/\/i.pximg.net\/user-profile\/img\/2013\/01\/25\/18\/08\/14\/5731959_f1fed145f5fd399fe3ed04a3aa9a134d_50.jpg"},"www_member_illust_medium_url":"https:\/\/www.pixiv.net\/member_illust.php?mode=medium\u0026illust_id=35019721","www_user_url":"https:\/\/www.pixiv.net\/member.php?id=4357166"},{"illust_id":"49281286","illust_title":"\u68ee\u306e\u90fd","url":{"medium":"https:\/\/i.pximg.net\/c\/600x600\/img-master\/img\/2015\/03\/15\/01\/01\/48\/49281286_p0_master1200.jpg","1200x1200":"https:\/\/i.pximg.net\/img-master\/img\/2015\/03\/15\/01\/01\/48\/49281286_p0_master1200.jpg"},"user_name":"\u30b8\u30e3\u30f3\u30fb\u30dd\u30dd","profile_img":{"main_s":"https:\/\/i.pximg.net\/user-profile\/img\/2014\/12\/06\/00\/12\/01\/8691367_1d45847544a736f7962b050d6394cf43_50.jpg"},"www_member_illust_medium_url":"https:\/\/www.pixiv.net\/member_illust.php?mode=medium\u0026illust_id=49281286","www_user_url":"https:\/\/www.pixiv.net\/member.php?id=455626"},{"illust_id":"26339586","illust_title":"\u30c0\u30a4\u30d6","url":{"medium":"https:\/\/i.pximg.net\/c\/600x600\/img-master\/img\/2012\/04\/04\/21\/24\/46\/26339586_p0_master1200.jpg","1200x1200":"https:\/\/i.pximg.net\/img-master\/img\/2012\/04\/04\/21\/24\/46\/26339586_p0_master1200.jpg"},"user_name":"\u98f4\u6751","profile_img":{"main_s":"https:\/\/i.pximg.net\/user-profile\/img\/2009\/11\/16\/22\/14\/55\/1239185_31374e5c882f5a649f877f516e630480_50.jpg"},"www_member_illust_medium_url":"https:\/\/www.pixiv.net\/member_illust.php?mode=medium\u0026illust_id=26339586","www_user_url":"https:\/\/www.pixiv.net\/member.php?id=47488"},{"illust_id":"37602900","illust_title":"StarTLiNe","url":{"medium":"https:\/\/i.pximg.net\/c\/600x600\/img-master\/img\/2013\/08\/06\/20\/38\/58\/37602900_p0_master1200.jpg","1200x1200":"https:\/\/i.pximg.net\/img-master\/img\/2013\/08\/06\/20\/38\/58\/37602900_p0_master1200.jpg"},"user_name":"\u3076\u30fc\u305f","profile_img":{"main_s":"https:\/\/i.pximg.net\/user-profile\/img\/2014\/10\/25\/01\/29\/21\/8546824_f685017cf0c3afe25d47ccbce9f488b7_50.png"},"www_member_illust_medium_url":"https:\/\/www.pixiv.net\/member_illust.php?mode=medium\u0026illust_id=37602900","www_user_url":"https:\/\/www.pixiv.net\/member.php?id=652196"},{"illust_id":"22208183","illust_title":"\u8da3\u3044\u305f\u307e\u307e\u751f\u304d\u306a\u3055\u3044","url":{"medium":"https:\/\/i.pximg.net\/c\/600x600\/img-master\/img\/2011\/10\/06\/16\/55\/08\/22208183_p0_master1200.jpg","1200x1200":"https:\/\/i.pximg.net\/img-master\/img\/2011\/10\/06\/16\/55\/08\/22208183_p0_master1200.jpg"},"user_name":"\u70cf\u9d28","profile_img":{"main_s":"https:\/\/i.pximg.net\/user-profile\/img\/2013\/05\/27\/15\/28\/17\/6291423_d6b16f0dcfffe7b5140a394503c130a8_50.png"},"www_member_illust_medium_url":"https:\/\/www.pixiv.net\/member_illust.php?mode=medium\u0026illust_id=22208183","www_user_url":"https:\/\/www.pixiv.net\/member.php?id=750680"},{"illust_id":"57196809","illust_title":"\u5c45\u9152\u5c4b\u8857","url":{"medium":"https:\/\/i.pximg.net\/c\/600x600\/img-master\/img\/2016\/06\/03\/01\/55\/24\/57196809_p0_master1200.jpg","1200x1200":"https:\/\/i.pximg.net\/img-master\/img\/2016\/06\/03\/01\/55\/24\/57196809_p0_master1200.jpg"},"user_name":"\u30dd\u30b3","profile_img":{"main_s":"https:\/\/i.pximg.net\/user-profile\/img\/2016\/02\/15\/01\/03\/56\/10535307_95c460fa30c71e972dd7445ab9248d49_50.jpg"},"www_member_illust_medium_url":"https:\/\/www.pixiv.net\/member_illust.php?mode=medium\u0026illust_id=57196809","www_user_url":"https:\/\/www.pixiv.net\/member.php?id=76266"},{"illust_id":"34512986","illust_title":"\u3055\u3056\u306a\u307f","url":{"medium":"https:\/\/i.pximg.net\/c\/600x600\/img-master\/img\/2013\/03\/25\/12\/44\/18\/34512986_p0_master1200.jpg","1200x1200":"https:\/\/i.pximg.net\/img-master\/img\/2013\/03\/25\/12\/44\/18\/34512986_p0_master1200.jpg"},"user_name":"loundraw","profile_img":{"main_s":"https:\/\/i.pximg.net\/user-profile\/img\/2015\/10\/26\/13\/27\/37\/10040113_f5351110064281797872cabe956a8404_50.jpg"},"www_member_illust_medium_url":"https:\/\/www.pixiv.net\/member_illust.php?mode=medium\u0026illust_id=34512986","www_user_url":"https:\/\/www.pixiv.net\/member.php?id=772547"},{"illust_id":"16983806","illust_title":"\u5de3\u7acb\u3061","url":{"medium":"https:\/\/i.pximg.net\/c\/600x600\/img-master\/img\/2011\/02\/27\/00\/12\/23\/16983806_p0_master1200.jpg","1200x1200":"https:\/\/i.pximg.net\/img-master\/img\/2011\/02\/27\/00\/12\/23\/16983806_p0_master1200.jpg"},"user_name":"\u30b3\u30fc\u30e9","profile_img":{"main_s":"https:\/\/i.pximg.net\/user-profile\/img\/2010\/02\/21\/13\/25\/25\/1508237_25b902302d5f03fd6688b5afd466d4b1_50.jpg"},"www_member_illust_medium_url":"https:\/\/www.pixiv.net\/member_illust.php?mode=medium\u0026illust_id=16983806","www_user_url":"https:\/\/www.pixiv.net\/member.php?id=810305"}]}}'>
</div>
<!-- Google Tag Manager -->
<noscript><iframe src="//www.googletagmanager.com/ns.html?id=GTM-NH5MTD"
height="0" width="0" style="display:none;visibility:hidden"></iframe></noscript>
<script>(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':
new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0],
j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src=
'//www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f);
})(window,document,'script','dataLayer','GTM-NH5MTD');</script>
<!-- End Google Tag Manager -->
<script id="capybara-status-check" data-t-code="0ec06063442619c76e42d6ebed9fa6dd" data-m-code="5276059c3a4531f3da239d1ea8771242"></script>
</body>
</html>
- 构造请求头
def _init_(self):
self.base_url = 'https://accounts.pixiv.net/login?lang=zh&source=pc&view_type=page&ref=wwwtop_accounts_index'
self.login_url = 'https://accounts.pixiv.net/api/login?lang=zh'
self.target_url = 'https://www.pixiv.net/search.php?s_mode=s_tag&word=%E5%AE%AB%E5%9B%AD%E8%96%B0'
self.main_url = 'http://www.pixiv.net'
self.headers = {
'Host': 'accounts.pixiv.net',
'Origin': 'https://accounts.pixiv.net',
'Referer': 'https: // accounts.pixiv.net / login'
'?lang = zh & source = pc & view_type = page & ref = wwwtop_accounts_index',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'
' AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36',
'X-Requested-With': 'XMLHttpRequest'
}
- 构造请求体
self.pixiv_id = 'userid',
self.password = '*****',
self.post_key = []
self.return_to = 'https://www.pixiv.net/'
#存放图片路径
self.load_path = 'F:\work\picture'
self.ip_list = []
- 捕捉需要获取的字段
def login(self):
post_key_xml = self.get(self.base_url,headers = self.headers).text
post_key_soup = BeautifulSoup(post_key_xml,'lxml')
self.post_key = post_key_soup.find(name='post_key')['value']
#构造请求体
data = {
'pixiv_id': self.pixiv_id,
'password': self.password,
'post_key': self.post_key,
'return_to': self.return_to
}
#模拟登录post
self.post(self.base_url,data=data,headers=self.headers)
- 测试登录
import requests
from bs4 import BeautifulSoup
se = requests.Session()
class Pixiv(object):
def __init__(self):
self.base_url = 'https://accounts.pixiv.net/login?lang=zh&source=pc&view_type=page&ref=wwwtop_accounts_index'
self.login_url = 'https://accounts.pixiv.net/api/login?lang=zh'
self.target_url = 'https://www.pixiv.net/search.php?s_mode=s_tag&word=%E5%AE%AB%E5%9B%AD%E8%96%B0'
self.main_url = 'http://www.pixiv.net'
self.headers = {
# 'Host': 'accounts.pixiv.net',
# 'Origin': 'https://accounts.pixiv.net',
'Referer': 'https: // accounts.pixiv.net / login'
'?lang = zh & source = pc & view_type = page & ref = wwwtop_accounts_index',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'
' AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36',
# 'X-Requested-With': 'XMLHttpRequest'
}
self.pixiv_id = 'userid',
self.password = 'password',
self.post_key = []
self.return_to = 'https://www.pixiv.net/'
# 存放图片路径
self.load_path = 'F:\work\picture'
self.ip_list = []
def login(self):
post_key_xml = se.get(self.base_url, headers=self.headers).text
post_key_soup = BeautifulSoup(post_key_xml, 'lxml')
self.post_key = post_key_soup.find('input')['value']
# 构造请求体
data = {
'pixiv_id': self.pixiv_id,
'password': self.password,
'post_key': self.post_key,
'return_to': self.return_to
}
se.post(self.login_url, data=data, headers=self.headers)
if __name__ == '__main__':
pixiv = Pixiv()
pixiv.login()
console output:事故现场,遭遇反爬
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "F:/pythonProject/PixivSpider/PixivChange.py", line 99, in <module>
pixiv.login()
File "F:/pythonProject/PixivSpider/PixivChange.py", line 84, in login
post_key_xml = se.get(self.base_url, headers=self.headers).text
File "C:\Users\26645\AppData\Local\Programs\Python\Python36\lib\site-packages\requests\sessions.py", line 515, in get
return self.request('GET', url, **kwargs)
File "C:\Users\26645\AppData\Local\Programs\Python\Python36\lib\site-packages\requests\sessions.py", line 502, in request
resp = self.send(prep, **send_kwargs)
File "C:\Users\26645\AppData\Local\Programs\Python\Python36\lib\site-packages\requests\sessions.py", line 612, in send
r = adapter.send(request, **kwargs)
File "C:\Users\26645\AppData\Local\Programs\Python\Python36\lib\site-packages\requests\adapters.py", line 514, in send
raise SSLError(e, request=request)
requests.exceptions.SSLError: EOF occurred in violation of protocol (_ssl.c:749)
看了一下这位朋友的讲解,爬虫的作用是将找到的大量数据爬取到本地,
通常网站管理员会用限制ip频率的方式进行反爬虫,找了一个ip代理池维护访问ip
-
获取ip代理池 (ps:若要使用正则:
r'<td data-title="IP">(.*?)</tr>'
)
def get_proxy(self):
html = requests.get("http://www.kuaidaili.com/free/")
# bsObj=BeautifulSoup(html,"html.parser",from_encoding="iso-8859-1")
bsObj = BeautifulSoup(html.text, 'html.parser')
ip_info_ip = bsObj.findAll("td", {"data-title": "IP"})
ip_info_port = bsObj.findAll("td", {"data-title": "PORT"})
# print(ip_info_ip)
i = 0
for ip_info in ip_info_ip:
ip_info = str(ip_info_ip[i].text) + ':' + str(ip_info_port[i].text)
i = i + 1
self.ip_list.append(ip_info)
print(ip_info)
console output: 拿到IP代理池
ip_list.jpg接着我们需要设置爬取网页错误时,设置延时及时更换ip代理
6)设置http代理延时
构建思路:
- 检查爬取网站是否有空闲的动态端口(ps:p站看起来不太可能有,加个判断以防万一),如果有发出一个get请求获取页面
- 随机从ip列表中用5个ip进行尝试
- 给定一个5秒的延时。将之前构建的代理池ip随机选择一条,如果失败则尝试ip总数-1,否则开始使用ip代理
def get_html(self, url, timeout,proxy=None, num_entries=5):
if proxy is None:
try:
return se.get(url,headers=self.headers,num_entries=5,timeout=timeout)
except:
if num_entries > 0:
print('获取网页出错,5秒后将会重新倒数第',num_entries,'次')
time.sleep(5)
return self.get_html(url,timeout,num_entries=num_entries-1)
else:
print('开始使用代理')
time.sleep(5)
ip = ''.join(str(random.choice(self.ip_list))).strip()
now_proxy = {'http':ip}
return self.get_html(url,timeout,proxy=now_proxy)
else:
try:
return se.get(url,headers =self.headers,proxy=proxy,timeout=timeout)
except:
if num_entries>0:
print('正在更换代理,5秒后重新获取第',num_entries,'次')
time.sleep(5)
ip = ''.join(str(random.choice(self.ip_list))).strip()
now_proxy = {'http': ip}
return self.get_html(url, timeout, proxy=now_proxy)
else:
print('使用代理失败')
return self.get_hmtl(url.timeout)
def work(self):
self.login()
for page_num in range(1,10):
path = str(page_num)
now_html = self.get_html(self.target_url+str(page_num),3)
print('第{page}被保存完毕',format(page=page_num))
time.sleep(2)
console ouput
python.jpg依然反爬。。emmm查看全部日志发现,json字符串中的posetkey未拿到,于是回过来单独拿下json试试
json.jpg- 获取json测试SSLERROR
def get_json(self):
html = se.get(self.base_url,params=self.params)
start = html.text.find(r'<input type="hidden" id="init-config" class="json-data" value=')
end = html.text.find(r'>')
self.json_data = html.text[start:end]
print(self.json_data)
def work(self):
self.login()
for page_num in range(1,10):
path = str(page_num)
now_html = self.get_html(self.target_url+str(page_num),3)
print('第{page}被保存完毕',format(page=page_num))
time.sleep(2)
if __name__ == '__main__':
pixiv = Pixiv()
pixiv.get_json()
console output
SSLError.jpg解决方法 :原来是因为在访问服务器的同时,需要配置SSL证书进行伪装,对比了下自己的情况,install pyOpenssl就好了
7)利用BeatifulSoup模拟登录
import random
import requests
from bs4 import BeautifulSoup
from urllib.request import urlopen
import re
import os
import time
from selenium import webdriver
import ssl
se = requests.Session()
chromedriver ="F:\dirver\chromedriver.exe"
os.environ["webdriver.chrome.driver"] =chromedriver
class Pixiv(object):
def __init__(self):
self.base_url = 'https://accounts.pixiv.net/login?lang=zh&source=pc&view_type=page&ref=wwwtop_accounts_index'
self.login_url = 'https://accounts.pixiv.net/api/login?lang=zh'
self.target_url = 'https://www.pixiv.net/search.php?s_mode=s_tag&word=%E5%AE%AB%E5%9B%AD%E8%96%B0'
self.main_url = 'http://www.pixiv.net'
self.params = {
'lang': 'zh',
'source': 'pc',
'view_type': 'page',
'ref': 'wwwtop_accounts_index'
}
self.headers = {
# 'Host': 'accounts.pixiv.net',
# 'Origin': 'https://accounts.pixiv.net',
'Referer': 'https: // accounts.pixiv.net / login'
'?lang = zh & source = pc & view_type = page & ref = wwwtop_accounts_index',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'
' AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36',
# 'X-Requested-With': 'XMLHttpRequest'
}
self.pixiv_id = 'userid',
self.password = 'pwd',
self.post_key = []
self.return_to = 'https://www.pixiv.net/'
# 存放图片路径
self.load_path = 'F:\work\picture'
self.ip_list = []
self.json_data = ""
def login(self):
# se.headers = self.headers
# r = se.get(self.login_url,params=self.params)
# pattern = re.compile(r'name="post_key"value="(.*?)">')
# result = pattern.findall(r.text)
# self.post_key = result[0]
# 构造请求体
post_key_html = se.get(self.base_url,headers= self.headers).text
post_key_soup = BeautifulSoup(post_key_html,'lxml')
self.post_key = post_key_soup.find('input')['value']
data = {
'pixiv_id': self.pixiv_id,
'password': self.password,
'post_key': self.post_key,
'return_to': self.return_to
}
q = se.post(self.login_url, data=data, headers=self.headers)
result = se.get(self.main_url)
print(q.text)
print(result.text)
def get_proxy(self):
html = requests.get("http://www.kuaidaili.com/free/")
# bsObj=BeautifulSoup(html,"html.parser",from_encoding="iso-8859-1")
bsObj = BeautifulSoup(html.text, 'html.parser')
ip_info_ip = bsObj.findAll("td", {"data-title": "IP"})
ip_info_port = bsObj.findAll("td", {"data-title": "PORT"})
# print(ip_info_ip)
i = 0
for ip_info in ip_info_ip:
ip_info = str(ip_info_ip[i].text) + ':' + str(ip_info_port[i].text)
i = i + 1
self.ip_list.append(ip_info)
print(ip_info)
def get_html(self, url, timeout,proxy=None, num_entries=5):
self.get_proxy()
if proxy is None:
try:
return se.get(url,headers=self.headers,num_entries=5,timeout=timeout)
except:
if num_entries > 0:
print('获取网页出错,5秒后将会重新倒数第',num_entries,'次')
time.sleep(5)
return self.get_html(url,timeout,num_entries=num_entries-1)
else:
print('开始使用代理')
time.sleep(5)
ip = ''.join(str(random.choice(self.ip_list))).strip()
now_proxy = {'http':ip}
return self.get_html(url,timeout,proxy=now_proxy)
else:
try:
return se.get(url,headers =self.headers,proxy=proxy,timeout=timeout)
except:
if num_entries>0:
print('正在更换代理,5秒后重新获取第',num_entries,'次')
time.sleep(5)
ip = ''.join(str(random.choice(self.ip_list))).strip()
now_proxy = {'http': ip}
return self.get_html(url, timeout, proxy=now_proxy,num_entries=num_entries-1)
else:
print('使用代理失败')
return self.get_hmtl(url.timeout)
def work(self):
self.login()
for page_num in range(1,10):
path = str(page_num)
now_html = self.get_html(self.target_url+str(page_num),3)
print('第{page}被保存完毕',format(page=page_num))
time.sleep(2)
if __name__ == '__main__':
pixiv = Pixiv()
pixiv.login()
console output*:拿到验证信息,验证返回校验的hash字符串一致
token.jpg entype.jpg2.创建爬取文件夹
def mkdir(self,path):
path = path.strip()
is_exist =os.path.exists(os.path.join(self.load_path,path))
if not is_exist:
print('创建一个名字为 '+path+' 的文件夹')
os.makedirs(os.path.join(self.load_path,path))
os.chdir(os.path.join(self.load_path,path))
return True
else:
print('名字为 '+path+' 的文件夹已经存在')
os.chdir(os.path.join(self.load_path,path))
return False
mkdir.jpg
target_url.jpg jump_to_img.jpg3 从爬取页面抓取图片
以第一张素描照片为例:图片信息URL =main_url+href包含标签值
素描图躺着的地方~:div class="work-display"下
1)获取保存图片信息的所有li
标签列表
def get_img(self,html,page_num):
li_soup =BeautifulSoup(html,'lxml')
li_list = li_soup.find_all('li',attrs={'class','image-item'})
for li in li_list:
href = li.find('a')['href']
print(href)
jump_to_url =self.main_url +href
jump_to_html = self.get_html(jump_to_url,3).text
img_soup =BeautifulSoup(jump_to_html,'lxml')
img_info = img_soup.find('div',attrs={'class','work_display'})\
.find('div',attrs={'class','_layout-thumbnail'})
if img_info is None:
continue
self.download_img(img_info,jump_to_url,page_num)
- 获取图片信息同时,保存图片信息到指定文件中
def download_img(self,img_info,href,page_num):
title = img_info.find('img')['alt']
src =img_info.find('img')['src']
src_headers = self.headers
src_headers['Referer'] = href
try:
html =requests.get(src,headers=src_headers)
img=html.content
except:
print('爬取图片失败')
return False
title = title.replace('?', '_').replace('/', '_').replace('\\', '_').replace('*', '_').replace('|', '_').\
replace('>', '_').replace('<', '_').replace(':', '_').replace('"', '_').strip()
# 去掉那些不能在文件名里面的.记得加上strip()去掉换行
if os.path.exists(os.path.join(self.load_path,str(page_num)),title+'.jpg'):
for i in range(1,100):
if not os.path.exists(os.path.join(self.load_path,str(page_num),title+str(i))+'.jpg'):
title = title+str(i)
break;
print('正在保存名字为: '+title+' 的图片')
with open(title+'.jpg','b') as f:
f.write(img)
print('保存该图片完毕')
console output
onSuccess.jpg picture.jpg然而因为中途撤掉了代理,所以爬了五张就挂了,qwq我的大四月啊~
NoProxy.jpg代码清单
import random
import requests
from bs4 import BeautifulSoup
from urllib.request import urlopen
import re
import os
import time
from selenium import webdriver
import ssl
se = requests.Session()
chromedriver ="F:\dirver\chromedriver.exe"
os.environ["webdriver.chrome.driver"] =chromedriver
class Pixiv(object):
def __init__(self):
self.base_url = 'https://accounts.pixiv.net/login?lang=zh&source=pc&view_type=page&ref=wwwtop_accounts_index'
self.login_url = 'https://accounts.pixiv.net/api/login?lang=zh'
self.target_url = 'https://www.pixiv.net/search.php?word=%E5%AE%AB%E5%9B%AD%E8%96%B0&order=date_d'
self.main_url = 'http://www.pixiv.net'
self.params = {
'lang': 'zh',
'source': 'pc',
'view_type': 'page',
'ref': 'wwwtop_accounts_index'
}
self.headers = {
# 'Host': 'accounts.pixiv.net',
# 'Origin': 'https://accounts.pixiv.net',
'Referer': 'https: // accounts.pixiv.net / login'
'?lang = zh & source = pc & view_type = page & ref = wwwtop_accounts_index',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'
' AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36',
# 'X-Requested-With': 'XMLHttpRequest'
}
self.pixiv_id = 'userid',
self.password = 'pwd',
self.post_key = []
self.return_to = 'https://www.pixiv.net/'
# 存放图片路径
self.load_path = 'F:\work\picture'
self.ip_list = []
self.json_data = ""
def login(self):
# se.headers = self.headers
# r = se.get(self.login_url,params=self.params)
# pattern = re.compile(r'name="post_key"value="(.*?)">')
# result = pattern.findall(r.text)
# self.post_key = result[0]
# 构造请求体
post_key_html = se.get(self.base_url,headers= self.headers).text
post_key_soup = BeautifulSoup(post_key_html,'lxml')
self.post_key = post_key_soup.find('input')['value']
data = {
'pixiv_id': self.pixiv_id,
'password': self.password,
'post_key': self.post_key,
'return_to': self.return_to
}
q = se.post(self.login_url, data=data, headers=self.headers)
result = se.get(self.main_url)
# print(q.text)
# print(result.text)
def get_proxy(self):
html = requests.get("http://www.kuaidaili.com/free/")
# bsObj=BeautifulSoup(html,"html.parser",from_encoding="iso-8859-1")
bsObj = BeautifulSoup(html.text, 'html.parser')
ip_info_ip = bsObj.findAll("td", {"data-title": "IP"})
ip_info_port = bsObj.findAll("td", {"data-title": "PORT"})
# print(ip_info_ip)
i = 0
for ip_info in ip_info_ip:
ip = str(ip_info_ip[i].text) + ':' + str(ip_info_port[i].text)
i = i + 1
ip_replace = re.sub('\n','',ip)
self.ip_list.append(ip_replace.strip())
# print(self.ip_list)
# html = requests.get("http://www.kuaidaili.com/free/")
# # bsObj=BeautifulSoup(html,"html.parser",from_encoding="iso-8859-1")
# bsObj = BeautifulSoup(html.text,'lxml')
# ListTable = bsObj.find_all("table",class_="list")
# res_tr = r'<tr>(.*?)</tr>'
# m_tr = re.findall(res_tr, html.text, re.S|re.M)
# res_td_ip = r'<td data-title="IP">(.*?)</td>'
# res_td_port = r'<td data-title="PORT">(.*?)</td>'
#
# for ip_list_enable in m_tr:
# port = re.findall(res_td_port,ip_list_enable,re.S|re.M)
# # ip_list_enable =ip_address+':'+port
# ip_address = re.findall(res_td_ip, ip_list_enable, re.S | re.M)
# # post_temp = re.sub('\n','',ip_list_enable)
# # self.ip_list.append(post_temp.strip())
# print(ip_address)
# i=0
# for ip in self.ip_list_temp:
# ip = ip_list_temp[i].contents
# i = i + 1
# self.ip_list.append(ip)
# print(ip)
# ip_list_temp = re.findall(r'<tr data-title="IP">(.*?)</tr>',bsObj.text)
# for ip in ip_list_temp:
# i = re.sub('\n','',ip)
# self.ip_list.append(i.strip())
# print(i.strip())
def get_html(self, url, timeout,proxy=None, num_entries=5):
if proxy is None:
try:
return se.get(url,headers=self.headers,num_entries=5,timeout=timeout)
except:
if num_entries > 0:
print('获取网页出错,5秒后将会重新倒数第',num_entries,'次')
time.sleep(5)
return self.get_html(url,timeout,num_entries=num_entries-1)
else:
print('开始使用代理')
time.sleep(5)
ip = ''.join(str(random.choice(self.ip_list))).strip()
print(random.choice(self.ip_list))
now_proxy = {'http':ip}
return self.get_html(url,timeout,proxy=now_proxy)
else:
try:
return se.get(url,headers =self.headers,proxy=proxy,timeout=timeout)
except:
if num_entries > 0:
print('正在更换代理,5秒后重新获取第',num_entries,'次')
time.sleep(5)
ip = ''.join(str(random.choice(self.ip_list))).strip()
now_proxy = {'http': ip}
return self.get_html(url, timeout, proxy=now_proxy,num_entries=num_entries-1)
else:
print('使用代理失败')
return self.get_hmtl(url.timeout)
def mkdir(self,path):
path = path.strip()
is_exist =os.path.exists(os.path.join(self.load_path,path))
if not is_exist:
print('创建一个名字为 '+path+' 的文件夹')
os.makedirs(os.path.join(self.load_path,path))
os.chdir(os.path.join(self.load_path,path))
return True
else:
print('名字为 '+path+' 的文件夹已经存在')
os.chdir(os.path.join(self.load_path,path))
return False
def get_img(self,html,page_num):
li_soup =BeautifulSoup(html,'html5lib')
li_list = li_soup.find_all('li',attrs={'class','image-item'})
for li in li_list:
href = li.find('a')['href']
# print(href)
jump_to_url =self.main_url +href
jump_to_html = se.get(jump_to_url).text
img_soup =BeautifulSoup(jump_to_html,'html5lib')
img_info =img_soup.find('div',{'class':'works_display'})\
.find('div',attrs={'class':'_layout-thumbnail'})
# print(img_info)
if img_info is None:
print("图片未找到")
continue
# else:
# print(jump_to_url)
# print(img_info)
self.download_img(img_info,jump_to_url,page_num)
def download_img(self,img_info,href,page_num):
title = img_info.find('img')['alt']
src = img_info.find('img')['src']
src_headers = self.headers
src_headers['Referer'] = href
try:
html = requests.get(src,headers=src_headers)
img = html.content
except:
print('爬取图片失败')
return False
title = title.replace('?', '_').replace('/', '_').replace('\\', '_').replace('*', '_').replace('|', '_').\
replace('>', '_').replace('<', '_').replace(':', '_').replace('"', '_').strip()
# 去掉那些不能在文件名里面的.记得加上strip()去掉换行
if os.path.exists(os.path.join(self.load_path,str(page_num),title+'.jpg')):
for i in range(1,100):
if not os.path.exists(os.path.join(self.load_path,str(page_num),title+str(i)+'.jpg')):
title = title+str(i)
break;
print('正在保存名字为: '+title+' 的图片')
with open(title+'.jpg','ab') as f:
f.write(img)
print('保存该图片完毕')
def work(self):
self.login()
for page_num in range(1,10):
path = str(page_num)
self.mkdir(path)
# now_html = self.get_html(self.target_url+str(page_num),3)
now_html = self.target_url+"&p="+str(page_num)
print(now_html)
self.get_img(se.get(now_html).text,9)
print('第{page}被保存完毕',format(page_num))
time.sleep(2)
if __name__ == '__main__':
pixiv = Pixiv()
pixiv.work()
# pixiv.get_proxy()
# pixiv.get_html(pixiv.ip_list,5)
# pixiv.get_html(pixiv.target_url,5)
# pixiv.get_proxy()
网友评论