100行代码做一个浓缩版知乎
#-*-coding:utf8;-*-
#qpy:2
#qpy:console
import requests,os,json,time,re
import warnings
warnings.filterwarnings("ignore")
html=open('/sdcard/html','r').read()
head={
'X-Requested-With':'mark.via.gp',
'User-Agent':'Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_0 like Mac OS X; en-us) AppleWebKit/532.9 (KHTML, like Gecko) Version/6.0.5 Mobile/8A93 Safari/5531.22.77',
}
#获取整个回答的json数据
#返回一个图片地址list
#offset位移量
def get_json(offset,ids,page):
h='''https://www.zhihu.com/api/v4/questions/'''+str( ids)+'''/answers?include=data%5B%2A%5D.is_normal%2Cadmin_closed_comment%2Creward_info%2Cis_collapsed%2Cannotation_action%2Cannotation_detail%2Ccollapse_reason%2Cis_sticky%2Ccollapsed_by%2Csuggest_edit%2Ccomment_count%2Ccan_comment%2Ccontent%2Ceditable_content%2Cvoteup_count%2Creshipment_settings%2Ccomment_permission%2Ccreated_time%2Cupdated_time%2Creview_info%2Cquestion%2Cexcerpt%2Crelationship.is_authorized%2Cis_author%2Cvoting%2Cis_thanked%2Cis_nothelp%2Cupvoted_followees%3Bdata%5B%2A%5D.mark_infos%5B%2A%5D.url%3Bdata%5B%2A%5D.author.follower_count%2Cbadge%5B%3F%28type%3Dbest_answerer%29%5D.topics&limit='''+str(page)+'''&offset='''+str(offset)+'''&sort_by=default'''
s = requests.Session()
s.cookies['z_c0']='Mi4xWnVtakFnQUFBQUFBSUVJU0doMDNEQmNBQUFCaEFsVk5HX20zV1FDeU5uV2pHU2RQTTl2RnhDdllCbVE2WVNfSHdB|1502637083|5297b0bc003ddf3f9ab8767c3a9f276b61be180a'
r=s.get(h,headers=head,verify=False)
js=json.loads(r.text)
question_title=js['data'][0]['question']['title']#获取问题的标题
name_list=[]
content_list=[]
up_list=[]
head_list=[]
info_dict={}
for item in js['data']:
up_list.append(item['voteup_count'])
name_list.append(item['author']['name'])
head_list.append(item['author']['headline'])
content_list.append(re.sub(r'','图片',item['content'].encode('utf8')))
info_dict['up_list']=up_list
info_dict['content_list']=content_list
info_dict['name_list']=name_list
info_dict['title']=question_title
info_dict['head_list']=head_list
return info_dict
#数据持久化函数
#content_list传入的文本内容
#name_list答主的名字
#up_list点赞数
#title问题的标题
def download(dt,fm):
filename=re.sub(r'[\?/\\:\ *"<>|]','',dt['title'])
print filename.encode('utf8')
if False==os.path.exists('/sdcard/%s/%s'%(fm,filename)):
os.mkdir("/sdcard/%s/%s"%(fm,filename))
i=0
print len(dt['name_list'])
print len(dt['content_list'])
while i',r.content))
for item in u:
question(item)
global qq
print '开始读取下一个问题'
qq=0
print '#'*10+str(i)
网友评论