美文网首页玩耍PythonPython
实践中学python:评论喜欢简书文章

实践中学python:评论喜欢简书文章

作者: 十一岁的加重 | 来源:发表于2017-08-22 09:32 被阅读332次
#!/usr/bin/python
# coding=utf-8
import requests
import re

# 从一个网页里获取文章id
def getArticleIDStringFromWebAtUrlString(urlString):
    return getIDStringFromWebAtUrlString(urlString, '<a class="title" target="_blank" href="/p/(.*?)">')

# 从一个网页里获取专题id
def getTopicIDStringFromWebAtUrlString(urlString):
    return getIDStringFromWebAtUrlString(urlString,'<a class="collection-tag" target="_blank" href="/c/(.*?)">')

# 从一个网页里按正则获取对应的字符串集合
def getIDStringFromWebAtUrlString(urlString,regString):
    html = requests.get(urlString)
    commentsIDStrings = re.findall(regString,html.text,re.S)
    return commentsIDStrings

# 从文章网页里拿到评论的id
def getComentsIDStringFromArticleUrlString(urlString):
    html = requests.get(urlString)
    commentsIDStrings = re.findall('data-note-id="(.*?)">',html.text,re.S)
    return commentsIDStrings[0]

# 从文章网页里拿到文章id
def getArticleIDStringFromArticleUrlString(urlString):
    prefixStr = 'http://www.jianshu.com/p/'
    if prefixStr in urlString:
        articleIDString = urlString.replace(prefixStr, '')
        return articleIDString
    return 'this is not a articleUrlString'

# 评论一篇文章,文章地址,评论的内容
def commentArticle(articleUrlString, commentString):
    cookieString = 'remember_user_token=W1sxOTI1NjJdLCIkMmEkMTAkeUdIRkRGdFN4L0RpZldKV3lKalR6ZSIsIjE1MDMzMjY2NTMuODY3NTI3Il0%3D--7255eb9eb5c5465a41781ef8e5772b10e1b2d72e; _ga=GA1.2.714896647.1497170140; _gid=GA1.2.462532388.1503150885; Hm_lvt_0c0e9d9b1e7d617b3e6842e85b9fb068=1503323642,1503323726,1503325746,1503326272; Hm_lpvt_0c0e9d9b1e7d617b3e6842e85b9fb068=1503326790; _session_id=ZkhpL1IzRzA4bEs2dTZRRlhSTlVaQ3JFMWQ0enc4akxtSjh1dlUrSlpCUGpJZGltZG42UDdDMmtodllJbkovZFZ1eEkxVlNjZDUzVG1RdVc1cysyQUZTVUxFZFBUUC9zL3dIZjNVdFJVeERsZWpET2NRK0J3dHFnN21xRkE3QlFuYlBPU1RZc09xTXFJRlBFNGwrU1U3Q0gzQnpTRnNmZXVvNmR1dzJOZ2J0MU5KMkQ4cnd5OTlpYlIyZXRyb21rdjBMeE9xS1lPWGRSL2t0UXlyeDN2bml2TWNwMDF4akpGSkVQaW5iSXBDdWJCNUxrZEJ3dGJDZUhnOU50a2wyV2N2b3g0dXkvd0M5VWkzNTh6eUtPRUdTZUJuUHo2REg1dXhqeGpNOExubm40enU5U3BPNG1xdnlOK0dxYTJFeU5GZVQ3OTlFTW1GdndUS043T0N6QlpGczY4VFJCYW5QTDlxUzRMOGJYN3RyeU05N2cvS0lDWDNidVJZNkgwbmF3aHVMY083TWgrc2RsRHRYVDBjYjIwZVJTdXRHc21MWnk4Z0I3VFdMWHNmOD0tLU8zaHdXcEFUZjNaLzVINWpIdUZnK0E9PQ%3D%3D--4ff5e23579bb725201c91a9bfa6d7bb5975bd076'
    ArticleIDString = getArticleIDStringFromArticleUrlString(articleUrlString)
    noteIDString = getComentsIDStringFromArticleUrlString(articleUrlString)
    data = {"content": commentString}
    #header
    headers = {'Host':'www.jianshu.com',
    'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36',
    'Accept':'application/json',
    'Accept-Language':'zh-CN,zh;q=0.8',
    'Content-Type':'application/json',
    'Cookie':cookieString,
    'Referer':'http://www.jianshu.com/p/%s'%ArticleIDString,
    'Origin':'http://www.jianshu.com',
    'Connection':'keep-alive',
    'Accept-Encoding':'gzip, deflate',
    'DNT':'1'}
    response = requests.post(url='http://www.jianshu.com/notes/%s/comments'%noteIDString,headers=headers,json=data)
    print response.status_code, response.reason

# 喜欢一个专题里的文章
def likeTopic(topicUrlString):
    articleIDStrings = getArticleIDStringFromWebAtUrlString(topicUrlString)
    for articleIDString in articleIDStrings:
# 喜欢一个文章跟评论一个文章类似的写法
        likeArticle(articleIDString)


# 喜欢一文章
def likeArticle(articleUrlString):
    cookieString = 'remember_user_token=W1sxOTI1NjJdLCIkMmEkMTAkeUdIRkRGdFN4L0RpZldKV3lKalR6ZSIsIjE1MDMzMjY2NTMuODY3NTI3Il0%3D--7255eb9eb5c5465a41781ef8e5772b10e1b2d72e; _ga=GA1.2.714896647.1497170140; _gid=GA1.2.462532388.1503150885; Hm_lvt_0c0e9d9b1e7d617b3e6842e85b9fb068=1503323642,1503323726,1503325746,1503326272; Hm_lpvt_0c0e9d9b1e7d617b3e6842e85b9fb068=1503326790; _session_id=ZkhpL1IzRzA4bEs2dTZRRlhSTlVaQ3JFMWQ0enc4akxtSjh1dlUrSlpCUGpJZGltZG42UDdDMmtodllJbkovZFZ1eEkxVlNjZDUzVG1RdVc1cysyQUZTVUxFZFBUUC9zL3dIZjNVdFJVeERsZWpET2NRK0J3dHFnN21xRkE3QlFuYlBPU1RZc09xTXFJRlBFNGwrU1U3Q0gzQnpTRnNmZXVvNmR1dzJOZ2J0MU5KMkQ4cnd5OTlpYlIyZXRyb21rdjBMeE9xS1lPWGRSL2t0UXlyeDN2bml2TWNwMDF4akpGSkVQaW5iSXBDdWJCNUxrZEJ3dGJDZUhnOU50a2wyV2N2b3g0dXkvd0M5VWkzNTh6eUtPRUdTZUJuUHo2REg1dXhqeGpNOExubm40enU5U3BPNG1xdnlOK0dxYTJFeU5GZVQ3OTlFTW1GdndUS043T0N6QlpGczY4VFJCYW5QTDlxUzRMOGJYN3RyeU05N2cvS0lDWDNidVJZNkgwbmF3aHVMY083TWgrc2RsRHRYVDBjYjIwZVJTdXRHc21MWnk4Z0I3VFdMWHNmOD0tLU8zaHdXcEFUZjNaLzVINWpIdUZnK0E9PQ%3D%3D--4ff5e23579bb725201c91a9bfa6d7bb5975bd076'
    prefixStr = 'http://www.jianshu.com/p/'
    if prefixStr not in articleUrlString:
        articleUrlString = prefixStr+articleUrlString
    noteIDString = getComentsIDStringFromArticleUrlString(articleUrlString)
    data = {}
    headers = {'Host':'www.jianshu.com',
    'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36',
    'Accept':'application/json',
    'Accept-Language':'zh-CN,zh;q=0.8',
    'Content-Type':'application/json',
    'Cookie':cookieString,
    'Referer':articleUrlString,
    'Origin':'http://www.jianshu.com',
    'Connection':'keep-alive',
    'Accept-Encoding':'gzip, deflate',
    'DNT':'1'}
    response = requests.post(url='http://www.jianshu.com/notes/%s/like'%noteIDString,headers=headers,json=data)
    print response.status_code, response.reason

按上面我们会遇到一个问题,就是拿不到一个用户的所有文章,一个专题的所有文章,解决方案:离线网页,再抓取网页源码


# 从本地一个html文件中解析出文章的id
def getArticalIDStringFromLocalHtmlFile(localHtmlFilePath):
    with open(r'%s'%localHtmlFilePath, "r") as f:
        page = f.read()
    commentsIDStrings = re.findall('<a class="title" target="_blank" href="http://www.jianshu.com/p/(.*?)">',page,re.S)
    return commentsIDStrings

# 这里我们可以开始调用了,这里要写全路径 不能写 ~/Desktop
articalIDStrings = getArticalIDStringFromLocalHtmlFile('/Users/mac/Desktop/test.htm')
for articalIDString in articalIDStrings:
    likeArticle(articalIDString)

小弟新手请大神们赐教

相关文章

  • 实践中学python:评论喜欢简书文章

    按上面我们会遇到一个问题,就是拿不到一个用户的所有文章,一个专题的所有文章,解决方案:离线网页,再抓取网页源码 小...

  • 长话短说

    近来有时间看简书一些文章,评论,喜欢,点赞,发现一些问题。 简书作者,简友之间的文章,都是相互点赞,喜欢,评论,相...

  • 创作你的创作

    简书的消息有:评论,简信,投稿请求,喜欢和赞,关注,赞赏,其他提醒。 评论是发布的文章有小伙伴评论了,或者你评论的...

  • js事件自动执行

    简书文章自动评论

  • 写给新手|换一个角度思考,让写作之路更顺畅

    最近这几天,经常收到简书的消息提醒:XXX喜欢了你的文章;XXX关注了你;XXX评论了你的文章。 读者喜欢和评论的...

  • 评论区中文字的足迹

    在简书平台上,评论区中和好友们互动的评论,由心而发,有的自己很喜欢。看着评论,就想起评论后面的这些文章,文章后面的...

  • 摘自简书文章评论

    内向的人不太喜欢长期处于一种嘈杂的环境中,往往喧嚣过后,都愿意置于一种安静独处的状态,以此获得能量。

  • 实践中学python:刷简书阅读量

  • 有趣的评论

    昨天看了一篇文章,让我喜欢的不是文章本身,而是下方的评论。 这些人,自命为“简书监察员”、“简书扫黑大...

  • 喜欢简书的评论

    没想到最近喜欢上了简书的评论 感觉文章底下的评论就像是以前黑板下各路同学之间的窃窃私语 要是有人给我点赞都会小小雀...

网友评论

本文标题:实践中学python:评论喜欢简书文章

本文链接:https://www.haomeiwen.com/subject/wvpkdxtx.html