用python写一个cnBeta阅读器

作者: bigtrace | 来源:发表于2017-07-21 04:22 被阅读373次

    我个人平时喜欢逛cnBeta和百度贴吧,我利用之前的写百度贴吧客户端的code, 写了一个cnBeta的阅读器

    用python写一个百度贴吧客户端

    由于cnBeta http://www.cnbeta.com/ 电脑端广告实在太多,要想阅读新闻和评论实在十分费时,于是我用Python抓取手机版 http://m.cnbeta.com/wap 的内容,方便大家阅读。

    功能与界面与我之前的百度python客户端十分相似。

    一打开便会显示首页的最新新闻,如果过想看第2页的新闻则输入s 2, 以此类推。

    s 2

    阅读某一个新闻,则输入t index , 比如查看index 为1 的新闻

    t 1

    不用你自己亲自查看评论,程序会抓取所有评论直接显示在文章下方,

    输入 b 可以返回新闻列表。

    由于我自己不喜欢评论,所以我没有添加评论该新闻的功能。想要加评论功能,也很简单,可以参考我的百度客户端的文章。

    新增预览图片功能

    输入pic, 可以打开由Pyqt库写的一个小窗口,用来预览该新闻内的图片,并且可以上下翻页。

    一下附上code:

    # coding=utf-8
    import sys
    import pycurl
    import os
    import time
    from StringIO import StringIO
    import re
    import lxml.html
    import unicodedata
    from PyQt4.QtGui import *
    from PyQt4 import QtGui
    from colorama import Fore, Back, Style,init
    from termcolor import colored
    
    
    # class definition
    
    class Example(QtGui.QWidget):
    
    
       def __init__(self,all_pic_list):
          super(Example, self).__init__()
          #self.url_list=['http://static.cnbetacdn.com/article/2017/0831/8eb7de909625140.png','http://static.cnbetacdn.com/article/2017/0831/7f11d5ec94fa123.png','http://static.cnbetacdn.com/article/2017/0831/1b6595175fb5486.jpg']
          self.url_list=all_pic_list
          self.current_pic_index=0
          self.initUI()
          #time.sleep(5)
    
       def initUI(self):
          QtGui.QToolTip.setFont(QtGui.QFont('Test', 10))
          self.setToolTip('This is a <b>QWidget</b> widget')
    
          # Show  image
          self.pic = QtGui.QLabel(self)
          self.pic.setGeometry(0, 0, 600, 500)
          #self.pic.setPixmap(QtGui.QPixmap("/home/lpp/Desktop/image1.png"))
    
          
          
          pixmap = QPixmap()
          data=self.retrieve_from_url(self.url_list[0])
          pixmap.loadFromData(data)
          self.pic.setPixmap(pixmap)
          #self.pic.setPixmap(QtGui.QPixmap.loadFromData(data))
    
    
          # Show button 
          btn_next = QtGui.QPushButton('Next', self)
          btn_next.setToolTip('This is a <b>QPushButton</b> widget')
          btn_next.resize(btn_next.sizeHint())
          btn_next.clicked.connect(self.fun_next)
          btn_next.move(300, 50)
    
          btn_prev = QtGui.QPushButton('Previous', self)
          btn_prev.setToolTip('This is a <b>QPushButton</b> widget')
          btn_prev.resize(btn_prev.sizeHint())
          btn_prev.clicked.connect(self.fun_prev)
          btn_prev.move(50, 50)
    
    
    
          self.setGeometry(300, 300, 500, 500)
          self.setWindowTitle('ImgViewer')
          self.show()
    
       def retrieve_from_url(self,pic_url):
          c = pycurl.Curl()
          c.setopt(pycurl.PROXY, 'http://192.168.87.15:8080')
          c.setopt(pycurl.PROXYUSERPWD, 'LL66269:')
          c.setopt(pycurl.PROXYAUTH, pycurl.HTTPAUTH_NTLM)
          buffer = StringIO()
          c.setopt(pycurl.URL, pic_url)
          c.setopt(c.WRITEDATA, buffer)
          c.perform()
          c.close()  
          data = buffer.getvalue()
          return data  
    
        # Connect button to image updating 
       def fun_next(self):
          if self.current_pic_index < len(self.url_list)-1:
             self.current_pic_index=self.current_pic_index+1
          else:
             self.current_pic_index=0
    
    
          pixmap = QPixmap()
          data=self.retrieve_from_url(self.url_list[self.current_pic_index])
          pixmap.loadFromData(data)
          self.pic.setPixmap(pixmap)
          #self.pic.setPixmap(QtGui.QPixmap( "/home/lpp/Desktop/image2.png"))
    
    
       def fun_prev(self):
          if self.current_pic_index > 0:
             self.current_pic_index=self.current_pic_index-1
          else:
             self.current_pic_index=len(self.url_list)-1
    
          pixmap = QPixmap()
          data=self.retrieve_from_url(self.url_list[self.current_pic_index])
          pixmap.loadFromData(data)
          self.pic.setPixmap(pixmap)
          #self.pic.setPixmap(QtGui.QPixmap( "/home/lpp/Desktop/image2.png"))
    
    
    def main(all_pic_list):
    
       app = QtGui.QApplication(sys.argv)
       ex = Example(all_pic_list)
       sys.exit(app.exec_())
    
    
    #---------------------------------------------
    class Browser_cnbeta:
        c = pycurl.Curl()
    
        def __init__(self):
            os.system('cls')
            print """
            
                   _                       _                     ____  _____ _____  _    
     __      _____| | ___ ___  _ __ ___   | |_ ___     ___ _ __ | __ )| ____|_   _|/ \   
     \ \ /\ / / _ \ |/ __/ _ \| '_ ` _ \  | __/ _ \   / __| '_ \|  _ \|  _|   | | / _ \  
      \ V  V /  __/ | (_| (_) | | | | | | | || (_) | | (__| | | | |_) | |___  | |/ ___ \ 
       \_/\_/ \___|_|\___\___/|_| |_| |_|  \__\___/   \___|_| |_|____/|_____| |_/_/   \_\
                                                                                         
    made by bigtrace
    http://www.jianshu.com/p/f04e514c2902
    7/20/2017
            """
            time.sleep(2)
            self.read_shouye(1)
    
        def wide_chars(self, s):
            # return the extra width for wide characters
            if isinstance(s, str):
                s = s.decode('utf-8')
            return sum(unicodedata.east_asian_width(x) in ('F', 'W') for x in s)
    
        def read_shouye(self, index):
            
            os.system('cls')
            self.c.setopt(pycurl.PROXY, 'http://192.168.87.15:8080')
            self.c.setopt(pycurl.PROXYUSERPWD, 'LL66269:')
            self.c.setopt(pycurl.PROXYAUTH, pycurl.HTTPAUTH_NTLM)
            USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'
            self.c.setopt(self.c.FOLLOWLOCATION, 1)
            self.c.setopt(pycurl.VERBOSE, 0)
            self.c.setopt(pycurl.FAILONERROR, True)
            self.c.setopt(pycurl.USERAGENT, USER_AGENT)
    
            # ------------------- Need to use each post page's own cookie to login
            url_tbs = 'http://m.cnbeta.com/wap/index.htm?page=' + str(index)
            print colored(url_tbs,'blue')
    
            print (colored("\n---------------------",'green'))
            buffer = StringIO()
            self.c.setopt(pycurl.URL, url_tbs)
            self.c.setopt(self.c.WRITEDATA, buffer)
            self.c.perform()
            body = buffer.getvalue().decode('utf-8', 'ignore')
            doc = lxml.html.fromstring(body)
            news_list = doc.xpath("//div[@class='list']")
    
            # http://m.cnbeta.com/wap/view/633687.htm
    
    
    
            Header_list = []
            link_list = []
            display_shouye = []
    
            self.header_max_width = 12
            self.title_max_width = 70
            i = 0
            for each_news in news_list:
                link = each_news.xpath(".//a/@href")[0]
                link_url = "http://m.cnbeta.com" + link
                title = each_news.xpath(".//a")[0].text_content()
    
                Header = "index " + colored(str(i),'yellow')
    
                each_title = ":   " + title
                Header_list.append(title)
                link_list.append(link_url)
    
                Header_fmt = u'{0:<%s}' % (self.header_max_width - self.wide_chars(Header))
                Title_fmt = u'{0:<%s}' % (self.title_max_width - self.wide_chars(each_title))
                each_display = ""
                try:
                    each_display = (Header_fmt.format(Header) + Title_fmt.format(each_title)).encode("gb18030")
                    # print (Header_fmt.format(Header)+Title_fmt.format(each_title)).encode("gb18030")
    
                except:
                    each_display = (Header_fmt.format(Header) + "Title can't be displayed").encode("gb18030")
                    # print (Header_fmt.format(Header)+"Title can't be displayed").encode("gb18030")
    
    
                print  each_display
    
                display_shouye.append(each_display)
                print ""
                i = i + 1
    
            self.tiezi_link = link_list
            self.shouye_titles = Header_list
            self.display_shouye_list = display_shouye
    
            print (colored("\n---------------------",'green'))
    
        def read_each_news(self, index):
            os.system('cls')
            link = self.tiezi_link[int(index)]
            title = self.shouye_titles[int(index)]
            print "===================================================\n\n\n"
    
            print colored(title, 'magenta')+ colored("  <" + link+ "> \n",'blue')  
    
    
            buffer = StringIO()
            self.c.setopt(pycurl.URL, link)
            self.c.setopt(self.c.WRITEDATA, buffer)
            self.c.perform()
            body = buffer.getvalue().decode('utf-8', 'ignore')
            doc = lxml.html.fromstring(body)
            title = doc.xpath("//div[@class='title']")[0].text_content()
            time = doc.xpath("//div[@class='time']/span")
            time_subtitle = ""
            for each_span in time:
                time_subtitle = time_subtitle + each_span.text_content()
    
            # print (title).encode("gb18030")
    
            print ""
    
            print colored((time_subtitle).encode("gb18030"),'cyan')
    
            print "" 
    
            content = doc.xpath("//div[@class='content']/p")
            self.current_thread_pic_list=[]
            for each_paragraph in content:
                print ""
                text_content = each_paragraph.text_content().replace(u'\xa0', u' ')   # remove \xa0 from string
                print text_content
                img = each_paragraph.xpath(".//img/@src")
                for each_img in img:
                    print colored("<img url: "+each_img+">",'yellow') 
                    self.current_thread_pic_list.append(each_img)
    
            blockquote = doc.xpath("//div[@class='content']/blockquote")
            j = 1
            for each_blockquote in blockquote:
                print "blockquote <" + str(j) + "> ~~~~~~~~~~~\n"
                print each_blockquote.text_content()
                print "~~~~~~~~~~~~~~~~~~~~~~~~~~\n"
                j = j + 1
    
            self.view_comment(link)
    
        def Get_Back_To_shouye(self):
            os.system('cls')
            for each_display in self.display_shouye_list:
                print each_display
    
        def exit(self):
            self.c.close()
            os.system('cls')
            print """
     _                    _                 
    | |                  | |                
    | |__  _   _ _____   | |__  _   _ _____ 
    |  _ \| | | | ___ |  |  _ \| | | | ___ |
    | |_) ) |_| | ____|  | |_) ) |_| | ____|
    |____/ \__  |_____)  |____/ \__  |_____)
          (____/               (____/       
    
    """
            time.sleep(1)
            os.system('cls')
    
        def view_comment(self, url):
            # http://m.cnbeta.com/wap/comment/633621.htm
            tid = re.search(r"(\d+)", url).group(1)
            comment_url = "http://m.cnbeta.com/wap/comment/" + str(tid) + ".htm?page="
    
            buffer = StringIO()
            self.c.setopt(pycurl.URL, comment_url + "1")
            self.c.setopt(self.c.WRITEDATA, buffer)
            self.c.perform()
            body = buffer.getvalue().decode('utf-8', 'ignore')
            doc = lxml.html.fromstring(body)
    
    
            comment_all = doc.xpath("//div[@class='content']")[0].text_content()
            print colored("\n--------------- comment ---------------",'green')
            #print comment_all
            print comment_all
    
            print colored("--------------- finished ---------------",'green')
            
    
    
        def view_image(self):
            print "launch picture viewer..."
            viewer_app = QtGui.QApplication(sys.argv)
            ex = Example(self.current_thread_pic_list)
            sys.exit(viewer_app.exec_())
    
    
    app = Browser_cnbeta()
    
    while True:
        print """
    
    
    
        """
        nb = raw_input('Give me your command: \n')
        try:
            if nb.startswith('s ') == True:
                index = re.search(r"s (\d+)", nb).group(1)
                app.read_shouye(index)
            elif nb.startswith('t ') == True:
                index = re.search(r"t\s+(\d+)", nb).group(1)
                app.read_each_news(index)
            elif nb == "b":
                app.Get_Back_To_shouye()
            elif nb =="c":
                os.system('cls')  # on windows
            elif nb == "e":
                break
            elif nb == "pic":
                app.view_image()
            else:
                print "type correct command"
        except:
            print ""
    
    
    
    
    app.exit()
    

    相关文章

      网友评论

        本文标题:用python写一个cnBeta阅读器

        本文链接:https://www.haomeiwen.com/subject/kgdskxtx.html