美文网首页
爬虫获取一条新闻内容 2018-11-03

爬虫获取一条新闻内容 2018-11-03

作者: 画奴 | 来源:发表于2018-11-03 23:29 被阅读0次

    import pymysql

    import requests as re

    from bs4 import BeautifulSoup

    try:

        url="http://www.cuc.edu.cn/zcyw/11569.html"

        r=re.get(url)

        soup = BeautifulSoup(r.text,'html.parser')

        title=soup.find_all('h1')

        newsfrom=soup.find_all('sapn')

        newsdate=soup.find_all('sapn')

        viewcount=soup.find_all('span',attrs={'id':'hits'})

        newscontent=soup.find_all('article',attrs={'class','con-area'})

        ntitle=title[0].get_text()

        nfrom=newsfrom[0].get_text()[27:30]

        ndate=newsdate[0].get_text()[67:77]

        ncount=int(viewcount[0].get_text())

        ncontent=newscontent[0].get_text()

        saverec(url,ntitle,nfrom,ndate,ncount,ncontent)   

    except:

        print("error")

    def getUrl():

        pass

    def saverec(url,ntitle,nfrom,ndate,ncount,ncontent):   

        # pymysql.connect(数据库url,用户名,密码,数据库名 )

        db = pymysql.connect("localhost", "root", "2017", "engword", charset = 'utf8')

        cursor = db.cursor()

        try:

            cursor.execute("INSERT INTO cucnews(newsurl,title,newsfrom,newsdate,contents,newscount) VALUES(%s,%s,%s,%s,%s,%s)",(url, ntitle,nfrom,ndate,ncontent,ncount))

            db.commit()

        except:

            print(db.error())

            db.rollback()

        db.close()

    相关文章

      网友评论

          本文标题:爬虫获取一条新闻内容 2018-11-03

          本文链接:https://www.haomeiwen.com/subject/xoilxqtx.html