美文网首页
2018-09-06

2018-09-06

作者: 自闭的皮卡秋 | 来源:发表于2019-05-07 12:26 被阅读0次

    title: Python爬虫_一言
    date: 2018-12-20 01:07:14
    tags:


    最近新学 pythonrequests 闲来无事就简单的爬去 一言 https://hitokoto.cn/

    一言 分析

    一言没啥可以分析的 最简单的 使用他的api接口 https://hitokoto.cn/api 会返回一串json格式的数据

    image

    爬取

    导入库

    import requests
    import json
    import pymysql
    import time
    

    直接使用get得到数据 并把它保存到变量 最后将参数们放入数组
    并返回

    def getyiyan(i):
        r = requests.get('https://v1.hitokoto.cn/')
        s = json.loads(r.text)
        id = s["id"]
        contents = s["hitokoto"]
        type = s["type"]
        froms = s["from"]
        print("第 ", i, " 次 ", id)
        yiyan = [id,contents,type,froms]
        return yiyan
    

    数据库

    数据库基本参数

    host = 'localhost'
    user = 'root'
    password = '******'
    port = 3306
    db = 'yiyan'
    dbname = 'yiyan'
    

    数据库调用

    def mysql(sql):
        db = pymysql.connect(host='localhost', user='root', password='******', port=3306, db='yiyan')
        cursor = db.cursor()
        try:
            result = cursor.execute(sql)
            db.commit()
        except:
            print(db.rollback())
        db.close()
        return result
    

    存储到数据库

    def intomysql(yiyan):
        id = yiyan[0]
        contents = yiyan[1]
        type = yiyan[2]
        froms = yiyan[3]
        sql = "INSERT INTO %s(`id`,`contents`,`type`,`from`) VALUES(%s,'%s','%s','%s')" % (dbname,id,contents,type,froms)
        if mysql(sql):
            print(yiyan,"保存到数据库成功 ")
    

    数据库查重

    def isinsql(yiyan):
        id = yiyan[0]
        print("正在检查id",id)
        sql = "Select * FROM %s WHERE `id`=%s" %(dbname,id)
        if mysql(sql):
            print(id,"已存在于数据库")
            return -1
        else:
            return 1
    

    运行

    if __name__ == '__main__':
        i = 1
        while i>0:
            print("--------------------------------")
            # time.sleep(1)
            data = getyiyan(i)
            if isinsql(data)<0 :
                continue
            intomysql(data)
            i += 1
    

    源代码

    import requests
    import json
    import pymysql
    import time
    
    
    host = 'localhost'
    user = 'root'
    password = ''
    port = 3306
    db = 'yiyan'
    dbname = 'yiyan'
    
    
    
    ### 数据库调用
    def mysql(sql):
        db = pymysql.connect(host='localhost', user='root', password='', port=3306, db='yiyan')
        cursor = db.cursor()
        try:
            result = cursor.execute(sql)
            db.commit()
        except:
            print(db.rollback())
        db.close()
        return result
    
    #爬取一言数据
    def getyiyan(i):
        r = requests.get('https://v1.hitokoto.cn/')
        s = json.loads(r.text)
        id = s["id"]
        contents = s["hitokoto"]
        type = s["type"]
        froms = s["from"]
        print("第 ", i, " 次 ", id)
        yiyan = [id,contents,type,froms]
        return yiyan
    #存储到数据库
    def intomysql(yiyan):
        id = yiyan[0]
        contents = yiyan[1]
        type = yiyan[2]
        froms = yiyan[3]
        sql = "INSERT INTO %s(`id`,`contents`,`type`,`from`) VALUES(%s,'%s','%s','%s')" % (dbname,id,contents,type,froms)
        if mysql(sql):
            print(yiyan,"保存到数据库成功 ")
    
    
    
    #数据库查重
    def isinsql(yiyan):
        id = yiyan[0]
        print("正在检查id",id)
        sql = "Select * FROM %s WHERE `id`=%s" %(dbname,id)
        if mysql(sql):
            print(id,"已存在于数据库")
            return -1
        else:
            return 1
    
    
    
    if __name__ == '__main__':
        i = 1
        while i>0:
            print("--------------------------------")
            # time.sleep(1)
            data = getyiyan(i)
            if isinsql(data)<0 :
                continue
            intomysql(data)
            i += 1
    

    数据库参数信息

    image

    相关文章

      网友评论

          本文标题:2018-09-06

          本文链接:https://www.haomeiwen.com/subject/hayxgftx.html