美文网首页
Python爬虫练习:保存为excel文件和存入MySQL数据库

Python爬虫练习:保存为excel文件和存入MySQL数据库

作者: python小哥哥2020 | 来源:发表于2022-03-18 14:37 被阅读0次

    大家好,我是天空之城,今天给大家带来,爬取周杰伦歌曲信息分别保存为excel文件和存入MySQL数据库,此处周董可以替换为任意一位歌手,还可以将歌曲信息全部发送到任何一个人的邮箱中。

    <font size=5>第一部分,爬取周董歌曲信息分别保存为excel文件</font>

    import requests, openpyxl
    
    wb = openpyxl.Workbook()
    sheet = wb.active
    sheet.title = 'geci'
    
    sheet['A1'] = '歌曲名'  # 加表头,给A1单元格赋值
    sheet['B1'] = '所属专辑'  # 加表头,给B1单元格赋值
    sheet['C1'] = '播放时长'  # 加表头,给C1单元格赋值
    sheet['D1'] = '播放链接'  # 加表头,给D1单元格赋值
    url = 'https://c.y.qq.com/soso/fcgi-bin/client_search_cp'
    for x in range(5):
    
        params = {
            'ct': '24',
            'qqmusic_ver': '1298',
            'new_json': '1',
            'remoteplace': 'sizer.yqq.song_next',
            'searchid': '64405487069162918',
            't': '0',
            'aggr': '1',
            'cr': '1',
            'catZhida': '1',
            'lossless': '0',
            'flag_qc': '0',
            'p': str(x + 1),
            'n': '20',
            'w': '周杰伦',
            'g_tk': '5381',
            'loginUin': '0',
            'hostUin': '0',
            'format': 'json',
            'inCharset': 'utf8',
            'outCharset': 'utf-8',
            'notice': '0',
            'platform': 'yqq.json',
            'needNewCode': '0'
        }
    
        res_music = requests.get(url, params=params)
        json_music = res_music.json()
        list_music = json_music['data']['song']['list']
        for music in list_music:
            # print(music['name'])
            # print('所属专辑:' + music['album']['name'])
            # print('播放时长:' + str(music['interval']) + '秒')
            # print('播放链接:https://y.qq.com/n/yqq/song/' + music['file']['media_mid'] + '.html\n\n')
    
            name = music['name']
            album = music['album']['name']
            time = str(music['interval'])
            link = 'https://y.qq.com/n/yqq/song/' + music['file']['media_mid'] + '.html\n\n'
    
            row = [name, album, time, link]
            sheet.append(row)
    wb.save('zhoujielun1.xlsx')
    
    

    看下Excel截图


    在这里插入图片描述

    <font size=5>第二部分,爬取周董(任何一个人)歌曲信息发送到任何一个人邮箱</font>

    import requests
    from bs4 import BeautifulSoup
    import smtplib
    from email.mime.text import MIMEText
    from email.header import Header
    
    
    headers = {
          'Referer': 'https://movie.douban.com/top250?start=1&filter=',
          'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:46.0) Gecko/20100101 Firefox/46.0'}
    url = 'https://c.y.qq.com/soso/fcgi-bin/client_search_cp'
    
    list_all=[]
    
    def getdata():
        name0 = input('请输入歌手名字:')
        for x in range(5):
            params = {
                'ct': '24',
                'qqmusic_ver': '1298',
                'new_json': '1',
                'remoteplace': 'sizer.yqq.song_next',
                'searchid': '64405487069162918',
                't': '0',
                'aggr': '1',
                'cr': '1',
                'catZhida': '1',
                'lossless': '0',
                'flag_qc': '0',
                'p': str(x + 1),
                'n': '20',
                'w': name0,
                'g_tk': '5381',
                'loginUin': '0',
                'hostUin': '0',
                'format': 'json',
                'inCharset': 'utf8',
                'outCharset': 'utf-8',
                'notice': '0',
                'platform': 'yqq.json',
                'needNewCode': '0'
            }
    
            res_music = requests.get(url, params=params,headers=headers)
            json_music = res_music.json()
            list_music = json_music['data']['song']['list']
            for music in list_music:
                name = music['name']
                album = music['album']['name']
                time = str(music['interval'])
                link = 'https://y.qq.com/n/yqq/song/' + music['file']['media_mid'] + '.html\n\n'
                list_all.append(name + '\n' + album + '\n' + time+'s' + '\n' + link )
        return '\n\n'.join(list_all)
    
    def send_email(list):
        mailhost = 'smtp.qq.com'
        qqmail = smtplib.SMTP()
        qqmail.connect(mailhost, 25)
        qqmail.login(account, password)
        content = '亲爱的,今天的推荐歌曲是:' + list
        message = MIMEText(content, 'plain', 'utf-8')
        subject = '今日推荐'
        message['Subject'] = Header(subject, 'utf-8')
        try:
            qqmail.sendmail(account, receiver, message.as_string())
            print('邮件发送成功')
        except:
            print('邮件发送失败')
        qqmail.quit()
    
    if __name__ == '__main__':
        account = input('请输入你的邮箱:')
        password = input('请输入你的邮箱密码:')
        receiver = input('请输入收件人:')
        mess = getdata()
        send_email(mess)
    

    <font size=5>第三部分,爬取周董(任何一个人)歌曲信息存入MySQL数据库</font>
    第一小步,先建表

    
    import pymysql
    
    conn = pymysql.connect(host='127.0.0.1', port=3306, user='root', passwd='你的数据库密码', db='你的数据库名称', charset='utf8')
    
    cursor = conn.cursor()
    
    
    sql = """create table qqmusic3(
            m_id int primary key auto_increment,
            m_name varchar (100) not null,
            m_album text null,
            m_time varchar(10) not null,
            m_link varchar(100) not null
            
            )"""
    
    cursor.execute(sql)
    
    cursor.close()
    conn.close()
    
    

    第二小步,爬取数据存入数据库

    
    import requests,pymysql
    
    conn = pymysql.connect(host='127.0.0.1', port=3306, user='root', passwd='你的数据库密码', db='你的数据库名称', charset='utf8')
    
    cursor = conn.cursor()
    
    name0=input('请输入你要爬取歌手的名字')
    
    headers={'Referer':'https://y.qq.com/',
        'User-Agent':'pgv_pvid=1717322000; ts_uid=6297933380; pgv_pvi=3979401216; RK=+ZbNcEN3Qw; ptcz=4278eb1e104c210e009b4fa2e86ef406c85fb5712664ab041b0ec59f39a70f78; ts_refer=www.baidu.com/link; yqq_stat=0; pgv_info=ssid=s7548246693; pgv_si=s6134429696; ts_last=y.qq.com/'
    
    }
    
    url = 'https://c.y.qq.com/soso/fcgi-bin/client_search_cp'
    for x in range(5):
    
        params = {
            'ct': '24',
            'qqmusic_ver': '1298',
            'new_json': '1',
            'remoteplace': 'sizer.yqq.song_next',
            'searchid': '64405487069162918',
            't': '0',
            'aggr': '1',
            'cr': '1',
            'catZhida': '1',
            'lossless': '0',
            'flag_qc': '0',
            'p': str(x + 1),
            'n': '20',
            'w': name0,
            'g_tk': '5381',
            'loginUin': '0',
            'hostUin': '0',
            'format': 'json',
            'inCharset': 'utf8',
            'outCharset': 'utf-8',
            'notice': '0',
            'platform': 'yqq.json',
            'needNewCode': '0'
        }
    
        res_music = requests.get(url, params=params,headers=headers)
        json_music = res_music.json()
        list_music = json_music['data']['song']['list']
        for music in list_music:
            # print(music['name'])
            # print('所属专辑:' + music['album']['name'])
            # print('播放时长:' + str(music['interval']) + '秒')
            # print('播放链接:https://y.qq.com/n/yqq/song/' + music['file']['media_mid'] + '.html\n\n')
    
            name = music['name']
            album = music['album']['name']
            time = str(music['interval'])
            link = 'https://y.qq.com/n/yqq/song/' + music['file']['media_mid'] + '.html\n\n'
            sql = 'insert into qqmusic3(m_name,m_album, m_time,m_link) values ("%s","%s","%s","%s")' % (
                name,album,time, link)
    
            cursor.execute(sql)
    
    
    
    conn.commit()
    cursor.close()
    conn.close()
    
    
    

    看一下数据库的数据


    在这里插入图片描述

    相关文章

      网友评论

          本文标题:Python爬虫练习:保存为excel文件和存入MySQL数据库

          本文链接:https://www.haomeiwen.com/subject/kkgidrtx.html