美文网首页
Python爬虫:从TXT导入数据

Python爬虫:从TXT导入数据

作者: 洋阳酱 | 来源:发表于2019-05-21 22:23 被阅读0次
    程序运行结果

    # -*- coding: utf-8 -*-
    """
    Created on Tue May 14 2019
    
    @author: YangYang
    """
    
    from urllib.request import urlopen
    import datetime
    import re
    import xlwt
    
    # 定义保存Excel的位置
    workbook = xlwt.Workbook()  #定义workbook
    sheet = workbook.add_sheet('单词群打卡')  #添加sheet
    head = ['扇贝ID', '扇贝用户名', '单词总计', '平均', '学习时间']    #表头
    for h in range(len(head)):
        sheet.write(0, h, head[h])    #把表头写到Excel里面去
    
    #计算打卡的统计时间
    now = datetime.datetime.now()        #从今天开始查卡
    #now = datetime.date(2019,5,13)      #输入查卡日期,自定义查卡日期
    print("查卡日期:",now)
    print('\n')
    time2 = datetime.timedelta(days=8)   #统计一个星期的数据
    day_now = str(now).split(" ")[0]
    day_end = now - time2
    day_end = str(day_end).split(" ")[0]
    
    print("开始读取ID数据")
    print("数据位置:")
    print("C:/Users/Administrator/Desktop/user.txt")
    print('\n')
    
    #从txt导入数据
    ID_total_input = open('C:/Users/Administrator/Desktop/user.txt')
    ID_total = ID_total_input.read()
    ID_total = ID_total.split("\n")  # 如果输入多个ID,用“\n”分开
    
    i = 1  #定义Excel表格的行数,从第二行开始写入,第一行已经写了表头
    
    for ID in ID_total:
    
        web = "https://www.shanbay.com/api/v1/checkin/user/"+str(ID)+"/"
        shanbay = urlopen(web)
        #shanbay = urlopen("https://www.shanbay.com/api/v1/checkin/user/16888030/")
        shanbay_data = shanbay.read().decode()
        
        #获取昵称
        find_username = re.findall("username\".*?,",shanbay_data)[0]
        username = str(find_username)[len("username\": \""):-2]
        
        # 获取打卡数据
        find_data = re.findall("\"stats\".*?track_object_img" ,shanbay_data)
        find_start = "\"stats\": "
        find_end = "\"track_object_img\""
    
        num_today = "\"num_today\": "
        used_time = "\"used_time\": "
    
        count = 0
        time_bdc = 0
        bdc_total = 0    
        
        #获取打卡天数
        checkin_time = []
        num_checkin_days = []
        find_checkin = re.findall("\"checkin_time\".*?\"share_urls\"",shanbay_data) 
        for checkin in find_checkin:
            shanbey_time = checkin.split(",")[0]
            shanbey_days = checkin.split(",")[3]
            checkin_time.append(str(shanbey_time)[len("\"checkin_time\": \""):len("\"checkin_time\": \"")+10])
            num_checkin_days.append(str(shanbey_days)[len("\"num_checkin_days\": "):])
    
        # 开始统计数据
        for data in find_data:       
        
            bdc = re.findall("\"bdc\":.*?}",data)
            if bdc == []:
                bdc = "{num_today\": 0, \"used_time\": 0.0}"    
        
            bdc_num = re.findall(r"\d+\.?\d*",str(bdc))[0]
            bdc_time = re.findall(r"\d+\.?\d*",str(bdc))[1]        
            
            if checkin_time[count] >= day_now:
                count += 1
            elif checkin_time[count] > day_end:            
                time_bdc = time_bdc+float(bdc_time)
                bdc_total = bdc_total+float(bdc_num)           
                #print("{}:打卡{}天,单词{}个,学习时间{}分钟".format(checkin_time[count],num_checkin_days[count],bdc_num,bdc_time))
                count += 1
            else:
                break
            
        average = bdc_total/7
        average = round(average,2)
        print("ID:{},昵称:{},背单词总计:{},平均:{},时长:{}分钟".format(ID,username,bdc_total,average,time_bdc))
        
        # 把内容保存到Excel
        sheet.write(i, 0, ID)  # 第i行,第1列
        sheet.write(i, 1, username)  # 第i行,第2列
        sheet.write(i, 2, bdc_total)  # 第i行,第3列
        sheet.write(i, 3, average)  # 第i行,第4列
        sheet.write(i, 4, time_bdc)  # 第i行,第5列
        i += 1
    
       # print(ID,username,bdc_total,average,time_bdc)
    
    workbook.save('C:/Users/Administrator/Desktop/单词群打卡.xls')
    print('\n') 
    print('写入excel成功')
    print("文件位置:")
    print("C:/Users/Administrator/Desktop/单词群打卡.xls")
    print('\n') 
    input("查卡完毕,点击回车退出")  
    

    小组打卡输出EXCEL情况如下:(昵称和ID做了打码处理)

    数据保存至Excel

    相关文章

      网友评论

          本文标题:Python爬虫:从TXT导入数据

          本文链接:https://www.haomeiwen.com/subject/egaszqtx.html