美文网首页
用Python3爬取NBA官方统计数据

用Python3爬取NBA官方统计数据

作者: 多啦A梦詹 | 来源:发表于2020-02-23 23:59 被阅读0次
    import sys
    import os
    import requests
    import time
    import urllib
    from bs4 import BeautifulSoup
    import re
    from xlwt import Workbook
    import urllib.request
    os.chdir("D:/python")
    # https://github.com/schnauzers/predict/blob/master/getrecord.py
    
    def getURLLists(url_header,url_tail,pages):
      """
      获取所有页面的URL列表
      """
      url_lists = []
      url_0 = url_header+'0'+url_tail
      print(url_0)
      url_lists.append(url_0)
      for i in range(1,pages+1):
        url_temp = url_header+str(i)+url_tail
        url_lists.append(url_temp)
      return url_lists
    def getNBAAllData(url_lists):
      """
      获取所有2017赛季NBA常规赛数据
      """
      datasets = ['']
      for item in url_lists:
        data1 = getNBASingleData(item)
        datasets.extend(data1)
      #去掉数据里的空元素
      for item in datasets[:]:
        if len(item) == 0:
          datasets.remove(item)
      return datasets
    def getNBASingleData(url):
      """
      获取1个页面NBA常规赛数据
      """
      # url = 'http://stat-nba.com/query_team.php?QueryType=game&order=1&crtcol=date_out&GameType=season&PageNum=3000&Season0=2016&Season1=2017'
      # html = requests.get(url).text
      html = urllib.request.urlopen(url).read()
      # print html
      soup = BeautifulSoup(html)
      data = soup.html.body.find('tbody').text
      list_data = data.split('\n')
      # with open('nba_data.txt','a') as fp:
      #   fp.write(data)
      # for item in list_data[:]:
      #   if len(item) == 0:
      #     list_data.remove(item)
      return list_data
    def saveDataToExcel(datasets,sheetname,filename):
      book = Workbook()
      sheet = book.add_sheet(sheetname)
      sheet.write(0,0,u'序号')
      sheet.write(0,1,u'球队')
      sheet.write(0,2,u'时间')
      sheet.write(0,3,u'结果')
      sheet.write(0,4,u'主客')
      sheet.write(0,5,u'比赛')
      sheet.write(0,6,u'投篮命中率')
      sheet.write(0,7,u'命中数')
      sheet.write(0,8,u'出手数')
      sheet.write(0,9,u'三分命中率')
      sheet.write(0,10,u'三分命中数')
      sheet.write(0,11,u'三分出手数')
      sheet.write(0,12,u'罚球命中率')
      sheet.write(0,13,u'罚球命中数')
      sheet.write(0,14,u'罚球出手数')
      sheet.write(0,15,u'篮板')
      sheet.write(0,16,u'前场篮板')
      sheet.write(0,17,u'后场篮板')
      sheet.write(0,18,u'助攻')
      sheet.write(0,19,u'抢断')
      sheet.write(0,20,u'盖帽')
      sheet.write(0,21,u'失误')
      sheet.write(0,22,u'犯规')
      sheet.write(0,23,u'得分')
      num = 24
      row_cnt = 0
      data_cnt = 0
      data_len = len(datasets)
      print('data_len:',data_len)
      while(data_cnt< data_len):
        row_cnt += 1
        print('序号:',row_cnt)
        for col in range(num):
            # print col
            sheet.write(row_cnt,col,datasets[data_cnt])
            data_cnt += 1
      book.save(filename)
    def writeDataToTxt(datasets):
      fp = open('nba_data.txt','w')
      line_cnt = 1
      for i in range(len(datasets)-1):
        #球队名称对齐的操作:如果球队名字过短或者为76人队是 球队名字后面加两个table 否则加1个table
        if line_cnt % 24 == 2 and len(datasets[i]) < 5 or datasets[i] == u'费城76人':
          fp.write(datasets[i]+'\t\t')
        else:
          fp.write(datasets[i]+'\t')
        line_cnt += 1
        if line_cnt % 24 == 1:
          fp.write('\n')
      fp.close()
    
    if __name__ == "__main__":
      pages = int(1132/150)
      url_header = 'http://stat-nba.com/query_team.php?page='
      url_tail = '&QueryType=game&order=1&crtcol=date_out&GameType=season&PageNum=3000&Season0=2016&Season1=2017#label_show_result'
      url_lists = getURLLists(url_header,url_tail,pages)
      datasets = getNBAAllData(url_lists)
      writeDataToTxt(datasets)
      sheetname = 'nba normal data 2016-2017'
      str_time = time.strftime('%Y-%m-%d',time.localtime(time.time()))
      filename = 'nba_normal_data'+str_time+'.xls'
      saveDataToExcel(datasets,sheetname,filename)
    

    相关文章

      网友评论

          本文标题:用Python3爬取NBA官方统计数据

          本文链接:https://www.haomeiwen.com/subject/bofyqhtx.html