美文网首页
上交所最新公告爬取年报

上交所最新公告爬取年报

作者: 月夜星空下 | 来源:发表于2022-05-07 14:22 被阅读0次

    部分上交所年报在定期报告栏目,发行上市公告栏目都取不到,在最新公告栏目能够拿到数据;
    该程序爬取最新公告数据

    import json
    import requests
    import datetime
    
    
    def noticeToAnnals(stock_code, START_DATE='2022-01-01', END_DATE=datetime.datetime.now().strftime('%Y-%m-%d')):
        URL_QUERY_COMPANY = 'http://query.sse.com.cn/commonQuery.do'
        HEADER = {
            'Referer': 'http://www.sse.com.cn',
            'User-Agent': "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36",
        }
        param = {
            'jsonCallBack': 'jsonpCallback72875491',
            'isPagination': 'true',
            'pageHelp.pageSize': '25',
            'pageHelp.cacheSize': '1',
            'type': 'inParams',
            'sqlId': 'COMMON_PL_SSGSXX_ZXGG_L',
            'START_DATE': START_DATE,
            'END_DATE': END_DATE,
            'SECURITY_CODE': stock_code,
            'TITLE': '',
            'BULLETIN_TYPE': '0101',
            'pageHelp.pageNo': '1',
            'pageHelp.beginPage': '1',
            'pageHelp.endPage': '1',
            '_': '1651221153479'
        }
    
        def listIndex(test):
            for info in test:
                if '年度报告' in info and '摘要' not in info:
                    return test.index(info)
    
        data = requests.get(URL_QUERY_COMPANY, params=param, headers=HEADER).text.replace('jsonpCallback72875491(',
                                                                                          '').rstrip(')')
        info = json.loads(data)['result']
        for i in info:
            TITLE = str(i['TITLE']).split('<br>')
            index = listIndex(TITLE)
            SSEDATE = str(i['SSEDATE']).split('<br>')
            OLD_BULLETIN_TYPE = str(i['OLD_BULLETIN_TYPE']).split('<br>')
    
            BULLETIN_YEAR = str(i['BULLETIN_YEAR']).split('<br>')
            ORG_BULLETIN_TYPE = str(i['ORG_BULLETIN_TYPE']).split('<br>')
            URL = str(i['URL']).split('<br>')
            SECURITY_NAME = str(i['SECURITY_NAME']).split('<br>')
            print(
                f'SSEDATE:{SSEDATE[index]}\nOLD_BULLETIN_TYPE:{OLD_BULLETIN_TYPE[index]}\nBULLETIN_YEAR:{BULLETIN_YEAR[index]}\nORG_BULLETIN_TYPE:{ORG_BULLETIN_TYPE[index]}'
                f'\nURL:{URL[index]}\nTITLE:{TITLE[index]}')
            # print(f'SSEDATE:{SSEDATE[index]}\nOLD_BULLETIN_TYPE:{OLD_BULLETIN_TYPE[index]}\nBULLETIN_YEAR{BULLETIN_YEAR[index]}\nORG_BULLETIN_TYPE:{ORG_BULLETIN_TYPE[index]}\n '
            #       f'URL:{URL[index]}\nSECURITY_NAME:{SECURITY_NAME[index]}\nTITLE:{TITLE[index]}')
    
    
    if __name__ == '__main__':
        noticeToAnnals('603993')
    
    

    相关文章

      网友评论

          本文标题:上交所最新公告爬取年报

          本文链接:https://www.haomeiwen.com/subject/flcxurtx.html