python脚本先贴上

# -*- coding: utf-8 -*-

import requests
import re
from urllib.parse import urljoin
import csv
import chardet

header = {
   'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
                 'Chrome/70.0.3538.77 Safari/537.36',
}

def web_link_collection(page_url):
   lis = []
   try:
       resp = requests.get(page_url, headers=header, timeout=3)
   except Exception as e:
       print('error:', e)
   else:
       encoding = chardet.detect(resp.content)["encoding"]
       resp.encoding = encoding
       link_list = re.findall(r'<a.*?href=["\'](.*?)["\'].*?>(.*?)</a>', resp.text, re.S | re.I)
       for link in link_list:
           url = urljoin(page_url, link[0])
           sc = url_status(url.strip())
           text = link[1]
           print([url, sc, text])
           lis.append([url, sc, text])
   return lis

def url_status(url):
   try:
       res = requests.get(url, headers=header, allow_redirects=False)
   except Exception as e:
       print('error:', e)
   else:
       sc = res.status_code
       if sc == 301 or sc == 302:
           loc_url = res.headers['Location']
           sc = '%s#%s' % (sc, loc_url)
       return sc

if __name__ == '__main__':
   url = ''  # 需要查询的页面
   lis = web_link_collection(url)

   file = open('result.csv', 'w', encoding='utf-8', newline='')
   cw = csv.writer(file)
   cw.writerow(['URL', 'Status_code', '链接文本'])
   for line in lis:
       cw.writerow(line)
   file.close()