可以更好地下载网页上的内容了!

该程序第一版见此:
https://www.jianshu.com/p/5055edb0867b
写了5版, 很多问题的改进终于完成

后端源码
import sys, io, re, os, shutil, ast
from urllib import request
from PySide2.QtWidgets import QApplication, QMainWindow, QFileDialog
from PySide2 import QtGui, QtWidgets, QtCore
from demo import Ui_MainWindow
from PySide2.QtGui import QIcon
class MainWindow(QMainWindow, Ui_MainWindow):
def __init__(self, base_dir):
super(MainWindow, self).__init__()
self.base_dir = base_dir
self.setupUi(self)
self.select_floder.clicked.connect(self.open_floder)
self.submit_ok.clicked.connect(self.download_readmine)
def get_download_url(self, sub_url, redmine_type):
url_key_pattern = re.compile(r"href=\".*\"")
url_key = url_key_pattern.findall(str(sub_url))
url_key_pattern = re.compile(r"\/redmine.*\"")
url_key = url_key_pattern.findall(str(url_key))
sub_url = str(url_key[0])
sub_url = sub_url.split('"')
sub_url = str(sub_url[0])
if redmine_type == '01' :
download_url = "http://redmine.springgroup.cn" + sub_url
elif redmine_type == '02' :
download_url = "http://redmine02.springgroup.cn" + sub_url
return download_url
def get_file_name(self, sub_url):
url_key_pattern = re.compile(r"href=\".*")
url_key = url_key_pattern.findall(str(sub_url))
this_file_name = str(url_key[0])
this_key_pattern = re.compile(r"\>.*?\<")
this_file_name = this_key_pattern.findall(str(this_file_name))
this_file_name = str(this_file_name[0])
this_file_name = this_file_name.strip('>,<')
return this_file_name
def get_span_info(self, span_url):
this_key_pattern = re.compile(r"\>.*?\<")
span_info = this_key_pattern.findall(str(span_url))
span_info = str(span_info[0])
span_info = span_info.strip('>,<')
return span_info
def get_cfg_value(self, cfg_name):
file_name = 'settings.cfg'
cfg_file = open(file_name, 'r', encoding="utf-8")
this_cfg_value = 'null'
for li in cfg_file:
if li and '#' not in str(li):
li = li.split(' = ')
this_cfg_name = li[0].strip()
cfg_value = li[-1].strip()
if '_list' in this_cfg_name:
cfg_value = ast.literal_eval(cfg_value)
if this_cfg_name == cfg_name:
#print(cfg_value)
this_cfg_value = cfg_value
break
cfg_file.close()
return this_cfg_value
def pre_fun(self, save_dir, redmine_number):
match_list = self.get_cfg_value('match_list')
match_info_list = self.get_cfg_value('match_info_list')
if match_list == 'null':
match_list = ["rar", "zip", "tgz", "tar", "配置", "更新操作手册.html", "war", "发布清单", "conf", "更新清单"]
if match_info_list == 'null':
match_info_list = ["rar", "zip", "tgz", "tar", "配置", "war", "发布清单", "conf", "更新清单"]
self.textEdit_3.append(save_dir)
self.textEdit_3.append(redmine_number)
len_redmine_number = len(redmine_number)
if len_redmine_number == 6:
cookie_str = self.get_cfg_value('cookie_str_01')
if cookie_str == 'null':
cookie_str = r'_redmine_session=ajhuOC9xbG9NaWlyUjJ4RTBzcDF4cjl1SVVzUlF4V1dURitCQ2x1U0FpQ1kva1ZrM1ppZ3FDTjVXbnNkdlNHSld3WCt4UjVIYlFBcFhMd29mTVdTc290ZGk5WGRERzl0RmR6V3VubFMxQkF1VGQvQlVGcHdEZWhkMTJFMzNGbVdQSlhYcnJldG81aGxUMGMxK0k0TWxLV1FJdHNnakx5Zm8yNWFMdXk4NWpYZVpGQmVBeFc1eUVDQWJFUGlwNjV4SXB1S0IxKy9EYUVOczNJMUU4VnEyMzh3eDRLYW9JRWVKL2toZVNhTnkvN051RFVVVnExYlU2Zmk5U2xPUkFzc0tqdWl2eDFJMFBOeFJoUmE1dklGaVNsQm1NNi8xazRLcG1icHRFS01DUTR4MGt3dENnYWs4TFB6WENZemZZQW45MU1qNlBTWmcxTytXbHZhWkZPck9pdXZVMzZpWVl6VlBHUmtHMjVTQkNTWS9TRWtjbFhRdGJGaWVHazVseDFSMHJ5WGd3dDBTTGNNK3plV3VaZWI0NVcxVmZBeWR6N3ZkUnQ4UWN6ZStIRHdCUi9tVm5GR044dEMvQW14czR3TFpKMHZEWDFSYlhqbXN4dzlYZWY0S1ZxZVpBRE5Vc2ZaSmlzdFFkZ0xuQ0FhZi9DMExtTkR0T0dSY2VIRHl2L0hqcjVYME02S3E5blBHeGdYVkh2LzhyeW5iRmxiRHJJTW05UG56MTIzSGpZPS0tK2tCc0cway9MNHZTdzQ0Ynd5K21YQT09--0d7676e2c2d3bad6d03b6fce8bb19986b06a3be0'
redmine_url = 'http://redmine.springgroup.cn/redmine/issues/' + redmine_number
redmine_type = '01'
elif len_redmine_number == 4:
cookie_str = self.get_cfg_value('cookie_str_02')
if cookie_str == 'null':
cookie_str = r'_redmine_session=Yk9lZEcvUG5KYVNDMXAvMGs3MG5McExRbFBSdmc5ZUl0ZEVXKzJSUi93Y2lYSlV6bDhndGpVMlB3UGxHOWdlK3pqaFV3TTBjREFvOEVQWHUraklsbHU0TUcvdmRsVE9aUTZtSFdIVWV3c1diY0VucUZWQnhTNTdyZXI0TzM3WWtobWF2dnp0VUFUNWQ4bCtCQTN1VXFEMENlY1VmRWZuQk9na3l6TFRXcDd3eU9TY3psZVZsanIveGpaT3ZaeDlXS1R4eUFtRGtaWEltQmFqOTZLR1JhUjZhWUFNWXJvbkhTYlJnMmpTOGxUR0VXUGJTYWRGNjhDU0F5TGJDZjlMUUl5ZnNmRnNPcDg1OWF5d2h0QU8rVFVBdmd5SkphYjhDRGJYS3hDNGg3cGxLOGllSERXam5lWVZvVHBWMFdEUXlZbUx6d1pxRjN1SEttT0kwR0VqSXdVWlZYSGxNaUxneUFpNTNMOVRzVDlIeFY4cVhEUXVPZGhuNWRRY1BzakFiYzM1UVJFWnpLODQ1ZmsxYndnUmFkWW1CVDg0VHlRcDF4U3hOYm5lanUyV3J0elNuRTdHczh6VEZ2ZC9yM3VkYnBKc0czRXo2Vk9QUW9rRUhVWS9WcnVLdEFIeSs4dldOdzBYcE9HSDZqZjkvOVRpS2hQeHhvazBPVTdzYzFsaWVNcUVtR1J2eHpRa1BqNStPa3ZWcmZLUFZBT05seUxsR1hPV0RsMzRnb2FWSmwwbG1pT1hVOGNpRGVSWE8vQ01zbUNSamtOcjRZcmN5eHQvajUzYXpoelg1T2psd2RBd0VNalU2WlV5TWhScz0tLWRVc2swV1FpRGlEdGZ5SVZLeEtHemc9PQ%3D%3D--8c8f689ced7f53990688848cb0ea665f0d1a011b'
redmine_url = 'http://redmine02.springgroup.cn/redmine/issues/' + redmine_number
redmine_type = '02'
else:
self.textEdit_3.append("redmine号 可能有误")
os.chdir(save_dir)
dir_list = os.listdir('.')
dir_name = 'redmine_' + redmine_number + '-'
for li in dir_list:
if dir_name in str(li) or li == str(redmine_number):
shutil.rmtree(li)
if not os.path.isdir(redmine_number):
os.makedirs(redmine_number)
os.chdir(redmine_number)
return redmine_url, cookie_str, redmine_type, match_list, match_info_list
def start_download(self, redmine_url, cookie_str, redmine_type, redmine_number, match_list, match_info_list):
#sys.stdout = io.TextIOWrapper(sys.stdout.buffer,encoding='utf8') #改变标准输出的默认编码
req = request.Request(redmine_url)
req.add_header('cookie', cookie_str)
req.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36')
resp = request.urlopen(req)
this_titile_name = ''
dir_name = ''
txt_flag = 0
this_txt = redmine_url + '\n' + 'redmine' + redmine_number + '_v' + '\n\n'
while resp:
li = resp.readline().decode('utf-8')
if "</html>" in li:
break
if txt_flag == 0 and '<p><a class="icon icon-attachment" href="' in li \
and ("数据库" not in li and "已添加" not in li):
for i in match_info_list:
if i in li:
txt_flag = 1
break
elif '</p>' in li:
txt_flag = 0
if txt_flag == 1 and ('<span class="size">' in li or '<span class="author">' in li or 'icon icon-attachment' in li):
if 'icon icon-attachment' in li:
this_txt = this_txt + str(self.get_file_name(li)) + ' '
elif '<span class="author">' in li:
this_txt = this_txt + str(self.get_span_info(li)) + '\n'
else:
this_txt = this_txt + str(self.get_span_info(li)) + ' '
if redmine_number in li and '</title' in li:
dir_name = li.split(':')
dir_name = dir_name[1]
dir_name = dir_name.split('-')
dir_name = dir_name[0].strip()
this_titile_name = dir_name
dir_name = 'redmine_' + redmine_number + '-' + str(dir_name)
if ("数据库" not in li and "已添加" not in li)\
and ("href" in li and "download" in li):
for i in match_list:
if i in li:
file_name = self.get_file_name(li)
download_url = self.get_download_url(li, redmine_type)
#print("--- %s ---" % (file_name))
self.textEdit_3.append("--- %s ---" % (file_name))
req1 = request.Request(download_url)
req1.add_header('cookie', cookie_str)
req1.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36')
this_file = request.urlopen(req1)
data = this_file.read()
with open(file_name.encode('utf-8'), "wb") as code:
code.write(data)
break
file_name = '001_' + this_titile_name + '_info.txt'
#f = open('001__info.txt', "w")
f = open(file_name, 'w')
f.write(this_txt)
f.close()
file_list = os.listdir()
if not os.path.isdir('更新操作手册'):
os.makedirs('更新操作手册')
for li in file_list:
if '更新操作手册.html' in li:
shutil.move(li, '更新操作手册')
os.chdir('..')
os.rename(redmine_number, dir_name)
def open_floder(self):
this_dir = os.getcwd()
open_floder_name = QFileDialog.getExistingDirectory(self, "选择下载保存的文件夹", this_dir)
temp_dir = '__pycache__'
if os.path.exists(temp_dir):
shutil.rmtree(temp_dir)
self.lineEdit_2.setText(str(open_floder_name))
def download_readmine(self):
try:
BASE_DIR = self.base_dir
#print(str(BASE_DIR))
os.chdir(BASE_DIR)
self.textEdit_3.setText("")
save_dir = str(self.lineEdit_2.text())
if not os.path.exists(save_dir):
save_dir = os.getcwd()
redmine_number = str(self.lineEdit.text())
redmine_url, cookie_str, redmine_type, match_list, match_info_list = self.pre_fun(str(save_dir), str(redmine_number))
self.start_download(redmine_url, cookie_str, redmine_type, redmine_number, match_list, match_info_list)
self.textEdit_3.append("下载完成")
except Exception as e:
self.textEdit_3.append(str(e))
if __name__ == '__main__':
base_dir = os.getcwd()
app = QtWidgets.QApplication(sys.argv)
app.setWindowIcon(QIcon('logo.ico'))
window = MainWindow(base_dir)
window.show()
sys.exit(app.exec_())
前端源码
-- 注: qt生成
# -*- coding: utf-8 -*-
################################################################################
## Form generated from reading UI file 'demo.ui'
##
## Created by: Qt User Interface Compiler version 5.15.2
##
## WARNING! All changes made in this file will be lost when recompiling UI file!
################################################################################
from PySide2.QtCore import *
from PySide2.QtGui import *
from PySide2.QtWidgets import *
class Ui_MainWindow(object):
def setupUi(self, MainWindow):
if not MainWindow.objectName():
MainWindow.setObjectName(u"MainWindow")
MainWindow.resize(600, 430)
MainWindow.setMinimumSize(QSize(600, 430))
MainWindow.setMaximumSize(QSize(600, 430))
self.centralwidget = QWidget(MainWindow)
self.centralwidget.setObjectName(u"centralwidget")
self.textEdit_3 = QTextEdit(self.centralwidget)
self.textEdit_3.setObjectName(u"textEdit_3")
self.textEdit_3.setGeometry(QRect(40, 100, 450, 250))
self.textEdit_3.setMinimumSize(QSize(450, 250))
self.textEdit_3.setMaximumSize(QSize(450, 250))
self.submit_ok = QPushButton(self.centralwidget)
self.submit_ok.setObjectName(u"submit_ok")
self.submit_ok.setGeometry(QRect(500, 320, 71, 31))
self.submit_ok.setMaximumSize(QSize(589, 500))
self.widget = QWidget(self.centralwidget)
self.widget.setObjectName(u"widget")
self.widget.setGeometry(QRect(40, 30, 272, 55))
self.verticalLayout = QVBoxLayout(self.widget)
self.verticalLayout.setObjectName(u"verticalLayout")
self.verticalLayout.setContentsMargins(0, 0, 0, 0)
self.horizontalLayout_2 = QHBoxLayout()
self.horizontalLayout_2.setObjectName(u"horizontalLayout_2")
self.label = QLabel(self.widget)
self.label.setObjectName(u"label")
self.label.setMaximumSize(QSize(8777215, 8777215))
self.horizontalLayout_2.addWidget(self.label)
self.lineEdit = QLineEdit(self.widget)
self.lineEdit.setObjectName(u"lineEdit")
self.horizontalLayout_2.addWidget(self.lineEdit)
self.verticalLayout.addLayout(self.horizontalLayout_2)
self.horizontalLayout = QHBoxLayout()
self.horizontalLayout.setObjectName(u"horizontalLayout")
self.label_2 = QLabel(self.widget)
self.label_2.setObjectName(u"label_2")
self.horizontalLayout.addWidget(self.label_2)
self.lineEdit_2 = QLineEdit(self.widget)
self.lineEdit_2.setObjectName(u"lineEdit_2")
self.horizontalLayout.addWidget(self.lineEdit_2)
self.select_floder = QPushButton(self.widget)
self.select_floder.setObjectName(u"select_floder")
self.horizontalLayout.addWidget(self.select_floder)
self.verticalLayout.addLayout(self.horizontalLayout)
MainWindow.setCentralWidget(self.centralwidget)
self.menubar = QMenuBar(MainWindow)
self.menubar.setObjectName(u"menubar")
self.menubar.setGeometry(QRect(0, 0, 600, 23))
MainWindow.setMenuBar(self.menubar)
self.statusbar = QStatusBar(MainWindow)
self.statusbar.setObjectName(u"statusbar")
MainWindow.setStatusBar(self.statusbar)
self.retranslateUi(MainWindow)
QMetaObject.connectSlotsByName(MainWindow)
# setupUi
def retranslateUi(self, MainWindow):
MainWindow.setWindowTitle(QCoreApplication.translate("MainWindow", u"redmine\u4e0b\u8f7d\u5668", None))
self.submit_ok.setText(QCoreApplication.translate("MainWindow", u"ok", None))
self.label.setText(QCoreApplication.translate("MainWindow", u"redmine\u53f7\u7801", None))
self.label_2.setText(QCoreApplication.translate("MainWindow", u"\u6587\u4ef6\u8def\u5f84", None))
self.select_floder.setText(QCoreApplication.translate("MainWindow", u"\u9009\u62e9\u6587\u4ef6\u5939", None))
# retranslateUi
配置文件见此

打包命令
pyinstaller -F main_v5.pyw demo.py --hidden-import PySide2.QtXml --icon="logo.ico"
运行结果

网友评论