def delete_wartermark(target_path, area_chart, VPT=0.6):
"""
:param target_path: 目标路径
:param area_chart: 去水印图
:param VPT: 图片相似图 阈值
:return: 文件路径
"""
save_pdf_path = ''
try:
if '.pdf' in target_path:
with fitz.open(target_path) as pdf_document:
for current_page in range(len(pdf_document)):
for image in pdf_document.getPageImageList(current_page):
xref = image[0]
pix = fitz.Pixmap(pdf_document, xref)
if pix.n < 4: # this is GRAY or RGB
save_path = "./imgs/func/page%s_%s.png" % (current_page, xref)
del_filepath(save_path)
pix.writePNG(save_path)
if is_same_img(area_chart, save_path, VPT):
pdf_document._deleteObject(image[0])
if os.path.exists(save_path):
os.remove(save_path)
splittext = os.path.splitext(target_path)
save_pdf_path = splittext[-2] + '1' + splittext[-1]
pdf_document.save(save_pdf_path, incremental=True, encryption=False)
except Exception as e:
print(e)
print('失败----删除水印')
return save_pdf_path
class Del_Warter(object):
def __init__(self, source_dir, new_dir):
self.source_dir = source_dir
self.new_dir = new_dir
self.old_log = 'old.log'
def readonly_handler(self, func, path):
os.chmod(path, stat.S_IWRITE)
func(path)
def del_dir(self, source):
if os.path.exists(self.new_dir):
for del_file in os.listdir(self.new_dir):
file = f'{self.new_dir}/{del_file}'
try:
os.remove(file)
except Exception:
os.chmod(file, stat.S_IWRITE)
shutil.rmtree(self.new_dir, onerror=self.readonly_handler)
time.sleep(0.5)
if os.path.exists(source):
shutil.rmtree(source)
time.sleep(0.5)
def del_ad(self, source_name):
abs_path = f'{self.source_dir}/{source_name}'
name = source_name.split('.')[0]
source = f"{self.source_dir}/{name}"
try:
self.del_dir(source)
os.system(f'cd {self.source_dir} && C:/WinRAR/WinRAR.exe x {source_name} -ad')
time.sleep(0.5)
if os.path.exists(source):
ppp = source
while True:
os.system(f'cd {ppp}')
if os.path.isdir(ppp):
ppp += f'/{os.listdir(ppp)[0]}'
if os.path.isfile(ppp):
ppp = '/'.join(ppp.split('/')[:-1])
break
time.sleep(0.5)
cur_files = os.listdir(ppp)
files = []
for fff in cur_files:
end_path = f'{ppp}/{fff}'
if '家电维修资料' not in fff and '.url' not in fff and '.txt' not in fff:
print(f'succeed : {end_path}')
ssss = ['www.520101.com', '.P']
for sss in ssss:
if sss in fff:
old_end_path = end_path
end_path = end_path.replace(sss, '')
os.rename(old_end_path, end_path)
if '.pdf' in end_path:
area_charts = os.listdir(r'C:\Users\Administrator\OneDrive\all_huaqiu\huaqiu_spider\test\target_img')
for area_chart in area_charts:
delete_wartermark(end_path, f'./target_img/{area_chart}', VPT=0.81)
files.append(end_path)
else:
print(f'del : {end_path}')
if not os.path.exists(self.new_dir):
os.makedirs(self.new_dir)
time.sleep(0.5)
for ff in files:
shutil.move(ff, self.new_dir)
time.sleep(0.5)
if os.path.exists(abs_path):
os.remove(abs_path)
time.sleep(0.5)
command = f'cd {self.source_dir} && C:/WinRAR/WinRAR.exe a -ibck {source_name} elecfans'
os.system(command)
time.sleep(0.5)
self.del_dir(source)
with open(self.old_log, 'a') as ww:
ww.write(source_name + ' ')
print('ok\n')
except Exception as e:
print(e)
time.sleep(2)
if os.path.exists(self.new_dir):
shutil.rmtree(self.new_dir)
time.sleep(0.5)
if os.path.exists(source):
shutil.rmtree(source)
def run(self):
with open(self.old_log) as rr:
old = rr.read()
lls = os.listdir(self.source_dir)
lls = [i for i in lls if '.rar' in i]
for i, source_name in enumerate(lls):
if '.rar' in source_name and source_name not in old:
print(f'target : {source_name}')
self.del_ad(source_name)
网友评论