美文网首页
python去除pdf水印图

python去除pdf水印图

作者: 是东东 | 来源:发表于2021-03-16 15:12 被阅读0次
def delete_wartermark(target_path, area_chart, VPT=0.6):
    """
    :param target_path: 目标路径
    :param area_chart: 去水印图
    :param VPT: 图片相似图 阈值
    :return:  文件路径
    """
    save_pdf_path = ''
    try:
        if '.pdf' in target_path:
            with fitz.open(target_path) as pdf_document:
                for current_page in range(len(pdf_document)):
                    for image in pdf_document.getPageImageList(current_page):
                        xref = image[0]
                        pix = fitz.Pixmap(pdf_document, xref)
                        if pix.n < 4:  # this is GRAY or RGB
                            save_path = "./imgs/func/page%s_%s.png" % (current_page, xref)
                            del_filepath(save_path)
                            pix.writePNG(save_path)
                            if is_same_img(area_chart, save_path, VPT):
                                pdf_document._deleteObject(image[0])
                            if os.path.exists(save_path):
                                os.remove(save_path)
                splittext = os.path.splitext(target_path)
                save_pdf_path = splittext[-2] + '1' + splittext[-1]
                pdf_document.save(save_pdf_path, incremental=True, encryption=False)

    except Exception as e:
        print(e)
        print('失败----删除水印')
    return save_pdf_path
class Del_Warter(object):

    def __init__(self, source_dir, new_dir):
        self.source_dir = source_dir
        self.new_dir = new_dir
        self.old_log = 'old.log'

    def readonly_handler(self, func, path):
        os.chmod(path, stat.S_IWRITE)
        func(path)

    def del_dir(self, source):
        if os.path.exists(self.new_dir):
            for del_file in os.listdir(self.new_dir):
                file = f'{self.new_dir}/{del_file}'
                try:
                    os.remove(file)
                except Exception:
                    os.chmod(file, stat.S_IWRITE)
            shutil.rmtree(self.new_dir, onerror=self.readonly_handler)
            time.sleep(0.5)
        if os.path.exists(source):
            shutil.rmtree(source)
            time.sleep(0.5)

    def del_ad(self, source_name):
        abs_path = f'{self.source_dir}/{source_name}'
        name = source_name.split('.')[0]
        source = f"{self.source_dir}/{name}"
        try:
            self.del_dir(source)
            os.system(f'cd {self.source_dir} && C:/WinRAR/WinRAR.exe x {source_name} -ad')
            time.sleep(0.5)
            if os.path.exists(source):
                ppp = source
                while True:
                    os.system(f'cd {ppp}')
                    if os.path.isdir(ppp):
                        ppp += f'/{os.listdir(ppp)[0]}'
                    if os.path.isfile(ppp):
                        ppp = '/'.join(ppp.split('/')[:-1])
                        break
                    time.sleep(0.5)
                cur_files = os.listdir(ppp)
                files = []
                for fff in cur_files:
                    end_path = f'{ppp}/{fff}'
                    if '家电维修资料' not in fff and '.url' not in fff and '.txt' not in fff:
                        print(f'succeed : {end_path}')
                        ssss = ['www.520101.com', '.P']
                        for sss in ssss:
                            if sss in fff:
                                old_end_path = end_path
                                end_path = end_path.replace(sss, '')
                                os.rename(old_end_path, end_path)
                        if '.pdf' in end_path:
                            area_charts = os.listdir(r'C:\Users\Administrator\OneDrive\all_huaqiu\huaqiu_spider\test\target_img')
                            for area_chart in area_charts:
                                delete_wartermark(end_path, f'./target_img/{area_chart}', VPT=0.81)
                        files.append(end_path)
                    else:
                        print(f'del : {end_path}')
                if not os.path.exists(self.new_dir):
                    os.makedirs(self.new_dir)
                    time.sleep(0.5)

                for ff in files:
                    shutil.move(ff, self.new_dir)
                    time.sleep(0.5)
                if os.path.exists(abs_path):
                    os.remove(abs_path)
                    time.sleep(0.5)
                command = f'cd {self.source_dir} && C:/WinRAR/WinRAR.exe a -ibck {source_name} elecfans'
                os.system(command)
                time.sleep(0.5)
                self.del_dir(source)
                with open(self.old_log, 'a') as ww:
                    ww.write(source_name + ' ')
                print('ok\n')
        except Exception as e:
            print(e)
            time.sleep(2)
            if os.path.exists(self.new_dir):
                shutil.rmtree(self.new_dir)
            time.sleep(0.5)
            if os.path.exists(source):
                shutil.rmtree(source)

    def run(self):
        with open(self.old_log) as rr:
            old = rr.read()
        lls = os.listdir(self.source_dir)
        lls = [i for i in lls if '.rar' in i]
        for i, source_name in enumerate(lls):
            if '.rar' in source_name and source_name not in old:
                print(f'target : {source_name}')
                self.del_ad(source_name)

相关文章

网友评论

      本文标题:python去除pdf水印图

      本文链接:https://www.haomeiwen.com/subject/xyjdcltx.html