美文网首页
手打爬虫 python 抓取银行联行号 大额行号

手打爬虫 python 抓取银行联行号 大额行号

作者: 辣辣不乖 | 来源:发表于2020-11-18 10:33 被阅读0次

    每月几千笔不重复银行转账业务,每笔跨行转账都需要狗屁的联行号,很无奈啊
    话说手机转账都不需要这玩意,干嘛非要跟工作电脑过不去...

    爬取联行号并保存到excel,简陋,但已经可以用了,未完成...

    import lxml
    import csv
    from bs4 import BeautifulSoup
    import requests
    
    header={'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.XXXX.XXX Safari/537.36'}
    file = open('numbank.csv', 'w', newline='')
    csvwriter = csv.writer(file)
    listbank=[]
    for i in range (1,3):
        url='http://www.5cm.cn/bank/nanjing/'+str(i)+'/'
        html=requests.get(url,headers=header).text
        soup=BeautifulSoup(html,'lxml') #用BeautifulSoup来解析获取的子页面html代码
        banklist = soup.find_all('tr')[1:]
        for row in banklist:
            print (row)
            banknum = row.find_all('td')[0].text
            bankname = row.find_all('td')[1].text
            # 如果喜欢,可以继续抓取其他信息,例:banktel = row.find_all('td')[2].text
            listbank.append([banknum,bankname])
    print (listbank)
    csvwriter.writerows(listbank)
    file.close
    

    爬取完整信息存档

    #!/usr/bin/python
    # -*- coding: UTF-8 -*-
    
    import lxml
    import csv
    from bs4 import BeautifulSoup
    import requests
    import time
    import random
    from retrying import retry
    
    headers = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.XXXX.XXX Safari/537.36'}
    listbank = []
    citys = ['jiangsu','guangdong','shandong','hebei','zhejiang','fujian','liaoning','anhui','hubei','sichuan','shanxisheng','hunan','shanxi','guizhou','henan','heilongjiang','jilin','xinjiang','shanghai','gansu','yunnan','beijing','neimenggu','tianjin','jiangxi','chongqing','guangxi','ningxia','hainan','qinghai','xianggang','xicang','aomen']
    for idx, i in enumerate(citys, 1): #idx数组索引,用于抓取过程中观察进度,也可用for i in citys:
        file = open('backnum.csv', 'w', newline = '')
        csvwriter = csv.writer(file)
        for j in range (1,333): #抓取页面数例1,666
            url = 'http://www.5cm.cn/bank/'+str(i)+'/'+str(j)+'/'
            #构建retry装饰器
            @retry(stop_max_attempt_number=3, wait_fixed=3000) #重试次数,等待时间毫秒
            def get_request(url):
                html = requests.get(url, headers=headers, timeout=3).text
                #html.encoding='utf8'
                print ('正在抓取:' + str(idx) + '/33的' + str(i) + '省,' +'进度: {:.2%}'.format(j/333))
                soup = BeautifulSoup(html,'lxml') #用BeautifulSoup来解析获取的子页面html代码
                banklist = soup.find_all('tr')[1:]            
                for row in banklist:
                    #print (row)
                    banknum = row.find_all('td')[0].text
                    bankname = row.find_all('td')[1].text
                    banktel = row.find_all('td')[2].text
                    bankadr = row.find_all('td')[4].text
                    bankcity = str(i)
                    #soup.find_all(attrs={'class':'w-100'}):
                    #soup.select('.text-nowrap a'):
                    #list1.append(j.get('title'))
                    listbank.append([banknum,bankname,banktel,bankadr,bankcity])
            get_request(url) #调用装饰器
            print(str(listbank[-1][:3]))
            print('···省略显示···若干记录···')       
        sum = random.randint(3,33) #设置省份随机间隔倒计时
        interval = 0.3 #设置屏幕刷新的间隔时间
        for y in range(0,int(sum/interval)+1):
            print("\r休息一会:" + "|" +"*"*y + " "*(int(sum/interval)+1-y)+"|" +'体力恢复: {:.0f}%'.format(y/(sum/interval)*100),end="")
            time.sleep(interval)
        print("\r满血复活!")
        time.sleep(1)
    csvwriter.writerows(listbank)
    print ('正在写入到EXCEL···请稍候···')
    print ('---')
    file.close
    print ('写入完成,文件名backnum.csv,收工!')
    

    pyinstaller打包联行号查询工具(cmd 版)

    360截图20201219131024924.png
    #!/usr/bin/env python
    
    import os
    import sys
    
    #生成资源文件目录访问路径
    def resource_path(relative_path):
        base_path = getattr(sys, '_MEIPASS', os.path.dirname(os.path.abspath(__file__)))
        return os.path.join(base_path, relative_path)
    # os.startfile(resource_path('banks.txt'))
    # pyinstaller -F --add-data banks.txt;. banknum.py
    banks = open(resource_path('banks.txt'), 'r', encoding='utf-8')
    #banks = open('banks.txt', 'r', encoding='utf-8')
    # 访问temp目录banks.txt的内容
    lines = banks.readlines()
    
    def inputx():
        print('--------------------------------------------')
        x = input('1.查询银行名称或行号\n3.退出\n$ 请输入序号数字后回车:')
        if x == str(1):
            inputy()
        elif x == str(3):
            banks.close()
            sys.exit()
        else:
            print('--------------------------------------------')
            print('# 注意:输入错误,请按屏幕提示操作!')
            inputx()
        pass
    
    def inputy():
        print('--------------------------------------------')
        y = input('$ 输入查询关键字后回车:')
        if y == '':
            print('--------------------------------------------')
            print('# 注意:关键字不能为空,请重新输入!')
            inputy()
        else:
            print('--------------------------------------------')
            print('关键字' + '<' + y + '>' + '查询中,请稍后!')
            print('--------------------------------------------')
            print('# 注意:查询结果为空将不显示任何信息!')
            selecty(y)
        pass
    
    def selecty(y):
        for line in lines:
            line = line.strip()
            if y in line:
                print(line)
            pass
        inputx()
    
    if __name__ == '__main__':
        os.system("mode con cols=150 lines=30")
        print('--------------------------------------------')
        print('- 联行号查询工具 - by lala 2020 ')
        inputx()
    pass
    

    pyinstaller打包联行号查询工具(TK 界面版)

    360截图20201219131202572.png 360截图20201219131224846.png
    # ! /usr/bin/env python
    # encoding:utf-8
    
    import os
    import sys
    from tkinter import *
    from tkinter import ttk
    
    def resource_path(relative_path):
        base_path = getattr(sys, '_MEIPASS', os.path.dirname(os.path.abspath(__file__)))
        return os.path.join(base_path, relative_path)
    
    banks = open(resource_path('banks.txt'), 'r', encoding='utf-8')
    lines = banks.readlines()
    
    def statusx(*args):
        try:
            loader_entry.delete(0, 'end')
            loader_entry.focus()
        except searchxError:
            pass
    
    def clsx(*args):
        try:
            txtout.configure(state='normal')
            loader_entry.delete(0, 'end')
            loader_entry.focus()
            meters.set('输入框已清空,请指示.')
            txtout.configure(state='disabled')
        except searchxError:
            pass
    
    def likes(*args):
        try:
            txtout.configure(state='normal')
            txtout.insert('1.0', '\n')
            txtout.image_create('1.0', image=photo)
            meters.set('用支付宝或微信打赏我一杯咖啡好吗.(^_^)♪')
            txtout.insert('1.0', '\n')
            txtout.configure(state='disabled')
        except searchxError:
            pass
    
    def queryx(*args):
        try:
            txtout.configure(state='normal')
            txtout.delete('1.0','end')
            searchx = loader.get()
            xlan = str(len(searchx))
            if searchx == '':
                txtout.insert('1.0', msg)
                meters.set('查询关键字不能为空,请重新输入.')
            else:
                for line in lines:
                    line = line.strip()
                    if searchx in line:
                        meters.set('关键字' + ' [' + searchx + '] ' + '查询已完成,双击内容后按Ctrl+C复制.')
                        txtout.insert('1.0', '------------\n' + line + '\n')
                start = 1.0
                while True:
                    pos = txtout.search(searchx, start, stopindex ='end')
                    if not pos:
                        break
                    txtout.tag_add('tagx', pos, '{}+{}c'.format(pos, xlan))
                    start = pos + '+1c' # 将 start 指向下一个字符
                txtout.insert('end', msg + '\n')
            txtout.configure(state='disabled')
    
        except searchxError:
            pass
    
    root = Tk()
    root.title('联行号查询工具 v1.3')
    
    mainframe = ttk.Frame(root, padding='12 6 12 12')
    mainframe.grid(column=0, row=0, sticky='nwes')
    # root.columnconfigure(0, weight=1)
    # root.rowconfigure(0, weight=1)
    root.resizable(0,0)
    
    meters = StringVar()
    loader_label = ttk.Label(mainframe, text='当前状态:').grid(column=1, row=1, sticky='w')
    loader_label = ttk.Label(mainframe, textvariable=meters).grid(column=1, row=1, sticky='w', padx=60)
    meters.set('初始化完成,数据版本202012,请指示.')
    
    loader = StringVar()
    loader_entry = ttk.Entry(mainframe, width=23, textvariable=loader)
    loader_entry.grid(column=3, row=1, sticky='e', padx=160)
    
    ttk.Button(mainframe, text='←', width=3, command=clsx).grid(column=3, row=1, sticky='e', padx=125)
    ttk.Button(mainframe, text='¥.', width=3, command=likes).grid(column=3, row=1, sticky='e', padx=90)
    ttk.Button(mainframe, text='查询', command=queryx).grid(column=3, row=1, sticky='e')
    
    
    photo = PhotoImage(file=resource_path('like.png'))
    msg = '\n  .__         .__          \n  |  | _____  |  | _____   \n  |  | \__  \ |  | \__  \  \n  |  |__/ __ \|  |__/ __ \_\n  |____(____  /____(____  /\n            \/          \/ \n------------\n# 使用说明:\n------------\n  输入银行名称或行号点击查询按钮或回车。双击查询结果,Ctrl+C复制,CTRL+V粘贴,查询结果为空将不显示任何内容。\n-------------------------\n@ 2020 by lala q.33818121\n'
    txtout = Text(mainframe, width=133, height=33)
    txtout.grid(column=1, row=2, columnspan=3, sticky='nwes')
    txtout.insert('1.0', msg)
    txtout.tag_configure('tagx', foreground='red') #mediumaquamarine backgroun='yellow', foreground='red'
    txtout.configure(state='disabled')
    
    sbar = ttk.Scrollbar(mainframe, orient = 'vertical', command = txtout.yview)
    txtout['yscrollcommand'] = sbar.set
    sbar.grid(column=4, row=2, sticky = 'ns')
    
    for child in mainframe.winfo_children():
        child.grid_configure(pady=5) # child.grid_configure(padx=0, pady=5) 水平外边距,垂直外边距
    
    loader_entry.focus() # 获取焦点
    
    root.bind('<Return>', queryx) # 绑定回车键
    
    root.mainloop()
    
    # pyinstaller -F -w --add-data banks.txt;. --add-data like.png;. bankttk.py
    

    pyinstaller打包命令参照

    下一步计划,批量付款明细用excel公式根据城市信息自动匹配银行行号

    相关文章

      网友评论

          本文标题:手打爬虫 python 抓取银行联行号 大额行号

          本文链接:https://www.haomeiwen.com/subject/tlykiktx.html