美文网首页
手打爬虫 python 抓取银行联行号 大额行号

手打爬虫 python 抓取银行联行号 大额行号

作者: 辣辣不乖 | 来源:发表于2020-11-18 10:33 被阅读0次

每月几千笔不重复银行转账业务,每笔跨行转账都需要狗屁的联行号,很无奈啊
话说手机转账都不需要这玩意,干嘛非要跟工作电脑过不去...

爬取联行号并保存到excel,简陋,但已经可以用了,未完成...

import lxml
import csv
from bs4 import BeautifulSoup
import requests

header={'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.XXXX.XXX Safari/537.36'}
file = open('numbank.csv', 'w', newline='')
csvwriter = csv.writer(file)
listbank=[]
for i in range (1,3):
    url='http://www.5cm.cn/bank/nanjing/'+str(i)+'/'
    html=requests.get(url,headers=header).text
    soup=BeautifulSoup(html,'lxml') #用BeautifulSoup来解析获取的子页面html代码
    banklist = soup.find_all('tr')[1:]
    for row in banklist:
        print (row)
        banknum = row.find_all('td')[0].text
        bankname = row.find_all('td')[1].text
        # 如果喜欢,可以继续抓取其他信息,例:banktel = row.find_all('td')[2].text
        listbank.append([banknum,bankname])
print (listbank)
csvwriter.writerows(listbank)
file.close

爬取完整信息存档

#!/usr/bin/python
# -*- coding: UTF-8 -*-

import lxml
import csv
from bs4 import BeautifulSoup
import requests
import time
import random
from retrying import retry

headers = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.XXXX.XXX Safari/537.36'}
listbank = []
citys = ['jiangsu','guangdong','shandong','hebei','zhejiang','fujian','liaoning','anhui','hubei','sichuan','shanxisheng','hunan','shanxi','guizhou','henan','heilongjiang','jilin','xinjiang','shanghai','gansu','yunnan','beijing','neimenggu','tianjin','jiangxi','chongqing','guangxi','ningxia','hainan','qinghai','xianggang','xicang','aomen']
for idx, i in enumerate(citys, 1): #idx数组索引,用于抓取过程中观察进度,也可用for i in citys:
    file = open('backnum.csv', 'w', newline = '')
    csvwriter = csv.writer(file)
    for j in range (1,333): #抓取页面数例1,666
        url = 'http://www.5cm.cn/bank/'+str(i)+'/'+str(j)+'/'
        #构建retry装饰器
        @retry(stop_max_attempt_number=3, wait_fixed=3000) #重试次数,等待时间毫秒
        def get_request(url):
            html = requests.get(url, headers=headers, timeout=3).text
            #html.encoding='utf8'
            print ('正在抓取:' + str(idx) + '/33的' + str(i) + '省,' +'进度: {:.2%}'.format(j/333))
            soup = BeautifulSoup(html,'lxml') #用BeautifulSoup来解析获取的子页面html代码
            banklist = soup.find_all('tr')[1:]            
            for row in banklist:
                #print (row)
                banknum = row.find_all('td')[0].text
                bankname = row.find_all('td')[1].text
                banktel = row.find_all('td')[2].text
                bankadr = row.find_all('td')[4].text
                bankcity = str(i)
                #soup.find_all(attrs={'class':'w-100'}):
                #soup.select('.text-nowrap a'):
                #list1.append(j.get('title'))
                listbank.append([banknum,bankname,banktel,bankadr,bankcity])
        get_request(url) #调用装饰器
        print(str(listbank[-1][:3]))
        print('···省略显示···若干记录···')       
    sum = random.randint(3,33) #设置省份随机间隔倒计时
    interval = 0.3 #设置屏幕刷新的间隔时间
    for y in range(0,int(sum/interval)+1):
        print("\r休息一会:" + "|" +"*"*y + " "*(int(sum/interval)+1-y)+"|" +'体力恢复: {:.0f}%'.format(y/(sum/interval)*100),end="")
        time.sleep(interval)
    print("\r满血复活!")
    time.sleep(1)
csvwriter.writerows(listbank)
print ('正在写入到EXCEL···请稍候···')
print ('---')
file.close
print ('写入完成,文件名backnum.csv,收工!')

pyinstaller打包联行号查询工具(cmd 版)

360截图20201219131024924.png
#!/usr/bin/env python

import os
import sys

#生成资源文件目录访问路径
def resource_path(relative_path):
    base_path = getattr(sys, '_MEIPASS', os.path.dirname(os.path.abspath(__file__)))
    return os.path.join(base_path, relative_path)
# os.startfile(resource_path('banks.txt'))
# pyinstaller -F --add-data banks.txt;. banknum.py
banks = open(resource_path('banks.txt'), 'r', encoding='utf-8')
#banks = open('banks.txt', 'r', encoding='utf-8')
# 访问temp目录banks.txt的内容
lines = banks.readlines()

def inputx():
    print('--------------------------------------------')
    x = input('1.查询银行名称或行号\n3.退出\n$ 请输入序号数字后回车:')
    if x == str(1):
        inputy()
    elif x == str(3):
        banks.close()
        sys.exit()
    else:
        print('--------------------------------------------')
        print('# 注意:输入错误,请按屏幕提示操作!')
        inputx()
    pass

def inputy():
    print('--------------------------------------------')
    y = input('$ 输入查询关键字后回车:')
    if y == '':
        print('--------------------------------------------')
        print('# 注意:关键字不能为空,请重新输入!')
        inputy()
    else:
        print('--------------------------------------------')
        print('关键字' + '<' + y + '>' + '查询中,请稍后!')
        print('--------------------------------------------')
        print('# 注意:查询结果为空将不显示任何信息!')
        selecty(y)
    pass

def selecty(y):
    for line in lines:
        line = line.strip()
        if y in line:
            print(line)
        pass
    inputx()

if __name__ == '__main__':
    os.system("mode con cols=150 lines=30")
    print('--------------------------------------------')
    print('- 联行号查询工具 - by lala 2020 ')
    inputx()
pass

pyinstaller打包联行号查询工具(TK 界面版)

360截图20201219131202572.png 360截图20201219131224846.png
# ! /usr/bin/env python
# encoding:utf-8

import os
import sys
from tkinter import *
from tkinter import ttk

def resource_path(relative_path):
    base_path = getattr(sys, '_MEIPASS', os.path.dirname(os.path.abspath(__file__)))
    return os.path.join(base_path, relative_path)

banks = open(resource_path('banks.txt'), 'r', encoding='utf-8')
lines = banks.readlines()

def statusx(*args):
    try:
        loader_entry.delete(0, 'end')
        loader_entry.focus()
    except searchxError:
        pass

def clsx(*args):
    try:
        txtout.configure(state='normal')
        loader_entry.delete(0, 'end')
        loader_entry.focus()
        meters.set('输入框已清空,请指示.')
        txtout.configure(state='disabled')
    except searchxError:
        pass

def likes(*args):
    try:
        txtout.configure(state='normal')
        txtout.insert('1.0', '\n')
        txtout.image_create('1.0', image=photo)
        meters.set('用支付宝或微信打赏我一杯咖啡好吗.(^_^)♪')
        txtout.insert('1.0', '\n')
        txtout.configure(state='disabled')
    except searchxError:
        pass

def queryx(*args):
    try:
        txtout.configure(state='normal')
        txtout.delete('1.0','end')
        searchx = loader.get()
        xlan = str(len(searchx))
        if searchx == '':
            txtout.insert('1.0', msg)
            meters.set('查询关键字不能为空,请重新输入.')
        else:
            for line in lines:
                line = line.strip()
                if searchx in line:
                    meters.set('关键字' + ' [' + searchx + '] ' + '查询已完成,双击内容后按Ctrl+C复制.')
                    txtout.insert('1.0', '------------\n' + line + '\n')
            start = 1.0
            while True:
                pos = txtout.search(searchx, start, stopindex ='end')
                if not pos:
                    break
                txtout.tag_add('tagx', pos, '{}+{}c'.format(pos, xlan))
                start = pos + '+1c' # 将 start 指向下一个字符
            txtout.insert('end', msg + '\n')
        txtout.configure(state='disabled')

    except searchxError:
        pass

root = Tk()
root.title('联行号查询工具 v1.3')

mainframe = ttk.Frame(root, padding='12 6 12 12')
mainframe.grid(column=0, row=0, sticky='nwes')
# root.columnconfigure(0, weight=1)
# root.rowconfigure(0, weight=1)
root.resizable(0,0)

meters = StringVar()
loader_label = ttk.Label(mainframe, text='当前状态:').grid(column=1, row=1, sticky='w')
loader_label = ttk.Label(mainframe, textvariable=meters).grid(column=1, row=1, sticky='w', padx=60)
meters.set('初始化完成,数据版本202012,请指示.')

loader = StringVar()
loader_entry = ttk.Entry(mainframe, width=23, textvariable=loader)
loader_entry.grid(column=3, row=1, sticky='e', padx=160)

ttk.Button(mainframe, text='←', width=3, command=clsx).grid(column=3, row=1, sticky='e', padx=125)
ttk.Button(mainframe, text='¥.', width=3, command=likes).grid(column=3, row=1, sticky='e', padx=90)
ttk.Button(mainframe, text='查询', command=queryx).grid(column=3, row=1, sticky='e')


photo = PhotoImage(file=resource_path('like.png'))
msg = '\n  .__         .__          \n  |  | _____  |  | _____   \n  |  | \__  \ |  | \__  \  \n  |  |__/ __ \|  |__/ __ \_\n  |____(____  /____(____  /\n            \/          \/ \n------------\n# 使用说明:\n------------\n  输入银行名称或行号点击查询按钮或回车。双击查询结果,Ctrl+C复制,CTRL+V粘贴,查询结果为空将不显示任何内容。\n-------------------------\n@ 2020 by lala q.33818121\n'
txtout = Text(mainframe, width=133, height=33)
txtout.grid(column=1, row=2, columnspan=3, sticky='nwes')
txtout.insert('1.0', msg)
txtout.tag_configure('tagx', foreground='red') #mediumaquamarine backgroun='yellow', foreground='red'
txtout.configure(state='disabled')

sbar = ttk.Scrollbar(mainframe, orient = 'vertical', command = txtout.yview)
txtout['yscrollcommand'] = sbar.set
sbar.grid(column=4, row=2, sticky = 'ns')

for child in mainframe.winfo_children():
    child.grid_configure(pady=5) # child.grid_configure(padx=0, pady=5) 水平外边距,垂直外边距

loader_entry.focus() # 获取焦点

root.bind('<Return>', queryx) # 绑定回车键

root.mainloop()

# pyinstaller -F -w --add-data banks.txt;. --add-data like.png;. bankttk.py

pyinstaller打包命令参照

下一步计划,批量付款明细用excel公式根据城市信息自动匹配银行行号

相关文章

网友评论

      本文标题:手打爬虫 python 抓取银行联行号 大额行号

      本文链接:https://www.haomeiwen.com/subject/tlykiktx.html