申请一个百度开发者账号,新建一个ocr的应用,记录下应用ID和密码ID,分成同步和异步两种,同步的调用需要提申请,异步的可以直接使用,每天可以免费使用50次。
凑合着先用,就是识别率好像没有文字识别那么高。
# encoding:utf-8
import requests
import base64
import time
'''
获取access_token
'''
# client_id 为官网获取的AK, client_secret 为官网获取的SK
host = 'https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=[输入你的AK]&client_secret=[输入你的SK]'
response = requests.get(host)
if response:
res_json = response.json()
at = res_json['access_token']
'''
表格文字识别(异步接口)
'''
request_url = "https://aip.baidubce.com/rest/2.0/solution/v1/form_ocr/request"
# 二进制方式打开图片文件
f = open(r"D:\Python\b4cd4eb05a76fe15828a11d918f3a44.png", 'rb')
img = base64.b64encode(f.read())
params = {"image": img}
access_token = at
request_url = request_url + "?access_token=" + access_token
headers = {'content-type': 'application/x-www-form-urlencoded'}
response = requests.post(request_url, data=params, headers=headers)
if response:
time.sleep(10)
res_json2 = response.json()
print(res_json2['result'])
request_id = res_json2['result'][0]
print(request_id)
url = "https://aip.baidubce.com/rest/2.0/solution/v1/form_ocr/get_request_result" + \
"?access_token=" + at
response = requests.post(url, data=request_id, headers=headers)
if response:
print(response.json())
url = response.json()['result']['result_data']
print(url)
r = requests.get(url)
with open("111.xls", "wb") as code:
code.write(r.content)
网友评论