# -*- coding:utf-8 -*-
# Copyright 2018 Huawei Technologies Co.,Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use
# this file except in compliance with the License. You may obtain a copy of the
# License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.
from cloud.HWOcrClientAKSK import HWOcrClientAKSK
from cloud.HWOcrClientToken import HWOcrClientToken
import os
def getOCR(pic_dir, pic_content_dir, number_of_pics):
'''
:param pic_dir: directory to store picture
:param content_dir: directory to store contents
:return:
'''
AK="此处隐藏" #AK from authentication
SK="此处隐藏" #SK from authentication
endpoint = "ocr.cn-north-4.myhuaweicloud.com" #http endpoint information
ocr_client = HWOcrClientAKSK(AK,SK,endpoint) #initialize ocr_client from ak,sk and endpoint information
option = {}
num = 1353 # 断点续连
while num < number_of_pics:
try:
response = ocr_client.request_ocr_service_base64("/v1.0/ocr/general-text",
(pic_dir + '/'+ str(num) + '.jpg'), option) # call OCR interface to recognize picture
# print(response.text)
response_dir = eval(response.text)
content = ''
for i in range(response_dir['result']['words_block_list'].__len__()):
content += response_dir['result']['words_block_list'][i]['words']
with open(pic_content_dir + '/content.txt', 'a', encoding='utf-8') as f1:
f1.write(str(num) + '\t' +content + '\n')
num += 1
print('generate ' + str(num) + '.jpg')
except Exception as e:
print('OOPS~ in ' + str(num) + 'location.')
print(e)
with open(pic_content_dir + '/false_content.txt', 'a', encoding='utf-8') as f1:
f1.write(str(num) + '\t' + str(e) + '\n')
num += 1
if __name__ == '__main__':
# AK SK demo code
pwd = os.getcwd()
pic_dir = os.path.abspath(os.path.dirname(pwd)+os.path.sep+".")+'/精/pic/'
pic_content_dir = os.path.abspath(os.path.dirname(pwd)+os.path.sep+".")+'/精/pic_content/'
number_of_pics = 4122
getOCR(pic_dir, pic_content_dir, number_of_pics)
网友评论