因为paddleocr识别的结果不及预期,需要分析是识别模型的问题还是检测模型的问题,所以用到了给定图片的坐标,识别文本,代码很简单,但是还有点小坑,具体如下:
图片的裁剪函数在tools/infer/utility.py下,可以直接复制出来用,省的自己引用
def get_rotate_crop_image(img, points):
'''
img_height, img_width = img.shape[0:2]
left = int(np.min(points[:, 0]))
right = int(np.max(points[:, 0]))
top = int(np.min(points[:, 1]))
bottom = int(np.max(points[:, 1]))
img_crop = img[top:bottom, left:right, :].copy()
points[:, 0] = points[:, 0] - left
points[:, 1] = points[:, 1] - top
'''
assert len(points) == 4, "shape of points must be 4*2"
img_crop_width = int(
max(
np.linalg.norm(points[0] - points[1]),
np.linalg.norm(points[2] - points[3])))
img_crop_height = int(
max(
np.linalg.norm(points[0] - points[3]),
np.linalg.norm(points[1] - points[2])))
pts_std = np.float32([[0, 0], [img_crop_width, 0],
[img_crop_width, img_crop_height],
[0, img_crop_height]])
M = cv2.getPerspectiveTransform(points, pts_std)
dst_img = cv2.warpPerspective(
img,
M, (img_crop_width, img_crop_height),
borderMode=cv2.BORDER_REPLICATE,
flags=cv2.INTER_CUBIC)
dst_img_height, dst_img_width = dst_img.shape[0:2]
if dst_img_height * 1.0 / dst_img_width >= 1.5:
dst_img = np.rot90(dst_img)
return dst_img
然后就是主函数了,需要注意的是给的坐标要是np.array且坐标的数据类型是np.float32,否则会报错
import cv2
import numpy as np
from paddleocr import PaddleOCR
from utils import get_rotate_crop_image
ocr = PaddleOCR(use_angle_cls=True, lang="ch")
image_file = 'pic/11.jpg'
with open(image_file, 'rb') as f:
np_arr = np.frombuffer(f.read(), dtype=np.uint8)
img = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
# print(img)
box = np.array([[481.0, 170.0], [1087.0, 155.0], [1089.0, 212.0], [482.0, 227.0]], np.float32)
box_img = get_rotate_crop_image(img, box)
result = ocr.ocr(box_img, det=False)
print(result)
输入结果如下:
[('当事人送达地址确认书', 0.890923798084259)]
网友评论