美文网首页
pytesseract

pytesseract

作者: SecondRocker | 来源:发表于2023-12-31 22:38 被阅读0次

    一、安装

    二、使用

    from PIL import Image
    import pytesseract
    
    img = cv2.imread('aaa.png')
    text = pytesseract.image_to_string(img, lang="chi_sim")
    print(text)
    

    去除噪点

    from PIL import Image
    import pytesseract
    import cv2
    
    img = cv2.imread('aaa.png')
    grayimg = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # # 转为灰度图
    
    _, bwimg = cv2.threshold(grayimg, 127, 255, cv2.THRESH_BINARY)  # 二值化
    
    contours, _ = cv2.findContours(bwing, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)  # 寻找轮廓
    
    
    # 方法一
    # 选出面积较小区域
    noise = []
    for contour in contours:
        area = cv2.contourArea(contour)
        if area <= 40:
            noise.append(contour)
    
    cv2.fillPoly(bwimg, noise, 0) # 删除点填充为 黑色
    
    # 方法 二
    kernel = np.ones((3, 11), np.uint8)
    dilation = cv2.dilate(bwimg, kernel, iterations=1)  # 膨胀
    
    # 膨胀后连在一起
    contours, _ = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)  # 寻找轮廓
    text_area = max(contours, key=lambda x: cv2.contourArea(x))
    
    # 最小外接矩阵
    rect = cv2.minAreaRect(text_area)
    box = cv2.boxPoints(rect)
    box = np.int0(box)
    
    # 矩阵外填充为黑色
    stencil = np.zeros(bwimg.shape).astype(bwimg.dtype)
    color = [255, 255, 255]
    cv2.fillPoly(stencil, [box], color)
    result = cv2.bitwise_and(bwimg, stencil)
    

    相关文章

      网友评论

          本文标题:pytesseract

          本文链接:https://www.haomeiwen.com/subject/ytnyndtx.html