python - DocTR wrong number detection - Stack Overflow

admin2025-04-30  0

I’m trying to extract data from an image using docTR for OCR and OpenCV to enhance the image quality by removing the background. However, I’ve encountered an issue where the OCR results are incorrectly converting the number 6 to 9 and vice versa. Here’s the code I’m using to enhance the image quality:

    def enhance_negative_signs(image):
   
    
    _, binary = cv2.threshold(image, 127, 255, cv2.THRESH_BINARY_INV)

   
    kernel = np.ones((1, 3), np.uint8)  # horizontal kernel
    negative_signs = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)

    
    image[negative_signs > 0] = 0  

    return image
def replace_yellow_with_white(bgr):
    
    hsv = cv2.cvtColor(bgr, cv2.COLOR_BGR2HSV)

   
    lower_yellow = np.array([23, 104, 163])  
    
    upper_yellow = np.array([30, 246, 255])  

    
    mask_yellow = cv2.inRange(hsv, lower_yellow, upper_yellow)

    
    bgr[mask_yellow == 255] = [255, 255, 255]


    return bgr

def preprocess_image(image_path):
  
    
    orginal_img = cv2.imread(image_path)
   
    original_img = cv2.resize(orginal_img, None, fx=1.5, fy=1.5, interpolation=cv2.INTER_CUBIC)

    if orginal_img is None:
        return None,None


    
    bgr_img = original_img.copy()

    bgr_img = replace_yellow_with_white(bgr_img)

   
    gray = cv2.cvtColor(bgr_img, cv2.COLOR_BGR2GRAY)


    thresh = cv2.adaptiveThreshold(
        gray, 255,
        cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        cv2.THRESH_BINARY,
        blockSize=15,
        C=10
    )
   
    kernel = np.ones((1, 1), np.uint8)
    processed_img = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
    processed_img = enhance_negative_signs(processed_img)

    return processed_img,original_img
processed_img, original_img = preprocess_image(original_img_path)
pil_image = Image.fromarray(processed_img)

# Convert PIL image to bytes
buffer = BytesIO()
pil_image.save(buffer, format="PNG")
image_bytes = buffer.getvalue()

doc = DocumentFile.from_images([image_bytes])
page = doc[0]
model = ocr_predictor(
        det_arch='db_resnet50',
        reco_arch='crnn_vgg16_bn',
        pretrained=True,
        det_bs=4,
        assume_straight_pages=False,
        paragraph_break=0.8
    )
content = model(doc)
转载请注明原文地址:http://anycun.com/QandA/1746027850a91545.html