I’m trying to extract data from an image using docTR for OCR and OpenCV to enhance the image quality by removing the background. However, I’ve encountered an issue where the OCR results are incorrectly converting the number 6 to 9 and vice versa. Here’s the code I’m using to enhance the image quality:
def enhance_negative_signs(image):
_, binary = cv2.threshold(image, 127, 255, cv2.THRESH_BINARY_INV)
kernel = np.ones((1, 3), np.uint8) # horizontal kernel
negative_signs = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)
image[negative_signs > 0] = 0
return image
def replace_yellow_with_white(bgr):
hsv = cv2.cvtColor(bgr, cv2.COLOR_BGR2HSV)
lower_yellow = np.array([23, 104, 163])
upper_yellow = np.array([30, 246, 255])
mask_yellow = cv2.inRange(hsv, lower_yellow, upper_yellow)
bgr[mask_yellow == 255] = [255, 255, 255]
return bgr
def preprocess_image(image_path):
orginal_img = cv2.imread(image_path)
original_img = cv2.resize(orginal_img, None, fx=1.5, fy=1.5, interpolation=cv2.INTER_CUBIC)
if orginal_img is None:
return None,None
bgr_img = original_img.copy()
bgr_img = replace_yellow_with_white(bgr_img)
gray = cv2.cvtColor(bgr_img, cv2.COLOR_BGR2GRAY)
thresh = cv2.adaptiveThreshold(
gray, 255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY,
blockSize=15,
C=10
)
kernel = np.ones((1, 1), np.uint8)
processed_img = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
processed_img = enhance_negative_signs(processed_img)
return processed_img,original_img
processed_img, original_img = preprocess_image(original_img_path)
pil_image = Image.fromarray(processed_img)
# Convert PIL image to bytes
buffer = BytesIO()
pil_image.save(buffer, format="PNG")
image_bytes = buffer.getvalue()
doc = DocumentFile.from_images([image_bytes])
page = doc[0]
model = ocr_predictor(
det_arch='db_resnet50',
reco_arch='crnn_vgg16_bn',
pretrained=True,
det_bs=4,
assume_straight_pages=False,
paragraph_break=0.8
)
content = model(doc)