What could improve the OCR result using pytesseract on schematic images for PCB?

39 Views Asked by At

I am trying to apply OCR using OpenCV and Python-tesseract to convert the following image to text:

image before ocr.

import cv2
import pytesseract
import argparse
import numpy as np

if __name__ == "__main__":
    # Argument parsing
    parser = argparse.ArgumentParser(description="Process images for OCR")
    parser.add_argument("input_file", help="Input image file path")
    args = parser.parse_args()
    
    # Read the input image
    image = cv2.imread(args.input_file)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    orig_image = image

    # Normalization
    norm_img = np.zeros((image.shape[0], image.shape[1]))
    image = cv2.normalize(image, norm_img, 0, 255, cv2.NORM_MINMAX)

    # Remove noise
    image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 15)

    #image = cv2.GaussianBlur(image, (1, 1), 0)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    image = cv2.threshold(image, 150, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) [1]

    image = cv2.bitwise_not(image)

    result = pytesseract.image_to_data(image, config=r'--psm 6 --oem 3 -l eng tessedit_char_blacklist=,;:', output_type=pytesseract.Output.DICT)

    text_results = result['text']
    bounding_boxes = list(zip(result['left'], result['top'], result['width'], result['height']))
               
    unique_results = list(set(zip(text_results, bounding_boxes)))
    char_list = ['-', '}', ',', '—', 'nnn', '#', ':', '=', '——', '*', '!', '°', '——=', ':', ';', '+', '-', '©', ',', ',', ',']
    ocr_results = []
    ocr_results = [element for element in unique_results if element[0].strip() and element[0] not in char_list]

    print(len(ocr_results))

    for ocr_result in ocr_results:
        x, y, w, h = ocr_result[1]
        cv2.rectangle(orig_image, (x, y), (x + w, y + h), (255, 0, 255), 2)
        # Draw the text on the image
        cv2.putText(orig_image, ocr_result[0], (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    cv2.namedWindow("image", cv2.WINDOW_NORMAL)
    cv2.imshow("image", orig_image)
    cv2.waitKey(0)

the performance after the ocr is attached as below. i see there is a lot of scope for improvement in results.

attached image is a sample image taken from google to just represent the problem. the original images that i am using are at 300 dpi and have good quality. the main question is how to optimize the pytesseract ocr to detect the text data from schematic of embedded micro controller boards.

image after ocr

1

There are 1 best solutions below

3
Ajay Meena On

Can you try paddleocr, I tested for the PCB image

from paddleocr import PaddleOCR,draw_ocr
# Paddleocr supports Chinese, English, French, German, Korean and Japanese.
# You can set the parameter `lang` as `ch`, `en`, `fr`, `german`, `korean`, `japan`
# to switch the language model in order.
ocr = PaddleOCR(use_angle_cls=False, det=False, lang='en', use_gpu=False) # need to run only once to download and loa
img_path = 'PCB.png'
result = ocr.ocr(img_path, cls=False)
for idx in range(len(result)):
    res = result[idx]
    for line in res:
        print(line)


# draw result
from PIL import Image
result = result[0]
image = Image.open(img_path).convert('RGB')
boxes = [line[0] for line in result]
txts = [line[1][0] for line in result]
scores = [line[1][1] for line in result]
im_show = draw_ocr(image, boxes, txts, scores, font_path='./arial.ttf')
im_show = Image.fromarray(im_show)
im_show.save('result.jpg')

enter image description here