image identification using tessaract

113 Views Asked by At

I am trying to identify numbers and their position in the image enter image description here

This is my code:

import cv2
import pytesseract


def round_to_nearest_10(number):
    return round(number / 10) * 10


def parse_image_grid(filename):
    # Set the path to the Tesseract executable (update with your path)
    pytesseract.pytesseract.tesseract_cmd = r'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'

    # Read the image
    image = cv2.imread(filename)

    # Convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Apply GaussianBlur to reduce noise and improve OCR accuracy
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)

    # Use the Canny edge detector to find edges in the image
    edges = cv2.Canny(blurred, 50, 150)

    # Find contours in the image
    contours, _ = cv2.findContours(edges.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Dictionary to store the mapping of square coordinates to identified numbers
    square_dict = {}

    # Iterate through each contour
    for contour in contours:
        # Approximate the contour to a polygon
        epsilon = 0.04 * cv2.arcLength(contour, True)
        approx = cv2.approxPolyDP(contour, epsilon, True)

        # Check if the polygon has four corners (likely a square)
        if len(approx) == 4:
            # Extract the region of interest (ROI) containing the square
            x, y, w, h = cv2.boundingRect(contour)
            square_roi = image[y:y + h, x:x + w]

            # print(square_roi)

            # Use OCR to extract numbers from the square
            square_text = pytesseract.image_to_string(square_roi, config="--psm 6").strip()

            # Print the square coordinates and extracted numbers
            print(f"Square at ({x}, {y}), Numbers: {square_text}")

Output:

Square at (221, 71), Numbers: 4a
Square at (181, 61), Numbers: fi
Square at (31, 61), Numbers: 3 |
Square at (211, 31), Numbers: @
Square at (181, 31), Numbers: 2
Square at (121, 31), Numbers: ff
Square at (91, 31), Numbers: &
Square at (61, 31), Numbers: @
Square at (1, 31), Numbers: 
Square at (121, 1), Numbers: 5 |
Square at (91, 1), Numbers: Es
Square at (61, 1), Numbers: @
Square at (31, 0), Numbers: 9

It is identifying some blocks correctly. For others it is identifying numbers as @ or | characters.

I tried to change psm settings but that did not work .

Am I missing something here ?

1

There are 1 best solutions below

1
Tino D On BEST ANSWER

There were some problems in your code that are generally detrimental to the detection of the numbers. Some improvements that can be done:

  • get the exact number of expected squares
  • generate an image where the numbers are the positive class (1) and the background is negative class (0)

So here is my approach:

pytesseract.pytesseract.tesseract_cmd = r'/usr/bin/tesseract' # path to my tesseract (on linux)
image = cv2.imread("sudoku.jpg") # read image
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # turn to gray
_, threshold = cv2.threshold(gray, 160, 255, cv2.THRESH_BINARY_INV) # apply threshold to detect grid
thresholdDilated = cv2.dilate(threshold, np.ones((2,2), np.uint8) # dilate to make sure no breaks inthe contour
thresholdDilated = cv2.bitwise_not(thresholdDilated) # flip the pixels
contours, _ = cv2.findContours(thresholdDilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) # contouring
filteredContours = [cnt for cnt in contours if cv2.contourArea(cnt) >= 100] # remove contours with low area

This gave me exactly 24 contours in the filtered contours, otherwise I had 26 contours with two contours having an area of 0... No idea what happened there.

Otherwise, I did one small change to your code:

# Dictionary to store the mapping of square coordinates to identified numbers
square_dict = {}
for contour in filteredContours:
    # Approximate the contour to a polygon
    epsilon = 0.04 * cv2.arcLength(contour, True)
    approx = cv2.approxPolyDP(contour, epsilon, True)

    # Check if the polygon has four corners (likely a square)
    if len(approx) == 4:
        # Extract the region of interest (ROI) containing the square
        x, y, w, h = cv2.boundingRect(contour)
        square_roi = gray[y:y + h, x:x + w]
        # print(square_roi)
        _, binaryThreshold = cv2.threshold(square_roi, 127, 255, cv2.THRESH_BINARY) # MAKE NUMBERS POSITIVE CLASS
        # Use OCR to extract numbers from the square
        square_text = pytesseract.image_to_string(binaryThreshold, config="--psm 6").strip()
        # Print the square coordinates and extracted numbers
        print(f"Square at ({x}, {y}), Numbers: {square_text}")
        # Map square coordinates to identified numbers in the dictionary
        square_dict[(x, y, x + w, y + h)] = square_text

I did the detection on the binaryThreshold where the numbers are the positive class. Here are the results:

Square at (212, 62), Numbers: 4
Square at (182, 62), Numbers: 
Square at (152, 62), Numbers: 
Square at (122, 62), Numbers: 
Square at (92, 62), Numbers: 1
Square at (62, 62), Numbers: 
Square at (32, 62), Numbers: 3
Square at (2, 62), Numbers: 0
Square at (212, 32), Numbers: 
Square at (182, 32), Numbers: 2
Square at (152, 32), Numbers: 7
Square at (122, 32), Numbers: 
Square at (92, 32), Numbers: 
Square at (62, 32), Numbers: 
Square at (32, 32), Numbers: 
Square at (2, 32), Numbers: 8
Square at (212, 2), Numbers: 6
Square at (182, 2), Numbers: 
Square at (152, 2), Numbers: 
Square at (122, 2), Numbers: 5
Square at (92, 2), Numbers: 
Square at (62, 2), Numbers: 
Square at (32, 2), Numbers: 9
Square at (2, 2), Numbers: 

If I were you, I would sort the contours such that they start from top left to bottom right. Otherwise, hope this answer helped :D