Not able to get 7 Segment display properly for electrical meters after using some trained data

29 Views Asked by At

Below is my code using cv2 & pytesseract. Tried different trained data's[7Seg/letsgodigital] available on my image.

enter image description here enter image description here

In first image, not getting value properly and even decimal point is also not being recognized. In 2nd image, values are coming, but decimals are not being recognized. Request to look into the issue and help me on this?

import numpy as np
import pytesseract
import argparse
import imutils
import cv2

pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe' 

image = 'D:/OCR/DigMeter2.jpg'

gray = cv2.imread(image, cv2.IMREAD_GRAYSCALE)  # Read input image as Grayscale
cv2.imshow("gray", gray)
cv2.waitKey(1000)

 # Erode the image
erd = cv2.erode(gray, None, iterations=2)
cv2.imshow("erd", erd)
cv2.waitKey(1000)
    
# threshold the image using Otsu's thresholding method# Convert to binary using automatic threshold (use cv2.THRESH_OTSU)
thresh = cv2.threshold(gray, 0, 255,cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
cv2.imshow("Otsu", thresh)
cv2.waitKey(1000)

# Dilate thresh for uniting text areas into blocks of rows.
dilated_thresh = cv2.dilate(thresh, np.ones((3,100)))

# Find contours on dilated_thresh
cnts = cv2.findContours(dilated_thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[-2]  # Use index [-2] to be compatible to OpenCV 3 and 4

# Build a list of bounding boxes
bounding_boxes = [cv2.boundingRect(c) for c in cnts]

# Sort bounding boxes from "top to bottom"
bounding_boxes = sorted(bounding_boxes, key=lambda b: b[1])

# Iterate bounding boxes
for b in bounding_boxes:
    x, y, w, h = b

    if (h > 10) and (w > 10):
        # Crop a slice, and inverse black and white (tesseract prefers black text).
        slice = 255 - thresh[max(y-10, 0):min(y+h+10, thresh.shape[0]), max(x-10, 0):min(x+w+10, thresh.shape[1])]
        cv2.imshow("slice", slice)
        cv2.waitKey(1000)
        
        text = pytesseract.image_to_string(slice, config="-c tessedit"
                                                          "_char_whitelist=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890-:."
                                                          " --psm 6"
                                                          " ")

        text1 = pytesseract.image_to_string(image, lang='7seg', config='--psm 6 --oem 3 -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ12345678900123456789.')

        text2 = pytesseract.image_to_string(slice, lang="letsgodigital", config="--psm 6 -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ12345678900123456789.")


 
print("[text] {}".format(text))
print("[text1] {}".format(text1))
print("[text2] {}".format(text2))
0

There are 0 best solutions below