Below is my code using cv2 & pytesseract. Tried different trained data's[7Seg/letsgodigital] available on my image.
In first image, not getting value properly and even decimal point is also not being recognized. In 2nd image, values are coming, but decimals are not being recognized. Request to look into the issue and help me on this?
import numpy as np
import pytesseract
import argparse
import imutils
import cv2
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
image = 'D:/OCR/DigMeter2.jpg'
gray = cv2.imread(image, cv2.IMREAD_GRAYSCALE) # Read input image as Grayscale
cv2.imshow("gray", gray)
cv2.waitKey(1000)
# Erode the image
erd = cv2.erode(gray, None, iterations=2)
cv2.imshow("erd", erd)
cv2.waitKey(1000)
# threshold the image using Otsu's thresholding method# Convert to binary using automatic threshold (use cv2.THRESH_OTSU)
thresh = cv2.threshold(gray, 0, 255,cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
cv2.imshow("Otsu", thresh)
cv2.waitKey(1000)
# Dilate thresh for uniting text areas into blocks of rows.
dilated_thresh = cv2.dilate(thresh, np.ones((3,100)))
# Find contours on dilated_thresh
cnts = cv2.findContours(dilated_thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[-2] # Use index [-2] to be compatible to OpenCV 3 and 4
# Build a list of bounding boxes
bounding_boxes = [cv2.boundingRect(c) for c in cnts]
# Sort bounding boxes from "top to bottom"
bounding_boxes = sorted(bounding_boxes, key=lambda b: b[1])
# Iterate bounding boxes
for b in bounding_boxes:
x, y, w, h = b
if (h > 10) and (w > 10):
# Crop a slice, and inverse black and white (tesseract prefers black text).
slice = 255 - thresh[max(y-10, 0):min(y+h+10, thresh.shape[0]), max(x-10, 0):min(x+w+10, thresh.shape[1])]
cv2.imshow("slice", slice)
cv2.waitKey(1000)
text = pytesseract.image_to_string(slice, config="-c tessedit"
"_char_whitelist=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890-:."
" --psm 6"
" ")
text1 = pytesseract.image_to_string(image, lang='7seg', config='--psm 6 --oem 3 -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ12345678900123456789.')
text2 = pytesseract.image_to_string(slice, lang="letsgodigital", config="--psm 6 -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ12345678900123456789.")
print("[text] {}".format(text))
print("[text1] {}".format(text1))
print("[text2] {}".format(text2))

