3

I tried to detect text in images specially images with quotes using OpenCV Python. For that I first train some text images. I detect each characters of text in the image to train. For images with proper word style the characters are detect properly. But for some images the text(character) area can't be detect properly. I attached the code for this below. How can I modify the code so that the characters can be detected properly

import sys
import numpy as np
import cv2
import os

MIN_CONTOUR_AREA = 100

RESIZED_IMAGE_WIDTH = 20
RESIZED_IMAGE_HEIGHT = 30

def main():
imgTrainingNumbers = cv2.imread("E:\God - Level 4 Research Project\Testings\Tharu\godd/jbpoetry.png") 

if imgTrainingNumbers is None:  
    print ("error: image not read from file \n\n") 
    os.system("pause") 
    return

imgGray = cv2.cvtColor(imgTrainingNumbers, cv2.COLOR_BGR2GRAY)
imgBlurred = cv2.GaussianBlur(imgGray, (5,5), 0)

imgThresh = cv2.adaptiveThreshold(imgBlurred,
                                  255,
                                  cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                  cv2.THRESH_BINARY_INV,
                                  11,
                                  2)

cv2.imshow("imgThresh", imgThresh)

imgThreshCopy = imgThresh.copy()

imgContours, npaContours, npaHierarchy = cv2.findContours(imgThreshCopy,
                                             cv2.RETR_EXTERNAL, 
                                             cv2.CHAIN_APPROX_SIMPLE)


npaFlattenedImages =  np.empty((0, RESIZED_IMAGE_WIDTH * RESIZED_IMAGE_HEIGHT))

intClassifications = []

intValidChars = [ord('0'), ord('1'), ord('2'), ord('3'), ord('4'), ord('5'), ord('6'), ord('7'), ord('8'), ord('9'),
                 ord('A'), ord('B'), ord('C'), ord('D'), ord('E'), ord('F'), ord('G'), ord('H'), ord('I'), ord('J'),
                 ord('K'), ord('L'), ord('M'), ord('N'), ord('O'), ord('P'), ord('Q'), ord('R'), ord('S'), ord('T'),
                 ord('U'), ord('V'), ord('W'), ord('X'), ord('Y'), ord('Z'),ord('a'),ord('b'),ord('c'),ord('d'),
                 ord('e'),ord('f'),ord('g'),ord('h'),ord('i'),ord('j'),ord('k'),ord('l'),ord('m'),ord('n'),ord('o'),
                 ord('p'),ord('q'),ord('r'),ord('s'),ord('t'),ord('u'),ord('v'),ord('w'),ord('x'),ord('y'),ord('z') ]


for npaContour in npaContours:
    if cv2.contourArea(npaContour) > MIN_CONTOUR_AREA:
        [intX, intY, intW, intH] = cv2.boundingRect(npaContour)


        cv2.rectangle(imgTrainingNumbers,
                      (intX, intY), 
                      (intX+intW,intY+intH),
                      (0, 0, 255),
                      2)

        imgROI = imgThresh[intY:intY+intH, intX:intX+intW] 
        imgROIResized = cv2.resize(imgROI, (RESIZED_IMAGE_WIDTH, RESIZED_IMAGE_HEIGHT))

        cv2.imshow("imgROI", imgROI)               
        cv2.imshow("imgROIResized", imgROIResized)
        cv2.imshow("training_numbers.png", imgTrainingNumbers)

        intChar = cv2.waitKey(0) 

        if intChar == 27: 
            sys.exit()
        elif intChar in intValidChars:
            print(intChar)
            intClassifications.append(intChar)    
            print(intChar)
            npaFlattenedImage = imgROIResized.reshape((1, RESIZED_IMAGE_WIDTH * RESIZED_IMAGE_HEIGHT)) 
            npaFlattenedImages = np.append(npaFlattenedImages, npaFlattenedImage, 0) 

fltClassifications = np.array(intClassifications, np.float32) 

npaClassifications = fltClassifications.reshape((fltClassifications.size, 1))

print ("\n\ntraining complete !!\n")

np.savetxt("classificationsNEWG.txt", npaClassifications)
np.savetxt("flattened_imagesNEWG.txt", npaFlattenedImages)
cv2.destroyAllWindows()
return
if __name__ == "__main__":
main()

Inaccurate text detection

1
  • Can you give more details about training images an training process? Commented Apr 18, 2018 at 5:39

1 Answer 1

2

What you are trying to do is a very naive approach, just applying the threshold and detecting contours won't work here. A lot of research papers have been published around this task. You may refer those and try to implement or can use image_to_boxes function of the famous tesseract OCR. You can download it from here and as you are using python you can install pytesseract - python wrapper for tesseract from here and use the following code to achieve what you are expecting.

import pytesseract
import cv2

originalImg = cv2.imread('tp.png')
originalImg = cv2.resize(originalImg, None, fx=2.5, fy=2.5)
img = cv2.cvtColor(originalImg, cv2.COLOR_BGR2GRAY)
_,img = cv2.threshold(img,100,255,cv2.THRESH_BINARY)

h, w = img.shape

letters = pytesseract.image_to_boxes(img)
letters = letters.split('\n')
letters = [letter.split() for letter in letters]

for letter in letters:    
    cv2.rectangle(originalImg, (int(letter[1]), h - int(letter[2])), (int(letter[3]), h - int(letter[4])), (0,0,255), 1)

cv2.imshow('', originalImg)

The resultant image

enter image description here

Note that there are many false detections, you need to ignore them in your training process.

Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.