Question

I am making an application that uses OCR and I am using OpenCV to threshold the image to improve the OCR results, I have gotten pretty good results but I want to know if anyone has any suggestions for improvement.

Here is what I've done so far:

// Convert to grayscale.
cv::cvtColor(cvMat, cvMat, CV_RGB2GRAY);
// Apply adaptive threshold.
cv::adaptiveThreshold(cvMat, cvMat, 255, CV_ADAPTIVE_THRESH_GAUSSIAN_C, CV_THRESH_BINARY, 3, 5);
// Attempt to sharpen the image.
cv::GaussianBlur(cvMat, cvMat, cv::Size(0, 0), 3);
cv::addWeighted(cvMat, 1.5, cvMat, -0.5, 0, cvMat);

Let me know if you have any suggestions to improve results, thanks.

Sample Images: enter image description here

After: enter image description here

Was it helpful?

Solution

One of the best algorithms for thresholding problem in the OCR field is sauvola method.You can use the below code.

#ifndef _THRESHOLDER
#define _THRESHOLDER
#include <cv.h>
#include "type.h"
using namespace cv;

enum class BhThresholdMethod{OTSU,NIBLACK,SAUVOLA,WOLFJOLION};


class BhThresholder
{
public :
    void doThreshold(InputArray src ,OutputArray dst,const BhThresholdMethod &method);
private:
};

#endif //_THRESHOLDER
thresholder.cpp

#include "stdafx.h"

#define uget(x,y)    at<unsigned char>(y,x)
#define uset(x,y,v)  at<unsigned char>(y,x)=v;
#define fget(x,y)    at<float>(y,x)
#define fset(x,y,v)  at<float>(y,x)=v;

// *************************************************************
// glide a window across the image and
// create two maps: mean and standard deviation.
// *************************************************************
//#define BINARIZEWOLF_VERSION  "2.3 (February 26th, 2013)"


double calcLocalStats (Mat &im, Mat &map_m, Mat &map_s, int win_x, int win_y) {

    double m,s,max_s, sum, sum_sq, foo;
    int wxh = win_x / 2;
    int wyh = win_y / 2;
    int x_firstth = wxh;
    int y_lastth = im.rows-wyh-1;
    int y_firstth= wyh;
    double winarea = win_x*win_y;

    max_s = 0;
    for (int j = y_firstth ; j<=y_lastth; j++) 
    {
        // Calculate the initial window at the beginning of the line
        sum = sum_sq = 0;
        for (int wy=0 ; wy<win_y; wy++)
            for (int wx=0 ; wx<win_x; wx++) {
                foo = im.uget(wx,j-wyh+wy);
                sum    += foo;
                sum_sq += foo*foo;
            }
        m  = sum / winarea;
        s  = sqrt ((sum_sq - (sum*sum)/winarea)/winarea);
        if (s > max_s)
            max_s = s;
        map_m.fset(x_firstth, j, m);
        map_s.fset(x_firstth, j, s);

        // Shift the window, add and remove new/old values to the histogram
        for (int i=1 ; i <= im.cols  -win_x; i++) {

            // Remove the left old column and add the right new column
            for (int wy=0; wy<win_y; ++wy) {
                foo = im.uget(i-1,j-wyh+wy);
                sum    -= foo;
                sum_sq -= foo*foo;
                foo = im.uget(i+win_x-1,j-wyh+wy);
                sum    += foo;
                sum_sq += foo*foo;
            }
            m  = sum / winarea;
            s  = sqrt ((sum_sq - (sum*sum)/winarea)/winarea);
            if (s > max_s)
                max_s = s;
            map_m.fset(i+wxh, j, m);
            map_s.fset(i+wxh, j, s);
        }
    }

    return max_s;
}




void NiblackSauvolaWolfJolion (InputArray _src, OutputArray _dst,const BhThresholdMethod &version,int winx, int winy, double k, double dR) {

    Mat src = _src.getMat();
    Mat dst = _dst.getMat();
    double m, s, max_s;
    double th=0;
    double min_I, max_I;
    int wxh = winx/2;
    int wyh = winy/2;
    int x_firstth= wxh;
    int x_lastth = src.cols-wxh-1;
    int y_lastth = src.rows-wyh-1;
    int y_firstth= wyh;
    int mx, my;

    // Create local statistics and store them in a double matrices
    Mat map_m = Mat::zeros (src.size(), CV_32FC1);
    Mat map_s = Mat::zeros (src.size(), CV_32FC1);
    max_s = calcLocalStats (src, map_m, map_s, winx, winy);

    minMaxLoc(src, &min_I, &max_I);

    Mat thsurf (src.size(), CV_32FC1);

    // Create the threshold surface, including border processing
    // ----------------------------------------------------

    for (int j = y_firstth ; j<=y_lastth; j++) {

        // NORMAL, NON-BORDER AREA IN THE MIDDLE OF THE WINDOW:
        for (int i=0 ; i <= src.cols-winx; i++) {

            m  = map_m.fget(i+wxh, j);
            s  = map_s.fget(i+wxh, j);

            // Calculate the threshold
            switch (version) {

            case BhThresholdMethod::NIBLACK:
                    th = m + k*s;
                    break;

            case BhThresholdMethod::SAUVOLA:
                    th = m * (1 + k*(s/dR-1));
                    break;

            case BhThresholdMethod::WOLFJOLION:
                    th = m + k * (s/max_s-1) * (m-min_I);
                    break;

                default:
                    cerr << "Unknown threshold type in ImageThresholder::surfaceNiblackImproved()\n";
                    exit (1);
            }

            thsurf.fset(i+wxh,j,th);

            if (i==0) {
                // LEFT BORDER
                for (int i=0; i<=x_firstth; ++i)
                    thsurf.fset(i,j,th);

                // LEFT-UPPER CORNER
                if (j==y_firstth)
                    for (int u=0; u<y_firstth; ++u)
                    for (int i=0; i<=x_firstth; ++i)
                        thsurf.fset(i,u,th);

                // LEFT-LOWER CORNER
                if (j==y_lastth)
                    for (int u=y_lastth+1; u<src.rows; ++u)
                    for (int i=0; i<=x_firstth; ++i)
                        thsurf.fset(i,u,th);
            }

            // UPPER BORDER
            if (j==y_firstth)
                for (int u=0; u<y_firstth; ++u)
                    thsurf.fset(i+wxh,u,th);

            // LOWER BORDER
            if (j==y_lastth)
                for (int u=y_lastth+1; u<src.rows; ++u)
                    thsurf.fset(i+wxh,u,th);
        }

        // RIGHT BORDER
        for (int i=x_lastth; i<src.cols; ++i)
            thsurf.fset(i,j,th);

        // RIGHT-UPPER CORNER
        if (j==y_firstth)
            for (int u=0; u<y_firstth; ++u)
            for (int i=x_lastth; i<src.cols; ++i)
                thsurf.fset(i,u,th);

        // RIGHT-LOWER CORNER
        if (j==y_lastth)
            for (int u=y_lastth+1; u<src.rows; ++u)
            for (int i=x_lastth; i<src.cols; ++i)
                thsurf.fset(i,u,th);
    }
    cerr << "surface created" << endl;


    for (int y=0; y<src.rows; ++y) 
    for (int x=0; x<src.cols; ++x) 
    {
        if (src.uget(x,y) >= thsurf.fget(x,y))
        {
            dst.uset(x,y,255);
        }
        else
        {
            dst.uset(x,y,0);
        }
    }
}

void BhThresholder::doThreshold(InputArray _src ,OutputArray _dst,const BhThresholdMethod &method)
{
    Mat src = _src.getMat();

    int winx = 0;
    int winy = 0;
    float optK=0.5;
    if (winx==0 || winy==0) {
        winy = (int) (2.0 * src.rows - 1)/3;
        winx = (int) src.cols-1 < winy ? src.cols-1 : winy;

        // if the window is too big, than we asume that the image
        // is not a single text box, but a document page: set
        // the window size to a fixed constant.
        if (winx > 100)
            winx = winy = 40;
    }

    // Threshold
    _dst.create(src.size(), CV_8UC1);
    Mat dst = _dst.getMat();

    //medianBlur(src,dst,5);
    GaussianBlur(src,dst,Size(5,5),0);
//#define _BH_SHOW_IMAGE
#ifdef _BH_DEBUG
    #define _BH_SHOW_IMAGE
#endif
    //medianBlur(src,dst,7);
    switch (method)
    {
    case BhThresholdMethod::OTSU :
        threshold(dst,dst,128,255,CV_THRESH_OTSU);
        break;
    case BhThresholdMethod::SAUVOLA :
    case BhThresholdMethod::WOLFJOLION :
        NiblackSauvolaWolfJolion (src, dst, method, winx, winy, optK, 128);


    }

    bitwise_not(dst,dst);


#ifdef _BH_SHOW_IMAGE

#undef _BH_SHOW_IMAGE
#endif
}

OTHER TIPS

Here is comparsion table for thresholding methods: http://clweb.csa.iisc.ernet.in/rahulsharma/binarize/set1.php?id=set1%2Fimage00b

A few thoughts:

  • Since you're starting with a rectangular object that may be viewed at a non-normal angle, use an affine transform to warp the image so that it appears rectangular with right angle corners.
  • Before the affine transform, you should probably remove barrel distortion (the curviness of the card edges).
  • Consider using an adaptive threshold rather than a simple global binarization threshold.
  • If you can find a proper OCR algorithm that doesn't require binary images, use that. Although binarization will work well for black text on a white background, in general binarization presents a lot of problems if you want to achieve high accuracy (i.e., character recognition approaching 98%+ for arbitrary strings of characters)
  • Try to sample with better resolution.
Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top