Question

I am struggling to extract fast and efficiently words that are in rectangles from a BufferedImage.
For example I have the following page : ( edit! ) the image is scanned, so it can contain noise, skewing and distortion.
enter image description here


How can I extract the following images without the rectangle : ( edit! ) I can use OpenCv or any other library, but I'm absolutely new to advanced image processing techniques. enter image description here

EDIT

I've used the method suggested by karlphillip here and it works decent.
Here is the code :

    package ro.ubbcluj.detection;

import java.awt.FlowLayout;
import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;

import javax.imageio.ImageIO;
import javax.swing.ImageIcon;
import javax.swing.JFrame;
import javax.swing.JLabel;
import javax.swing.WindowConstants;

import org.opencv.core.Core;
import org.opencv.core.Mat;
import org.opencv.core.MatOfByte;
import org.opencv.core.MatOfPoint;
import org.opencv.core.Point;
import org.opencv.core.Scalar;
import org.opencv.core.Size;
import org.opencv.highgui.Highgui;
import org.opencv.imgproc.Imgproc;

public class RectangleDetection {

public static void main(String[] args) throws IOException {
    System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
    Mat image = loadImage();
    Mat grayscale = convertToGrayscale(image);

    Mat treshold = tresholdImage(grayscale);
    List<MatOfPoint> contours = findContours(treshold);
    Mat contoursImage = fillCountours(contours, grayscale);
    Mat grayscaleWithContours = convertToGrayscale(contoursImage);
    Mat tresholdGrayscaleWithContours = tresholdImage(grayscaleWithContours);
    Mat eroded = erodeAndDilate(tresholdGrayscaleWithContours);
    List<MatOfPoint> squaresFound = findSquares(eroded);
    Mat squaresDrawn = Rectangle.drawSquares(grayscale, squaresFound);
    BufferedImage convertedImage = convertMatToBufferedImage(squaresDrawn);
    displayImage(convertedImage);
}

private static List<MatOfPoint> findSquares(Mat eroded) {
    return Rectangle.findSquares(eroded);
}

private static Mat erodeAndDilate(Mat input) {
    int erosion_type = Imgproc.MORPH_RECT;
    int erosion_size = 5;
    Mat result = new Mat();
    Mat element = Imgproc.getStructuringElement(erosion_type, new Size(2 * erosion_size + 1, 2 * erosion_size + 1));
    Imgproc.erode(input, result, element);
    Imgproc.dilate(result, result, element);
    return result;
}

private static Mat convertToGrayscale(Mat input) {
    Mat grayscale = new Mat();
    Imgproc.cvtColor(input, grayscale, Imgproc.COLOR_BGR2GRAY);
    return grayscale;
}

private static Mat fillCountours(List<MatOfPoint> contours, Mat image) {
    Mat result = image.clone();
    Imgproc.cvtColor(result, result, Imgproc.COLOR_GRAY2RGB);
    for (int i = 0; i < contours.size(); i++) {
        Imgproc.drawContours(result, contours, i, new Scalar(255, 0, 0), -1, 8, new Mat(), 0, new Point());
    }
    return result;
}

private static List<MatOfPoint> findContours(Mat image) {
    List<MatOfPoint> contours = new ArrayList<>();
    Mat hierarchy = new Mat();
    Imgproc.findContours(image, contours, hierarchy, Imgproc.RETR_TREE, Imgproc.CHAIN_APPROX_NONE);
    return contours;
}

private static Mat detectLinesHough(Mat img) {
    Mat lines = new Mat();
    int threshold = 80;
    int minLineLength = 10;
    int maxLineGap = 5;
    double rho = 0.4;
    Imgproc.HoughLinesP(img, lines, rho, Math.PI / 180, threshold, minLineLength, maxLineGap);
    Imgproc.cvtColor(img, img, Imgproc.COLOR_GRAY2RGB);
    System.out.println(lines.cols());
    for (int x = 0; x < lines.cols(); x++) {
        double[] vec = lines.get(0, x);
        double x1 = vec[0], y1 = vec[1], x2 = vec[2], y2 = vec[3];
        Point start = new Point(x1, y1);
        Point end = new Point(x2, y2);
        Core.line(lines, start, end, new Scalar(0, 255, 0), 3);
    }
    return img;
}

static BufferedImage convertMatToBufferedImage(Mat mat) throws IOException {
    MatOfByte matOfByte = new MatOfByte();
    Highgui.imencode(".jpg", mat, matOfByte);
    byte[] byteArray = matOfByte.toArray();
    InputStream in = new ByteArrayInputStream(byteArray);
    return ImageIO.read(in);

}

static void displayImage(BufferedImage image) {
    JFrame frame = new JFrame();
    frame.getContentPane().setLayout(new FlowLayout());
    frame.getContentPane().add(new JLabel(new ImageIcon(image)));
    frame.setDefaultCloseOperation(WindowConstants.EXIT_ON_CLOSE);
    frame.pack();
    frame.setVisible(true);

}

private static Mat tresholdImage(Mat img) {
    Mat treshold = new Mat();
    Imgproc.threshold(img, treshold, 225, 255, Imgproc.THRESH_BINARY_INV);
    return treshold;
}

private static Mat tresholdImage2(Mat img) {
    Mat treshold = new Mat();
    Imgproc.threshold(img, treshold, -1, 255, Imgproc.THRESH_BINARY_INV + Imgproc.THRESH_OTSU);
    return treshold;
}

private static Mat loadImage() {
    return Highgui
            .imread("E:\\Programs\\Eclipse Workspace\\LicentaWorkspace\\OpenCvRectangleDetection\\src\\img\\form3.jpg");
}

}


and the Rectangle class

    package ro.ubbcluj.detection;

import java.awt.image.BufferedImage;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.opencv.core.Core;
import org.opencv.core.Mat;
import org.opencv.core.MatOfPoint;
import org.opencv.core.MatOfPoint2f;
import org.opencv.core.Point;
import org.opencv.core.Scalar;
import org.opencv.core.Size;
import org.opencv.imgproc.Imgproc;

public class Rectangle {
static List<MatOfPoint> findSquares(Mat input) {
    Mat pyr = new Mat();
    Mat timg = new Mat();

    // Down-scale and up-scale the image to filter out small noises
    Imgproc.pyrDown(input, pyr, new Size(input.cols() / 2, input.rows() / 2));
    Imgproc.pyrUp(pyr, timg, input.size());
    // Apply Canny with a threshold of 50
    Imgproc.Canny(timg, timg, 0, 50, 5, true);

    // Dilate canny output to remove potential holes between edge segments
    Imgproc.dilate(timg, timg, new Mat(), new Point(-1, -1), 1);

    // find contours and store them all as a list
    Mat hierarchy = new Mat();
    List<MatOfPoint> contours = new ArrayList<>();
    Imgproc.findContours(timg, contours, hierarchy, Imgproc.RETR_LIST, Imgproc.CHAIN_APPROX_SIMPLE);
    List<MatOfPoint> squaresResult = new ArrayList<MatOfPoint>();
    for (int i = 0; i < contours.size(); i++) {

        // Approximate contour with accuracy proportional to the contour
        // perimeter
        MatOfPoint2f contour = new MatOfPoint2f(contours.get(i).toArray());
        MatOfPoint2f approx = new MatOfPoint2f();
        double epsilon = Imgproc.arcLength(contour, true) * 0.02;
        boolean closed = true;
        Imgproc.approxPolyDP(contour, approx, epsilon, closed);
        List<Point> approxCurveList = approx.toList();

        // Square contours should have 4 vertices after approximation
        // relatively large area (to filter out noisy contours)
        // and be convex.
        // Note: absolute value of an area is used because
        // area may be positive or negative - in accordance with the
        // contour orientation
        boolean aproxSize = approx.rows() == 4;
        boolean largeArea = Math.abs(Imgproc.contourArea(approx)) > 200;
        boolean isConvex = Imgproc.isContourConvex(new MatOfPoint(approx.toArray()));
        if (aproxSize && largeArea && isConvex) {
            double maxCosine = 0;
            for (int j = 2; j < 5; j++) {
                // Find the maximum cosine of the angle between joint edges
                double cosine = Math.abs(getAngle(approxCurveList.get(j % 4), approxCurveList.get(j - 2),
                        approxCurveList.get(j - 1)));
                maxCosine = Math.max(maxCosine, cosine);
            }
            // If cosines of all angles are small
            // (all angles are ~90 degree) then write quandrange
            // vertices to resultant sequence
            if (maxCosine < 0.3) {
                Point[] points = approx.toArray();
                squaresResult.add(new MatOfPoint(points));
            }
        }
    }
    return squaresResult;
}

// angle: helper function.
// Finds a cosine of angle between vectors from pt0->pt1 and from pt0->pt2.
private static double getAngle(Point point1, Point point2, Point point0) {
    double dx1 = point1.x - point0.x;
    double dy1 = point1.y - point0.y;
    double dx2 = point2.x - point0.x;
    double dy2 = point2.y - point0.y;
    return (dx1 * dx2 + dy1 * dy2) / Math.sqrt((dx1 * dx1 + dy1 * dy1) * (dx2 * dx2 + dy2 * dy2) + 1e-10);
}

public static Mat drawSquares(Mat image, List<MatOfPoint> squares) {
    Mat result = new Mat();
    Imgproc.cvtColor(image, result, Imgproc.COLOR_GRAY2RGB);
    int thickness = 2;
    Core.polylines(result, squares, false, new Scalar(0, 255, 0), thickness);
    return result;
}
}

Example of result :

enter image description here enter image description here

... though, it doesn't work so great for smaller images :
enter image description hereenter image description here

Maybe some enhancements can be suggested? Or how to make the algorithm faster in case I have a batch of images to process?

Was it helpful?

Solution

I did the following program in c++ using opencv (I'm not familiar with java+opencv). I've included the output for the two sample images that you have provided. You may have to adjust the thresholds in the contour filtering section for some other images.

#include "stdafx.h"

#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <iostream>

using namespace cv;
using namespace std;

int _tmain(int argc, _TCHAR* argv[])
{
    // load image as grayscale
    Mat im = imread(INPUT_FILE, CV_LOAD_IMAGE_GRAYSCALE);

    Mat morph;
    // morphological closing with a column filter : retain only large vertical edges
    Mat morphKernelV = getStructuringElement(MORPH_RECT, Size(1, 7));
    morphologyEx(im, morph, MORPH_CLOSE, morphKernelV);

    Mat bwV;
    // binarize: will contain only large vertical edges
    threshold(morph, bwV, 0, 255.0, CV_THRESH_BINARY | CV_THRESH_OTSU);

    // morphological closing with a row filter : retain only large horizontal edges
    Mat morphKernelH = getStructuringElement(MORPH_RECT, Size(7, 1));
    morphologyEx(im, morph, MORPH_CLOSE, morphKernelH);

    Mat bwH;
    // binarize: will contain only large horizontal edges
    threshold(morph, bwH, 0, 255.0, CV_THRESH_BINARY | CV_THRESH_OTSU);

    // combine the virtical and horizontal edges
    Mat bw = bwV & bwH;
    threshold(bw, bw, 128.0, 255.0, CV_THRESH_BINARY_INV);

    // just for illustration
    Mat rgb;
    cvtColor(im, rgb, CV_GRAY2BGR);

    // find contours
    vector<vector<Point>> contours;
    vector<Vec4i> hierarchy;
    findContours(bw, contours, hierarchy, CV_RETR_CCOMP, CV_CHAIN_APPROX_SIMPLE, Point(0, 0));
    // filter contours by area to obtain boxes
    double areaThL = bw.rows * .04 * bw.cols * .06;
    double areaThH = bw.rows * .7 * bw.cols * .7;
    double area = 0;
    for(int idx = 0; idx >= 0; idx = hierarchy[idx][0])
    {
        area = contourArea(contours[idx]); 
        if (area > areaThL && area < areaThH)
        {
            drawContours(rgb, contours, idx, Scalar(0, 0, 255), 2, 8, hierarchy);
            // take bounding rectangle. better to use filled countour as a mask
            // to extract the rectangle because then you won't get any stray elements
            Rect rect = boundingRect(contours[idx]);
            cout << "rect: (" << rect.x << ", " << rect.y << ") " << rect.width << " x " << rect.height << endl;
            Mat imRect(im, rect);
        }
    }

    return 0;
}

Result for the first image:

enter image description here

Result for the second image:

enter image description here

OTHER TIPS

I'm not sure whether "real" image processing skills are necessary.

Once you start tackling this problem with OpenCV, Sobel/Canny filters, edge detections and Hough transforms, it starts becoming rather involved. But maybe all this is not necessary here.

It all depends on how "predictable" the input is. That's why I asked in the comments whether the image can serve as a test case. IF the rectangles are always axis-aligned and don't have noise, distortions and interruptions, this can be solved with some trivial loops and pixel comparisons.

So IF you have potentially noisy or distorted input images, then ... good luck, you may have to acquire quite some image processing skills. If the image is not distorted or noisy, a solution like this one might be sufficient:

import java.awt.BorderLayout;
import java.awt.Dimension;
import java.awt.Graphics2D;
import java.awt.GridLayout;
import java.awt.Rectangle;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import javax.imageio.ImageIO;
import javax.swing.ImageIcon;
import javax.swing.JFrame;
import javax.swing.JLabel;
import javax.swing.JPanel;
import javax.swing.JScrollPane;
import javax.swing.SwingUtilities;


public class RectangleInImageTest
{
    public static void main(String[] args) throws IOException
    {
        final BufferedImage image = convertToARGB(ImageIO.read(new File("gcnc2.jpg")));
        final List<BufferedImage> subImages = scan(image);

        SwingUtilities.invokeLater(new Runnable()
        {
            @Override
            public void run()
            {
                createAndShowGUI(image, subImages);
            }
        });
    }

    private static void createAndShowGUI(
        BufferedImage image,
        List<BufferedImage> subImages)
    {
        JFrame f = new JFrame();
        f.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
        f.getContentPane().setLayout(new BorderLayout());

        f.getContentPane().add(new JLabel(new ImageIcon(image)),
            BorderLayout.CENTER);

        JPanel p = new JPanel(new GridLayout(1,0));
        for (BufferedImage subImage : subImages)
        {
            p.add(new JLabel(new ImageIcon(subImage)));
        }
        JPanel pp = new JPanel(new GridLayout(1,1));
        pp.setPreferredSize(new Dimension(800, 100));
        pp.add(new JScrollPane(p));
        f.getContentPane().add(pp, BorderLayout.SOUTH);
        f.setSize(800,800);
        f.setLocationRelativeTo(null);
        f.setVisible(true);
    }


    public static BufferedImage convertToARGB(BufferedImage image)
    {
        BufferedImage newImage = new BufferedImage(
            image.getWidth(), image.getHeight(),
            BufferedImage.TYPE_INT_ARGB);
        Graphics2D g = newImage.createGraphics();
        g.drawImage(image, 0, 0, null);
        g.dispose();
        return newImage;
    }    

    private static List<BufferedImage> scan(BufferedImage image)
    {
        List<BufferedImage> result = new ArrayList<BufferedImage>();
        int w = image.getWidth();
        int h = image.getHeight();
        for (int y=0; y<h; y++)
        {
            for (int x=0; x<w; x++)
            {
                int rgb = image.getRGB(x, y);
                if (!isBlack(rgb))
                {
                    continue;
                }
                if (!isUpperLeftCorner(image, x, y))
                {
                    continue;
                }
                Rectangle rectangle = extractRectangle(image, x,y);
                if (!isValidRectangle(rectangle))
                {
                    continue;
                }
                System.out.println("Rectangle "+rectangle);

                BufferedImage part = new BufferedImage(
                    rectangle.width-2, rectangle.height-2, 
                    BufferedImage.TYPE_INT_ARGB);
                Graphics2D g = part.createGraphics();
                g.drawImage(image, 
                    0, 0, rectangle.width-2, rectangle.height-2,
                    x+1, y+1, x+rectangle.width-1, y+rectangle.height-1, null);
                g.dispose();
                result.add(part);
            }
        }
        return result;
    }

    private static boolean isBlack(int rgb)
    {
        final int threshold = 128;
        int r = (rgb >> 16) & 0xFF;
        int g = (rgb >>  8) & 0xFF;
        int b = (rgb      ) & 0xFF;
        return 
            r < threshold &&
            g < threshold &&
            b < threshold;
    }

    private static boolean isUpperLeftCorner(BufferedImage image, int x, int y)
    {
        if (!isValidAndWhite(image, x-1, y  )) return false;
        if (!isValidAndWhite(image, x  , y-1)) return false;
        if (!isValidAndWhite(image, x-1, y-1)) return false;
        if (!isValidAndWhite(image, x+1, y-1)) return false;
        if (!isValidAndWhite(image, x-1, y+1)) return false;
        if (!isValidAndWhite(image, x+1, y+1)) return false;
        return true;
    }

    private static boolean isValidAndWhite(
        BufferedImage image, int x, int y)
    {
        int w = image.getWidth();
        int h = image.getHeight();
        if (x < 0 || x >= w)
        {
            return false;
        }
        if (y < 0 || y >= h)
        {
            return false;
        }
        int rgb = image.getRGB(x, y);
        return !isBlack(rgb);
    }


    private static Rectangle extractRectangle(
        BufferedImage image, int x0, int y0)
    {
        int w = image.getWidth();
        int h = image.getHeight();

        int x1 = x0;
        int y1 = y0;
        for (int y=y0; y<h; y++)
        {
            int rgb = image.getRGB(x0, y);
            if (!isBlack(rgb))
            {
                y1 = y;
                break;
            }
        }
        for (int x=x0; x<w; x++)
        {
            int rgb = image.getRGB(x, y0);
            if (!isBlack(rgb))
            {
                x1 = x;
                break;
            }
        }
        return new Rectangle(x0, y0, x1-x0, y1-y0);  
    }


    private static boolean isValidRectangle(Rectangle r)
    {
        final int minWidth = 16;
        final int minHeight = 8;
        return r.width >= minWidth && r.height >= minHeight;
    }

}

Here's an algorithm that I demonstrated on a similar project using OpenCV:

Most of these references are not in Java, but I assume you have the skills to convert C/C++ code to Java (btw, cv::Mat is equivalent to IplImage).

first of all I hope you are already aware of some image processing because you will need some of this to continue :)

Here is a link about ways to do it : https://dsp.stackexchange.com/questions/3324/how-to-detect-edges-and-rectangles

But to sum up the most used method would be to use a Canny (edges detector) and them to apply Hough in order to detect the straight line and considering the results find the rectangle. In fact Hough is usually used to detect straight line and a rectangle is just 4 straight lines with an angle of 90° between each of them. So using all of this you may be able to improve your research ;)

Hope it will help ;)

One possible solution is to perform connected component analysis after binarization using adaptive method. After that, calculate the median width of the connected component, if the connected component width is 5 times larger than the median width, then this connected component is the square we are looking for. The following codes are used to illustrate this idea

    Mat im = imread(inputFileName,CV_LOAD_IMAGE_GRAYSCALE);
Mat outputIm(im.rows,im.cols,CV_8U, Scalar(0));


    Mat bi;



// step 1: adaptive thresholding 
adaptiveThreshold(im,bi,255,ADAPTIVE_THRESH_GAUSSIAN_C,THRESH_BINARY,7,50);

threshold(bi, bi, 128.0, 255.0, CV_THRESH_BINARY_INV);


    // step 2: connected component analysis
std::vector<std::vector<cv::Point> > contours;

findContours(bi, contours, CV_RETR_EXTERNAL , CV_CHAIN_APPROX_NONE);  

    // step 3: analyze these blobs
double area;
std::vector<double> areaArray;
for(int i=0; i<contours.size(); i++)
{
    cv::Rect rect = boundingRect(contours[i]);
    area = rect.width;
    areaArray.push_back(area);
}
std::vector<double> sortedAreaArray;
sortedAreaArray = areaArray;
size_t n = sortedAreaArray.size() / 2;
    nth_element(sortedAreaArray.begin(), sortedAreaArray.begin()+n, sortedAreaArray.end());

double medianArea = sortedAreaArray[n];

for(int i=0; i<contours.size(); i++)
{
    if(areaArray[i]>5*medianArea)
    {
        for(int j=0; j<contours[i].size(); j++)
        {
            int x = contours[i][j].x;
            int y = contours[i][j].y;
            int pos = x+y*bi.cols;
            outputIm.data[pos]=255;
        }
    }

}
imwrite(outputFileName,outputIm);

The output rectangles can be shown:

enter image description here enter image description here

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top