Question

I need to use boosted cascaded training to classify some images in scikit-learn. I want to classify according to HoG features.

My code below is adapted from this example.

This part of the code is the only thing that I've really done:

import sys
from scipy import misc, ndimage
from skimage import data, io, filter, color, exposure
from skimage.viewer import ImageViewer
from skimage.feature import hog
from skimage.transform import resize
import matplotlib.pyplot as plt
from sklearn.datasets import make_gaussian_quantiles
from sklearn.ensemble import AdaBoostClassifier
from sklearn.externals.six.moves import xrange
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
import pylab as pl
from sklearn.externals.six.moves import zip

f = open("PATH_TO_SAMPLES_LIST\\samples.txt",'r')
out = f.read().splitlines()
import numpy as np

### THIS IS THE MAIN CHANGES I MADE TO THE CODE
### THE CHANGES ARE ONLY IN ORDER TO GET HOG FEATURES OUT OF IMAGES TO PASS ON TO THE CLASSIFIERS

imgs = []
tmp_hogs = []

# I've omitted the code where I populate an array called "labels", but it's just a 1D #array with 528 elements, either 1 or 0

i=0
for file in out:
        filepath = "C:\\work_asaaki\\caltech\\cars_brad\\resized\\"
        readfile = filepath + file
        curr_img = color.rgb2gray(io.imread(readfile))
        imgs.append(curr_img)
        fd, hog_image = hog(curr_img, orientations=8, pixels_per_cell=(8, 8),
                 cells_per_block=(1, 1), visualise=True, normalise=True)
        tmp_hogs.append(fd) 
        i+=1

img_hogs = np.array(tmp_hogs, dtype =float) 
print img_hogs.shape

n_split = 508
X_train, X_test = np.array(img_hogs[:n_split]), np.array(img_hogs[n_split:])
y_train, y_test = np.array(labels[:n_split]), np.array(labels[n_split:])

The rest of the code is from the example on the link:

#### THE CODE BELOW IS TAKEN DIRECTLY FROM THE EXAMPLE


bdt_real = AdaBoostClassifier(
    DecisionTreeClassifier(max_depth=2),
    n_estimators=600,
    learning_rate=1)

bdt_discrete = AdaBoostClassifier(
    DecisionTreeClassifier(max_depth=2),
    n_estimators=600,
    learning_rate=1.5,
    algorithm="SAMME")

bdt_real.fit(X_train, y_train)
bdt_discrete.fit(X_train, y_train)

real_test_errors = []
discrete_test_errors = []

for real_test_predict, discrete_train_predict in zip(
        bdt_real.staged_predict(X_test), bdt_discrete.staged_predict(X_test)):
    real_test_errors.append(
        1. - accuracy_score(real_test_predict, y_test))
    discrete_test_errors.append(
        1. - accuracy_score(discrete_train_predict, y_test))

n_trees_discrete = len(bdt_discrete)
n_trees_real = len(bdt_real)

# Boosting might terminate early but the following arrays are always
# n_estimators long. We crop them to the actual number of tree here:
discrete_estimator_errors = bdt_discrete.estimator_errors_[:n_trees_discrete]
real_estimator_errors = bdt_real.estimator_errors_[:n_trees_real]
discrete_estimator_weights = bdt_discrete.estimator_weights_[:n_trees_discrete]

plt.figure(figsize=(15, 5))

plt.subplot(131)
plt.plot(xrange(1, n_trees_discrete + 1),
         discrete_test_errors, c='black', label='SAMME')
plt.plot(xrange(1, n_trees_real + 1),
         real_test_errors, c='black',
         linestyle='dashed', label='SAMME.R')
plt.legend()
plt.ylim(0.18, 0.62)
plt.ylabel('Test Error')
plt.xlabel('Number of Trees')
print "n trees"
print n_trees_discrete
print "discrete_test_errors"
print bdt_discrete.estimator_errors_.shape

plt.subplot(132)
plt.plot(xrange(1, n_trees_discrete + 1), discrete_estimator_errors,
         "b", label='SAMME', alpha=.5)
plt.plot(xrange(1, n_trees_real + 1), real_estimator_errors,
         "r", label='SAMME.R', alpha=.5)
plt.legend()
plt.ylabel('Error')
plt.xlabel('Number of Trees')
plt.ylim((.2,
         max(real_estimator_errors.max(),
             discrete_estimator_errors.max()) * 1.2))
plt.xlim((-20, len(bdt_discrete) + 20))

print  "plotting..."

plt.subplot(133)
plt.plot(xrange(1, n_trees_discrete + 1), discrete_estimator_weights,
         "b", label='SAMME')
plt.legend()
plt.ylabel('Weight')
plt.xlabel('Number of Trees')
plt.ylim((0, discrete_estimator_weights.max() * 1.2))
plt.xlim((-20, n_trees_discrete + 20))
# prevent overlapping y-axis labels
plt.subplots_adjust(wspace=0.25)
plt.show()

My question is, is this the right way to classify images according to HoG features? There are 528 image in all. First they were 240x360. But when I printed the shape of img_hogs, I got:

(528L, 2640L)

I was told there are no graphs to plot because the classification is terminating early since there are many more features than images. So I reduced my images to 20x30.

Now when I print the shape of img_hogs, I get:

(528L, 48L)

But I'm still getting no results. In either case, the axes are plotted but the graph is empty.

Was it helpful?

Solution

I would have deleted this question but it occurred to me that another newbie at classification might have the same issue so I'm just answering the question.

The problem was because of a very silly mistake - my training array, labels, had no negatives, it consisted entirely of labels for only one class.

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top