Question

I was trying to implement the digit recognizer which is discussed in following blog post in detail: http://www.markhneedham.com/blog/2012/10/27/kaggle-digit-recognizer-mahout-random-forest-attempt/

I receive following error when I execute the Java program:

Exception in thread "main" java.lang.NoClassDefFoundError: org/apache/mahout/classifier/df/builder/TreeBuilder
Caused by: java.lang.ClassNotFoundException: org.apache.mahout.classifier.df.builder.TreeBuilder
    at java.net.URLClassLoader$1.run(URLClassLoader.java:217)
    at java.security.AccessController.doPrivileged(Native Method)
    at java.net.URLClassLoader.findClass(URLClassLoader.java:205)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:321)
    at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:294)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:266)
Could not find the main class: com.mawasthi.mahout.test.MahoutDigitRecognizer. Program will exit.

I have the following code for implementing Digit Recognizer using Apache Mahout.

package com.mawasthi.mahout.test;

import java.lang.Math;
import java.util.ArrayList;
import java.util.Random;
import java.io.DataInputStream;
import java.io.FileInputStream;
import java.io.BufferedReader;
import java.io.InputStreamReader;

import org.apache.mahout.classifier.df.data.Data;
import org.apache.mahout.classifier.df.data.Instance;
import org.apache.mahout.classifier.df.data.DataLoader;
import org.apache.mahout.classifier.df.DecisionForest;
import org.apache.mahout.classifier.df.builder.DefaultTreeBuilder;
import org.apache.mahout.classifier.df.ref.SequentialBuilder;
import org.apache.mahout.common.RandomUtils;
import org.apache.commons.math3.util.FastMath;

public class MahoutDigitRecognizer {
    public static void main(String[] args) throws Exception { 

        // Build RF

        String descriptor = "L N N N N N N N N N N N N N N N N N N N";
        String[] trainDataValues = fileAsStringArray("data/train.csv"); 

        Data data = DataLoader.loadData(DataLoader.generateDataset(descriptor, false, trainDataValues), trainDataValues);

        int numberOfTrees = 100; 
        DecisionForest forest = buildForest(numberOfTrees,data);

        // Test 

        String[] testDataValues = testFileAsStringArray("data/test.csv"); 

        Data testData = DataLoader.loadData(data.getDataset(), testDataValues); 
        Random rng = RandomUtils.getRandom();

        for (int i=0;i<testData.size();i++) { 
            Instance oneSample = testData.get(i); 
            double classify = forest.classify(testData.getDataset(), rng, oneSample); 
            int label = data.getDataset().valueOf(0, String.valueOf((int)classify)); 
            System.out.println("Label: " + label); 
        }

    }

    private static DecisionForest buildForest(int numberOfTrees, Data data) { 
        int m = (int) Math.floor(FastMath.log(2.0, (double)data.getDataset().nbAttributes()) + 1);

        DefaultTreeBuilder treeBuilder = new DefaultTreeBuilder();
        treeBuilder.setM(m);

        return new SequentialBuilder(RandomUtils.getRandom(), treeBuilder, data.clone()).build(numberOfTrees);
    }

    private static String[] fileAsStringArray(String file) throws Exception { 
        ArrayList<String> list = new ArrayList<String>();

        DataInputStream in = new DataInputStream(new FileInputStream(file)); 
        BufferedReader br = new BufferedReader(new InputStreamReader(in)); 

        String line; 
        br.readLine(); // discard the header row 
        while((line = br.readLine()) != null) { 
            list.add(line);
        }

        in.close();

        return list.toArray(new String[list.size()]);
    }

    private static String[] testFileAsStringArray(String file) throws Exception { 
        ArrayList<String> list = new ArrayList<String>(); 

        DataInputStream in = new DataInputStream(new FileInputStream(file)); 
        BufferedReader br = new BufferedReader(new InputStreamReader(in)); 

        String line; 
        br.readLine(); 
        while((line = br.readLine()) != null) { 
            list.add("-," + line); 
        }

        in.close();

        return list.toArray(new String[list.size()]); 
    }
}

Following is my POM.XML file:

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <groupId>com.mawasthi.mahout.test</groupId> 
    <artifactId>mvn-mahout-test</artifactId>
    <version>1.0-SNAPSHOT</version>

  <properties>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
  </properties>

  <dependencies>
    <dependency>
      <groupId>org.slf4j</groupId>
      <artifactId>slf4j-api</artifactId>
      <version>1.6.4</version>
      <scope>provided</scope>
    </dependency>
    <dependency>
      <groupId>junit</groupId>
      <artifactId>junit</artifactId>
      <version>4.11</version>
      <scope>test</scope>
    </dependency>
    <dependency>
      <groupId>org.apache.mahout</groupId>
      <artifactId>mahout-core</artifactId>
      <version>0.7</version>
    </dependency>
    <dependency>
        <groupId>org.apache.commons</groupId>
        <artifactId>commons-math3</artifactId>
        <version>3.0</version>
    </dependency>
  <dependency>
    <groupId>org.apache.mahout</groupId>
    <artifactId>mahout-utils</artifactId>
    <version>0.5</version>
  </dependency>
    <dependency>
        <groupId>org.apache.mahout</groupId>
        <artifactId>mahout-math</artifactId>
        <version>0.4</version>
    </dependency>  
    <dependency>
        <groupId>org.apache.mahout</groupId>
        <artifactId>mahout-collections</artifactId>
        <version>1.0</version>
    </dependency>

  </dependencies>

    <build>
        <finalName>mvn-mahout-test</finalName>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <configuration>
                  <source>1.6</source>
                  <target>1.6</target>
                </configuration>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-jar-plugin</artifactId>
                <configuration>
                  <outputDirectory>${basedir}</outputDirectory>
                </configuration>
            </plugin>
        </plugins>
    </build>

</project>
Was it helpful?

Solution

Being also new to Java I missed the point of making available the dependent JARS. I finally bundled them into one Runnable JAR (I know - not a good idea) and was able to run it!

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top