SGD vs SGD in mini batches

https://datascience.stackexchange.com/questions/46798

01-11-2019
|

Question

So I recently finished a mini batches algorithm for a library in building in java(artificial neural network lib). I then followed to train my network for an XOR problem in mini batches size of 2 or 3, for both I got worse accuracy to what I got from making it 1(which is basically just SGD). Now I understand that I need to train it on more epochs but I'm not noticing any speed up in runtime which from what I read should happen. Why is this?

Here is my code(Java)

 public void SGD(double[][] inputs,double[][] expected_outputs,int mini_batch_size,int epochs, boolean verbose){
    //Set verbose
    setVerbose(verbose);

    //Create training set
    TrainingSet trainingSet = new TrainingSet(inputs,expected_outputs);

    //Loop through Epochs
    for(int i = 0; i<epochs;i++){
        //Print Progress
        print("\rTrained: " + i + "/" + epochs);

        //Shuffle training set
        trainingSet.shuffle();

        //Create the mini batches
        TrainingSet.Data[][] mini_batches = createMiniBatches(trainingSet,mini_batch_size);

        //Loop through mini batches
        for(int j = 0; j<mini_batches.length;j++){
            update_mini_batch(mini_batches[j]);
        }
    }

    //Print Progress
    print("\rTrained: " + epochs + "/" + epochs);
    print("\nDone!");
}

   private Pair backprop(double[] inputs, double[] target_outputs){
    //Create Expected output column matrix
    Matrix EO = Matrix.fromArray(new double[][]{target_outputs});

    //Forward Propagate inputs
    feedForward(inputs);

    //Get the Errors which is also the Bias Delta
    Matrix[] Errors = calculateError(EO);

    //Weight Delta Matrix
    Matrix[] dCdW = new Matrix[Errors.length];

    //Calculate the Deltas
    //Calculating the first Layers Delta
    dCdW[0] = Matrix.dot(Matrix.transpose(I),Errors[0]);

    //Rest of network
    for (int i = 1; i < Errors.length; i++) {
        dCdW[i] = Matrix.dot(Matrix.transpose(H[i - 1]), Errors[i]);
    }

    return new Pair(dCdW,Errors);
}
private void update_mini_batch(TrainingSet.Data[] mini_batch){
    //Get first deltas
    Pair deltas = backprop(mini_batch[0].input,mini_batch[0].output);

    //Loop through mini batch and sum the deltas
    for(int i = 1; i< mini_batch.length;i++){
        deltas.add(backprop(mini_batch[i].input,mini_batch[i].output));
    }

    //Multiply deltas by the learning rate
    //and divide by the mini batch size to get
    //the mean of the deltas
    deltas.multiply(learningRate/mini_batch.length);

    //Update Weights and Biases
    for(int i= 0; i<W.length;i++){
        W[i].subtract(deltas.dCdW[i]);
        B[i].subtract(deltas.dCdB[i]);
    }
}

No correct solution

Licensed under: CC-BY-SA with attribution

Not affiliated with datascience.stackexchange