MultiLayer Neural Network giving wrong output

https://stackoverflow.com/questions/22980082

01-07-2023
|

Question

This is the open source code that I am using:

import math
import random
import string

class NN:
  def __init__(self, NI, NH, NO):
    # number of nodes in layers
    self.ni = NI + 1 # +1 for bias
    self.nh = NH
    self.no = NO

    # initialize node-activations
    self.ai, self.ah, self.ao = [],[], []
    self.ai = [1.0]*self.ni
    self.ah = [1.0]*self.nh
    self.ao = [1.0]*self.no

    # create node weight matrices
    self.wi = makeMatrix (self.ni, self.nh)
    self.wo = makeMatrix (self.nh, self.no)
    # initialize node weights to random vals
    randomizeMatrix ( self.wi, -0.2, 0.2 )
    randomizeMatrix ( self.wo, -2.0, 2.0 )
    # create last change in weights matrices for momentum
    self.ci = makeMatrix (self.ni, self.nh)
    self.co = makeMatrix (self.nh, self.no)

  def runNN (self, inputs):
    if len(inputs) != self.ni-1:
      print 'incorrect number of inputs'

    for i in range(self.ni-1):
      self.ai[i] = inputs[i]

    for j in range(self.nh):
      sum = 0.0
      for i in range(self.ni):
        sum +=( self.ai[i] * self.wi[i][j] )
      self.ah[j] = sigmoid (sum)

    for k in range(self.no):
      sum = 0.0
      for j in range(self.nh):        
        sum +=( self.ah[j] * self.wo[j][k] )
      self.ao[k] = sigmoid (sum)

    return self.ao



  def backPropagate (self, targets, N, M):
    # calc output deltas
    # we want to find the instantaneous rate of change of ( error with respect to weight from node j to node k)
    # output_delta is defined as an attribute of each ouput node. It is not the final rate we need.
    # To get the final rate we must multiply the delta by the activation of the hidden layer node in question.
    # This multiplication is done according to the chain rule as we are taking the derivative of the activation function
    # of the ouput node.
    # dE/dw[j][k] = (t[k] - ao[k]) * s'( SUM( w[j][k]*ah[j] ) ) * ah[j]
    output_deltas = [0.0] * self.no
    for k in range(self.no):
      error = targets[k] - self.ao[k]
      output_deltas[k] =  error * dsigmoid(self.ao[k]) 

    # update output weights
    for j in range(self.nh):
      for k in range(self.no):
        # output_deltas[k] * self.ah[j] is the full derivative of dError/dweight[j][k]
        change = output_deltas[k] * self.ah[j]
        self.wo[j][k] += N*change + M*self.co[j][k]
        self.co[j][k] = change

    # calc hidden deltas
    hidden_deltas = [0.0] * self.nh
    for j in range(self.nh):
      error = 0.0
      for k in range(self.no):
        error += output_deltas[k] * self.wo[j][k]
      hidden_deltas[j] = error * dsigmoid(self.ah[j])

    #update input weights
    for i in range (self.ni):
      for j in range (self.nh):
        change = hidden_deltas[j] * self.ai[i]
        #print 'activation',self.ai[i],'synapse',i,j,'change',change
        self.wi[i][j] += N*change + M*self.ci[i][j]
        self.ci[i][j] = change

    # calc combined error
    # 1/2 for differential convenience & **2 for modulus
    error = 0.0
    for k in range(len(targets)):
      error = 0.5 * (targets[k]-self.ao[k])**2
    return error

  def weights(self):
    print 'Input weights:'
    for i in range(self.ni):
      print self.wi[i]
    print
    print 'Output weights:'
    for j in range(self.nh):
      print self.wo[j]
    print ''

  def test(self, patterns):
    for p in patterns:
      inputs = p[0]
      print 'Inputs:', p[0], '-->', self.runNN(inputs), '\tTarget', p[1]

  def train (self, patterns, max_iterations = 1000, N=0.5, M=0.1):
    for i in range(max_iterations):
      for p in patterns:
        inputs = p[0]
        targets = p[1]
        self.runNN(inputs)
        error = self.backPropagate(targets, N, M)
      if i % 50 == 0:
        print 'Combined error', error
    self.test(patterns)


def sigmoid (x):
  return math.tanh(x)

def dsigmoid (y):
  return 1 - y**2

def makeMatrix ( I, J, fill=0.0):
  m = []
  for i in range(I):
    m.append([fill]*J)
  return m

def randomizeMatrix ( matrix, a, b):
  for i in range ( len (matrix) ):
    for j in range ( len (matrix[0]) ):
      matrix[i][j] = random.uniform(a,b)

def main ():

    #print mylist
    pat = [
    [ [0.0,0.0], [0.0] ],
    [ [0.0,0.5], [2.0] ],
    [ [0.0,1.0], [0.0] ],

    [ [0.5,0.0], [3.0] ],
    [ [0.5,0.5], [0.0] ],
    [ [0.5,1.0], [5.0] ],

    [ [1.0,0.0], [0.0] ],
    [ [1.0,0.5], [89.0] ],
    [ [1.0,1.0], [0.0] ]
    ]

    myNN = NN ( 2, 10, 1)
    myNN.train(pat)

if __name__ == "__main__":
    main()

But, when I run the code, I get almost the same output which is wrong.

Combined error 0.499991904422
Combined error 0.499996323964
Combined error 0.499997646742    
Combined error 0.499998277742
Combined error 0.499998645609
Combined error 0.499998885941
Combined error 0.499999054982
Combined error 0.49999918021
Combined error 0.499999276619    
Combined error 0.49999935308
Combined error 0.499999415171
Combined error 0.499999466571
Combined error 0.499999509808
Combined error 0.499999546673
Combined error 0.499999578468
Combined error 0.499999606167
Combined error 0.499999630508
Combined error 0.499999652063
Combined error 0.499999671282
Combined error 0.499999688523
Inputs: [0.0, 0.0] --> [0.9999971763261493]     Target [0.0]
Inputs: [0.0, 0.5] --> [0.9999991710833099]     Target [2.0]
Inputs: [0.0, 1.0] --> [0.9999996328965068]     Target [0.0]
Inputs: [0.5, 0.0] --> [0.9999976785687611]     Target [3.0]
Inputs: [0.5, 0.5] --> [0.9999992837399216]     Target [0.0]
Inputs: [0.5, 1.0] --> [0.9999996729737041]     Target [5.0]
Inputs: [1.0, 0.0] --> [0.9999980402687116]     Target [0.0]
Inputs: [1.0, 0.5] --> [0.9999993680567348]     Target [89.0]
Inputs: [1.0, 1.0] --> [0.9999997038262324]     Target [0.0]

Is there anything wrong with the code or usage of code? Why I am always getting an output value less than 1?

Solution

You are using sigmmoid activation function and require your network to output values greater than 1, which is impossible. Scale down all your output values by the maximum value (89 in your case).

Licensed under: CC-BY-SA with attribution

Not affiliated with StackOverflow