I Give a Talk on Neural Network Fundamentals

When learning most things related to computer science, I think the most difficult part is the first few steps. I recently gave a talk intended to be an absolute introduction to neural networks. This topic forced me to think long and hard about what to include in the talk, and more importantly, what not to include.

I used a combination of pictures and code. I strongly believe you can’t understand neural networks without seeing code, but I also strongly believe you need diagrams to understand the code.

My canonical demo program illustrated the NN input-output mechanism. This included:

The idea of nodes and layers
Weights and biases
Weight initialization
Sum-of-products computation
Hidden layer activation (just tanh for now)
Output layer softmax activation

As I just mentioned, the important idea here is what I left out of the discussion. Things like back-propagation and stochastic gradient decent, data normalization and encoding, cross entropy error, and so on, are must-know topics, but based on my experience, presenting such topics too early does more harm than good.

Anyway, good fun for me. No matter how many times I present a topic, I always gain a new insight or two.

# nn_io.py
# Anaconda3 (Python 3.5.2, NumPy 1.11.1)

import numpy as np
import math

def show_vector(v, dec):
  fmt = "% ." + str(dec) + "f" # like '% .4f'
  for i in range(len(v)):
    print(fmt % v[i] + '  ', end='')
  print('')
  
def show_matrix(m, dec):
  for i in range(len(m)):
    show_vector(m[i], dec)
  
# -----
	
class NeuralNetwork:

  def __init__(self, num_input, num_hidden, num_output):
    self.ni = num_input
    self.nh = num_hidden
    self.no = num_output
	
    self.i_nodes = np.zeros(shape=[self.ni], dtype=np.float32)
    self.h_nodes = np.zeros(shape=[self.nh], dtype=np.float32)
    self.o_nodes = np.zeros(shape=[self.no], dtype=np.float32)
	
    self.ih_weights = np.zeros(shape=[self.ni,self.nh],
      dtype=np.float32)
    self.ho_weights = np.zeros(shape=[self.nh,self.no],
      dtype=np.float32)
	
    self.h_biases = np.zeros(shape=[self.nh], dtype=np.float32)
    self.o_biases = np.zeros(shape=[self.no], dtype=np.float32)
	
    self.rnd = np.random.RandomState(1)
    self.initialize_weights()
 	
  def set_weights(self, weights):
    if len(weights) != self.total_weights(self.ni, \
      self.nh, self.no):
      print("Warning: len(weights) error in set_weights()")	

    idx = 0
    for i in range(self.ni):
      for j in range(self.nh):
        self.ih_weights[i,j] = weights[idx]
        idx += 1
		
    for j in range(self.nh):
      self.h_biases[j] = weights[idx]
      idx += 1

    for j in range(self.nh):
      for k in range(self.no):
        self.ho_weights[j,k] = weights[idx]
        idx += 1
	  
    for k in range(self.no):
      self.o_biases[k] = weights[idx]
      idx += 1
	  
  def get_weights(self):
    tw = self.total_weights(self.ni, self.nh, self.no)
    result = np.zeros(shape=[tw], dtype=np.float32)
    idx = 0  # points into result
    
    for i in range(self.ni):
      for j in range(self.nh):
        result[idx] = self.ih_weights[i,j]
        idx += 1
		
    for j in range(self.nh):
      result[idx] = self.h_biases[j]
      idx += 1

    for j in range(self.nh):
      for k in range(self.no):
        result[idx] = self.ho_weights[j,k]
        idx += 1
	  
    for k in range(self.no):
      result[idx] = self.o_biases[k]
      idx += 1
	  
    return result
 	
  def initialize_weights(self):
    num_wts = NeuralNetwork.total_weights(self.ni,
      self.nh, self.no)
    wts = np.float32(self.rnd.uniform(-0.01, 0.01,
     (num_wts)))
    self.set_weights(wts)

  def compute_outputs(self, x_values):
    print("\n ih_weights: ")
    show_matrix(self.ih_weights, 2)
	
    print("\n h_biases: ")
    show_vector(self.h_biases, 2)
	
    print("\n ho_weights: ")
    show_matrix(self.ho_weights, 2)
  
    print("\n o_biases: ")
    show_vector(self.o_biases, 2)  
  
    h_sums = np.zeros(shape=[self.nh], dtype=np.float32)
    o_sums = np.zeros(shape=[self.no], dtype=np.float32)

    for i in range(self.ni):
      self.i_nodes[i] = x_values[i]

    for j in range(self.nh):
      for i in range(self.ni):
        h_sums[j] += self.i_nodes[i] * self.ih_weights[i,j]

    for j in range(self.nh):
      h_sums[j] += self.h_biases[j]
	  
    print("\n pre-tanh activation hidden node values: ")
    show_vector(h_sums, 4)

    for j in range(self.nh):
      self.h_nodes[j] = self.hypertan(h_sums[j])
	  
    print("\n after activation hidden node values: ")
    show_vector(self.h_nodes, 4)

    for k in range(self.no):
      for j in range(self.nh):
        o_sums[k] += self.h_nodes[j] * self.ho_weights[j,k]

    for k in range(self.no):
      o_sums[k] += self.o_biases[k]
	  
    print("\n pre-softmax output values: ")
    show_vector(o_sums, 4)

    soft_out = self.softmax(o_sums)
    for k in range(self.no):
      self.o_nodes[k] = soft_out[k]
	  
    result = np.zeros(shape=self.no, dtype=np.float32)
    for k in range(self.no):
      result[k] = self.o_nodes[k]
	  
    return result
	
  @staticmethod
  def hypertan(x):
    if x  20.0:
      return 1.0
    else:
      return math.tanh(x)

  @staticmethod
  def softmax(o_sums):
    result = np.zeros(shape=[len(o_sums)], dtype=np.float32)
    div = 0.0
    for k in range(len(o_sums)):
      div += math.exp(o_sums[k])
    for k in range(len(result)):
      result[k] =  math.exp(o_sums[k]) / div
    return result
	
  @staticmethod
  def total_weights(n_input, n_hidden, n_output):
   tw = (n_input * n_hidden) + (n_hidden * n_output) + \
     n_hidden + n_output
   return tw

# end class NeuralNetwork

def main():
  print("\nBegin NN demo \n")

  num_input = 3
  num_hidden = 4
  num_output = 2
  print("Creating a %d-%d-%d neural network " \
    % (num_input, num_hidden, num_output) )
  nn = NeuralNetwork(num_input, num_hidden, num_output)
  
  print("\nSetting weights and biases ")
  num_wts = NeuralNetwork.total_weights(num_input, \
    num_hidden, num_output)
  wts = np.zeros(shape=[num_wts], dtype=np.float32)  # 26 cells
  for i in range(len(wts)):
    wts[i] = ((i+1) * 0.01)  # [0.01, 0.02, . . 0.26 ]
  nn.set_weights(wts)
 
  x_values = np.array([1.0, 2.0, 3.0], dtype=np.float32)
  print("\nInput values are: ")
  show_vector(x_values, 1)
  
  y_values = nn.compute_outputs(x_values)
  print("\nOutput values are: ")
  show_vector(y_values, 4)

  print("\nEnd demo \n")
   
if __name__ == "__main__":
  main()
This entry was posted in Machine Learning. Bookmark the permalink.