When learning most things related to computer science, I think the most difficult part is the first few steps. I recently gave a talk intended to be an absolute introduction to neural networks. This topic forced me to think long and hard about what to include in the talk, and more importantly, what not to include.
I used a combination of pictures and code. I strongly believe you can’t understand neural networks without seeing code, but I also strongly believe you need diagrams to understand the code.
My canonical demo program illustrated the NN input-output mechanism. This included:
The idea of nodes and layers
Weights and biases
Weight initialization
Sum-of-products computation
Hidden layer activation (just tanh for now)
Output layer softmax activation
As I just mentioned, the important idea here is what I left out of the discussion. Things like back-propagation and stochastic gradient decent, data normalization and encoding, cross entropy error, and so on, are must-know topics, but based on my experience, presenting such topics too early does more harm than good.
Anyway, good fun for me. No matter how many times I present a topic, I always gain a new insight or two.
# nn_io.py # Anaconda3 (Python 3.5.2, NumPy 1.11.1) import numpy as np import math def show_vector(v, dec): fmt = "% ." + str(dec) + "f" # like '% .4f' for i in range(len(v)): print(fmt % v[i] + ' ', end='') print('') def show_matrix(m, dec): for i in range(len(m)): show_vector(m[i], dec) # ----- class NeuralNetwork: def __init__(self, num_input, num_hidden, num_output): self.ni = num_input self.nh = num_hidden self.no = num_output self.i_nodes = np.zeros(shape=[self.ni], dtype=np.float32) self.h_nodes = np.zeros(shape=[self.nh], dtype=np.float32) self.o_nodes = np.zeros(shape=[self.no], dtype=np.float32) self.ih_weights = np.zeros(shape=[self.ni,self.nh], dtype=np.float32) self.ho_weights = np.zeros(shape=[self.nh,self.no], dtype=np.float32) self.h_biases = np.zeros(shape=[self.nh], dtype=np.float32) self.o_biases = np.zeros(shape=[self.no], dtype=np.float32) self.rnd = np.random.RandomState(1) self.initialize_weights() def set_weights(self, weights): if len(weights) != self.total_weights(self.ni, \ self.nh, self.no): print("Warning: len(weights) error in set_weights()") idx = 0 for i in range(self.ni): for j in range(self.nh): self.ih_weights[i,j] = weights[idx] idx += 1 for j in range(self.nh): self.h_biases[j] = weights[idx] idx += 1 for j in range(self.nh): for k in range(self.no): self.ho_weights[j,k] = weights[idx] idx += 1 for k in range(self.no): self.o_biases[k] = weights[idx] idx += 1 def get_weights(self): tw = self.total_weights(self.ni, self.nh, self.no) result = np.zeros(shape=[tw], dtype=np.float32) idx = 0 # points into result for i in range(self.ni): for j in range(self.nh): result[idx] = self.ih_weights[i,j] idx += 1 for j in range(self.nh): result[idx] = self.h_biases[j] idx += 1 for j in range(self.nh): for k in range(self.no): result[idx] = self.ho_weights[j,k] idx += 1 for k in range(self.no): result[idx] = self.o_biases[k] idx += 1 return result def initialize_weights(self): num_wts = NeuralNetwork.total_weights(self.ni, self.nh, self.no) wts = np.float32(self.rnd.uniform(-0.01, 0.01, (num_wts))) self.set_weights(wts) def compute_outputs(self, x_values): print("\n ih_weights: ") show_matrix(self.ih_weights, 2) print("\n h_biases: ") show_vector(self.h_biases, 2) print("\n ho_weights: ") show_matrix(self.ho_weights, 2) print("\n o_biases: ") show_vector(self.o_biases, 2) h_sums = np.zeros(shape=[self.nh], dtype=np.float32) o_sums = np.zeros(shape=[self.no], dtype=np.float32) for i in range(self.ni): self.i_nodes[i] = x_values[i] for j in range(self.nh): for i in range(self.ni): h_sums[j] += self.i_nodes[i] * self.ih_weights[i,j] for j in range(self.nh): h_sums[j] += self.h_biases[j] print("\n pre-tanh activation hidden node values: ") show_vector(h_sums, 4) for j in range(self.nh): self.h_nodes[j] = self.hypertan(h_sums[j]) print("\n after activation hidden node values: ") show_vector(self.h_nodes, 4) for k in range(self.no): for j in range(self.nh): o_sums[k] += self.h_nodes[j] * self.ho_weights[j,k] for k in range(self.no): o_sums[k] += self.o_biases[k] print("\n pre-softmax output values: ") show_vector(o_sums, 4) soft_out = self.softmax(o_sums) for k in range(self.no): self.o_nodes[k] = soft_out[k] result = np.zeros(shape=self.no, dtype=np.float32) for k in range(self.no): result[k] = self.o_nodes[k] return result @staticmethod def hypertan(x): if x 20.0: return 1.0 else: return math.tanh(x) @staticmethod def softmax(o_sums): result = np.zeros(shape=[len(o_sums)], dtype=np.float32) div = 0.0 for k in range(len(o_sums)): div += math.exp(o_sums[k]) for k in range(len(result)): result[k] = math.exp(o_sums[k]) / div return result @staticmethod def total_weights(n_input, n_hidden, n_output): tw = (n_input * n_hidden) + (n_hidden * n_output) + \ n_hidden + n_output return tw # end class NeuralNetwork def main(): print("\nBegin NN demo \n") num_input = 3 num_hidden = 4 num_output = 2 print("Creating a %d-%d-%d neural network " \ % (num_input, num_hidden, num_output) ) nn = NeuralNetwork(num_input, num_hidden, num_output) print("\nSetting weights and biases ") num_wts = NeuralNetwork.total_weights(num_input, \ num_hidden, num_output) wts = np.zeros(shape=[num_wts], dtype=np.float32) # 26 cells for i in range(len(wts)): wts[i] = ((i+1) * 0.01) # [0.01, 0.02, . . 0.26 ] nn.set_weights(wts) x_values = np.array([1.0, 2.0, 3.0], dtype=np.float32) print("\nInput values are: ") show_vector(x_values, 1) y_values = nn.compute_outputs(x_values) print("\nOutput values are: ") show_vector(y_values, 4) print("\nEnd demo \n") if __name__ == "__main__": main()
You must be logged in to post a comment.