Logistic Regression with CNTK v2.0 RC2

In my opinion, the two most powerful code libraries for deep neural networks are Google’s TensorFlow and Microsoft’s CNTK. TensorFlow has a huge lead on CNTK — TF has been fully released for over a year while CNTK v2.0 is currently (like today as I write this) only in Release Candidate 2.

But I’m making a bet on CNTK, in part because I prefer CNTK’s API (both libraries use Python to access the underlying C++ code), and in part because CNTK is much, much better on Windows where I do most of my work.

First, in a command shell, I upgraded my CNTK v2.0 RC1 to RC2 with the command:

> pip install --upgrade --no-deps  

where is a reference to a python .whl file (think binary installer):

https://cntk.ai / PythonWheel / CPU-Only / 
  cntk-2.0rc2-cp35-cp35m-win_amd64.whl

I went to the CNTK Python API site (which wasn’t all that easy to find) and refactored the basic Logistic Regression example from Jupyter to a normal Python script. The example code was very, very long for a basic example. Much too long in my opinion for an example that should be a simple Hello World. A lot of the code created nice-looking and useful graphs, but that code obscured the main ideas of Logistic Regression.

If I get a chance, I intend to walk through the CNTK example LR code and extract just the bare minimum that performs LR so I can make sure I know exactly what’ going on.

# log_reg_cntk.py
# logistic regression using CNTK v2.0 RC2
# May 1, 2017

from __future__ import print_function
import numpy as np
import sys
import os
from cntk import *
import matplotlib.pyplot as plt

def generate_random_data_sample(sample_size, \
  feature_dim, num_classes):
  Y = np.random.randint(size=(sample_size, 1), \
    low=0, high=num_classes)
  X = (np.random.randn(sample_size, \
    feature_dim)+3) * (Y+1)
  X = X.astype(np.float32)    
  class_ind = [Y==class_number for class_number \
    in range(num_classes)]
  Y = np.asarray(np.hstack(class_ind), dtype=np.float32)
  return X, Y

mydict = {"w":None,"b":None} # a global object

def linear_layer(input_var, output_dim):
  input_dim = input_var.shape[0]
  weight_param = parameter(shape=(input_dim, output_dim))
  bias_param = parameter(shape=(output_dim))
  mydict['w'], mydict['b'] = weight_param, bias_param
  return times(input_var, weight_param) + bias_param

def moving_average(a, w=10):
  if len(a) < w: 
    return a[:]    
  return [val if idx < w else sum(a[(idx-w):idx])/w \
    for idx, val in enumerate(a)]

def print_training_progress(trainer, mb, frequency, \
  verbose=1):
  training_loss, eval_error = "NA", "NA"
  if mb % frequency == 0:
    training_loss = \
      trainer.previous_minibatch_loss_average
    eval_error = \
      trainer.previous_minibatch_evaluation_average
    if verbose: 
      print ("Minibatch: {0}, Loss: {1:.4f}, \
        Error: {2:.2f}".format(mb, training_loss, \
        eval_error))        
  return mb, training_loss, eval_error


def main():
  print("\nBegin logistic regression demo ")
  print("Using CNTK RC2 \n")

  np.random.seed(0)
  input_dim = 2
  num_output_classes = 2
  mysamplesize = 32
  features, labels = \
    generate_random_data_sample(mysamplesize, \
      input_dim, num_output_classes)

  colors = ['r' if l == 0 else 'b' for l in labels[:,0]]
  plt.scatter(features[:,0], features[:,1], c=colors)
  plt.xlabel("Scaled age (in yrs)")
  plt.ylabel("Tumor size (in cm)")
  plt.show()

  feature = input(input_dim, np.float32)
  output_dim = num_output_classes
  z = linear_layer(feature, output_dim)

  label = input((num_output_classes), np.float32)
  loss = cross_entropy_with_softmax(z, label)
  eval_error = classification_error(z, label)
  learning_rate = 0.5
  lr_schedule = learning_rate_schedule(learning_rate, \
    UnitType.minibatch) 
  learner = sgd(z.parameters, lr_schedule)
  trainer = Trainer(z, (loss, eval_error), [learner])
  minibatch_size = 25
  num_samples_to_train = 20000
  num_minibatches_to_train = int(num_samples_to_train \
    / minibatch_size)

  training_progress_output_freq = 50
  plotdata = {"batchsize":[], "loss":[], "error":[]}
  for i in range(0, num_minibatches_to_train):
    features, labels = \
      generate_random_data_sample(minibatch_size, \
      input_dim, num_output_classes)
    trainer.train_minibatch({feature : features, \
      label : labels})
    batchsize, loss, error = \
      print_training_progress(trainer, i, \
      training_progress_output_freq, verbose=1)    
    if not (loss == "NA" or error =="NA"):
      plotdata["batchsize"].append(batchsize)
      plotdata["loss"].append(loss)
      plotdata["error"].append(error)

  plotdata["avgloss"] = moving_average(plotdata["loss"])
  plotdata["avgerror"] = moving_average(plotdata["error"])
  plt.figure(1)
  plt.subplot(211)
  plt.plot(plotdata["batchsize"], \
    plotdata["avgloss"], 'b--')
  plt.xlabel('Minibatch number')
  plt.ylabel('Loss')
  plt.title('Minibatch run vs. Training loss')
  plt.show()

  plt.subplot(212)
  plt.plot(plotdata["batchsize"], \
    plotdata["avgerror"], 'r--')
  plt.xlabel('Minibatch number')
  plt.ylabel('Label Prediction Error')
  plt.title('Minibatch run vs. Label Prediction Error')
  plt.show()

  test_minibatch_size = 25
  features, labels = \
    generate_random_data_sample(test_minibatch_size, \
    input_dim, num_output_classes)
  trainer.test_minibatch({feature : features, \
    label : labels})
  out = softmax(z)
  result = out.eval({feature : features})

  print("Label    :", [np.argmax(label) \
    for label in labels])
  print("Predicted:", [np.argmax(result[i,:]) \
    for i in range(len(result))])

  print(mydict['b'].value)
  bias_vector   = mydict['b'].value
  weight_matrix = mydict['w'].value

  colors = ['r' if l == 0 else 'b' for l in labels[:,0]]
  plt.scatter(features[:,0], features[:,1], c=colors)
  plt.plot([0, bias_vector[0]/weight_matrix[0][1]], \
    [ bias_vector[1]/weight_matrix[0][0], 0], \
    c = 'g', lw = 3)
  plt.xlabel("Scaled age (in yrs)")
  plt.ylabel("Tumor size (in cm)")
  plt.show()


  print("\nEnd demo \n")

if __name__ == "__main__":
  main()
Advertisements
This entry was posted in CNTK, Machine Learning. Bookmark the permalink.