Keras Autoencoder Dimensionality Reduction for UCI Digits Visualization

One Sunday morning, after I walked my dogs, I decided to code up an example of using a Keras library autoencoder for dimensionality reduction for visualization of the UCI Digits dataset.

The UCI Digits dataset consists of crude 8×8 images of handwritten ‘0’ to ‘9’ digits. Each image pixel value is greyscale between 0 and 16. I used the 1797-item training dataset.


Examples of ten typical UCI digits.

My autoencoder architecture was 64-32-2-32-64 so that each 64-pixel image is condensed to just 2 values so that each image can be plotted on an x-y graph.


According to the autoencoder mapping, 0s and 3s are quite similar, which makes sense.

My biggest takeaway was recalling that there are many, many ways to design an autoencoder using Keras. My design ended up being:

class Autoencoder(K.models.Model):
  # 64-32-2-32-64
  def __init__(self):
    super(Autoencoder, self).__init__()
    my_init = K.initializers.glorot_uniform(seed=1)
    self.encoder = K.models.Sequential()
    self.encoder.add(K.layers.Dense(input_dim=64, 
      units=32, activation='tanh', 
      kernel_initializer=my_init))
    self.encoder.add(K.layers.Dense(units=2,
      activation='tanh', kernel_initializer=my_init))

    self.decoder = K.models.Sequential()
    self.decoder.add(K.layers.Dense(input_dim=2, 
      units=32, activation='tanh', 
      kernel_initializer=my_init))
    self.decoder.add(K.layers.Dense(units=64,
      # activation='tanh', kernel_initializer=my_init)) 
      kernel_initializer=my_init)) 

  def encode(self, x):
    encoded = self.encoder(x)
    return encoded

  def decode(self, z):
    decoded = self.decoder(z)
    return decoded

  def call(self, x):
    z = self.encode(x)
    z = self.decode(z)
    return z

A complete explanation of this definition would take many pages but using the autoencoder is simple. With this definition, an autoencoder can be used like:

auto = Autoencoder()
# compile and train the autoencoder
# set up some input values
reduced = auto.encode(inpt)  # each image reduced to 2 values

For dimensionality reduction, the decode() method isn’t needed but you’d use decode() for tasks like denoising and anomaly detection.

Good fun.



Minimalist art is a form of dimensionality reduction. Three examples, from least reduced to most reduced. Left: By Rokas Aleliunas. Center: By Pablo Romero. Right: By Mads Berg.


Code:

# uci_auto_viz_tfk.py

# autoencoder dim reduction visualization for UCI Digits
# Anaconda3-2020.02  (Python 3.7.6)
# TensorFlow 2.6.0 (includes KerasTF 2.6.0)
# Windows 10

import os
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'

import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras as K

class Autoencoder(K.models.Model):
  # 64-32-2-32-64
  def __init__(self):
    super(Autoencoder, self).__init__()
    my_init = K.initializers.glorot_uniform(seed=1)
    self.encoder = K.models.Sequential()
    self.encoder.add(K.layers.Dense(input_dim=64, 
      units=32, activation='tanh', kernel_initializer=my_init))
    self.encoder.add(K.layers.Dense(units=2,
      activation='tanh', kernel_initializer=my_init))

    self.decoder = K.models.Sequential()
    self.decoder.add(K.layers.Dense(input_dim=2, 
      units=32, activation='tanh', kernel_initializer=my_init))
    self.decoder.add(K.layers.Dense(units=64,
      # activation='tanh', kernel_initializer=my_init)) 
      kernel_initializer=my_init)) 

  def encode(self, x):
    encoded = self.encoder(x)
    return encoded

  def decode(self, z):
    decoded = self.decoder(z)
    return decoded

  def call(self, x):
    z = self.encode(x)
    z = self.decode(z)
    return z

class MyLogger(K.callbacks.Callback):
  def __init__(self, n):
    self.n = n   # print loss every n epochs

  def on_epoch_end(self, epoch, logs={}):
    if epoch % self.n == 0:
      curr_loss =logs.get('loss')
      print("epoch = %4d loss = %0.6f" % (epoch, curr_loss))

def main():
  # 0. get started
  print("\nBegin UCI digits viz using Keras autoencoder")
  np.random.seed(1)
  tf.random.set_seed(1)

  # 1. load data into memory
  print("\nLoading UCI digits (1797 test images) ")
  data_file = ".\\Data\\digits_uci_test_1797.txt"
  data_x = np.loadtxt(data_file, delimiter=",",
    usecols=range(0,64), dtype=np.float32)
  labels = np.loadtxt(data_file, delimiter=",",
    usecols=[64], dtype=np.float32)  # labels not used for train
  data_x = data_x / 16

  # 2. create and compile the autoencoder
  print("\nCreating 64-32-2-32-64 autoencoder ")
  autoencoder = Autoencoder()
  simple_adam = K.optimizers.Adam()  
  autoencoder.compile(loss='mean_squared_error',
    optimizer=simple_adam)

  # 3. train the autoencoder
  print("\nStarting training")
  max_epochs = 80
  my_logger = MyLogger(n=10)
  h = autoencoder.fit(data_x, data_x, batch_size=8, 
    epochs=max_epochs, verbose=0, callbacks=[my_logger])
  print("Training complete") 

  # 4. generate (x,y) point pairs for each digit 
  reduced = autoencoder.predict(data_x)  # test mode
  # print(reduced.shape)  # (1797, 64)
  
  # 5. graph the digits in 2D - crude
  # plt.scatter(reduced[:, 0], reduced[:, 1],
  #           c=labels, edgecolor='none', alpha=0.9,
  #           cmap=plt.cm.get_cmap('nipy_spectral', 11),
  #           s=16)
  # plt.xlabel('component 1'); plt.ylabel('component 2')
  # plt.colorbar(); plt.show()

  # 5. graph the digits in 2D - a bit more refined
  fig, ax = plt.subplots()
  colors = ['red', 'blue', 'green', 'yellow', 'orange',
            'black', 'brown', 'purple', 'silver', 'cyan']
  digits = [' 0 ', ' 1 ', ' 2 ', ' 3 ', ' 4 ',
            ' 5 ', ' 6 ', ' 7 ', ' 8 ', ' 9 ']

  # process by each color
  for i in range(len(colors)):  # 0 to 9 each color
    color = colors[i]
    rows = []                   # select rows for curr color
    for j in range(len(reduced)):    # 0 to 1796
      if labels[j] == i:
        rows.append(True)       # to extract rows, must use bool
      else:
        rows.append(False)

    rows = np.array(rows, dtype=np.bool)  # list to array
    selected = reduced[rows,:]                 # like (178,2)

    x = selected[:,0]  # like (178,)
    y = selected[:,1]
    scatter  = ax.scatter(x, y, c=color, s=10, alpha=0.9)
    # and continue on to next color/class

  txt = "\n" + "0=red \n" + "1=blue \n" + "2=green \n" + \
    "3=yellow \n" + "4=orange \n" + "5=black \n" + \
    "6=brown \n" + "7=purple \n" + "8=silver \n" + "9=cyan \n"

  props = dict(boxstyle='round', facecolor='wheat', alpha=0.95)
  ax.text(0.95, 0.95, txt, transform=ax.transAxes, fontsize=8,
    verticalalignment='top', bbox=props)
  ax.grid(True)
  plt.xlabel('component 1')
  plt.ylabel('component 2')
  plt.show()

if __name__ == "__main__":
  main()
This entry was posted in Keras. Bookmark the permalink.

Leave a Reply

Please log in using one of these methods to post your comment:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Google photo

You are commenting using your Google account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s