Regression (People Income) Using PyTorch 1.12 on Windows 10/11

A regression problem is one where the goal is to predict a single numeric value. I decided to check my current PyTorch version (1.12.1-CPU) to make sure there were no breaking changes.

I used one of my standard examples where the goal is to predict a person’s annual income from their sex, age, state, and political leaning. My data looks like:

 1   0.24   1   0   0   0.2950   0   0   1
-1   0.39   0   0   1   0.5120   0   1   0
 1   0.63   0   1   0   0.7580   1   0   0
-1   0.36   1   0   0   0.4450   0   1   0
 1   0.27   0   1   0   0.2860   0   0   1
. . .

The tab-delimited fields are sex (male = -1, female = +1), age (divided by 100), state (Michigan = 100, Nebraska = 010, Oklahoma = 001), income (divided by 100,000), politics (conservative = 100, moderate = 010, liberal = 001). The data is synthetic. There are 200 training items and 40 test items.

For my demo, I created an 8-(10-10)-1 neural network with tanh() activation on the hidden nodes. I used explicit weight and bias initialization.

For training, I used Adam optimization with a fixed learning rate of 0.01, and mean squared error.

I implemented a program-defined accuracy() function where a correct income prediction is one that’s within a specified percentage of the true income. After training, using a 10% closeness percentage, my model scored 91.00% accuracy on the training data (182 of 200 correct), and 85.00% accuracy on the test data (34 of 40 correct).

Good fun.

Left: The board game “Careers” was first published in 1955. Players accumulate fame, happiness, and money. An unusual feature is that players start by setting their own victory conditions, such as 20 fame points, 10 happiness points, and 30 money points. The game is fun and interesting. Right: “Catan” was first published in 1995 and is wildly popular. The goal is to create wealth by building settlements and roads. I enjoy playing Catan a lot.

Demo code. Replace “lt” with Boolean operator symbol (my lame blog editor chokes on symbols).

# people_income.py
# predict income from sex, age, city, politics
# PyTorch 1.12.1-CPU Anaconda3-2020.02  Python 3.7.6
# Windows 10/11 

import numpy as np
import torch as T

device = T.device('cpu')  # apply to Tensor or Module

# -----------------------------------------------------------

class PeopleDataset(T.utils.data.Dataset):
  def __init__(self, src_file):
    # sex age   state   income   politics
    # -1  0.27  0 1 0   0.7610   0 0 1
    # +1  0.19  0 0 1   0.6550   1 0 0

    # tmp_x = np.loadtxt(src_file, usecols=[0,1,2,3,4,6,7,8],
    #   delimiter="\t", comments="#", dtype=np.float32)
    # tmp_y = np.loadtxt(src_file, usecols=5, delimiter="\t",
    #   comments="#", dtype=np.float32)
    # tmp_y = tmp_y.reshape(-1,1)  # 2D required

    all_xy = np.loadtxt(src_file, usecols=[0,1,2,3,4,5,6,7,8],
      delimiter="\t", comments="#", dtype=np.float32)
    tmp_x = all_xy[:,[0,1,2,3,4,6,7,8]]
    tmp_y = all_xy[:,5].reshape(-1,1)  # 2D required

    self.x_data = T.tensor(tmp_x, dtype=T.float32).to(device)
    self.y_data = T.tensor(tmp_y, dtype=T.float32).to(device)

  def __len__(self):
    return len(self.x_data)

  def __getitem__(self, idx):
    preds = self.x_data[idx]
    incom = self.y_data[idx] 
    return (preds, incom)  # as a tuple

# -----------------------------------------------------------

class Net(T.nn.Module):
  def __init__(self):
    super(Net, self).__init__()
    self.hid1 = T.nn.Linear(8, 10)  # 8-(10-10)-1
    self.hid2 = T.nn.Linear(10, 10)
    self.oupt = T.nn.Linear(10, 1)

    T.nn.init.xavier_uniform_(self.hid1.weight)
    T.nn.init.zeros_(self.hid1.bias)
    T.nn.init.xavier_uniform_(self.hid2.weight)
    T.nn.init.zeros_(self.hid2.bias)
    T.nn.init.xavier_uniform_(self.oupt.weight)
    T.nn.init.zeros_(self.oupt.bias)

  def forward(self, x):
    z = T.tanh(self.hid1(x))
    z = T.tanh(self.hid2(z))
    z = self.oupt(z)  # regression: no activation
    return z

# -----------------------------------------------------------

def accuracy(model, ds, pct_close):
  # assumes model.eval()
  # correct within pct of true income
  n_correct = 0; n_wrong = 0

  for i in range(len(ds)):
    X = ds[i][0]   # 2-d
    Y = ds[i][1]   # 2-d
    with T.no_grad():
      oupt = model(X)         # computed income

    if T.abs(oupt - Y) "lt" T.abs(pct_close * Y):
      n_correct += 1
    else:
      n_wrong += 1
  acc = (n_correct * 1.0) / (n_correct + n_wrong)
  return acc

# -----------------------------------------------------------

def accuracy_x(model, ds, pct_close):
  # all-at-once (quick)
  # assumes model.eval()
  X = ds.x_data  # all inputs
  Y = ds.y_data  # all targets
  n_items = len(X)
  with T.no_grad():
    pred = model(X)  # all predicted incomes
 
  n_correct = T.sum((T.abs(pred - Y) "lt" T.abs(pct_close * Y)))
  result = (n_correct.item() / n_items)  # scalar
  return result  

# -----------------------------------------------------------

def train(model, ds, bs, lr, me, le):
  # dataset, bat_size, lrn_rate, max_epochs, log interval
  train_ldr = T.utils.data.DataLoader(ds, batch_size=bs,
    shuffle=True)
  loss_func = T.nn.MSELoss()
  optimizer = T.optim.Adam(model.parameters(), lr=lr)

  for epoch in range(0, me):
    epoch_loss = 0.0  # for one full epoch

    for (b_idx, batch) in enumerate(train_ldr):
      X = batch[0]  # predictors
      y = batch[1]  # target income
      optimizer.zero_grad()
      oupt = model(X)
      loss_val = loss_func(oupt, y)  # a tensor
      epoch_loss += loss_val.item()  # accumulate
      loss_val.backward()  # compute gradients
      optimizer.step()     # update weights

    if epoch % le == 0:
      print("epoch = %4d  |  loss = %0.4f" % (epoch, epoch_loss)) 

# -----------------------------------------------------------

def main():
  # 0. get started
  print("\nBegin People predict income ")
  T.manual_seed(0)
  np.random.seed(0)
  
  # 1. create Dataset objects
  print("\nCreating People Dataset objects ")
  train_file = ".\\Data\\people_train.txt"
  train_ds = PeopleDataset(train_file)  # 200 rows

  test_file = ".\\Data\\people_test.txt"
  test_ds = PeopleDataset(test_file)  # 40 rows

  # bat_size = 10
  # train_ldr = T.utils.data.DataLoader(train_ds,
  #   batch_size=bat_size, shuffle=True)

  # 2. create network
  print("\nCreating 8-(10-10)-1 neural network ")
  net = Net().to(device)

# -----------------------------------------------------------

  # 3. train model
  print("\nbat_size = 10 ")
  print("loss = MSELoss() ")
  print("optimizer = Adam ")
  print("lrn_rate = 0.01 ")

  print("\nStarting training")
  net.train()
  train(net, train_ds, bs=10, lr=0.01, me=1000, le=100)
  print("Done ")

# -----------------------------------------------------------

  # 4. evaluate model accuracy
  print("\nComputing model accuracy (within 0.10 of true) ")
  net = net.eval()
  acc_train = accuracy(net, train_ds, 0.10)  # item-by-item
  print("Accuracy on train data = %0.4f" % acc_train)

  acc_test = accuracy_x(net, test_ds, 0.10)  # all-at-once
  print("Accuracy on test data = %0.4f" % acc_test)

# -----------------------------------------------------------

  # 5. make a prediction
  print("\nPredicting income for M 34 Oklahoma moderate: ")
  x = np.array([[-1, 0.34, 0,0,1,  0,1,0]],
    dtype=np.float32)
  x = T.tensor(x, dtype=T.float32).to(device) 

  with T.no_grad():
    pred_inc = net(x)
  pred_inc = pred_inc.item()  # scalar
  print("$%0.2f" % (pred_inc * 100_000))  # un-normalized

# -----------------------------------------------------------

  # 6. save model (state_dict approach)
  print("\nSaving trained model state")
  fn = ".\\Models\\people_income_model.pt"
  T.save(net.state_dict(), fn)

  # model = Net()
  # model.load_state_dict(T.load(fn))
  # use model to make prediction(s)

  print("\nEnd People income demo")

if __name__ == "__main__":
  main()

Training data. Replace comma characters with tabs.

# people_train.txt
#
# sex (-1 = male, 1 = female), age / 100,
# state (michigan = 100, nebraska = 010, oklahoma = 001),
# income / 100_000,
# politics (conservative = 100, moderate = 010, liberal = 001)
#
1,0.24,1,0,0,0.2950,0,0,1
-1,0.39,0,0,1,0.5120,0,1,0
1,0.63,0,1,0,0.7580,1,0,0
-1,0.36,1,0,0,0.4450,0,1,0
1,0.27,0,1,0,0.2860,0,0,1
1,0.50,0,1,0,0.5650,0,1,0
1,0.50,0,0,1,0.5500,0,1,0
-1,0.19,0,0,1,0.3270,1,0,0
1,0.22,0,1,0,0.2770,0,1,0
-1,0.39,0,0,1,0.4710,0,0,1
1,0.34,1,0,0,0.3940,0,1,0
-1,0.22,1,0,0,0.3350,1,0,0
1,0.35,0,0,1,0.3520,0,0,1
-1,0.33,0,1,0,0.4640,0,1,0
1,0.45,0,1,0,0.5410,0,1,0
1,0.42,0,1,0,0.5070,0,1,0
-1,0.33,0,1,0,0.4680,0,1,0
1,0.25,0,0,1,0.3000,0,1,0
-1,0.31,0,1,0,0.4640,1,0,0
1,0.27,1,0,0,0.3250,0,0,1
1,0.48,1,0,0,0.5400,0,1,0
-1,0.64,0,1,0,0.7130,0,0,1
1,0.61,0,1,0,0.7240,1,0,0
1,0.54,0,0,1,0.6100,1,0,0
1,0.29,1,0,0,0.3630,1,0,0
1,0.50,0,0,1,0.5500,0,1,0
1,0.55,0,0,1,0.6250,1,0,0
1,0.40,1,0,0,0.5240,1,0,0
1,0.22,1,0,0,0.2360,0,0,1
1,0.68,0,1,0,0.7840,1,0,0
-1,0.60,1,0,0,0.7170,0,0,1
-1,0.34,0,0,1,0.4650,0,1,0
-1,0.25,0,0,1,0.3710,1,0,0
-1,0.31,0,1,0,0.4890,0,1,0
1,0.43,0,0,1,0.4800,0,1,0
1,0.58,0,1,0,0.6540,0,0,1
-1,0.55,0,1,0,0.6070,0,0,1
-1,0.43,0,1,0,0.5110,0,1,0
-1,0.43,0,0,1,0.5320,0,1,0
-1,0.21,1,0,0,0.3720,1,0,0
1,0.55,0,0,1,0.6460,1,0,0
1,0.64,0,1,0,0.7480,1,0,0
-1,0.41,1,0,0,0.5880,0,1,0
1,0.64,0,0,1,0.7270,1,0,0
-1,0.56,0,0,1,0.6660,0,0,1
1,0.31,0,0,1,0.3600,0,1,0
-1,0.65,0,0,1,0.7010,0,0,1
1,0.55,0,0,1,0.6430,1,0,0
-1,0.25,1,0,0,0.4030,1,0,0
1,0.46,0,0,1,0.5100,0,1,0
-1,0.36,1,0,0,0.5350,1,0,0
1,0.52,0,1,0,0.5810,0,1,0
1,0.61,0,0,1,0.6790,1,0,0
1,0.57,0,0,1,0.6570,1,0,0
-1,0.46,0,1,0,0.5260,0,1,0
-1,0.62,1,0,0,0.6680,0,0,1
1,0.55,0,0,1,0.6270,1,0,0
-1,0.22,0,0,1,0.2770,0,1,0
-1,0.50,1,0,0,0.6290,1,0,0
-1,0.32,0,1,0,0.4180,0,1,0
-1,0.21,0,0,1,0.3560,1,0,0
1,0.44,0,1,0,0.5200,0,1,0
1,0.46,0,1,0,0.5170,0,1,0
1,0.62,0,1,0,0.6970,1,0,0
1,0.57,0,1,0,0.6640,1,0,0
-1,0.67,0,0,1,0.7580,0,0,1
1,0.29,1,0,0,0.3430,0,0,1
1,0.53,1,0,0,0.6010,1,0,0
-1,0.44,1,0,0,0.5480,0,1,0
1,0.46,0,1,0,0.5230,0,1,0
-1,0.20,0,1,0,0.3010,0,1,0
-1,0.38,1,0,0,0.5350,0,1,0
1,0.50,0,1,0,0.5860,0,1,0
1,0.33,0,1,0,0.4250,0,1,0
-1,0.33,0,1,0,0.3930,0,1,0
1,0.26,0,1,0,0.4040,1,0,0
1,0.58,1,0,0,0.7070,1,0,0
1,0.43,0,0,1,0.4800,0,1,0
-1,0.46,1,0,0,0.6440,1,0,0
1,0.60,1,0,0,0.7170,1,0,0
-1,0.42,1,0,0,0.4890,0,1,0
-1,0.56,0,0,1,0.5640,0,0,1
-1,0.62,0,1,0,0.6630,0,0,1
-1,0.50,1,0,0,0.6480,0,1,0
1,0.47,0,0,1,0.5200,0,1,0
-1,0.67,0,1,0,0.8040,0,0,1
-1,0.40,0,0,1,0.5040,0,1,0
1,0.42,0,1,0,0.4840,0,1,0
1,0.64,1,0,0,0.7200,1,0,0
-1,0.47,1,0,0,0.5870,0,0,1
1,0.45,0,1,0,0.5280,0,1,0
-1,0.25,0,0,1,0.4090,1,0,0
1,0.38,1,0,0,0.4840,1,0,0
1,0.55,0,0,1,0.6000,0,1,0
-1,0.44,1,0,0,0.6060,0,1,0
1,0.33,1,0,0,0.4100,0,1,0
1,0.34,0,0,1,0.3900,0,1,0
1,0.27,0,1,0,0.3370,0,0,1
1,0.32,0,1,0,0.4070,0,1,0
1,0.42,0,0,1,0.4700,0,1,0
-1,0.24,0,0,1,0.4030,1,0,0
1,0.42,0,1,0,0.5030,0,1,0
1,0.25,0,0,1,0.2800,0,0,1
1,0.51,0,1,0,0.5800,0,1,0
-1,0.55,0,1,0,0.6350,0,0,1
1,0.44,1,0,0,0.4780,0,0,1
-1,0.18,1,0,0,0.3980,1,0,0
-1,0.67,0,1,0,0.7160,0,0,1
1,0.45,0,0,1,0.5000,0,1,0
1,0.48,1,0,0,0.5580,0,1,0
-1,0.25,0,1,0,0.3900,0,1,0
-1,0.67,1,0,0,0.7830,0,1,0
1,0.37,0,0,1,0.4200,0,1,0
-1,0.32,1,0,0,0.4270,0,1,0
1,0.48,1,0,0,0.5700,0,1,0
-1,0.66,0,0,1,0.7500,0,0,1
1,0.61,1,0,0,0.7000,1,0,0
-1,0.58,0,0,1,0.6890,0,1,0
1,0.19,1,0,0,0.2400,0,0,1
1,0.38,0,0,1,0.4300,0,1,0
-1,0.27,1,0,0,0.3640,0,1,0
1,0.42,1,0,0,0.4800,0,1,0
1,0.60,1,0,0,0.7130,1,0,0
-1,0.27,0,0,1,0.3480,1,0,0
1,0.29,0,1,0,0.3710,1,0,0
-1,0.43,1,0,0,0.5670,0,1,0
1,0.48,1,0,0,0.5670,0,1,0
1,0.27,0,0,1,0.2940,0,0,1
-1,0.44,1,0,0,0.5520,1,0,0
1,0.23,0,1,0,0.2630,0,0,1
-1,0.36,0,1,0,0.5300,0,0,1
1,0.64,0,0,1,0.7250,1,0,0
1,0.29,0,0,1,0.3000,0,0,1
-1,0.33,1,0,0,0.4930,0,1,0
-1,0.66,0,1,0,0.7500,0,0,1
-1,0.21,0,0,1,0.3430,1,0,0
1,0.27,1,0,0,0.3270,0,0,1
1,0.29,1,0,0,0.3180,0,0,1
-1,0.31,1,0,0,0.4860,0,1,0
1,0.36,0,0,1,0.4100,0,1,0
1,0.49,0,1,0,0.5570,0,1,0
-1,0.28,1,0,0,0.3840,1,0,0
-1,0.43,0,0,1,0.5660,0,1,0
-1,0.46,0,1,0,0.5880,0,1,0
1,0.57,1,0,0,0.6980,1,0,0
-1,0.52,0,0,1,0.5940,0,1,0
-1,0.31,0,0,1,0.4350,0,1,0
-1,0.55,1,0,0,0.6200,0,0,1
1,0.50,1,0,0,0.5640,0,1,0
1,0.48,0,1,0,0.5590,0,1,0
-1,0.22,0,0,1,0.3450,1,0,0
1,0.59,0,0,1,0.6670,1,0,0
1,0.34,1,0,0,0.4280,0,0,1
-1,0.64,1,0,0,0.7720,0,0,1
1,0.29,0,0,1,0.3350,0,0,1
-1,0.34,0,1,0,0.4320,0,1,0
-1,0.61,1,0,0,0.7500,0,0,1
1,0.64,0,0,1,0.7110,1,0,0
-1,0.29,1,0,0,0.4130,1,0,0
1,0.63,0,1,0,0.7060,1,0,0
-1,0.29,0,1,0,0.4000,1,0,0
-1,0.51,1,0,0,0.6270,0,1,0
-1,0.24,0,0,1,0.3770,1,0,0
1,0.48,0,1,0,0.5750,0,1,0
1,0.18,1,0,0,0.2740,1,0,0
1,0.18,1,0,0,0.2030,0,0,1
1,0.33,0,1,0,0.3820,0,0,1
-1,0.20,0,0,1,0.3480,1,0,0
1,0.29,0,0,1,0.3300,0,0,1
-1,0.44,0,0,1,0.6300,1,0,0
-1,0.65,0,0,1,0.8180,1,0,0
-1,0.56,1,0,0,0.6370,0,0,1
-1,0.52,0,0,1,0.5840,0,1,0
-1,0.29,0,1,0,0.4860,1,0,0
-1,0.47,0,1,0,0.5890,0,1,0
1,0.68,1,0,0,0.7260,0,0,1
1,0.31,0,0,1,0.3600,0,1,0
1,0.61,0,1,0,0.6250,0,0,1
1,0.19,0,1,0,0.2150,0,0,1
1,0.38,0,0,1,0.4300,0,1,0
-1,0.26,1,0,0,0.4230,1,0,0
1,0.61,0,1,0,0.6740,1,0,0
1,0.40,1,0,0,0.4650,0,1,0
-1,0.49,1,0,0,0.6520,0,1,0
1,0.56,1,0,0,0.6750,1,0,0
-1,0.48,0,1,0,0.6600,0,1,0
1,0.52,1,0,0,0.5630,0,0,1
-1,0.18,1,0,0,0.2980,1,0,0
-1,0.56,0,0,1,0.5930,0,0,1
-1,0.52,0,1,0,0.6440,0,1,0
-1,0.18,0,1,0,0.2860,0,1,0
-1,0.58,1,0,0,0.6620,0,0,1
-1,0.39,0,1,0,0.5510,0,1,0
-1,0.46,1,0,0,0.6290,0,1,0
-1,0.40,0,1,0,0.4620,0,1,0
-1,0.60,1,0,0,0.7270,0,0,1
1,0.36,0,1,0,0.4070,0,0,1
1,0.44,1,0,0,0.5230,0,1,0
1,0.28,1,0,0,0.3130,0,0,1
1,0.54,0,0,1,0.6260,1,0,0

Test data.

# people_test.txt
#
-1,0.51,1,0,0,0.6120,0,1,0
-1,0.32,0,1,0,0.4610,0,1,0
1,0.55,1,0,0,0.6270,1,0,0
1,0.25,0,0,1,0.2620,0,0,1
1,0.33,0,0,1,0.3730,0,0,1
-1,0.29,0,1,0,0.4620,1,0,0
1,0.65,1,0,0,0.7270,1,0,0
-1,0.43,0,1,0,0.5140,0,1,0
-1,0.54,0,1,0,0.6480,0,0,1
1,0.61,0,1,0,0.7270,1,0,0
1,0.52,0,1,0,0.6360,1,0,0
1,0.3,0,1,0,0.3350,0,0,1
1,0.29,1,0,0,0.3140,0,0,1
-1,0.47,0,0,1,0.5940,0,1,0
1,0.39,0,1,0,0.4780,0,1,0
1,0.47,0,0,1,0.5200,0,1,0
-1,0.49,1,0,0,0.5860,0,1,0
-1,0.63,0,0,1,0.6740,0,0,1
-1,0.3,1,0,0,0.3920,1,0,0
-1,0.61,0,0,1,0.6960,0,0,1
-1,0.47,0,0,1,0.5870,0,1,0
1,0.3,0,0,1,0.3450,0,0,1
-1,0.51,0,0,1,0.5800,0,1,0
-1,0.24,1,0,0,0.3880,0,1,0
-1,0.49,1,0,0,0.6450,0,1,0
1,0.66,0,0,1,0.7450,1,0,0
-1,0.65,1,0,0,0.7690,1,0,0
-1,0.46,0,1,0,0.5800,1,0,0
-1,0.45,0,0,1,0.5180,0,1,0
-1,0.47,1,0,0,0.6360,1,0,0
-1,0.29,1,0,0,0.4480,1,0,0
-1,0.57,0,0,1,0.6930,0,0,1
-1,0.2,1,0,0,0.2870,0,0,1
-1,0.35,1,0,0,0.4340,0,1,0
-1,0.61,0,0,1,0.6700,0,0,1
-1,0.31,0,0,1,0.3730,0,1,0
1,0.18,1,0,0,0.2080,0,0,1
1,0.26,0,0,1,0.2920,0,0,1
-1,0.28,1,0,0,0.3640,0,0,1
-1,0.59,0,0,1,0.6940,0,0,1