#### Wine Dataset Example Dec 1, 2023

- https://archive.ics.uci.edu/dataset/109/wine
- select only classes 1 and 2
- standardize the data
- apply nn1 single-layer neural net  

In [22]:
import pandas as pd
import numpy as np

df = pd.read_csv('wine.txt', header=None)

# class in first column, select 1 and 2 only
df = df[ df.iloc[:,0] < 3 ]

# drop class
X = np.asarray(df.iloc[:,1:])

# 0, 1 instead of 1, 2
y = np.asarray(df.iloc[:,0]) - 1

# standardize
X = (X - X.mean(axis=0)) / X.std(axis=0)

# print() is our friend: check for any obvious problems
print(X)
print(y)

# how many class 1
print(sum(y))

# how many observations in total
print(X.shape, y.shape)

[[ 1.452455   -0.29441432  0.30247764 ... -0.11242761  2.04002469
   0.782868  ]
 [ 0.28906609 -0.21455828 -0.67719651 ... -0.05291788  0.94143735
   0.74015178]
 [ 0.24388594  0.4471061   1.11324246 ... -0.17193734  0.45552372
   1.12459784]
 ...
 [-1.30353427  0.18472195  1.484843   ... -0.52899571 -1.0867239
  -0.92293343]
 [-0.64842207 -0.38567837 -0.13668663 ... -1.00507355 -0.36841679
  -1.27605426]
 [-1.02115832  2.66025935  0.13356831 ... -1.60017084 -0.81207706
  -0.59829008]]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
71
(130, 13) (130,)


Single-layer neural net unchanged:

In [23]:
def f(x):
    return 1. / (1. + np.exp(-x))

def acc(a, y):
    return sum([ int(a[i] > 0.5) == y[i] for i in range(len(y)) ]) / len(y)

def nn1(X, y, l=0.01, epochs=100):
    np.random.seed(1)
    w = np.random.rand(X.shape[1]) - 0.5
    for ep in range(epochs):
        h = np.dot(X, w)
        a = f(h)
        e = a - y
        #    np.dot(-l * (a - y) * f(h) * (1 - f(h)), X)
        w += np.dot(-l * (a - y) * a * (1 - a), X)
        if ep % (epochs/10) == 0: print(w[:3], sum(e**2), acc(a, y))
    return w, sum(e**2)


Decent performance out of the box:

In [24]:
nn1(X, y, l=0.01, epochs=100)

[-0.15483301  0.20297137 -0.49884981] 19.508949646246474 0.8153846153846154
[-0.48178972  0.07091579 -0.4839369 ] 7.660006129037332 0.9461538461538461
[-0.6413397  -0.03405067 -0.47485542] 5.2636785435757805 0.9692307692307692
[-0.75635231 -0.11506279 -0.4630825 ] 3.8240043470349425 0.9846153846153847
[-0.84709687 -0.17697496 -0.45944595] 2.88164711332596 0.9846153846153847
[-0.91817971 -0.22440738 -0.47364194] 2.331051689012861 1.0
[-0.9749989  -0.26101163 -0.49926342] 1.9902345235545975 1.0
[-1.02228567 -0.28979791 -0.52883951] 1.7517003229519756 1.0
[-1.06294193 -0.31301628 -0.55876192] 1.5714000248584568 1.0
[-1.09872332 -0.33223505 -0.58761573] 1.4288042249757342 1.0


(array([-1.12769882, -0.34700508, -0.61226988,  0.83368444, -0.1558362 ,
        -0.11327274, -0.30044025,  0.08650578,  0.15932211, -0.55075162,
         0.15204778, -0.33284124, -1.3462867 ]),
 1.3232322972714838)