-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathNeuralNetwork.py
312 lines (282 loc) · 14.2 KB
/
NeuralNetwork.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
#Written by Matteo Bjornsson and Nick Stone
#################################################################### MODULE COMMENTS ##############################################################################
# This file is the neural network class, THis file has all of the functionality of a neural network that will handle either classification or regression data sets#
# This program takes in a series of hyper parameters that should be tuned for each different neural network, and assumes that all data being inputted has been nor#
#malized, additionally this program uses sigmoid as the hidden layer activation function, and soft max and cross entropy for classifcation and sigmoid and MSE for#
#for regression This program will calculate both forward pass and back propagation for the nerual network #
#################################################################### MODULE COMMENTS ##############################################################################
from types import new_class
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
class NeuralNetwork:
#On creation of a Neural Network object do the following
def __init__(self, input_size: int, hidden_layers: list, regression: bool,
output_size: int) -> None:
"""
:param input_size: int. dimension of the data set (number of features in x).
:param hidden_layers: list. [n1, n2, n3..]. List of number of nodes in
each hidden layer. empty list == no hidden layers.
:param regression: bool. Is this network estimating a regression output?
:param output_size: int. Number of output nodes (1 for regression, otherwise 1 for each class)
:param learning_rate: float. Determines the rate the weights are updated. Should be small.
:param momentum: float. Determines the fraction of the weight/bias update that is used from last pass
"""
self.input_size = input_size
self.hidden_layers = hidden_layers
self.regression = regression
self.output_size = output_size
self.layer_node_count = [input_size] + hidden_layers + [output_size]
self.layers = len(self.layer_node_count)
# weights, biases, and layer outputs are lists with a length corresponding to
# the number of hidden layers + 1. Therefore weights for layer 0 are found in
# weights[0], weights for the output layer are weights[-1], etc.
self.weights = self.generate_weight_matrices()
self.biases = self.generate_bias_matrices()
# activation_outputs[0] is the input values X, where activation_outputs[1] is the
# activation values output from layer 1. activation_outputs[-1] represents
# the final output of the neural network
self.activation_outputs = [None] * self.layers
self.layer_derivatives = [None] * self.layers
self.data_labels = None
#following is used to plot error
self.error_y = []
self.error_x = []
self.pass_count = 0
################# INITIALIZATION HELPERS ###################################
#Function generates weigths sets the object variable intial weigths to the newly generated weight values
def generate_weight_matrices(self):
# initialize weights randomly, close to 0
# generate the matrices that hold the input weights for each layer. Maybe return a list of matrices?
# will need 1 weight matrix for 0 hidden layers, 2 for 1 hidden layer, 3 for 2 hidden layer.
weights = []
counts = self.layer_node_count
for i in range(self.layers):
if i == 0:
weights.append([])
else:
# initialze a (notes, inputs) dimension matrix for each layer.
# layer designated by order of append (position in weights list)
layer_nodes = counts[i]
layer_inputs = counts[i-1]
weights.append(np.random.randn(layer_nodes, layer_inputs) * 1/layer_inputs) # or * 0.01
self.initial_weights = weights
return weights
#Generate the bias for the given neural network
def generate_bias_matrices(self):
# initialize biases as 0
# generate the matrices that hold the bias value for each layer. Maybe return a list of matrices?
# will need 1 bias matrix for 0 hidden layers, 2 for 1 hidden layer, 3 for 2 hidden layer.
biases = []
counts = self.layer_node_count
for i in range(self.layers):
if i == 0:
biases.append([])
else:
# initialze a (nodes, 1) dimension matrix for each layer.
# layer designated by order of append (position in biases list)
layer_nodes = counts[i]
biases.append(0)
return biases
#Set the object labels and input data to the data that we are taking in , the data set and the labels
def set_input_data(self, X: np.ndarray, labels: np.ndarray) -> None:
''' Public method used to set the data input to the network and save the
ground truth labels for error evaluation.
Return: None
'''
self.activation_outputs[0] = X
self.data_labels = labels
############################################################
################# ACTIVATION FUNCTIONS #####################
############################################################
#function to calculate the sigmoid value
def sigmoid(self, z: np.ndarray) -> np.ndarray:
''' Returns sigmoid function of z: s(z) = (1 + e^(-z))^-1
:param z: weighted sum of layer, to be passed through sigmoid fn
Return: matrix
'''
# trim the matrix to prevent overflow
z[z < -700] = -700
# return the sigmoid
return 1 / (1 + np.exp(-z))
#Function to calculate the derivative of the sigmoid function
def d_sigmoid(self, z):
""" Derivative of the sigmoid function: d/dz s(z) = s(z)(1 - s(z))
Input: real number or numpy matrix
Return: real number or numpy matrix.
"""
return self.sigmoid(z) * (1-self.sigmoid(z))
#Function to calculate the soft max value
# source: https://stackoverflow.com/questions/34968722/how-to-implement-the-softmax-function-in-python
def SoftMax(self,Values):
# trim matrix to prevent overflow
Values[Values > 700] = 700
Values[Values < -700] = -700
# return softmax calculation
numerator = np.exp(Values)
denom = np.sum(np.exp(Values), axis=0)
return numerator/denom
################# Error functions #####################
#Generates the mean squared error for a given ground turth and estimate
def mean_squared_error(self, ground_truth: np.ndarray, estimate:np.ndarray) -> float:
""" takes in matrices, calculates the mean squared error w.r.t. target.
Input matrices must be the same size.
:param ground_truth: matrix holding ground truth for each training example
:param estimate: matrix holding network estimate for each training example
"""
m = ground_truth.shape[1]
return (1/m)* np.sum(np.square(ground_truth - estimate))
#Function to calculate the cross entropy value
def CrossEntropy(self,Ground_Truth,Estimate):
#Calculate the number of rows in the data set
Num_Samples = Estimate.shape[1]
# output = self.SoftMax(Ground_Truth)
#Take the log of the estimate (Make sure its not 0 by adding a small value) and then multiply by ground truth
Logrithmic = Ground_Truth * np.log(Estimate + .000000000000001)
#Return the sum of the logs divided by the number of samples
return - np.sum(Logrithmic) / Num_Samples
##################################################################
################ FORWARD PASS ###################################
##################################################################
#Function that generates the net input
def calculate_net_input(self, W: np.ndarray, X: np.ndarray, b: np.ndarray) -> None:
""" Return Z = W*X + b
:param W: matrix of weights of input values incident to the layer
:param X: matrix input values incident to the layer
:param b: matrix of bias for the layer
Return: None
"""
Z = np.dot(W, X) + b
return Z
#Function: responsible for calculating the sigmoid activation based on the net input value
def calculate_sigmoid_activation(self, W: np.ndarray, X: np.ndarray, b: np.ndarray) -> None:
""" Return A = sigmoid(W*X + b)
:param W: matrix of weights of input values incident to the layer
:param X: matrix input values incident to the layer
:param b: matrix of bias for the layer
Return: None
"""
Z = self.calculate_net_input(W, X, b)
A = self.sigmoid(Z)
return A
#Function will claculate the forward pass and will update acivation function outputs
def forward_pass(self) -> float:
""" Starting from the input layer propogate the inputs through to the output
layer. Return a matrix of outputs.
Return: None
"""
# iterate through each layer, starting at inputs
for i in range(self.layers):
# the activation output is known for the first layer (input data)
if i == 0:
continue
# weights into layer i
W = self.weights[i]
# outputs of previous layer into layer i
A = self.activation_outputs[i-1]
# bias of layer i
b = self.biases[i]
# Calculate the activation output for the layer, store for later access
#if this is a classification network and i is the output layer, caclulate softmax
if self.regression == False and i == self.layers -1:
self.activation_outputs[i] = (
#Calculate the softmax function
self.SoftMax(self.calculate_net_input(W, A, b))
)
# otherwise activation is always sigmoid
else:
self.activation_outputs[i] = (
self.calculate_sigmoid_activation(W, A, b)
)
# output of the network is the activtion output of the last layer
final_estimate = self.activation_outputs[-1]
#calculate the error w.r.t. the ground truth
if self.regression == False:
error = self.CrossEntropy(self.data_labels,final_estimate)
else:
error = self.mean_squared_error(self.data_labels, final_estimate)
self.pass_count += 1
# save the error to be plotted over time
# if self.pass_count > 1:
# self.error_y.append(error)
# self.error_x.append(self.pass_count)
return error
#This function will use packprogation for a neural network to update the weights
##################### CLASSIFICATION #######################################
#Given a numpy array of data and labels return a classification guess for the data set
def classify(self, X: np.ndarray, Labels: np.ndarray) -> list:
""" Starting from the input layer propogate the inputs through to the output
layer.
:param X: test data to be classified
Return: a list of [ground truth, estimate] pairs.
"""
#Set the input data from the parameters
self.set_input_data(X,Labels)
#Run the forward pass
self.forward_pass()
#Return the labels from the activation outputs
return self.activation_outputs[-1]
########################## pick class value ##################################
#Given an array of probabilities pick the index with the highest set
def PickLargest(self, Probabilities):
# print("Pick largest input:", type(Probabilities), Probabilities.shape, '\n', Probabilities)
Estimation = list()
#For every column in the OneHot Matrix
for i in range(Probabilities.shape[1]):
#Create an index variable to 0
Index = 0
#Set the value based on the first probability position
Value = Probabilities[0][i]
#For each of the rows in the One Hot Matrix
for j in range(len(Probabilities)):
#If the probability value is greater than the value above
if Probabilities[j][i] > Value:
#Set the new value
Value = Probabilities[j][i]
#Create a new index poisition
Index = j
#Append the index of the value to the array
Estimation.append(Index)
#Return the array
return Estimation
####################### FITNESS ############################################
def fitness(self, position: np.ndarray) -> float:
weights = self.weight_transform(position)
self.weights = weights
return self.forward_pass()
def weight_transform(self, position) -> list:
layers = [self.input_size] + self.hidden_layers + [self.output_size]
weights = [None] * len(layers)
weights[0] = []
# transform the flat position vector into a list of weight matrices
# for the neural network
for i in range(len(layers)-1):
l = layers[i] * layers[i+1]
w = position[:l]
position = position[l:]
weights[i+1] = w.reshape(layers[i+1], layers[i])
return weights
if __name__ == '__main__':
# TD = TestData.TestData()
# X , labels = TD.classification()
# this code is for testing many points at once from real data
df = pd.read_csv(f"./NormalizedData/Cancer.csv")
D = df.to_numpy()
labels = D[:, -1]
labels = labels.reshape(1, labels.shape[0]).T
D = np.delete(D, -1, 1)
D = D.T
X = D
labels = labels.T
#labels = labels.T
input_size = X.shape[0]
hidden_layers = [input_size]
learning_rate = 3
momentum = 0
regression = False
output_size = 3
NN = NeuralNetwork(
input_size, hidden_layers, regression, output_size,learning_rate,momentum
)
NN.set_input_data(X, labels)
# print(vars(NN))