-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathRNN.py
59 lines (45 loc) · 1.79 KB
/
RNN.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
import pandas as pd
num_epochs = 100
num_batch = 128
time_step = 10
hidden_dim = 100
_file_path = "cleaned.csv"
def get_training_testing_data_from_csv(csv_file):
"""
return training feature matrix, dimensions = [number of weeks in 8 years, feature_dim]
also return Labeling matrix Y, diemsnions = [number of weeks in 8 years, 1]
also return X_test, Y_test
type being numpy ndarray
"""
#some transformation here
df = pd.read_csv(csv_file)
#output a matrix of [number of weeks, feature_dim]
filtered = df.dropna(axis=0, how='any')
return filtered[["NUM. OF PROVIDERS", "PERCENT POSITIVE", "TOTAL PATIENTS"]].values, filtered[["TOTAL SPECIMENS"]].values
# get_training_testing_data_from_csv(_file_path)
X, Y = get_training_testing_data_from_csv(_file_path)
def transform_training_data(X):
"""
sliding window methods - takes in a giant matrix X, number_weeks * feature_dim
return a 3D matrix, [num_example, time_step, feature_dim]
"""
#create a 3D numpy array
x_train = np.zeros((X.shape[0] - time_step + 1, time_step ,X.shape[1]))
for i in range(X.shape[0] - time_step + 1):
x_train[i] = X[i: i + time_step, :]
return x_train
# print(X.shape[1])
feature_dim = X.shape[1]
#X = np.reshape
#Instructions - remove first 9 y values,
# remove the null values in the y column need the same vertical length.
#need to divide X Y into x and y.
model = Sequential()
model.add(LSTM(hidden_dim, input_shape=(time_step, feature_dim), return_sequences = True))
model.add(Dense(100, activation = 'softmax'))
model.compile(loss = 'mean_squared_error', optimizer = 'rmsprop', merics = ['accuracy'])
model.fit(transform_training_data(X), Y, num_epochs, num_batch, validation_data = (x_test, y_test))