With fake data the shape of my input, in the following minimum working example, I illustrate how I create my autoencoder network, and the problem I'm facing with numpy ndarray making the prediction.
import numpy as np
import pandas as pd
import random
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import ModelCheckpoint
from sklearn.metrics import mean_squared_error
class SingleEncoder:
def __init__(self, train, test):
self.x_train = train
self.x_test = test
self.first_dim = 1
self.second_dim = 100
self.channels = 4
self.input_dim = (self.first_dim, self.second_dim,
self.channels) #(1, 100, 4)
def buildModel(self):
input_layer = self.input_dim
autoencoder = Sequential()
activ='relu'
# encoder
autoencoder.add(Dense(200, activation='relu', input_shape=input_layer))
autoencoder.add(Dense(100, activation='relu'))
autoencoder.add(Dense(80, activation='linear'))
#decoder
autoencoder.add(Dense(80, activation='linear'))
autoencoder.add(Dense(100, activation='relu'))
autoencoder.add(Dense(200, activation='relu'))
autoencoder.add(Dense(self.channels, activation='relu'))
autoencoder.compile(optimizer='adam', loss='mae',
metrics=['mean_squared_error'])
autoencoder.summary()
filepath = "weights.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss',
verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]
autoencoder.fit(self.x_train, self.x_train, epochs=10, batch_size=32,
shuffle=True,callbacks=callbacks_list)
return autoencoder
#
Network loss:
def LosScore2(x_pred, x_test):
mse = []
for i in range(len(x_test)):
mse.append(mean_squared_error(pd.DataFrame(x_pred[i]), pd.DataFrame(x_test[i])))
return mse
Generating fake data:
#Generate sample data, the shape of expected input
# X: ND-Array containing fixed-length segments of shape (1,100,4)
X = np.random.randn(2000, 1, 100, 4)
a,b,c = np.repeat(0, 700), np.repeat(1, 700), np.repeat(2, 600)
y = np.hstack((a,b,c))
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=7)
LABELS= list(set(np.ndarray.flatten(y_train)))
Train the network:
models = []
for i in range(len(LABELS)):
print(LABELS[i])
sub_train = x_train[y_train == i]
sub_test = x_test[y_test == i]
autoencoder = SingleEncoder(sub_train, sub_test)
autoencoder = autoencoder.buildModel()
models.append(autoencoder)
This MWE works perfectly, but when I try to evaluate on the test set I encountered an error due to input shape as below:
print("Evaluating on test set -> ")
x_pred = []
# for each model
# predition
for e in range(len(models)):
x_pred.append(models[e].predict(x_test))
scored0 = (LosScore2(x_pred[0], x_test))
scored1 = (LosScore2(x_pred[1], x_test))
scored2 = (LosScore2(x_pred[2], x_test))
Here goes the error:
Evaluating on test set ->
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-11-989c507cfd63> in <module>()
9 #scored0 = (LosScore2(x_pred[0][np.newaxis], x_test))
10
---> 11 scored0 = (LosScore2(x_pred[0], x_test))
12 scored1 = (LosScore2(x_pred[1], x_test))
13 scored2 = (LosScore2(x_pred[2], x_test))
3 frames
<ipython-input-9-356737ea1f97> in LosScore2(x_pred, x_test)
2 mse = []
3 for i in range(len(x_test)):
----> 4 mse.append(mean_squared_error(pd.DataFrame(x_pred[i]), pd.DataFrame(x_test[i])))
5 return mse
/usr/local/lib/python3.6/dist-packages/pandas/core/frame.py in __init__(self, data, index, columns, dtype, copy)
462 mgr = init_dict({data.name: data}, index, columns, dtype=dtype)
463 else:
--> 464 mgr = init_ndarray(data, index, columns, dtype=dtype, copy=copy)
465
466 # For data is list-like, or Iterable (will consume into list)
/usr/local/lib/python3.6/dist-packages/pandas/core/internals/construction.py in init_ndarray(values, index, columns, dtype, copy)
167 # by definition an array here
168 # the dtypes will be coerced to a single dtype
--> 169 values = prep_ndarray(values, copy=copy)
170
171 if dtype is not None:
/usr/local/lib/python3.6/dist-packages/pandas/core/internals/construction.py in prep_ndarray(values, copy)
293 values = values.reshape((values.shape[0], 1))
294 elif values.ndim != 2:
--> 295 raise ValueError("Must pass 2-d input")
296
297 return values
ValueError: Must pass 2-d input
I understand this has to do with test data shape, but I can't figure out how to fix it.