I am getting some data from a pandas dataframe with the following shape
df.head()
>>>
Value USD Drop 7 Up 7 Mean Change 7 Change Predict
0.06480 2.0 4.0 -0.000429 -0.00420 4
0.06900 1.0 5.0 0.000274 0.00403 2
0.06497 1.0 5.0 0.000229 0.00007 2
0.06490 1.0 5.0 0.000514 0.00200 2
0.06290 2.0 4.0 0.000229 -0.00050 3
The first 5 columns are intended to be the X
and predict the y
. This is how I preprocess the data for the model
from keras.models import Sequential
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import accuracy_score
from keras.layers import LSTM
from sklearn import preprocessing# Convert a Pandas dataframe to the x,y inputs that TensorFlow needs
def to_xy(df, target):result = []for x in df.columns:if x != target:result.append(x)# find out the type of the target column. Is it really this hard? :(target_type = df[target].dtypestarget_type = target_type[0] if hasattr(target_type, '__iter__') else target_type# Encode to int for classification, float otherwise. TensorFlow likes 32 bits.if target_type in (np.int64, np.int32):# Classificationdummies = pd.get_dummies(df[target])return df.as_matrix(result).astype(np.float32), dummies.as_matrix().astype(np.float32)else:# Regressionreturn df.as_matrix(result).astype(np.float32), df.as_matrix([target]).astype(np.float32)# Encode text values to indexes(i.e. [1],[2],[3] for red,green,blue).
def encode_text_index(df, name):le = preprocessing.LabelEncoder()df[name] = le.fit_transform(df[name])return le.classes_df['Predict'].value_counts()
>>>
4 1194
3 664
2 623
0 405
1 14
Name: Predict, dtype: int64predictions = encode_text_index(df, "Predict")
predictions
>>>
array([0, 1, 2, 3, 4], dtype=int64)X,y = to_xy(df,"Predict")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, shuffle=False)X_train
>>>
array([[ 6.4800002e-02, 2.0000000e+00, 4.0000000e+00, -4.2857142e-04,-4.1999999e-03],[ 6.8999998e-02, 1.0000000e+00, 5.0000000e+00, 2.7414286e-04,4.0300000e-03],[ 6.4970002e-02, 1.0000000e+00, 5.0000000e+00, 2.2857143e-04,7.0000002e-05],...,[ 9.5987000e+02, 5.0000000e+00, 2.0000000e+00, -1.5831429e+01,-3.7849998e+01],[ 9.9771997e+02, 5.0000000e+00, 2.0000000e+00, -1.6948572e+01,-1.8250000e+01],[ 1.0159700e+03, 5.0000000e+00, 2.0000000e+00, -1.3252857e+01,-7.1700001e+00]], dtype=float32)y_train
>>>
array([[0., 0., 0., 0., 1.],[0., 0., 1., 0., 0.],[0., 0., 1., 0., 0.],...,[0., 0., 0., 0., 1.],[0., 0., 0., 0., 1.],[0., 0., 0., 0., 1.]], dtype=float32)X_train[1]
>>>
array([6.8999998e-02, 1.0000000e+00, 5.0000000e+00, 2.7414286e-04,4.0300000e-03], dtype=float32)X_train.shape
>>>
(2320, 5)X_train[1].shape
>>>
(5,)
and finally the LSTM model (also it might look like not the best way to write one so will appreciate a rewrite of the inner layers as well if that's the case)
model = Sequential()
#model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2, input_shape=(None, 1)))
model.add(LSTM(50, dropout=0.2, return_sequences=True, input_shape=X_train.shape))
model.add(LSTM(50, dropout=0.2, return_sequences=True))
model.add(LSTM(50, dropout=0.2, return_sequences=True))
model.add(LSTM(50, dropout=0.2, return_sequences=True))
#model.add(Dense(50, activation='relu'))
model.add(Dense(y_train.shape[1], activation='softmax'))#model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
#model.fit(X_train, y_train, epochs=1000)model.compile(loss='categorical_crossentropy', optimizer='adam')
monitor = EarlyStopping(monitor='val_loss', min_delta=1e-2, patience=15, verbose=1, mode='auto')
checkpointer = ModelCheckpoint(filepath="best_weights.hdf5", verbose=0, save_best_only=True) # save best modelmodel.fit(X_train, y_train, validation_data=(X_test, y_test), callbacks=[monitor,checkpointer], verbose=2, epochs=1000)
model.load_weights('best_weights.hdf5') # load weights from best model
running this throws this error
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-67-a17835a382f6> in <module>()15 checkpointer = ModelCheckpoint(filepath="best_weights.hdf5", verbose=0, save_best_only=True) # save best model16
---> 17 model.fit(X_train, y_train, validation_data=(X_test, y_test), callbacks=[monitor,checkpointer], verbose=2, epochs=1000)18 model.load_weights('best_weights.hdf5') # load weights from best modelc:\users\samuel\appdata\local\programs\python\python35\lib\site-packages\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, **kwargs)948 sample_weight=sample_weight,949 class_weight=class_weight,
--> 950 batch_size=batch_size)951 # Prepare validation data.952 do_validation = Falsec:\users\samuel\appdata\local\programs\python\python35\lib\site-packages\keras\engine\training.py in _standardize_user_data(self, x, y, sample_weight, class_weight, check_array_lengths, batch_size)747 feed_input_shapes,748 check_batch_axis=False, # Don't enforce the batch size.
--> 749 exception_prefix='input')750 751 if y is not None:c:\users\samuel\appdata\local\programs\python\python35\lib\site-packages\keras\engine\training_utils.py in standardize_input_data(data, names, shapes, check_batch_axis, exception_prefix)125 ': expected ' + names[i] + ' to have ' +126 str(len(shape)) + ' dimensions, but got array '
--> 127 'with shape ' + str(data_shape))128 if not check_batch_axis:129 data_shape = data_shape[1:]ValueError: Error when checking input: expected lstm_48_input to have 3 dimensions, but got array with shape (2320, 5)
I've tried a lot of variations of the X_train input shape but every single one throws some error, I also checked the Keras docs but it wasn't clear on how the data should be fed to the model
Attempt No. 1 from Suggestions
First is reshaping X_train
data = np.resize(X_train,(X_train.shape[0],1,X_train.shape[1]))
model.add(LSTM(50, dropout=0.2, return_sequences=True, input_shape=data.shape))
this fails with an error
ValueError: Input 0 is incompatible with layer lstm_52: expected ndim=3, found ndim=4
suggested I feed it in as
model.add(LSTM(50, dropout=0.2, return_sequences=True, input_shape=X_train.shape[1:]))
that throws the same error
ValueError: Input 0 is incompatible with layer lstm_63: expected ndim=3, found ndim=2
Sugestion 2
use the default X,y from pandas
y = df['Predict']
X = df[['Value USD', 'Drop 7', 'Up 7', 'Mean Change 7', 'Change']]X = np.array(X)
y = np.array(y)X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, shuffle=False)
also that LSTM expect input in the following way (batch_size, timesteps, input_dim)
so I tried this
model.add(LSTM(50, dropout=0.2, return_sequences=True, input_shape=(100, 100, X_train.shape)))
which throws this error
TypeError: Error converting shape to a TensorShape: int() argument must be a string, a bytes-like object or a number, not 'tuple'.
and a different way
model.add(LSTM(50, dropout=0.2, return_sequences=True, input_shape=(100, 100, X_train[1].shape)))
returns the same error
TypeError: Error converting shape to a TensorShape: int() argument must be a string, a bytes-like object or a number, not 'tuple'.