keras LSTM forward propagation replication #19731

CriptoEdo · 2024-05-19T14:35:51Z

CriptoEdo
May 19, 2024

Consider the time series problem with datasets x_train, x_test of dimension (n_steps, window_size, n_features) and y_train, y_test of dimension (n_steps, 1). I have fitted an LSTM model using the following code:

import numpy as np
import tensorflow as tf
from tensorflow import keras

window_size=10
x_train, x_test, y_train, y_test = load_data(window_size=window_size)

keras.utils.set_random_seed(42)
np.random.seed(42)
tf.random.set_seed(42)

model = keras.Sequential([
    keras.layers.LSTM(64, input_shape=(window_size, x_train.shape[-1]), return_sequences=True),
    keras.layers.Dropout(0.1),
    keras.layers.LSTM(8),
    keras.layers.Dropout(0.1),
    keras.layers.Dense(y_train.shape[1], activation='sigmoid')
    ])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
history = model.fit(x_train, y_train, epochs=10, batch_size=2000, validation_split=0.2, verbose=2, callbacks=[early_stopping])

I am trying to exactly replicate the keras model.predict() architecture in order to produce the same prediction for one given time-series step:

i = 0

# KERAS PREDICTION
datapoint = x_test[i:i+1]
keras_prediction = model.predict(datapoint)

from tensorflow.keras.models import Model
intermediate_layer_model = Model(
    inputs=model.input,
    outputs=[layer.output for layer in model.layers if 'dropout' not in layer.name.lower()]
)
x_sample = np.expand_dims(x_test[0], axis=0)
intermediate_outputs = intermediate_layer_model.predict(x_sample)

# MANUAL PREDICTION
lstm1_weights, lstm1_recurrent_weights, lstm1_bias = model.layers[0].get_weights()
lstm2_weights, lstm2_recurrent_weights, lstm2_bias = model.layers[2].get_weights()
dense_weights, dense_bias = model.layers[4].get_weights()

def lstm_step(x, h_prev, c_prev, W, U, b):

    if x.shape[1] == 1:
        x = np.reshape(x, [1, -1])
    
    z = np.dot(x, W) + np.dot(h_prev, U) + b
    z0, z1, z2, z3 = np.split(z, 4, axis=1)
    
    i = 1 / (1 + np.exp(-z0))
    f = 1 / (1 + np.exp(-z1))
    o = 1 / (1 + np.exp(-z2))
    g = np.tanh(z3)
    
    c_next = f * c_prev + i * g
    h_next = o * np.tanh(c_next)
    
    return h_next, c_next

def dense_step(x, W, b):
    
    return 1 / (1 + np.exp(-(np.dot(x, W) + b)))

def manual_prediction(x, window_size, 
                      lstm1_weights, lstm1_recurrent_weights, lstm1_bias,
                      lstm2_weights, lstm2_recurrent_weights, lstm2_bias, 
                      dense_weights, dense_bias):

    # First Layer
    outputs1 = []    
    h1 = np.zeros((1, 64))
    c1 = np.zeros((1, 64))
    for t in range(window_size):
        h1, c1 = lstm_step(x[:, t:t+1, :], h1, c1, lstm1_weights, lstm1_recurrent_weights, lstm1_bias)
        outputs1.append(h1)
    outputs1 = np.stack(outputs1, axis=1)

    # Second Layer
    h2 = np.zeros((1, 8))
    c2 = np.zeros((1, 8))
    h2, c2 = lstm_step(outputs1[:, -1, :], h2, c2, lstm2_weights, lstm2_recurrent_weights, lstm2_bias)

    # Dense Layer
    output = dense_step(h2, dense_weights, dense_bias)

    # Collect all outputs
    intermediate_outputs = [(outputs1, outputs1.shape), (h2, h2.shape), (output, output.shape)]

    return output, intermediate_outputs

prediction, debug_info = manual_prediction(datapoint, window_size,
                                           lstm1_weights, lstm1_recurrent_weights, lstm1_bias,
                                           lstm2_weights, lstm2_recurrent_weights, lstm2_bias,
                                           dense_weights, dense_bias)

print("Comparative Output Analysis:")
for idx, (keras_output, manual_output_info) in enumerate(zip(intermediate_outputs, debug_info)):
    keras_shape, keras_data = keras_output.shape, keras_output
    manual_shape, manual_data = manual_output_info[1], manual_output_info[0]
    print(f"Layer {idx+1}:")
    print(f"  Keras Output: Shape = {keras_shape} Data = {keras_data}")
    print(f"  Manual Output: Shape = {manual_shape} Data = {manual_data}")

However, I am obtaining differences in the output for each layer/step and I am not getting what am I doing wrong. Maybe I am wrongly assigning the fitted weights/biases. Any help? :)

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

keras LSTM forward propagation replication #19731

{{title}}

{{editor}}'s edit

{{editor}}'s edit

Replies: 0 comments

Select a reply

keras LSTM forward propagation replication #19731

CriptoEdo May 19, 2024

Replies: 0 comments

CriptoEdo
May 19, 2024