I am playing around with Tensorflow+Keras and I'm trying to build a custom layer that feeds preprocessed data into the rest of the model. The input is an array of floating point values representing a time series and I want to compute on-the-fly deltas, ratios and mean values of slices.
For example, if the input is [1, 1.5, 3.2, 4] the output should be:
[
1, 1.5, 3.2, 4, # the inputs get copied
0.5, 1.7, 0.8, # deltas: d[i] - d[i-1]
1.5, 2.13, 1.25, # ratios: d[i] / d[i-1]
1.25, 1.9, 2.425 # means: d[0:i] / i
]
For n inputs there will be:
n-1deltasn-1ratiosn-2means
Therefore, if the input size is n, the output size must be 4*n - 4.
I managed to create a working example by forcing eager execution when compiling my model with model.compile(..., run_eagerly=True) and here it is.
import time
from typing import Final
import tensorflow as tf
from tensorflow import keras
class Enricher(keras.layers.Layer):
def __init__(self, input_dim: int, **kwargs):
super(Enricher, self).__init__(name="enricher", **kwargs)
if input_dim < 2:
raise ValueError("input_dim < 2 not allowed")
self.input_dim = input_dim
def _mymean(self, ll: list[float]) -> float:
return float(sum(ll) / len(ll))
def call(self, inputs: tf.Tensor) -> tf.Tensor:
dtype = self._compute_dtype
result = [[] for _ in range(inputs.shape[0])]
# iterating over the first dimension which is the batch size
for row_idx in range(inputs.shape[0]):
result[row_idx] = [
inputs[row_idx][i] for i in range(inputs.shape[1])
] # copying the inputs
# appending the deltas
result[row_idx].extend(
[
inputs[row_idx][i] - inputs[row_idx][i - 1]
for i in range(1, self.input_dim)
]
)
# appending the ratios
result[row_idx].extend(
[
inputs[row_idx][i] / inputs[row_idx][i - 1]
for i in range(1, self.input_dim)
]
)
# appending the "last k values" means
result[row_idx].extend(
[self._mymean(inputs[row_idx][-i:]) for i in range(2, self.input_dim)]
)
return tf.stack(result)
def compute_output_shape(self, input_shape):
# n inputs
# n-1 deltas
# n-1 ratios
# n-2 means
# n + (n-1) + (n-1) + (n-2) = 4*n - 4
return 4 * input_shape - 4
Unfortunately, eager execution is really slow, so slow that a batch of 32x101 gets preprocessed in roughly 20 seconds. So I tried converting the call method into a tf.function like so (and I also removed the run_eagerly=True from the model compilatio):
def call(self, inputs: tf.Tensor) -> tf.Tensor:
dtype = self._compute_dtype
print(inputs.shape)
# result = [[] for _ in range(inputs.shape[0])]
result = tf.TensorArray(dtype, size=4 * self.input_dim - 4)
idx: int = int(0)
for i in range(self.input_dim):
result = result.write(idx, inputs[i])
idx += 1
# appending the deltas
for i in range(1, self.input_dim):
result = result.write(idx, tf.math.subtract(inputs[i], inputs[i - 1]))
idx += 1
# appending the ratios
for i in range(1, self.input_dim):
result = result.write(idx, tf.math.divide(inputs[i], inputs[i - 1]))
idx += 1
# appending the "last k values" means
for i in range(2, self.input_dim):
result = result.write(idx, tf.math.reduce_mean(inputs[-i:], axis=1))
idx += 1
return result.stack()
But I always end up with this error:
File "/.../training/layers.py", line 68, in call
for i in range(self.input_dim):
File "/.../training/layers.py", line 70, in call
result = result.write(idx, inputs[i])
Node: 'model_0/enricher/strided_slice_100'
slice index 100 of dimension 0 out of bounds.
[[{{node model_0/enricher/strided_slice_100}}]] [Op:__inference_predict_function_11030]