UserWarning: Input dict contained keys which did not match any model input. They will be ignored by the model

222 Views Asked by At

I try to convert my code to use data.dataset. I'm not far but still have proble with my features and model input layer never seen before I use data.dataset

I load a lot of .csv with many columns for features, csv had heather with name string.

my simple test code is:

import tensorflow as tf
import pandas as pd

bd_path = 'C:/Users/my doc/Python/mini_test/'
keep_columns = ['precipitation', 'temperature_min', 'temperature_max',
                'snow_depth_water_equivalent_max', 'streamflow']
name_columns = pd.read_csv(bd_path + 'camels_01022500+attributs_mensuels.csv').columns

# Enable eager execution
tf.config.run_functions_eagerly(True)

# Load a single CSV file and preprocess it
def load_and_preprocess_csv(filename):
    columns = name_columns
    dataset = tf.data.experimental.make_csv_dataset(
        file_pattern=filename,
        num_parallel_reads=2,
        batch_size=32,
        num_epochs=1,
        label_name='streamflow',
        column_names=columns,
        select_columns=keep_columns,
        shuffle_buffer_size=10000,
        header=True,
        field_delim=','
    )

    # Apply preprocessing to the dataset 
    def preprocess_fn(features, label):
        # Normalize the features (example: scaling to [0, 1])
        features['precipitation'] /= 100.0
        features['temperature_min'] /= 100.0
        features['temperature_max'] /= 100.0
        features['snow_depth_water_equivalent_max'] /= 100.0
# last trial I did
        # Create a 'main_inputs' feature by stacking the selected columns
        features['main_inputs'] = tf.stack([
            features['precipitation'],
            features['temperature_min'],
            features['temperature_max'],
            features['snow_depth_water_equivalent_max']
        ], axis=-1)

# here an other trial without sucess...
        # Rename the columns to match the model's input layer
        #features['main_inputs'] = tf.cast(features['main_inputs'], tf.float32)  # Ensure the dtype is correct
        #features['main_inputs'] = tf.identity(features['main_inputs'], name='main_inputs')  # Rename the feature
    
        return features, label

    dataset = dataset.map(preprocess_fn)

    return dataset

# Create a list of file paths matching pattern
file_paths = tf.io.gfile.glob(bd_path + '*.csv')

# Load and preprocess CSV files in parallel
building_datasets = []
for file_path in file_paths:
    dataset = load_and_preprocess_csv(file_path)
    building_datasets.append(dataset)

# Combine the individual datasets into a single dataset
combined_dataset = tf.data.Dataset.sample_from_datasets(building_datasets)

# Optional, further transform, shuffle, and batch the dataset as needed
# For example:
combined_dataset = combined_dataset.shuffle(buffer_size=10000)
#combined_dataset = combined_dataset.batch(64)

# model
tensor_input = tf.keras.layers.Input(shape=(4,), name='main_inputs')
xy = tf.keras.layers.Dense(10, activation='linear')(tensor_input)
xy = tf.keras.layers.Dropout(rate=0.2)(xy)
out = tf.keras.layers.Dense(1, activation='linear')(xy)

model = tf.keras.Model(inputs=tensor_input, outputs=out)
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='mse')

# Train the model
history = model.fit(combined_dataset, epochs=1)

the warning I get is:

... \keras\engine\functional.py:637: UserWarning: Input dict contained keys ['temperature_min', 'snow_depth_water_equivalent_max', 'temperature_max', 'precipitation'] which did not match any model input. They will be ignored by the model.

my experience is passing array directly to model, is the input layer must be modify or this is my dataset who need more modification?

1

There are 1 best solutions below

4
Priyanshu Jha On

In preprocess_fn make sure you use same string to refer to feature indexes -

def preprocess_fn(features, label):
    # Normalize the features (example: scaling to [0, 1])
    features['total_precipitation_sum'] /= 100.0
    features['temperature_2m_min'] /= 100.0
    features['temperature_2m_max'] /= 100.0
    features['snow_depth_water_equivalent_max'] /= 100.0
    # last trial I did
    # Create a 'main_inputs' feature by stacking the selected columns
    features['main_inputs'] = tf.stack([
        features['total_precipitation_sum'],
        features['temperature_2m_min'],
        features['temperature_2m_max'],
        features['snow_depth_water_equivalent_max']
    ], axis=-1)

    # here an other trial without sucess...
    # Rename the columns to match the model's input layer
    #features['main_inputs'] = tf.cast(features['main_inputs'], tf.float32)  # Ensure the dtype is correct
    #features['main_inputs'] = tf.identity(features['main_inputs'], name='main_inputs')  # Rename the feature

    # return features, label
    return {'main_inputs': features['main_inputs']}, label    #edited

dataset = dataset.map(preprocess_fn)

return dataset