ANN Fuel Consumption Prediction Model

26 Views Asked by At

I am writing a code for an multi-layer perceptron neural network model to predict brake specific fuel consumption using the 4 variables (torque, speed, volumetric efficiency, and fuel mass flow rate) and conduct a global sensitivity analysis on this model. Can someone verify if this is correct?

### Read the data ###

import pandas as pd
import numpy as np
import tensorflow as tf

data = pd.read_excel(r"C:\Users\Irish\Desktop\TEST BED DATA\Scaled Gasoline Data.xlsx")
data.head()


### Separate the features (input) and target (output) ###

X = data[['Torque', 'EngSpd', 'FuelMassFlwRate', 'VE']]
y = data['BSFC']

### Splitting the data without random state ###

from sklearn.model_selection import train_test_split

# Determine the number of features for each set
total_data = X.shape[0]
train_data = int(total_data * 0.7)  # 70% for training, value is then converted to an integer using int()
test_data = int(total_data * 0.15)   # 15% for testing
val_data = total_data - train_data - test_data  # Remaining for validation

# Split the data manually
X_train = X[:train_data]
X_test = X[train_data:train_data + test_data]
X_val = X[train_data + test_data:]

y_train = y[:train_data]
y_test = y[train_data:train_data + test_data]
y_val = y[train_data + test_data:]

print("X_train:")
print(X_train)
print("\nX_test:")
print(X_test)
print("\nX_val:")
print(X_val)
print("\ny_train:")
print(y_train)
print("\ny_test:")
print(y_test)
print("\ny_val:")
print(y_val)


### Build the ANN regression model ###
from keras.models import Sequential
from keras.layers import Dense

model = Sequential()

# Defining the 1st hidden layer
model.add(Dense(units=8, input_dim=4, kernel_initializer='normal', activation='relu'))

# Defining the 2nd hidden layer
model.add(Dense(units=16, kernel_initializer='normal', activation='relu'))

# Defining the 2nd hidden layer
model.add(Dense(units=32, kernel_initializer='normal', activation='relu'))

# Defining the output layer
model.add(Dense(1, kernel_initializer='normal', activation='linear'))


### Define custom R-squared metric ###
from sklearn.metrics import mean_squared_error, r2_score
from keras import backend as K

def r_squared(y_true, y_pred):
    SS_res = K.sum(K.square(y_true - y_pred))
    SS_tot = K.sum(K.square(y_true - K.mean(y_true)))
    return 1 - SS_res / (SS_tot + K.epsilon())

### Compile the model with the Adam optimizer and R-squared as a metric ###
import tensorflow.keras.backend as K
model.compile(optimizer='adam', loss='mean_squared_error', metrics=[r_squared])

# Train the model
history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_val, y_val))

### Plot training and validation loss ###
import matplotlib.pyplot as plt

plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Mean Squared Error')
plt.legend()
plt.show()

### Evaluate the model on the test set ###
test_results = model.evaluate(X_test, y_test, verbose=0)

# Print the Mean Squared Error and R-squared on the test set
print(f'Mean Squared Error on Test Set: {test_results[0]}')
print(f'R-squared on Test Set: {test_results[1]}')

# Print training and validation accuracy on train and val sets
print(f'Mean Squared Error on Training Set: {history.history["loss"][-1]}')
print(f'Mean Squared Error on Validation Set: {history.history["val_loss"][-1]}')

# Predictions on training set
y_train_pred = model.predict(X_train)
r2_train = r2_score(y_train, y_train_pred)

# Predictions on validation set
y_validate_pred = model.predict(X_val)
r2_validate = r2_score(y_val, y_validate_pred)

# Print R-squared values
print(f'R-squared on training set: {r2_train}')
print(f'R-squared on validation set: {r2_validate}')

### Sensitivity Analysis ###
import SALib
from SALib import sample
from SALib.sample import saltelli
from SALib.analyze import sobol
import SALib.sample.saltelli

# 1 Define parameter file
parameter_dict = { 'num_vars': 4,  # Number of input parameters
    'names': ['Torque', 'EngSpd', 'FuelMassFlwRate', 'VE'],
    'bounds': [[0, 1], [0, 1], [0, 1], [0, 1]]  # Ranges for each input parameter
}

# 2 Generate sample
model_input = SALib.sample.saltelli.sample(parameter_dict, 1024)

# 3 Run model
model_output = Ishigami.evaluate(model_input)
Si = sobol.analyze(parameter_dict, model_output)

# First-order
print(Si['S1'])

# Total-order indices
print(Si['ST'])

# Second-order
print("x1-x2:", Si['S2'][0,1])
print("x1-x3:", Si['S2'][0,2])
print("x1-x4:", Si['S2'][0,3])
print("x2-x3:", Si['S2'][1,2])
print("x2-x4:", Si['S2'][1,3])
print("x3-x4:", Si['S2'][2,3])

# Plot
total_Si, first_Si, second_Si = Si.to_df()
Si.plot()

I did not scale my datasets on the code because the values were already normalized (the values are between 0 and 1). However, I'm not certain about my MSE and R_squared results and my sensitivity analysis. How can I make my code clearer or better?

0

There are 0 best solutions below