why running following code says, "['Hatch'] not in index"

8 Views Asked by At

I have three input parameters,including Power,Velocity and Hatch and I want to predict surface roughness with regression, that I have amount of all surface roughness. But, when I run it says "Hatch not in index" and makes an error.

# Step 1: Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')
# Step 2: Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from math import sqrt
# Step 3: Read the Excel file
file_path = '/content/drive/MyDrive/Colab Notebooks/colab/BBBc.xlsx'
df = pd.read_excel(file_path)


# Print the columns of the DataFrame
print("Columns in DataFrame:")
print(df.columns)
# Step 4: Normalize the data
scaler = MinMaxScaler()
# Verify that the columns 'Power', 'Velocity', and 'Hatch' exist in the DataFrame
if all(col in df.columns for col in ['Power', 'Velocity', 'Hatch']):
    normalized_data = scaler.fit_transform(df[['Power', 'Velocity', 'Hatch']])
    
    # Add the normalized data back to the DataFrame
    df[['Power', 'Velocity', 'Hatch']] = normalized_data
else:
    print("Some or all of the specified columns are not present in the DataFrame.")
    print("Check the column names in your DataFrame and adjust the code accordingly.")
# Display the first few rows of the DataFrame
print("Original Data:")
print(df.head())
# Step 4: Normalize the data
scaler = MinMaxScaler()
normalized_data = scaler.fit_transform(df[['Power', 'Velocity', 'Hatch']])
# Add the normalized data back to the DataFrame
df[['Power', 'Velocity', 'Hatch']] = normalized_data
# Display the first few rows of the DataFrame with normalized data
print("\nNormalized Data:")
print(df.head())
# Step 5: Split the data into training and testing sets
X = df[['Power', 'Velocity', 'Hatch']]
y = df['SurfaceRoughness']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Step 6: Train a Random Forest Regressor
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
# Step 7: Evaluate the model
y_pred = model.predict(X_test)
rmse = sqrt(mean_squared_error(y_test, y_pred))
print(f'\nRoot Mean Squared Error: {rmse}')
# Step 8: Make predictions on new data if needed
# new_data = pd.DataFrame({'Power': [your_values], 'Velocity': [your_values], 'Hatch': [your_values]})
# normalized_new_data = scaler.transform(new_data)
# prediction = model.predict(normalized_new_data)```

0

There are 0 best solutions below