Model Prediction with New Data
- Load the saved files containing the trained model, encoders and scalers.
- Ask the user for the input file containing the data for which a prediction should be made.
- Apply similar validation checks and preprocessing steps as in the training phase, encoding all categorical data as necessary and use the correct scalers for scaling.
- Add the default value 0 to all missing columns and remove extra columns.
- Pass the processed data through the model to generate predictions.
- Present the final predictions and storing them in a csv file.
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
import joblib
import sys
#set default values for file
default_csv_file = "data/regression.csv"
def predict_with_new_data(dataInput):
# load the model, scalers and encoder files
loaded_model = tf.keras.models.load_model('model/fcnn_reg_trained_model.keras')
feature_scaler = joblib.load('model/fcnn_reg_feature_scaler.pkl')
target_scaler = joblib.load('model/fcnn_reg_target_scaler.pkl')
loaded_label_encoders = joblib.load('model/fcnn_reg_label_encoders.pkl')
expected_columns = joblib.load('model/fcnn_reg_training_features.pkl') # Load saved feature columns
#load the new data
new_data = pd.read_csv(dataInput)
#align new data columns with training features
missing_columns = set(expected_columns) - set(new_data.columns)
extra_columns = set(new_data.columns) - set(expected_columns)
#add missing columns with default value 0
for col in missing_columns:
new_data[col] = 0
#remove extra columns
new_data = new_data[expected_columns]
#encode all categorical columns
for column in new_data.columns:
if column in loaded_label_encoders:
encoder = loaded_label_encoders[column]
try:
new_data[column] = encoder.transform(new_data[column].fillna("Unknown"))
except ValueError:
print(f"Warning: Unseen categories in column '{column}'. Replacing with default value.")
new_data[column] = new_data[column].apply(
lambda x: encoder.transform([x])[0] if x in encoder.classes_ else 0
)
#scale features with scaler
try:
new_data_scaled = feature_scaler.transform(new_data)
except ValueError:
print("\nError: The new data's feature dimensions do not match the training data.")
sys.exit(1)
#make predictions
predictions_scaled = loaded_model.predict(new_data_scaled)
#inverse-transform predictions back to original scale
predictions_original = target_scaler.inverse_transform(predictions_scaled)
return predictions_original
#get CSV file name by user with default option
csv_file = input(f"Enter name of CSV data source (default: {default_csv_file}): ").strip()
csv_file = csv_file if csv_file else default_csv_file
#load file and include error handling
try:
data = pd.read_csv(csv_file)
except FileNotFoundError:
print("\nError: File not found. Please check the file name and path.")
sys.exit(1)
#validate CSV and provide an error if empty
if data.empty:
print("\nError: The provided CSV file is empty. Please provide a valid file with data.")
sys.exit(1)
#validate CSV and provide an error if not at least 2 columns
if len(data.columns) < 2:
print("\nError: The CSV file must contain at least two columns (features and target column).")
sys.exit(1)
#validate CSV and provide an error if all null or invalid data
if data.isnull().all().all():
print("\nError: The CSV file contains only null or invalid data. Please provide a valid file.")
sys.exit(1)
#prediction on new CSV file
predictions = predict_with_new_data(csv_file)
#create prediction output
print("\nPredictions:")
for i, pred in enumerate(predictions):
print(f"Row {i + 1}: Predicted Value = {pred[0]}")
#save predictions to a CSV file
output_file = "predictions/predictionsFcnnRegression.csv"
pd.DataFrame(predictions, columns=["Predicted Value"]).to_csv(output_file, index=False)
print(f"\nPredictions have been saved to {output_file}.")