Project 1: Implementation of a Fully Connected Neural Network (FCNN)

Model Prediction with New Data

Load the saved files containing the trained model, encoders and scalers.
Ask the user for the input file containing the data for which a prediction should be made.
Apply similar validation checks and preprocessing steps as in the training phase, encoding all categorical data as necessary and use the correct scalers for scaling.
Add the default value 0 to all missing columns and remove extra columns.
Pass the processed data through the model to generate predictions.
Present the final predictions and storing them in a csv file.


import pandas as pd
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
import joblib
import sys

#set default values for file
default_csv_file = "data/regression.csv"

def predict_with_new_data(dataInput):
    # load the model, scalers and encoder files
    loaded_model = tf.keras.models.load_model('model/fcnn_reg_trained_model.keras')
    feature_scaler = joblib.load('model/fcnn_reg_feature_scaler.pkl')
    target_scaler = joblib.load('model/fcnn_reg_target_scaler.pkl')
    loaded_label_encoders = joblib.load('model/fcnn_reg_label_encoders.pkl')
    expected_columns = joblib.load('model/fcnn_reg_training_features.pkl')  # Load saved feature columns

    #load the new data
    new_data = pd.read_csv(dataInput)

    #align new data columns with training features
    missing_columns = set(expected_columns) - set(new_data.columns)
    extra_columns = set(new_data.columns) - set(expected_columns)

    #add missing columns with default value 0
    for col in missing_columns:
        new_data[col] = 0

    #remove extra columns
    new_data = new_data[expected_columns]

    #encode all categorical columns
    for column in new_data.columns:
        if column in loaded_label_encoders:
            encoder = loaded_label_encoders[column]
            try:
                new_data[column] = encoder.transform(new_data[column].fillna("Unknown"))
            except ValueError:
                print(f"Warning: Unseen categories in column '{column}'. Replacing with default value.")
                new_data[column] = new_data[column].apply(
                    lambda x: encoder.transform([x])[0] if x in encoder.classes_ else 0
                )

    #scale features with scaler
    try:
        new_data_scaled = feature_scaler.transform(new_data)
    except ValueError:
        print("\nError: The new data's feature dimensions do not match the training data.")
        sys.exit(1)

    #make predictions
    predictions_scaled = loaded_model.predict(new_data_scaled)

    #inverse-transform predictions back to original scale
    predictions_original = target_scaler.inverse_transform(predictions_scaled)

    return predictions_original

#get CSV file name by user with default option
csv_file = input(f"Enter name of CSV data source (default: {default_csv_file}): ").strip()
csv_file = csv_file if csv_file else default_csv_file

#load file and include error handling
try:
    data = pd.read_csv(csv_file)
except FileNotFoundError:
    print("\nError: File not found. Please check the file name and path.")
    sys.exit(1)

#validate CSV and provide an error if empty
if data.empty:
    print("\nError: The provided CSV file is empty. Please provide a valid file with data.")
    sys.exit(1)

#validate CSV and provide an error if not at least 2 columns
if len(data.columns) < 2:
    print("\nError: The CSV file must contain at least two columns (features and target column).")
    sys.exit(1)

#validate CSV and provide an error if all null or invalid data
if data.isnull().all().all():
    print("\nError: The CSV file contains only null or invalid data. Please provide a valid file.")
    sys.exit(1)

#prediction on new CSV file
predictions = predict_with_new_data(csv_file)

#create prediction output
print("\nPredictions:")
for i, pred in enumerate(predictions):
    print(f"Row {i + 1}: Predicted Value = {pred[0]}")

#save predictions to a CSV file
output_file = "predictions/predictionsFcnnRegression.csv"
pd.DataFrame(predictions, columns=["Predicted Value"]).to_csv(output_file, index=False)
print(f"\nPredictions have been saved to {output_file}.")