Project 1: Implementation of a Fully Connected Neural Network (FCNN)

Model Prediction with New Data

  • Load the saved files containing the trained model, encoders and scalers.
  • Ask the user for the input file containing the data for which a prediction should be made.
  • Apply similar validation checks and preprocessing steps as in the training phase, encoding all categorical data as necessary and use the correct scalers for scaling.
  • Add the default value 0 to all missing columns and remove extra columns.
  • Pass the processed data through the model to generate predictions.
  • Present the final predictions and storing them in a csv file.

import pandas as pd
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
import joblib
import sys

#set default values for file
default_csv_file = "data/regression.csv"

def predict_with_new_data(dataInput):
    # load the model, scalers and encoder files
    loaded_model = tf.keras.models.load_model('model/fcnn_reg_trained_model.keras')
    feature_scaler = joblib.load('model/fcnn_reg_feature_scaler.pkl')
    target_scaler = joblib.load('model/fcnn_reg_target_scaler.pkl')
    loaded_label_encoders = joblib.load('model/fcnn_reg_label_encoders.pkl')
    expected_columns = joblib.load('model/fcnn_reg_training_features.pkl')  # Load saved feature columns

    #load the new data
    new_data = pd.read_csv(dataInput)

    #align new data columns with training features
    missing_columns = set(expected_columns) - set(new_data.columns)
    extra_columns = set(new_data.columns) - set(expected_columns)

    #add missing columns with default value 0
    for col in missing_columns:
        new_data[col] = 0

    #remove extra columns
    new_data = new_data[expected_columns]

    #encode all categorical columns
    for column in new_data.columns:
        if column in loaded_label_encoders:
            encoder = loaded_label_encoders[column]
            try:
                new_data[column] = encoder.transform(new_data[column].fillna("Unknown"))
            except ValueError:
                print(f"Warning: Unseen categories in column '{column}'. Replacing with default value.")
                new_data[column] = new_data[column].apply(
                    lambda x: encoder.transform([x])[0] if x in encoder.classes_ else 0
                )

    #scale features with scaler
    try:
        new_data_scaled = feature_scaler.transform(new_data)
    except ValueError:
        print("\nError: The new data's feature dimensions do not match the training data.")
        sys.exit(1)

    #make predictions
    predictions_scaled = loaded_model.predict(new_data_scaled)

    #inverse-transform predictions back to original scale
    predictions_original = target_scaler.inverse_transform(predictions_scaled)

    return predictions_original

#get CSV file name by user with default option
csv_file = input(f"Enter name of CSV data source (default: {default_csv_file}): ").strip()
csv_file = csv_file if csv_file else default_csv_file

#load file and include error handling
try:
    data = pd.read_csv(csv_file)
except FileNotFoundError:
    print("\nError: File not found. Please check the file name and path.")
    sys.exit(1)

#validate CSV and provide an error if empty
if data.empty:
    print("\nError: The provided CSV file is empty. Please provide a valid file with data.")
    sys.exit(1)

#validate CSV and provide an error if not at least 2 columns
if len(data.columns) < 2:
    print("\nError: The CSV file must contain at least two columns (features and target column).")
    sys.exit(1)

#validate CSV and provide an error if all null or invalid data
if data.isnull().all().all():
    print("\nError: The CSV file contains only null or invalid data. Please provide a valid file.")
    sys.exit(1)

#prediction on new CSV file
predictions = predict_with_new_data(csv_file)

#create prediction output
print("\nPredictions:")
for i, pred in enumerate(predictions):
    print(f"Row {i + 1}: Predicted Value = {pred[0]}")

#save predictions to a CSV file
output_file = "predictions/predictionsFcnnRegression.csv"
pd.DataFrame(predictions, columns=["Predicted Value"]).to_csv(output_file, index=False)
print(f"\nPredictions have been saved to {output_file}.")