Project 1: Implementation of a Fully Connected Neural Network (FCNN)

Data Import, Validation, and Model Creation

Prompt the user for the source file location and offer a default value.
Validate the CSV file by checking its existence, ensuring it’s not empty, verifying it has at least two rows, and confirming not all entries are empty.
Prompt the user to specify which column represents the target value, and provide a default option.
Remove all entries with an empty class value and ensure there is still sufficient data remaining.
Encode all labels and create a class mapping, allowing the numerical values to be translated back to their readable classes when available.
Use a scaler to scale data into the right format and store the scaler information.
Prompt the user for the relevant algorithm parameters, providing default values for each.
Split the data into training and validation sets.
Run the machine learning algorithm using the specified data splits and parameters.
Validate the run by providing the user a metrics report.
Store the model, encoders, scaler, feature mapping, and parameters in separate files.
Create an architecture overview/image of the created model.

Note: The goal of this project is to make this pipeline usable for various data sources. I used a bike sharing data file consisting of around 17'000 entries each with 16 features and a target value (target = how many bikes have been rented on a certain day). The data can be found here. All screenshots and examples are based on this data.


import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras import Input
from tensorflow.keras.utils import plot_model
import sys
import joblib

#initialize variables
default_csv_file = "dataTraining/trainRegression.csv"
default_target_value_column = "target_value"
random_state_default = 1234
test_size_default = 0.3
hidden_layers_default = 16
neurons_per_layer_default = 64
epochs_default = 50
batch_size_default = 32
validation_split_default = 0.2

#ask user for csv file location
csv_file = input(f"Enter name of CSV data source (default: {default_csv_file}): ").strip()
csv_file = csv_file if csv_file else default_csv_file

#load file including error handling in case file doesn't exist
try:
	data = pd.read_csv(csv_file)
except FileNotFoundError:
	print("\nError: File not found. Please check the file name and path.")
	sys.exit(1)

#validate csv and provide an error if the file is empty
if data.empty:
	print("\nError: The provided CSV file is empty. Please provide a valid file with data.")
	sys.exit(1)

#validate csv and provide error if there are not min 2 columns (header and one data entry)
if len(data.columns) < 2:
	print("\nError: The CSV file must contain at least two columns (features and target_value column).")
	sys.exit(1)

#validate csv and provide error if all rows are empty or null
if data.isnull().all().all():
	print("\nError: The CSV file contains only null or invalid data. Please provide a valid file.")
	sys.exit(1)

#ask user for target value
target_value_column = input(f"Please provide the name of the column containing the target_value_column (default: {default_target_value_column}): ").strip()
target_value_column = target_value_column if target_value_column else default_target_value_column

#validate target value by checking if it exists in the file
if target_value_column not in data.columns:
	print(f"\nError: The target column '{target_value_column}' does not exist in the dataset.")
	sys.exit(1)

#drop all rows where the target value is missing
if data[target_value_column].isnull().any():
	data = data.dropna(subset=[target_value_column])
	print(f"\nWarning: Missing values detected in the '{target_value_column}' column. Rows with missing values have been dropped.")

#check if the file still contains data after dropping the empty rows
if data.empty:
	print("\nError: All rows have been dropped due to missing values in the target_value_column column. Please provide a valid file.")
	sys.exit(1)

#encode all categorial features
label_encoders = {}
for column in data.columns:
	if data[column].dtype == 'object':
		le = LabelEncoder()
		data[column] = le.fit_transform(data[column])
		label_encoders[column] = le

#split features and target into x and y
X = data.drop(target_value_column, axis=1)
y = data[target_value_column]

#initialize scalers for featur and target scaling
feature_scaler = MinMaxScaler()
target_scaler = MinMaxScaler(feature_range=(0, 1))

#ask user for all parameteres
hidden_layers = int(input(f"Number of hidden layers (default={hidden_layers_default}): ").strip() or hidden_layers_default)
neurons_per_layer = int(input(f"Number of neurons per hidden layer (default={neurons_per_layer_default}): ").strip() or neurons_per_layer_default)
epochs = int(input(f"Number of epochs (default={epochs_default}): ").strip() or epochs_default)
batch_size = int(input(f"Batch size (default={batch_size_default}): ").strip() or batch_size_default)
validation_split = float(input(f"Provide test validation split (default={validation_split_default}): ").strip() or validation_split_default)
random_state = input(f"Provide a random state seed (default={random_state_default}): ").strip()
random_state = int(random_state) if random_state else random_state_default
test_size = input(f"Provide test size must be a float between 0.0 and 1.0, (default={test_size_default}): ").strip()
test_size = float(test_size) if test_size else test_size_default

#store all parameters in the params variable
params = {
	"hidden_layers": hidden_layers,
	"neurons_per_layer": neurons_per_layer,
	"epochs": epochs,
	"batch_size": batch_size,
	"validation_split": validation_split,
	"random_state": random_state,
	"test_size": test_size,
}

#split data into test and training data according to pre-defined test size
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size_default, random_state=random_state_default)

#scale features
X_train = feature_scaler.fit_transform(X_train)
X_test = feature_scaler.transform(X_test)

#scale target
y_train = target_scaler.fit_transform(y_train.values.reshape(-1, 1))
y_test = target_scaler.transform(y_test.values.reshape(-1, 1))

#build the FCNN using the pre-defined parameters
model = Sequential()
model.add(Input(shape=(X_train.shape[1],)))
model.add(Dense(neurons_per_layer, activation='relu'))
for _ in range(hidden_layers - 1):
	model.add(Dense(neurons_per_layer, activation='relu'))
model.add(Dense(1))

#compile the model
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_squared_error'])

#train the model
print("\nTraining the Neural Network...")
history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_split=validation_split, verbose=1)

#evaluate the model
print("\nModel evaluations starting")
loss = model.evaluate(X_test, y_test, verbose=0)

#model metrics calculation and print to console
y_pred = model.predict(X_test)
y_pred_original = target_scaler.inverse_transform(y_pred)
y_test_original = target_scaler.inverse_transform(y_test)
mse = mean_squared_error(y_test_original, y_pred_original)
mae = mean_absolute_error(y_test_original, y_pred_original)
r2 = r2_score(y_test_original, y_pred_original)

print("\nEvaluation Metrics:")
print(f"Mean Squared Error: {mse}")
print(f"Mean Absolute Error: {mae}")
print(f"R^2 Score: {r2}")

#save feature column names and scalers
feature_columns = X.columns.tolist()
joblib.dump(feature_scaler, 'model/fcnn_reg_feature_scaler.pkl')
joblib.dump(target_scaler, 'model/fcnn_reg_target_scaler.pkl')
joblib.dump(label_encoders, 'model/fcnn_reg_label_encoders.pkl')
joblib.dump(feature_columns, 'model/fcnn_reg_training_features.pkl')

#save the ml model in keras format
model.save('model/fcnn_reg_trained_model.keras')

#confirm completion to user
print("Model, encoders, scalers, and parameters have been saved.")
print(f"\nModel parameters used: {params}")

#create a plot that shows the model architecture
plot_model(model, to_file='modelImages/model_structure_fcnn_regression.png', show_shapes=True, show_layer_names=True)

#NOTES FOR MYSELF
#could adjust the learning rate of the adam optimizer to finetune the model and get even better results- > tbd