Project 1: Implementation of a Fully Connected Neural Network (FCNN)
Data Import, Validation, and Model Creation
- Prompt the user for the source file location and offer a default value.
- Validate the CSV file by checking its existence, ensuring it’s not empty, verifying it has at least two rows, and confirming not all entries are empty.
- Prompt the user to specify which column represents the target value, and provide a default option.
- Remove all entries with an empty class value and ensure there is still sufficient data remaining.
- Encode all labels and create a class mapping, allowing the numerical values to be translated back to their readable classes when available.
- Use a scaler to scale data into the right format and store the scaler information.
- Prompt the user for the relevant algorithm parameters, providing default values for each.
- Split the data into training and validation sets.
- Run the machine learning algorithm using the specified data splits and parameters.
- Validate the run by providing the user a metrics report.
- Store the model, encoders, scaler, feature mapping, and parameters in separate files.
- Create an architecture overview/image of the created model.
Note: The goal of this project is to make this pipeline usable for various data sources. I used a bike sharing data file consisting of around 17'000 entries each with 16 features and a target value (target = how many bikes have been rented on a certain day). The data can be found here. All screenshots and examples are based on this data.
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras import Input
from tensorflow.keras.utils import plot_model
import sys
import joblib
#initialize variables
default_csv_file = "dataTraining/trainRegression.csv"
default_target_value_column = "target_value"
random_state_default = 1234
test_size_default = 0.3
hidden_layers_default = 16
neurons_per_layer_default = 64
epochs_default = 50
batch_size_default = 32
validation_split_default = 0.2
#ask user for csv file location
csv_file = input(f"Enter name of CSV data source (default: {default_csv_file}): ").strip()
csv_file = csv_file if csv_file else default_csv_file
#load file including error handling in case file doesn't exist
try:
data = pd.read_csv(csv_file)
except FileNotFoundError:
print("\nError: File not found. Please check the file name and path.")
sys.exit(1)
#validate csv and provide an error if the file is empty
if data.empty:
print("\nError: The provided CSV file is empty. Please provide a valid file with data.")
sys.exit(1)
#validate csv and provide error if there are not min 2 columns (header and one data entry)
if len(data.columns) < 2:
print("\nError: The CSV file must contain at least two columns (features and target_value column).")
sys.exit(1)
#validate csv and provide error if all rows are empty or null
if data.isnull().all().all():
print("\nError: The CSV file contains only null or invalid data. Please provide a valid file.")
sys.exit(1)
#ask user for target value
target_value_column = input(f"Please provide the name of the column containing the target_value_column (default: {default_target_value_column}): ").strip()
target_value_column = target_value_column if target_value_column else default_target_value_column
#validate target value by checking if it exists in the file
if target_value_column not in data.columns:
print(f"\nError: The target column '{target_value_column}' does not exist in the dataset.")
sys.exit(1)
#drop all rows where the target value is missing
if data[target_value_column].isnull().any():
data = data.dropna(subset=[target_value_column])
print(f"\nWarning: Missing values detected in the '{target_value_column}' column. Rows with missing values have been dropped.")
#check if the file still contains data after dropping the empty rows
if data.empty:
print("\nError: All rows have been dropped due to missing values in the target_value_column column. Please provide a valid file.")
sys.exit(1)
#encode all categorial features
label_encoders = {}
for column in data.columns:
if data[column].dtype == 'object':
le = LabelEncoder()
data[column] = le.fit_transform(data[column])
label_encoders[column] = le
#split features and target into x and y
X = data.drop(target_value_column, axis=1)
y = data[target_value_column]
#initialize scalers for featur and target scaling
feature_scaler = MinMaxScaler()
target_scaler = MinMaxScaler(feature_range=(0, 1))
#ask user for all parameteres
hidden_layers = int(input(f"Number of hidden layers (default={hidden_layers_default}): ").strip() or hidden_layers_default)
neurons_per_layer = int(input(f"Number of neurons per hidden layer (default={neurons_per_layer_default}): ").strip() or neurons_per_layer_default)
epochs = int(input(f"Number of epochs (default={epochs_default}): ").strip() or epochs_default)
batch_size = int(input(f"Batch size (default={batch_size_default}): ").strip() or batch_size_default)
validation_split = float(input(f"Provide test validation split (default={validation_split_default}): ").strip() or validation_split_default)
random_state = input(f"Provide a random state seed (default={random_state_default}): ").strip()
random_state = int(random_state) if random_state else random_state_default
test_size = input(f"Provide test size must be a float between 0.0 and 1.0, (default={test_size_default}): ").strip()
test_size = float(test_size) if test_size else test_size_default
#store all parameters in the params variable
params = {
"hidden_layers": hidden_layers,
"neurons_per_layer": neurons_per_layer,
"epochs": epochs,
"batch_size": batch_size,
"validation_split": validation_split,
"random_state": random_state,
"test_size": test_size,
}
#split data into test and training data according to pre-defined test size
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size_default, random_state=random_state_default)
#scale features
X_train = feature_scaler.fit_transform(X_train)
X_test = feature_scaler.transform(X_test)
#scale target
y_train = target_scaler.fit_transform(y_train.values.reshape(-1, 1))
y_test = target_scaler.transform(y_test.values.reshape(-1, 1))
#build the FCNN using the pre-defined parameters
model = Sequential()
model.add(Input(shape=(X_train.shape[1],)))
model.add(Dense(neurons_per_layer, activation='relu'))
for _ in range(hidden_layers - 1):
model.add(Dense(neurons_per_layer, activation='relu'))
model.add(Dense(1))
#compile the model
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_squared_error'])
#train the model
print("\nTraining the Neural Network...")
history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_split=validation_split, verbose=1)
#evaluate the model
print("\nModel evaluations starting")
loss = model.evaluate(X_test, y_test, verbose=0)
#model metrics calculation and print to console
y_pred = model.predict(X_test)
y_pred_original = target_scaler.inverse_transform(y_pred)
y_test_original = target_scaler.inverse_transform(y_test)
mse = mean_squared_error(y_test_original, y_pred_original)
mae = mean_absolute_error(y_test_original, y_pred_original)
r2 = r2_score(y_test_original, y_pred_original)
print("\nEvaluation Metrics:")
print(f"Mean Squared Error: {mse}")
print(f"Mean Absolute Error: {mae}")
print(f"R^2 Score: {r2}")
#save feature column names and scalers
feature_columns = X.columns.tolist()
joblib.dump(feature_scaler, 'model/fcnn_reg_feature_scaler.pkl')
joblib.dump(target_scaler, 'model/fcnn_reg_target_scaler.pkl')
joblib.dump(label_encoders, 'model/fcnn_reg_label_encoders.pkl')
joblib.dump(feature_columns, 'model/fcnn_reg_training_features.pkl')
#save the ml model in keras format
model.save('model/fcnn_reg_trained_model.keras')
#confirm completion to user
print("Model, encoders, scalers, and parameters have been saved.")
print(f"\nModel parameters used: {params}")
#create a plot that shows the model architecture
plot_model(model, to_file='modelImages/model_structure_fcnn_regression.png', show_shapes=True, show_layer_names=True)
#NOTES FOR MYSELF
#could adjust the learning rate of the adam optimizer to finetune the model and get even better results- > tbd