| | |
| | |
| | """ |
| | Corrected and upgraded by the Martial Terran, from |
| | https://github.com/spiderPan/Google-Machine-Learning-Crash-Course/blob/master/multi-class_classfication_of_handwritten_digits.py |
| | |
| | The architecture of this model is not a CNN (Convolutional Neural Network). |
| | It is a Dense Neural Network (DNN), also commonly known as a Multilayer Perceptron (MLP). |
| | Let's break down why and look at the specific architecture. |
| | Why it's a DNN and Not a CNN |
| | The defining characteristic of a CNN is its use of convolutional layers (Conv2D). These layers are specifically designed to work with grid-like data, such as images. They use filters (or kernels) to slide across the input image, detecting spatial patterns like edges, textures, and shapes. |
| | This model does not use any convolutional layers. Instead, its core components are Dense layers (tf.keras.layers.Dense). |
| | DNN Approach: The 28x28 pixel image is flattened into a single vector of 784 numbers. The Dense layers treat these numbers as a simple list, with no inherent understanding that pixel #29 is directly below pixel #1. It learns patterns from the pixel values themselves, but loses all the spatial relationships between them. |
| | CNN Approach: A CNN would take the input as a 2D grid (e.g., shape=(28, 28, 1)) and use Conv2D layers to analyze neighboring pixels, preserving the spatial structure of the image. |
| | The Specific Architecture of this Model |
| | You can see the exact architecture from the code or by printing the model's summary (model.summary()). |
| | Based on the code with hidden_units = [100, 100], the architecture is as follows: |
| | Layer # Layer Type Description Output Shape |
| | 1 Input A flat vector of 784 pixel values (28x28). (None, 784) |
| | 2 Dense First fully-connected hidden layer. Every one of its 100 neurons is connected to all 784 input pixels. (None, 100) |
| | 3 Dense Second fully-connected hidden layer. Every one of its 100 neurons is connected to all 100 neurons before it. (None, 100) |
| | 4 Dropout Regularization layer. Randomly sets 20% of neuron activations to zero during training to prevent overfitting. (None, 100) |
| | 5 Dense The final output layer. It has 10 neurons, one for each class (digits 0-9). (None, 10) |
| | Softmax The activation function on the output layer that converts the outputs into a probability distribution. (None, 10) |
| | (Note: "None" in the output shape refers to the batch size, which can vary.) |
| | In summary: |
| | It's a DNN/MLP: It uses stacked Dense (fully-connected) layers. |
| | It's not a CNN: It lacks Conv2D and MaxPooling2D layers, and it flattens the image data, discarding the crucial 2D spatial information that CNNs are built to exploit. |
| | |
| | Model Summary: |
| | /usr/local/lib/python3.11/dist-packages/keras/src/layers/core/input_layer.py:27: UserWarning: Argument `input_shape` is deprecated. Use `shape` instead. |
| | warnings.warn( |
| | Model: "sequential_1" |
| | βββββββββββββββββββββββββββββββββββ³βββββββββββββββββββββββββ³ββββββββββββββββ |
| | β Layer (type) β Output Shape β Param # β |
| | β‘βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ© |
| | β dense_3 (Dense) β (None, 100) β 78,500 β |
| | βββββββββββββββββββββββββββββββββββΌβββββββββββββββββββββββββΌββββββββββββββββ€ |
| | β dense_4 (Dense) β (None, 100) β 10,100 β |
| | βββββββββββββββββββββββββββββββββββΌβββββββββββββββββββββββββΌββββββββββββββββ€ |
| | β dropout_1 (Dropout) β (None, 100) β 0 β |
| | βββββββββββββββββββββββββββββββββββΌβββββββββββββββββββββββββΌββββββββββββββββ€ |
| | β dense_5 (Dense) β (None, 10) β 1,010 β |
| | βββββββββββββββββββββββββββββββββββ΄βββββββββββββββββββββββββ΄ββββββββββββββββ |
| | Total params: 89,610 (350.04 KB) |
| | Trainable params: 89,610 (350.04 KB) |
| | Non-trainable params: 0 (0.00 B) |
| | |
| | Final accuracy (on validation data): 0.96 |
| | |
| | Evaluating on test data... |
| | Accuracy on test data: 0.96 |
| | |
| | """ |
| |
|
| | import glob |
| | import math |
| | import os |
| |
|
| | import numpy as np |
| | import pandas as pd |
| | import seaborn as sns |
| | import tensorflow as tf |
| | from IPython.display import display |
| | from matplotlib import pyplot as plt |
| | from sklearn import metrics |
| |
|
| | |
| | pd.options.display.max_rows = 10 |
| | pd.options.display.float_format = '{:.1f}'.format |
| |
|
| |
|
| | def parse_labels_and_features(dataset): |
| | """Parses a dataset into features and labels. |
| | |
| | Args: |
| | dataset: A Pandas DataFrame with the first column being the label |
| | and the remaining columns as pixel data. |
| | Returns: |
| | A tuple of (labels, features), where both are Pandas Series/DataFrame. |
| | """ |
| | labels = dataset[0] |
| | |
| | features = dataset.loc[:, 1:784] |
| | |
| | features = features / 255 |
| | return labels, features |
| |
|
| |
|
| | def create_and_train_nn_model( |
| | learning_rate, |
| | epochs, |
| | batch_size, |
| | hidden_units, |
| | training_examples, |
| | training_targets, |
| | validation_examples, |
| | validation_targets): |
| | """ |
| | Creates, trains, and evaluates a Deep Neural Network model using tf.keras. |
| | |
| | Args: |
| | learning_rate: The learning rate for the optimizer. |
| | epochs: The number of times to iterate through the training data. |
| | batch_size: The number of examples to use in each training step. |
| | hidden_units: A list of integers, where each integer is the number of nodes |
| | in a hidden layer. |
| | training_examples: DataFrame of training features. |
| | training_targets: Series of training labels. |
| | validation_examples: DataFrame of validation features. |
| | validation_targets: Series of validation labels. |
| | |
| | Returns: |
| | The trained tf.keras.Model object and the training history. |
| | """ |
| | |
| | model = tf.keras.models.Sequential() |
| |
|
| | |
| | model.add(tf.keras.layers.InputLayer(input_shape=(784,))) |
| |
|
| | |
| | for units in hidden_units: |
| | model.add(tf.keras.layers.Dense(units, activation='relu')) |
| |
|
| | |
| | model.add(tf.keras.layers.Dropout(0.2)) |
| |
|
| | |
| | model.add(tf.keras.layers.Dense(10, activation='softmax')) |
| |
|
| | |
| | model.compile( |
| | optimizer=tf.keras.optimizers.Adagrad(learning_rate=learning_rate), |
| | loss="sparse_categorical_crossentropy", |
| | metrics=['accuracy'] |
| | ) |
| | |
| | |
| | print("Model Summary:") |
| | model.summary() |
| | print("\nTraining Model...") |
| |
|
| | |
| | history = model.fit( |
| | x=training_examples.values, |
| | y=training_targets.values, |
| | batch_size=batch_size, |
| | epochs=epochs, |
| | shuffle=True, |
| | validation_data=(validation_examples.values, validation_targets.values), |
| | |
| | verbose=2 |
| | ) |
| | print("Model training finished.") |
| |
|
| | |
| | training_loss = history.history["loss"] |
| | validation_loss = history.history["val_loss"] |
| | epochs_range = range(1, epochs + 1) |
| |
|
| | plt.figure(figsize=(10, 5)) |
| | plt.ylabel("Loss (Sparse Categorical Crossentropy)") |
| | plt.xlabel("Epochs") |
| | plt.title("Loss vs. Epochs") |
| | plt.plot(epochs_range, training_loss, label="Training") |
| | plt.plot(epochs_range, validation_loss, label="Validation") |
| | plt.legend() |
| | plt.show() |
| |
|
| | |
| | |
| | validation_probabilities = model.predict(validation_examples.values) |
| | validation_predictions = np.argmax(validation_probabilities, axis=1) |
| |
|
| | cm = metrics.confusion_matrix(validation_targets, validation_predictions) |
| | cm_normalized = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis] |
| |
|
| | plt.figure(figsize=(8, 8)) |
| | ax = sns.heatmap(cm_normalized, cmap="bone_r", annot=True, fmt=".2f") |
| | ax.set_aspect(1) |
| | plt.title("Confusion Matrix") |
| | plt.ylabel("True Label") |
| | plt.xlabel("Predicted Label") |
| | plt.show() |
| |
|
| | |
| | final_validation_accuracy = history.history["val_accuracy"][-1] |
| | print(f"Final accuracy (on validation data): {final_validation_accuracy:.2f}") |
| |
|
| | return model, history |
| |
|
| |
|
| | |
| |
|
| | |
| | mnist_dataframe = pd.read_csv('sample_data/mnist_train_small.csv', sep=",", header=None) |
| | mnist_test_dataframe = pd.read_csv('sample_data/mnist_test.csv', sep=',', header=None) |
| |
|
| | |
| | mnist_dataframe = mnist_dataframe.head(10000) |
| | mnist_dataframe = mnist_dataframe.reindex(np.random.permutation(mnist_dataframe.index)) |
| | display(mnist_dataframe.head()) |
| |
|
| | |
| | training_targets, training_examples = parse_labels_and_features(mnist_dataframe[:7500]) |
| | validation_targets, validation_examples = parse_labels_and_features(mnist_dataframe[7500:10000]) |
| | testing_targets, testing_examples = parse_labels_and_features(mnist_test_dataframe) |
| |
|
| | display(training_examples.describe()) |
| | display(validation_examples.describe()) |
| |
|
| | |
| | rand_example_idx = np.random.choice(training_examples.index) |
| | _, ax = plt.subplots() |
| | ax.matshow(training_examples.loc[rand_example_idx].values.reshape(28, 28)) |
| | ax.set_title(f"Label: {training_targets.loc[rand_example_idx]}") |
| | ax.grid(False) |
| | plt.show() |
| |
|
| | |
| | |
| | |
| | |
| | |
| | LEARNING_RATE = 0.05 |
| | EPOCHS = 25 |
| | BATCH_SIZE = 30 |
| | HIDDEN_UNITS = [100, 100] |
| |
|
| | |
| | trained_model, history = create_and_train_nn_model( |
| | learning_rate=LEARNING_RATE, |
| | epochs=EPOCHS, |
| | batch_size=BATCH_SIZE, |
| | hidden_units=HIDDEN_UNITS, |
| | training_examples=training_examples, |
| | training_targets=training_targets, |
| | validation_examples=validation_examples, |
| | validation_targets=validation_targets |
| | ) |
| |
|
| | |
| | print("\nEvaluating on test data...") |
| | loss, accuracy = trained_model.evaluate(testing_examples.values, testing_targets.values, verbose=0) |
| | print(f"Accuracy on test data: {accuracy:.2f}") |
| |
|
| | |
| | print("\nVisualizing weights of the first hidden layer...") |
| | |
| | |
| | weights0 = trained_model.layers[0].get_weights()[0] |
| | print('Weights 0 shape:', weights0.shape) |
| |
|
| | num_nodes = weights0.shape[1] |
| | num_rows = int(math.ceil(num_nodes / 10.0)) |
| | fig, axes = plt.subplots(num_rows, 10, figsize=(20, 2 * num_rows)) |
| | for coef, ax in zip(weights0.T, axes.ravel()): |
| | |
| | ax.matshow(coef.reshape(28, 28), cmap=plt.cm.pink) |
| | ax.set_xticks(()) |
| | ax.set_yticks(()) |
| |
|
| | plt.suptitle("First Hidden Layer Weights", fontsize=20) |
| | plt.show() |
| |
|