Standardization with Simple Python

1. We will:

  1. Generate data with very different ranges.
  2. Show predictions without standardization.
  3. Apply standardization.
  4. Train again and compare.

Step-by-step Implementation (No external libraries)

# 1. Sample raw dataset: X1 (0-1), X2 (100-200), Y as label
X = [
    [0.1, 120],
    [0.4, 150],
    [0.2, 130],
    [0.9, 200],
    [0.5, 160]
]

Y = [1, 0, 1, 0, 0]

# 2. Calculate mean and std for standardization
def mean_std(data, col):
    values = [row[col] for row in data]
    mean = sum(values) / len(values)
    std = (sum([(x - mean) ** 2 for x in values]) / len(values)) ** 0.5
    return mean, std

# 3. Apply standardization
def standardize(X):
    means_stds = [mean_std(X, col) for col in range(len(X[0]))]
    std_X = []
    for row in X:
        new_row = []
        for i, val in enumerate(row):
            mean, std = means_stds[i]
            new_val = (val - mean) / std
            new_row.append(new_val)
        std_X.append(new_row)
    return std_X

# 4. Mini Neural Network with 1 hidden layer
def sigmoid(x):
    return 1 / (1 + 2.71828**(-x))

def train_nn(X, Y, epochs=1000, lr=0.1):
    # Assume 2 input → 1 hidden → 1 output
    w1 = [0.5, -0.3]   # weights for input to hidden
    w2 = 0.7           # weight from hidden to output
    bias1 = 0.1
    bias2 = 0.2

    for epoch in range(epochs):
        total_loss = 0
        for i in range(len(X)):
            # Forward Pass
            z1 = X[i][0] * w1[0] + X[i][1] * w1[1] + bias1
            a1 = sigmoid(z1)
            z2 = a1 * w2 + bias2
            a2 = sigmoid(z2)
            pred = a2

            # Loss (Mean Squared Error)
            loss = (pred - Y[i]) ** 2
            total_loss += loss

            # Backpropagation (simplified)
            d_pred = 2 * (pred - Y[i])
            d_z2 = d_pred * pred * (1 - pred)
            d_w2 = d_z2 * a1
            d_bias2 = d_z2

            d_a1 = d_z2 * w2
            d_z1 = d_a1 * a1 * (1 - a1)
            d_w1_0 = d_z1 * X[i][0]
            d_w1_1 = d_z1 * X[i][1]
            d_bias1 = d_z1

            # Update weights
            w1[0] -= lr * d_w1_0
            w1[1] -= lr * d_w1_1
            bias1 -= lr * d_bias1
            w2 -= lr * d_w2
            bias2 -= lr * d_bias2

        if epoch % 200 == 0:
            print(f"Epoch {epoch}, Loss: {total_loss:.4f}")

    print("\nFinal Weights and Biases:")
    print("w1:", w1)
    print("w2:", w2)
    print("bias1:", bias1)
    print("bias2:", bias2)

# 5. Run with raw data
print("Training with RAW data:")
train_nn(X, Y)

# 6. Run with standardized data
print("\nTraining with STANDARDIZED data:")
X_std = standardize(X)
train_nn(X_std, Y)

Neural Network with Standardization (Train Story Version)

#  STORY: Train Station Balance Problem
# We have data from two cities: one with small luggage, another with heavy luggage.
# Our goal is to predict whether the passengers reach on time (label = 1 or 0).
# The raw data is unfair because luggage weights have a large range.
# Standardization helps "balance" both luggage and passenger count for learning.

# 1. Sample raw dataset: Luggage weight (0-200), Passenger count (1-5)
X = [
    [5, 120],   # Light luggage, moderate passengers
    [2, 150],
    [4, 130],
    [1, 200],
    [3, 160]
]

Y = [1, 0, 1, 0, 0]  # 1 = on time, 0 = late

# 2. Calculate mean and std for each feature (column)
def mean_std(data, col):
    values = [row[col] for row in data]
    mean = sum(values) / len(values)
    std = (sum([(x - mean) ** 2 for x in values]) / len(values)) ** 0.5
    return mean, std

# 3. Apply Standardization: (x - mean) / std
def standardize(X):
    means_stds = [mean_std(X, col) for col in range(len(X[0]))]
    std_X = []
    for row in X:
        new_row = []
        for i, val in enumerate(row):
            mean, std = means_stds[i]
            new_val = (val - mean) / std
            new_row.append(new_val)
        std_X.append(new_row)
    return std_X

# 4. Sigmoid activation (used for both hidden & output layers)
def sigmoid(x):
    return 1 / (1 + 2.71828**(-x))

# 5. Simple 1-hidden-layer Neural Network (No library)
def train_nn(X, Y, epochs=1000, lr=0.1):
    # Inputs → Hidden (2 weights) → Output (1 weight)
    w1 = [0.5, -0.3]
    w2 = 0.7
    bias1 = 0.1
    bias2 = 0.2

    for epoch in range(epochs):
        total_loss = 0
        for i in range(len(X)):
            # Forward Pass
            z1 = X[i][0] * w1[0] + X[i][1] * w1[1] + bias1
            a1 = sigmoid(z1)
            z2 = a1 * w2 + bias2
            a2 = sigmoid(z2)
            pred = a2

            # Loss (Mean Squared Error)
            loss = (pred - Y[i]) ** 2
            total_loss += loss

            # Backpropagation (manually coded)
            d_pred = 2 * (pred - Y[i])
            d_z2 = d_pred * pred * (1 - pred)
            d_w2 = d_z2 * a1
            d_bias2 = d_z2

            d_a1 = d_z2 * w2
            d_z1 = d_a1 * a1 * (1 - a1)
            d_w1_0 = d_z1 * X[i][0]
            d_w1_1 = d_z1 * X[i][1]
            d_bias1 = d_z1

            # Weight Updates
            w1[0] -= lr * d_w1_0
            w1[1] -= lr * d_w1_1
            bias1 -= lr * d_bias1
            w2 -= lr * d_w2
            bias2 -= lr * d_bias2

        if epoch % 200 == 0:
            print(f"Epoch {epoch}, Loss: {total_loss:.4f}")

    print("\nFinal Weights and Biases:")
    print("w1:", w1)
    print("w2:", w2)
    print("bias1:", bias1)
    print("bias2:", bias2)

# 6. Train with RAW data (unfair, one feature dominates)
print("Training with RAW data (no standardization):")
train_nn(X, Y)

# 7. Train with STANDARDIZED data (fair input!)
print("\n Training with STANDARDIZED data:")
X_std = standardize(X)
train_nn(X_std, Y)

What We’ll Observe

  • RAW Data: Loss reduces slowly and often stalls. The large feature range (e.g., 120 vs. 2) causes instability.
  • STANDARDIZED Data: Training becomes smoother and faster because both input features are treated equally.

Standardization in Neural Networks – Basic Math Concepts