2020-10-18

111-TensorFlow Gradient Descent Trainging Linear regression

jupyter-notebook command

1	$ nohup jupyter-notebook &

TensorFlow-BasicTraingLoop

Solving machine leanring problem

1. Obtain training data
1. Define the model
1. Define a loss function
1. Run through the trainging data, calculating loss from the ideal value
1. Calculate the gradients for that loss and use an optimizer to adjust the variables to fit the data
1. Evaluate your results

Gradient Descent

Batch Gradient Descent
Stochastic Gradient Descent
Mini-batch Gradient Descent

Linear Regression Example

1. Generate Training Data

import tensorflow as tf
import matplotlib.pyplot as plt
import random

# generate trainging data
TRUE_W = 3.1
TRUE_B = 2.0
LOSS_RATIO = 0.8
NUM_EXAMPLES = 1000

def generate_samples():
    x = tf.random.normal(shape=[NUM_EXAMPLES])
    noise = tf.random.normal(shape=[NUM_EXAMPLES])
    
    y = x * TRUE_W + TRUE_B + LOSS_RATIO * noise
#     print("type(x)=", type(x), "numpy=", type(x.numpy()))
    return x, y

def shuffle_samples(x, y):
    data = list(zip(x,y))
#     print("before shuffle: x={}".format([(a.numpy(), b.numpy()) for (a,b) in data[:5]]))
    
    random.shuffle(data)
    xx, yy = zip(*data)
#     print("after shuffle: x={}".format([(xx[i].numpy(), yy[i].numpy()) for i in range(5)]))
    return tf.convert_to_tensor(xx), tf.convert_to_tensor(yy)
    
def random_select_samples(x, y, batch=50):
    data = list(zip(x,y))
#     print("before shuffle: x={}".format([(a.numpy(), b.numpy()) for (a,b) in data[:5]]))
    
    random.shuffle(data)
    data2 = data[:batch]
    xx, yy = zip(*data2)
#     print("after shuffle: x={}".format([(xx[i].numpy(), yy[i].numpy()) for i in range(5)]))
    return tf.convert_to_tensor(xx), tf.convert_to_tensor(yy)


sx, sy = generate_samples()
plt.scatter(sx, sy, c="r")
plt.show()

print("\n------------------------after shuffle-----------------------------------\n")

rx ,ry = random_select_samples(sx, sy, 10)
plt.scatter(rx, ry, c="r")
plt.show()

GenerateSamples

------------------------after shuffle-----------------------------------

RandomSelectSamples

2. Define A Model

# define a model
class LinearModel(tf.Module):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.w = tf.Variable(1.0)
        self.b = tf.Variable(0.0)
        
    def __call__(self, x):
        return self.w * x + self.b
    
model = LinearModel()
print("LinearModel variables=", model.variables, "--->>>>>model(5.0)=", model(5.0).numpy())

LinearModel variables= (<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=0.0>, <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=1.0>) --->>>>>model(5.0)= 5.0

3. Define Loss Function

# define loss function
def loss(target_y, predicted_y):
    return tf.reduce_mean(tf.square(target_y - predicted_y))

plt.scatter(sx, sy, c="b")
plt.scatter(sx, model(sx), c="r")
plt.show()

print("current loss is: %1.6f"%loss(sy, model(sx)).numpy())

OriginalModel

current loss is: 8.540572

4. Define Gradient Descent Procedure

Batch Gradient Descent
Mini-batch Gradient Descent

Stochastic Gradient Descent

import random 
from datetime import datetime
# define training procedure
def train(model , sx , sy, learning_rate):
    with tf.GradientTape() as tape:
        current_loss = loss(sy , model(sx))
        
    dw, db = tape.gradient(current_loss, [model.w, model.b])
#     print("gradient loss=", current_loss.numpy(), ", dw=", dw.numpy(), ", db=", db.numpy())
    model.w.assign_sub(dw * learning_rate)
    model.b.assign_sub(db * learning_rate)
    
def batch_training_loop(model, sx, sy):
    for epoch in epochs:
        x_samples , y_samples = shuffle_samples(sx, sy)
        train(model, x_samples , y_samples, learning_rate=0.1)
        ws.append(model.w.numpy())
        bs.append(model.b.numpy())
        current_loss = loss(sy, model(sx))    
        los.append(current_loss)
        print("batch>>>Epoch-%d: w=%1.2f, b=%1.2f, loss=%2.5f"%(epoch, ws[-1], bs[-1], current_loss))
    
def mini_batch_training_loop(model, x, y, batch=50):
    for epoch in epochs:
        total_batch = int(NUM_EXAMPLES/batch)
        for batch_no in range(total_batch):
            x_samples, y_samples = random_select_samples(x, y, batch)        
            train(model, x_samples , y_samples, learning_rate=0.1)
            ws.append(model.w.numpy())
            bs.append(model.b.numpy())
            current_loss = loss(y, model(x))    
            los.append(current_loss)
            print("mini_batch[%s]>>>Epoch-%d-%d: w=%1.2f, b=%1.2f, loss=%2.5f"%(str(datetime.today()), epoch, batch_no, ws[-1], bs[-1], current_loss))

def stochastic_training_loop(model, x, y):
    for epoch in epochs:
	x , y = shuffle_samples(x , y)
        for i in range(999):
            x_sample = tf.convert_to_tensor(x.numpy()[i:i+1])
            y_sample = tf.convert_to_tensor(y.numpy()[i:i+1])
            train(model, x_sample , y_sample, learning_rate=0.1)
            
            if i % 300 == 0:
                ws.append(model.w.numpy())
                bs.append(model.b.numpy())
#                 current_loss = loss(y_sample, model(x_sample)) 
                current_loss = loss(y, model(x))    
                los.append(current_loss)
                print("stochastic[%s]>>>Epoch-%d-%d: w=%1.2f, b=%1.2f, loss=%2.5f"%(str(datetime.today()),epoch, i, ws[-1], bs[-1], current_loss))

5. Batch Gradient Descent Training

# 1. Batch Gradient Descent Training
ws, bs, los = [], [],[] # history of w & b
epochs = range(20)

print("Starting Trainging: w=%1.2f, b=%1.2f, loss=%2.5f"%(model.w.numpy(), model.b.numpy(), loss(sy, model(sx))))

model = LinearModel()
sx , sy = generate_samples()
batch_training_loop(model, sx, sy)

plt.plot(range(len(ws)), ws, "r", range(len(bs)), bs, "b", range(len(los)), los)
plt.plot([TRUE_W] * len(ws), "r--", [TRUE_B] * len(bs), "b--", [LOSS_RATIO] * len(los), "g--")

plt.legend(["W", "b", "loss",  "True W", "True b", "Loss Ratio"])
plt.show()

Starting Trainging: w=3.08, b=1.92, loss=0.59176
batch>>>Epoch-0: w=1.41, b=0.39, loss=5.98690
batch>>>Epoch-1: w=1.75, b=0.70, loss=4.10916
batch>>>Epoch-2: w=2.01, b=0.95, loss=2.89469
batch>>>Epoch-3: w=2.23, b=1.15, loss=2.10920
batch>>>Epoch-4: w=2.40, b=1.32, loss=1.60116
batch>>>Epoch-5: w=2.54, b=1.45, loss=1.27257
batch>>>Epoch-6: w=2.65, b=1.55, loss=1.06005
batch>>>Epoch-7: w=2.74, b=1.64, loss=0.92259
batch>>>Epoch-8: w=2.81, b=1.70, loss=0.83369
batch>>>Epoch-9: w=2.87, b=1.76, loss=0.77619
batch>>>Epoch-10: w=2.92, b=1.80, loss=0.73900
batch>>>Epoch-11: w=2.96, b=1.84, loss=0.71495
batch>>>Epoch-12: w=2.99, b=1.87, loss=0.69939
batch>>>Epoch-13: w=3.01, b=1.89, loss=0.68933
batch>>>Epoch-14: w=3.03, b=1.91, loss=0.68282
batch>>>Epoch-15: w=3.05, b=1.92, loss=0.67861
batch>>>Epoch-16: w=3.06, b=1.93, loss=0.67589
batch>>>Epoch-17: w=3.07, b=1.94, loss=0.67413
batch>>>Epoch-18: w=3.08, b=1.95, loss=0.67299
batch>>>Epoch-19: w=3.09, b=1.96, loss=0.67225

BatchGradientDescent

6. Mini-batch Gradient Descent Training

# 2. Mini-batch Gradient Descent Training
ws, bs, los = [], [],[] # history of w & b
epochs = range(2)

print("Starting Trainging: w=%1.2f, b=%1.2f, loss=%2.5f"%(model.w.numpy(), model.b.numpy(), loss(sy, model(sx))))

model = LinearModel()
sx , sy = generate_samples()
mini_batch_training_loop(model, sx, sy, 50)

plt.plot(range(len(ws)), ws, "r", range(len(bs)), bs, "b", range(len(los)), los)
plt.plot([TRUE_W] * len(ws), "r--", [TRUE_B] * len(bs), "b--", [LOSS_RATIO] * len(los), "g--")

plt.legend(["W", "b", "loss",  "True W", "True b", "Loss Ratio"])
plt.show()

Starting Trainging: w=3.15, b=2.01, loss=0.63184
mini_batch[2020-10-19 10:53:20.027708]>>>Epoch-0-0: w=1.37, b=0.34, loss=5.89021
mini_batch[2020-10-19 10:53:20.130732]>>>Epoch-0-1: w=1.70, b=0.63, loss=4.09933
mini_batch[2020-10-19 10:53:20.232158]>>>Epoch-0-2: w=1.92, b=0.86, loss=3.04879
mini_batch[2020-10-19 10:53:20.333324]>>>Epoch-0-3: w=2.10, b=1.03, loss=2.33524
mini_batch[2020-10-19 10:53:20.433431]>>>Epoch-0-4: w=2.28, b=1.16, loss=1.81701
mini_batch[2020-10-19 10:53:20.533221]>>>Epoch-0-5: w=2.54, b=1.37, loss=1.21532
mini_batch[2020-10-19 10:53:20.636450]>>>Epoch-0-6: w=2.69, b=1.49, loss=0.95628
mini_batch[2020-10-19 10:53:20.737715]>>>Epoch-0-7: w=2.76, b=1.58, loss=0.83697
mini_batch[2020-10-19 10:53:20.837584]>>>Epoch-0-8: w=2.80, b=1.65, loss=0.76705
mini_batch[2020-10-19 10:53:20.941710]>>>Epoch-0-9: w=2.87, b=1.69, loss=0.71031
mini_batch[2020-10-19 10:53:21.041694]>>>Epoch-0-10: w=2.92, b=1.76, loss=0.65902
mini_batch[2020-10-19 10:53:21.144872]>>>Epoch-0-11: w=2.95, b=1.76, loss=0.64952
mini_batch[2020-10-19 10:53:21.246754]>>>Epoch-0-12: w=2.95, b=1.81, loss=0.63193
mini_batch[2020-10-19 10:53:21.353544]>>>Epoch-0-13: w=3.04, b=1.79, loss=0.62284
mini_batch[2020-10-19 10:53:21.457569]>>>Epoch-0-14: w=3.05, b=1.84, loss=0.60707
mini_batch[2020-10-19 10:53:21.558734]>>>Epoch-0-15: w=3.05, b=1.86, loss=0.60416
mini_batch[2020-10-19 10:53:21.661324]>>>Epoch-0-16: w=3.06, b=1.88, loss=0.59755
mini_batch[2020-10-19 10:53:21.761637]>>>Epoch-0-17: w=3.10, b=1.93, loss=0.59159
mini_batch[2020-10-19 10:53:21.864016]>>>Epoch-0-18: w=3.11, b=1.95, loss=0.59041
mini_batch[2020-10-19 10:53:21.966481]>>>Epoch-0-19: w=3.09, b=1.95, loss=0.58959
mini_batch[2020-10-19 10:53:22.073395]>>>Epoch-1-0: w=3.14, b=1.92, loss=0.59537
mini_batch[2020-10-19 10:53:22.178235]>>>Epoch-1-1: w=3.14, b=1.93, loss=0.59440
mini_batch[2020-10-19 10:53:22.280586]>>>Epoch-1-2: w=3.12, b=1.95, loss=0.59117
mini_batch[2020-10-19 10:53:22.385605]>>>Epoch-1-3: w=3.17, b=1.95, loss=0.59736
mini_batch[2020-10-19 10:53:22.487125]>>>Epoch-1-4: w=3.15, b=1.94, loss=0.59528
mini_batch[2020-10-19 10:53:22.589749]>>>Epoch-1-5: w=3.16, b=1.95, loss=0.59559
mini_batch[2020-10-19 10:53:22.690940]>>>Epoch-1-6: w=3.16, b=1.96, loss=0.59544
mini_batch[2020-10-19 10:53:22.793476]>>>Epoch-1-7: w=3.18, b=1.96, loss=0.59917
mini_batch[2020-10-19 10:53:22.895063]>>>Epoch-1-8: w=3.17, b=1.94, loss=0.59721
mini_batch[2020-10-19 10:53:22.997368]>>>Epoch-1-9: w=3.14, b=1.94, loss=0.59431
mini_batch[2020-10-19 10:53:23.096649]>>>Epoch-1-10: w=3.14, b=1.94, loss=0.59425
mini_batch[2020-10-19 10:53:23.197975]>>>Epoch-1-11: w=3.14, b=1.92, loss=0.59560
mini_batch[2020-10-19 10:53:23.300306]>>>Epoch-1-12: w=3.11, b=1.90, loss=0.59585
mini_batch[2020-10-19 10:53:23.399488]>>>Epoch-1-13: w=3.08, b=1.89, loss=0.59648
mini_batch[2020-10-19 10:53:23.501171]>>>Epoch-1-14: w=3.08, b=1.91, loss=0.59391
mini_batch[2020-10-19 10:53:23.604648]>>>Epoch-1-15: w=3.08, b=1.91, loss=0.59288
mini_batch[2020-10-19 10:53:23.706784]>>>Epoch-1-16: w=3.08, b=1.94, loss=0.59042
mini_batch[2020-10-19 10:53:23.806523]>>>Epoch-1-17: w=3.07, b=1.93, loss=0.59156
mini_batch[2020-10-19 10:53:23.907185]>>>Epoch-1-18: w=3.06, b=1.95, loss=0.59035
mini_batch[2020-10-19 10:53:24.006649]>>>Epoch-1-19: w=3.08, b=1.92, loss=0.59176

Mini-batchGradientDescent

7. Stochastic Gradient Descent[SGD]

# 3. Stochastic Gradient Descent[SGD]
ws, bs, los = [], [],[] # history of w & b
epochs = range(5)

print("Starting Trainging: w=%1.2f, b=%1.2f, loss=%2.5f"%(model.w.numpy(), model.b.numpy(), loss(sy, model(sx))))

model = LinearModel()
sx , sy = generate_samples()
stochastic_training_loop(model, sx, sy)

plt.plot(range(len(ws)), ws, "r", range(len(bs)), bs, "b", range(len(los)), los)
plt.plot([TRUE_W] * len(ws), "r--", [TRUE_B] * len(bs), "b--", [LOSS_RATIO] * len(los), "g--")

plt.legend(["W", "b", "loss",  "True W", "True b", "Loss Ratio"])
plt.show()

Starting Trainging: w=3.11, b=1.92, loss=0.64565
stochastic[2020-10-19 10:56:06.594091]>>>Epoch-0-0: w=2.13, b=1.07, loss=2.41372
stochastic[2020-10-19 10:56:06.816926]>>>Epoch-0-300: w=3.05, b=1.71, loss=0.77622
stochastic[2020-10-19 10:56:07.034266]>>>Epoch-0-600: w=2.82, b=2.09, loss=0.77356
stochastic[2020-10-19 10:56:07.252596]>>>Epoch-0-900: w=3.27, b=1.87, loss=0.73822
stochastic[2020-10-19 10:56:07.323392]>>>Epoch-1-0: w=3.60, b=2.10, loss=0.93773
stochastic[2020-10-19 10:56:07.539220]>>>Epoch-1-300: w=3.05, b=1.71, loss=0.77622
stochastic[2020-10-19 10:56:07.752638]>>>Epoch-1-600: w=2.82, b=2.09, loss=0.77356
stochastic[2020-10-19 10:56:07.965720]>>>Epoch-1-900: w=3.27, b=1.87, loss=0.73822
stochastic[2020-10-19 10:56:08.038803]>>>Epoch-2-0: w=3.60, b=2.10, loss=0.93773
stochastic[2020-10-19 10:56:08.255674]>>>Epoch-2-300: w=3.05, b=1.71, loss=0.77622
stochastic[2020-10-19 10:56:08.472008]>>>Epoch-2-600: w=2.82, b=2.09, loss=0.77356
stochastic[2020-10-19 10:56:08.688382]>>>Epoch-2-900: w=3.27, b=1.87, loss=0.73822
stochastic[2020-10-19 10:56:08.759993]>>>Epoch-3-0: w=3.60, b=2.10, loss=0.93773
stochastic[2020-10-19 10:56:08.977329]>>>Epoch-3-300: w=3.05, b=1.71, loss=0.77622
stochastic[2020-10-19 10:56:09.193560]>>>Epoch-3-600: w=2.82, b=2.09, loss=0.77356
stochastic[2020-10-19 10:56:09.410611]>>>Epoch-3-900: w=3.27, b=1.87, loss=0.73822
stochastic[2020-10-19 10:56:09.481097]>>>Epoch-4-0: w=3.60, b=2.10, loss=0.93773
stochastic[2020-10-19 10:56:09.700040]>>>Epoch-4-300: w=3.05, b=1.71, loss=0.77622
stochastic[2020-10-19 10:56:09.913498]>>>Epoch-4-600: w=2.82, b=2.09, loss=0.77356
stochastic[2020-10-19 10:56:10.127604]>>>Epoch-4-900: w=3.27, b=1.87, loss=0.73822

StochasticGradientDescent

8. Model Validation

# validate
test_x, test_y = generate_samples()
plt.scatter(test_x, test_y, c='b')
plt.scatter(test_x, model(test_x), c='r')
plt.show()
print("current loss is : %1.2f"%(loss(test_x, model(test_x)).numpy()))

ModelValidation

current loss is : 9.70