Use Gradient Descent In Linear Regression

April 6, 2019

在线性回归模型中使用梯度下降法

import numpy as np
import matplotlib.pyplot as plt

np.random.seed(666)
x = 2* np.random.random(size=100)
y = x * 3. + 4. + np.random.normal(size = 100)

X = x.reshape(-1, 1)

X.shape

(100, 1)

y.shape

(100,)

plt.scatter(x, y)
plt.show()

png

使用梯度下降法训练

目标：使 \(\frac{1}{m}\sum_{i=1}^m (y^{(i)} - \hat{y}^{(i)})^2\) 尽可能小

\[J(\theta) = MSE(y, \hat{y})\] \[\nabla J(\theta) = \frac{2}{m} \cdot \begin{pmatrix} \sum_{i=1}^m (X_b^{(i)}\theta - y^{(i)}) \\\\ \sum_{i=1}^m (X_b^{(i)}\theta - y^{(i)}) \cdot X^{(i)}_1 \\\\ \sum_{i=1}^m (X_b^{(i)}\theta - y^{(i)}) \cdot X^{(i)}_2 \\\\ \ldots \\\\ \sum_{i=1}^m (X_b^{(i)}\theta - y^{(i)}) \cdot X^{(i)}_n \end{pmatrix}\]

def J(theta, x_b, y):
    try:
        return np.sum((y - x_b.dot(theta)) ** 2) / len(x_b)
    except:
        return float('inf')

def dJ(theta, x_b, y):
    res = np.empty(len(theta))
    res[0] = np.sum(x_b.dot(theta) - y)
    for i in range(1, len(theta)):
        res[i] = np.sum((x_b.dot(theta) - y).dot(x_b[:,i]))
    return res * 2 / len(x_b)

def gradient_descent(x_b, y, initial_theta, eta, n_iters=10000, epsilon=1e-8):
    theta = initial_theta

    i_iter = 0
    while i_iter < n_iters:
        gradient = dJ(theta, x_b, y)
        last_theta = theta
        theta = theta - eta * gradient
        i_iter = i_iter + 1
        if(abs(J(theta, x_b, y) - J(last_theta, x_b, y)) < epsilon):
            break

    return theta

x_b = np.hstack([np.ones((len(x), 1)), X])
initial_theta = np.zeros(x_b.shape[1])
eta = 0.01

theta = gradient_descent(x_b, y, initial_theta, eta)

theta

array([4.02145786, 3.00706277])

%run ../LinearRegression/LinearRegression.py

lr = LinearRegression()
lr.fit_gd(X, y)

LinearRegression()

lr.interception_

4.021457858204859

lr.coef_

array([3.00706277])