Multiple Linear Regression
March 29, 2019
多元线性回归
\[y = \theta_0 + \theta_1x_1 + \theta_2x_2 + ... + \theta_nx_n\] \[\hat{y} = \theta_0 + \theta_1X_1^{(i)} + \theta_2X_2^{(i)} + ... + \theta_nX_n^{(i)}\]目标: 找到\(\theta_0,\theta_1,\theta_2,...,\theta_n\)使
\[\sum_{i=1}^m(y^{(i)} - \hat{y}^{(i)})^2\]尽可能小
处理式子
\[\hat{y}^{(i)} = \theta_0 + \theta_1X_1^{(i)} + \theta_2X_2^{(i)} + ... + \theta_nX_n^{(i)}\] \[\theta = (\theta_0, \theta_1, \theta_2,...,\theta_n)^T\] \[\hat{y}^{(i)} = \theta_0X_0^{(i)} + \theta_1X_1^{(i)} + \theta_2X_2^{(i)} + ... + \theta_nX_n^{(i)}, X_0^{(i)}\equiv1\] \[X^{(i)} = (X_0^{(i)},X_1^{(i)},X_2^{(i)}...,X_n^{(i)})\] \[\hat{y}^{(i)} = X^{(i)}·\theta\] \[\mathbf{X}_b = \left( \begin{array}{ccc} 1 & X_1^{(1)} & X_2^{(1)} & \ldots & X_n^{(1)}\\\\ 1 & X_1^{(2)} & X_2^{(2)} & \ldots & X_n^{(2)}\\\\ \ldots & & & & \ldots \\\\ 1 & X_1^{(m)} & X_2^{(m)} & \ldots & X_n^{(m)} \end{array} \right)\] \[\mathbf{\theta} = \left( \begin{array} {ccc} \theta_0\\\\ \theta_1\\\\ \theta_2\\\\ \ldots \\\\ \theta_n \end{array}\right)\]预测化简为
\[\hat{y} = X_b · \theta\]则目标可化简为:使 \((y - X_b·\theta)^T(y - X_b·\theta)\) 尽可能小
可推导出 \(\theta = (X_b^TX_b)^{-1}X_b^Ty\) 即,多元线性回归的正规方程解(Normal Equation)
问题:时间复杂度高:O(n^3)(优化O(n^2.4))
优点: 不需要对数据做归一化处理
实现多元线性回归模型
import numpy as np
from sklearn import datasets
boston = datasets.load_boston()
X = boston.data
y = boston.target
X = X[y < 50]
y = y[y < 50]
X_train, X_test, y_train, y_test = train_test_split(X, y, seed=666)
import numpy as np
class LinearRegression():
def __init__(self):
self.coef_ = None
self.interception_ = None
self._theta = None
def fit_normal(self, X_train, y_train):
assert X_train.shape[0] == y_train.shape[0],\
"the size of X_train must be equal to y_train"
X_b = np.hstack([np.ones((len(X_train), 1)), X_train])
self._theta = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y_train)
self.coef_ = self._theta[1:]
self.interception_ = self._theta[0]
return self
def fit_gd(self, X_train, y_train, eta=0.01, n_iters=1e4):
assert X_train.shape[0] == y_train.shape[0],\
"the size of X_train must be equal to y_train"
def J(theta, x_b, y):
try:
return np.sum((y - x_b.dot(theta)) ** 2) / len(x_b)
except:
return float('inf')
def dJ(theta, x_b, y):
# res = np.empty(len(theta))
# res[0] = np.sum(x_b.dot(theta) - y)
# for i in range(1, len(theta)):
# res[i] = np.sum((x_b.dot(theta) - y).dot(x_b[:,i]))
# return res * 2 / len(x_b)
return x_b.T.dot(x_b.dot(theta) - y) * 2. / len(y)
def gradient_descent(x_b, y, initial_theta, eta, n_iters=10000, epsilon=1e-8):
theta = initial_theta
i_iter = 0
while i_iter < n_iters:
gradient = dJ(theta, x_b, y)
last_theta = theta
theta = theta - eta * gradient
i_iter = i_iter + 1
if(abs(J(theta, x_b, y) - J(last_theta, x_b, y)) < epsilon):
break
return theta
X_b = np.hstack([np.ones((len(X_train), 1)), X_train])
initial_theta = np.zeros(X_b.shape[1])
self._theta = gradient_descent(X_b, y_train, initial_theta, eta, n_iters)
self.coef_ = self._theta[1:]
self.interception_ = self._theta[0]
return self
def fit_sgd(self, X_train, y_train, n_iters=5, t0=5, t1=50):
assert X_train.shape[0] == y_train.shape[0],\
"the size of X_train must be equal to y_train"
assert n_iters >= 1,\
"n_iters must >= 1"
def dJ_sgd(theta, x_b_i, y_i):
return x_b_i.T.dot(x_b_i.dot(theta) - y_i) * 2.
def sgd(X_b, y, initial_theta, n_iters, t0=5, t1=50):
def learn_rate(t):
return t0 / (t + t1)
theta = initial_theta
m = len(X_b)
for cur_iters in range(n_iters):
indexs = np.random.permutation(m)
X_b_new = X_b[indexs]
y_new = y[indexs]
for i in range(m):
gradient = dJ_sgd(theta, X_b_new[i], y_new[i])
theta = theta - learn_rate(cur_iters * m + i) * gradient
return theta
X_b = np.hstack([np.ones((len(X_train),1)), X_train])
initial_theta = np.zeros(X_b.shape[1])
self._theta = sgd(X_b, y_train, initial_theta, n_iters, t0, t1)
self.coef_ = self._theta[1:]
self.interception_ = self._theta[0]
return self
def predict(self, X_predict):
assert self.coef_ is not None and self.interception_ is not None,\
"must fit before predict"
assert X_predict.shape[1] == len(self.coef_),\
"the feature of X_predict must be equal to X_train"
X_b = np.hstack([np.ones((len(X_predict), 1)), X_predict])
return X_b.dot(self._theta)
def score(self, X_test, y_test):
y_predict = self.predict(X_test)
return self._r2_score(y_test, y_predict)
def _r2_score(self, y_true, y_predict):
assert len(y_true) == len(y_predict),\
"the size of y_true must be equal to the size of y_predict"
mean_squared_error = np.sum((y_true - y_predict) ** 2) / len(y_true)
return 1 - mean_squared_error / np.var(y_true)
def __repr__(self):
return "LinearRegression()"
reg = LinearRegression()
reg.fit_normal(X_train, y_train)
LinearRegression()
reg.coef_
array([-1.20354261e-01, 3.64423279e-02, -3.61493155e-02, 5.12978140e-02, -1.15775825e+01, 3.42740062e+00, -2.32311760e-02, -1.19487594e+00, 2.60101728e-01, -1.40219119e-02, -8.35430488e-01, 7.80472852e-03, -3.80923751e-01])
reg.interception_
34.11739972322438
reg.score(X_test, y_test)
0.8129794056212711