手动Python实现逻辑回归LR

LR的基本思想是基于极大似然估计进行的：
# 导入相应的库
import numpy as np
import matplotlib.pyplot as plt

# 随机生成样本数据。 二分类问题,每一个类别生成5000个样本数据
np.random.seed(12)
num_observations = 5000
x1 = np.random.multivariate_normal([0, 0], [[1, .75],[.75, 1]], num_observations)
x2 = np.random.multivariate_normal([1, 4], [[1, .75],[.75, 1]], num_observations)
X = np.vstack((x1, x2)).astype(np.float32)
y = np.hstack((np.zeros(num_observations),
               np.ones(num_observations)))
a =0
def sigmoid(X):
    return 1/(1+np.exp(-X))



def max_likelihood(X,y,w,b):
    pos,neg = np.where(y==1),np.where(y==0)
    pos_sum = np.sum(np.log(sigmoid(np.dot(X[pos],w)+b)))
    neg_sum = np.sum(np.log(1-sigmoid(np.dot(X[neg],w)+b)))
    
    return pos_sum + neg_sum


def logistic_regression_minibatch(X, y, num_steps, learning_rate):
    w,b = np.zeros(X.shape[1]),0 # w可以初始化为0,但神经网络不可以
    for t in range(num_steps):
        batch = np.random.choice(X.shape[0],100)
        x_batch,y_batch = X[batch],y[batch]
        errors = sigmoid(np.dot(x_batch,w)+b) - y_batch  #这是推导出来的，w的导数
        
        grad_w = np.matmul(x_batch.T,errors)    # 上面的errors*x 为w的导数
        grad_b = np.sum(errors) # 这也是推导出来的 sigmod(x) 的导数为sigmod(x)(1-sigmod(x))
        
        w -= learning_rate*grad_w
        b -= learning_rate*grad_b
        
        if t % 10000 == 0:
            print("似然函数为：",max_likelihood(x_batch,y_batch,w,b))
    return w,b
w, b = logistic_regression_minibatch(X, y, num_steps = 500000, learning_rate = 5e-4)
print ("(自己写的)逻辑回归的参数w, b分别为: ", w, b)

# 这里我们直接调用sklearn的模块来训练,看看跟自己手写的有没有区别。如果结果一样就说明是正确的!
from sklearn.linear_model import LogisticRegression

# C设置一个很大的值,意味着不想加入正则项 (在会看到正则作用,这里就理解成为了公平的比较)
clf = LogisticRegression(fit_intercept=True, C = 1e15)
clf.fit(X, y)
print ("(sklearn)逻辑回归的参数w, b分别为: ", clf.coef_, clf.intercept_, )
手动Python实现逻辑回归LR

Python相关栏目本月热门文章