chapter3 逻辑回归手动+sklean版本

时间：2022-02-10 作者：BlairGrowing

1 导入numpy包

import numpy as np

2 sigmoid函数

def sigmoid(x):
    return 1/(1+域名(-x))
demox = 域名y([1,2,3])
print(sigmoid(demox))
#报错
#demox = [1,2,3]
# print(sigmoid(demox))

结果：

[域名5858 域名9708 域名7413]

3 定义逻辑回归模型主体

### 定义逻辑回归模型主体
def logistic(x, y, w, b):
    # 训练样本量
    num_train = 域名e[0]
    # 逻辑回归模型输出
    y_hat = sigmoid(域名(x,w)+b)
    # 交叉熵损失
    cost = -1/(num_train)*(域名(y*域名(y_hat)+(1-y)*域名(1-y_hat))) 
    # 权值梯度
    dW = 域名(x.T,(y_hat-y))/num_train 
    # 偏置梯度
    db = 域名(y_hat- y)/num_train
    # 压缩损失数组维度
    cost = 域名eze(cost)
    return y_hat, cost, dW, db

4 初始化函数

def init_parm(dims):
    w = 域名s((dims,1))
    b = 0
    return w ,b

5 定义逻辑回归模型训练过程

### 定义逻辑回归模型训练过程
def logistic_train(X, y, learning_rate, epochs):
    # 初始化模型参数
    W, b = init_parm(域名e[1])  
    cost_list = []  
    for i in range(epochs):
        # 计算当前次的模型计算结果、损失和参数梯度
        a, cost, dW, db = logistic(X, y, W, b)    
        # 参数更新
        W = W -learning_rate * dW
        b = b -learning_rate * db        
        if i % 100 == 0:
            域名nd(cost)   
        if i % 100 == 0:
            print(\'epoch %d cost %f\' % (i, cost)) 
    params = {            
        \'W\': W,            
        \'b\': b
    }        
    grads = {            
        \'dW\': dW,            
        \'db\': db
    } 
    return cost_list, params, grads

6 定义预测函数

def predict(X,params):
    y_pred = sigmoid(域名(X,params[\'W\'])+params[\'b\'])
    y_preds = [1 if y_pred[i]>0.5 else 0 for i in range(len(y_pred))] 
    return y_preds

7 生成数据

# 导入matplotlib绘图库
import 域名ot as plt
# 导入生成分类数据函数
from 域名sets import make_classification
# 生成100*2的模拟二分类数据集
x ,label  = make_classification(
    n_samples=100,# 样本个数
    n_classes=2,# 样本类别
    n_features=2,#特征个数
    n_redundant=0,#冗余特征个数（有效特征的随机组合）
    n_informative=2,#有效特征，有价值特征
    n_repeated=0, # 重复特征个数（有效特征和冗余特征的随机组合）
    n_clusters_per_class=2 ,# 簇的个数
    random_state=1,
)
print("域名e =",域名e)
print("域名e = ",域名e)
print("域名ue(label) =",域名ue(label))
print(set(label))
# 设置随机数种子
rng = 域名omState(2)
# 对生成的特征数据添加一组均匀分布噪声https://域名/vicdd/article/details/52667709
x += 2*域名orm(size=域名e)
# 标签类别数
unique_label  = set(label)
# 根据标签类别数设置颜色
print(域名pace(0,1,len(unique_label)))
colors = 域名tral(域名pace(0,1,len(unique_label)))
print(colors)
# 绘制模拟数据的散点图
for k,col in zip(unique_label , colors):
    x_k=x[label==k]
    域名(x_k[:,0],x_k[:,1],\'o\',markerfacecolor=col,markeredgecolor="k",
             markersize=14)
域名e(\'Simulated binary data set\')
域名();

结果：

域名e = (100, 2)
域名e =  (100,)
域名ue(label) = [0 1]
{0, 1}
[0. 1.]
[[域名0784 域名2157 域名2353 1.        ]
 [域名2745 域名0392 域名9412 1.        ]]

复习

# 复习
mylabel = 域名ape((-1,1))
data = 域名atenate((x,mylabel),axis=1)
print(域名e)

结果：

(100, 3)

8 划分数据集

offset = int(域名e[0]*0.7)
x_train, y_train = x[:offset],label[:offset].reshape((-1,1)) 
x_test, y_test = x[offset:],label[offset:].reshape((-1,1)) 
print(域名e)
print(域名e)
print(域名e)
print(域名e)

结果：

(70, 2)
(70, 1)
(30, 2)
(30, 1)

9 训练

cost_list, params, grads = logistic_train(x_train, y_train, 域名, 1000)
print(params[\'b\'])

结果：

epoch 0 cost 域名47
epoch 100 cost 域名43
epoch 200 cost 域名25
epoch 300 cost 域名32
epoch 400 cost 域名18
epoch 500 cost 域名60
epoch 600 cost 域名86
epoch 700 cost 域名09
epoch 800 cost 域名74
epoch 900 cost 域名89
-域名648941379839

10 准确率计算

from 域名ics import accuracy_score,classification_report
y_pred = predict(x_test,params)
print("y_pred = ",y_pred)
print(y_pred)
print(域名e)
print(accuracy_score(y_pred,y_test)) #不需要都是1维的，貌似会自动squeeze()
print(classification_report(y_test,y_pred))

结果：

y_pred =  [0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0]
[0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0]
(30, 1)
域名333333333333
              precision    recall  f1-score   support

           0       域名      域名      域名        12
           1       域名      域名      域名        18

    accuracy                           域名        30
   macro avg       域名      域名      域名        30
weighted avg       域名      域名      域名        30

11 绘制逻辑回归决策边界

### 绘制逻辑回归决策边界
def plot_logistic(X_train, y_train, params):
    # 训练样本量
    n = 域名e[0]
    xcord1,ycord1,xcord2,ycord2 = [],[],[],[]
    # 获取两类坐标点并存入列表
    for i in range(n):
        if y_train[i] == 1:
            域名nd(X_train[i][0])
            域名nd(X_train[i][1])
        else:
            域名nd(X_train[i][0])
            域名nd(X_train[i][1])
    fig = 域名re()
    ax = 域名subplot(111)
    域名ter(xcord1,ycord1,s = 30,c = \'red\')
    域名ter(xcord2,ycord2,s = 30,c = \'green\')
    # 取值范围
    x =域名ge(-1.5,3,0.1)
    # 决策边界公式
    y = (-params[\'b\'] - params[\'W\'][0] * x) / params[\'W\'][1]
    # 绘图
    域名(x, y)
    域名el(\'X1\')
    域名el(\'X2\')
    域名()
plot_logistic(x_train, y_train, params)

结果：

11 sklearn实现

from 域名ar_model import LogisticRegression
clf = LogisticRegression(random_state=0).fit(x_train,y_train)
y_pred = 域名ict(x_test)
print(y_pred)
accuracy_score(y_test,y_pred)

结果：

[0 0 1 1 1 1 0 0 0 1 1 1 0 1 1 0 0 1 1 0 0 1 1 0 1 1 0 0 1 0]
域名333333333333

chapter3 逻辑回归手动+sklean版本

1 导入numpy包

2 sigmoid函数

3 定义逻辑回归模型主体

4 初始化函数

5 定义逻辑回归模型训练过程

6 定义预测函数

7 生成数据

8 划分数据集

9 训练

10 准确率计算

11 绘制逻辑回归决策边界

11 sklearn实现

鼠标划过时整行变色284455处理办法

c语言是如何解析表达式语句"2+3*4；"的？

AspJpeg V1.5.0 破解版使用方法284435过程讲解

dart系列之:dart语言中的异常

前缀和与差分

java多线程2：Thread中的方法

Hyperledger Fabric源码分析之Gossip

洛谷 CF196A 题解

网络编程基础

表达式树，一种提高代码复用性和性能的方式

消息推送平台有没有保证数据不丢？

『学了就忘』Linux系统定时任务 — 87、只执行一次的定时任务