邏輯回歸 breast cancer 資料集處理

2021-10-24 15:28:05 字數 4008 閱讀 4278

# -*- coding: utf-8 -*-

import pandas as pd

import numpy as np

from matplotlib import pyplot as plt

from sklearn.model_selection import train_test_split

definit()

: df = pd.read_csv(

"./breast-cancer.csv"

)# 處理無用列

df = df.drop(

"id",1

) df = df.drop(

"unnamed: 32",1

)# 處理標籤列

df['diagnosis'

]= df[

'diagnosis'].

map(

)# 劃分訓練集 (70%) 和測試集 (30%)

train, test = train_test_split(df, test_size =

0.3, random_state=1)

# 處理訓練資料

train_x = train.loc[:,

'radius_mean'

:'fractal_dimension_worst'

] train_y = train.loc[:,

['diagnosis']]

# 處理測試資料

test_x = test.loc[:,

'radius_mean'

:'fractal_dimension_worst'

] test_y = test.loc[:,

['diagnosis']]

# 轉換資料為np陣列

train_x = np.asarray(train_x)

train_y = np.asarray(train_y)

test_x = np.asarray(test_x)

test_y = np.asarray(test_y)

# 使用訓練資料建立邏輯回歸模型

d = model(train_x.t, train_y.t, num_of_iterations=

10000

, alpha=

0.000001

)

costs = d [

"costs"

] w = d[

"w"]

b = d[

"b"]

# 繪圖

plt.plot(costs)

plt.title(

"損失-迭代次數"

) plt.xlabel(

"迭代次數(x100)"

) plt.ylabel(

"損失"

)# 計算精確度

y_prediction_train = predict(train_x.t, w, b)

y_prediction_test = predict(test_x.t, w, b)

print

("\n訓練資料測試精確度: {}%"

.format

(100

- np.mean(np.

abs(y_prediction_train - train_y.t))*

100)

)print

("\n測試資料測試精確度: {}%"

.format

(100

- np.mean(np.

abs(y_prediction_test - test_y.t))*

100)

)

plt.show(

)# 初始化權值

definitialize

(m):

w = np.zeros(

(m,1))

b =0return w , b

# sigmoid函式

defsigmoid

(x):

return1/

(1+ np.exp(

- x)

)# 正反向傳播

defpropogate

(x, y, w, b)

:# 樣本數量

m = x.shape[1]

# 正向傳播 計算損失

z = np.dot(w.t, x)

+ b;

a = sigmoid(z)

cost=-(

1/m)

* np.

sum(y * np.log(a)+(

1-y)

* np.log(

1-a)

)# 反向傳播 計算梯度

dw =(1

/m)* np.dot(x,

(a-y)

.t) db =(1

/m)* np.

sum(a-y)

grads=

return grads, cost

# 執行梯度下降

defoptimize

(x, y, w, b, num_of_iterations, alpha)

: costs=

for i in

range

(num_of_iterations)

: grads, cost = propogate(x, y, w, b)

dw = grads[

"dw"

] db = grads[

"db"

]

w = w - alpha * dw

b = b - alpha * db

# 每十次迭代儲存乙個損耗

if i %

100==0:

print

("次迭代後的損失度: %f"

%(i, cost)

)

parameters =

grads =

return parameters, grads, costs

# 對資料集進行**

defpredict

(x, w, b)

:# 訓練集數量

m = x.shape[1]

y_prediction = np.zeros((1

,m))

w = w.reshape(x.shape[0]

,1)

a=sigmoid(np.dot(w.t, x)

+b)for i in

range

(a.shape[1]

):if(a[

0,i]

<

0.5)

: y_prediction[

0,i]=0

else

: y_prediction[

0,i]=1

return y_prediction

# 計算邏輯回歸模型

defmodel

(xtrain, ytrain, num_of_iterations, alpha)

:# 獲取特徵數量

機器學習 邏輯回歸 Python實現邏輯回歸

coding utf 8 author 蔚藍的天空tom import numpy as np import os import matplotlib.pyplot as plt from sklearn.datasets import make blobs global variable path...

邏輯回歸模型 SAS邏輯回歸模型訓練

邏輯回歸模型是金融信貸行業製作各類評分卡模型的核心,幾乎80 的機器學習 統計學習模型演算法都是邏輯回歸模型,按照邏輯美國金融公司總結的sas建模過程,大致總結如下 一般通用模型訓練過程 a 按照指定需求和模型要求製作driver資料集,包含欄位有user id,dep b 其中,空值賦預設值即 c...

線性回歸與邏輯回歸

cost functionj 12m i 1m h x i y i hypothesish x tx 梯度下降求解 為了最小化j j j 1m i 1m h x i y i x i j 每一次迭代更新 j j 1m i 1m h x i y i x i j 正規方程求解 最小二乘法 xtx 1x t...