信用卡違約率分析

2021-09-23 08:02:09 字數 3585 閱讀 5343

使用網格搜尋的方式:

# -*- coding: utf-8 -*-

# 信用卡違約率分析

import pandas as pd

from sklearn.model_selection import learning_curve, train_test_split,gridsearchcv

from sklearn.preprocessing import standardscaler

from sklearn.pipeline import pipeline

from sklearn.metrics import accuracy_score

from sklearn.svm import svc

from sklearn.tree import decisiontreeclassifier

from sklearn.ensemble import randomforestclassifier

from sklearn.neighbors import kneighborsclassifier

from matplotlib import pyplot as plt

import seaborn as sns

# 資料載入

data = pd.read_csv(

'./uci_credit_card.csv'

)# 資料探索

print

(data.shape)

# 檢視資料集大小

print

(data.describe())

# 資料集概覽

# 檢視下乙個月違約率的情況

next_month = data[

'default.payment.next.month'

].value_counts(

)print

(next_month)

df = pd.dataframe(

)plt.rcparams[

'font.sans-serif']=

['simhei'

]#用來正常顯示中文標籤

plt.figure(figsize =(6

,6))

plt.title(

'信用卡違約率客戶\n (違約:1,守約:0)'

)sns.set_color_codes(

"pastel"

)sns.barplot(x =

'default.payment.next.month'

, y=

"values"

, data=df)

locs, labels = plt.xticks(

)plt.show(

)# 特徵選擇,去掉id欄位、最後乙個結果字段即可

data.drop(

['id'

], inplace=

true

, axis =1)

#id這個字段沒有用

target = data[

'default.payment.next.month'

].values

columns = data.columns.tolist(

)columns.remove(

'default.payment.next.month'

)features = data[columns]

.values

# 30%作為測試集,其餘作為訓練集

train_x, test_x, train_y, test_y = train_test_split(features, target, test_size=

0.30

, stratify = target, random_state =1)

# 構造各種分類器

classifiers =

[ svc(random_state =

1, kernel =

'rbf'

),

decisiontreeclassifier(random_state =

1, criterion =

'gini'),

randomforestclassifier(random_state =

1, criterion =

'gini'),

kneighborsclassifier(metric =

'minkowski'),

]# 分類器名稱

classifier_names =

['svc'

,'decisiontreeclassifier'

,'randomforestclassifier'

,'kneighborsclassifier',]

# 分類器引數

classifier_param_grid =[,

,,,]

# 對具體的分類器進行gridsearchcv引數調優

defgridsearchcv_work

(pipeline, train_x, train_y, test_x, test_y, param_grid, score =

'accuracy'):

response =

gridsearch = gridsearchcv(estimator = pipeline, param_grid = param_grid, scoring = score)

# 尋找最優的引數 和最優的準確率分數

search = gridsearch.fit(train_x, train_y)

print

("gridsearch最優引數:"

, search.best_params_)

print

("gridsearch最優分數: %0.4lf"

%search.best_score_)

predict_y = gridsearch.predict(test_x)

print

("準確率 %0.4lf"

%accuracy_score(test_y, predict_y)

) response[

'predict_y'

]= predict_y

response[

'accuracy_score'

]= accuracy_score(test_y,predict_y)

return response

for model, model_name, model_param_grid in

zip(classifiers, classifier_names, classifier_param_grid)

: pipeline = pipeline([(

'scaler'

, standardscaler())

,(model_name, model)])

result = gridsearchcv_work(pipeline, train_x, train_y, test_x, test_y, model_param_grid , score =

'accuracy'

)

python信用卡違約 Python信用卡驗證

以下是luhn演算法 mod10 check 的步驟從右到左每隔兩位數。如果這個 翻倍 結果是兩位數,則將兩位數相加 得到乙個數字的數字。現在將步驟1中的所有單個數字相加。將信用卡號碼中從右到左的奇數字數相加。將步驟2和步驟3的結果相加。如果步驟4的結果可被10整除,則卡號有效 否則無效。我的輸出應...

python信用卡管理 python信用卡操作

python python開發 python語言 python信用卡操作 import datetime user dict dict 用來儲存使用者的賬號 註冊系統 def registered try print welcome to register atm system registered...

python信用卡管理 python信用卡操作

import datetime user dict dict 用來儲存使用者的賬號 註冊系統 def registered try print welcome to register atm system registered user input 請輸入您的賬號 registered pwd in...