600字范文,内容丰富有趣,生活中的好帮手!
600字范文 > 基于逻辑回归的金融风控模型评分卡

基于逻辑回归的金融风控模型评分卡

时间:2020-12-14 13:07:29

相关推荐

基于逻辑回归的金融风控模型评分卡

1月报名了单位组织参加的传智教育线上Python数据分析培训班,一共五天。前三天主要介绍了一些Python和机器学习的基础知识、包括pandas库的使用、若干机器学习经典算法、基于机器学习的Python数据分析简单模型。第4、5天介绍了金融风控的相关业务知识并完成了客户评分卡的制作项目。培训总体难度中等,适合有一定Python和机器学习基础、较缺乏项目实践经验、想在Python数据分析方向更进一步的老铁。最后的项目实现用了若干种方法,我只练习了其中最基础的“基于逻辑回归的评分卡”,现将练习代码分享如下。

中间遇到一些问题上网搜,发现好多这个风控模型项目的资料,这个培训很火吗?还是说这个培训中的项目是用了很久的经典资料?

import pandas as pdimport numpy as npimport randomimport mathfrom sklearn.metrics import roc_auc_score,roc_curve,aucfrom sklearn.model_selection import train_test_splitfrom sklearn.linear_model import LogisticRegressionfrom sklearn import metricsfrom matplotlib import pyplot as plt

data=pd.read_csv('C:/Users/liulan/Desktop/jupyternotebook_work/day4/data/Bcard.txt')data.head()

data.shapedata.columnsdata.obs_mth.unique()

train=data[data.obs_mth!='-11-30'].reset_index().copy()val=data[data.obs_mth=='-11-30'].reset_index().copy()

train.obs_mth.unique()

feature_lst = ['person_info','finance_info','credit_info','act_info','td_score','jxl_score','mj_score','rh_score', 'zzc_score', 'zcx_score']

x=train[feature_lst]y=train['bad_ind']val_x=val[feature_lst]val_y=val['bad_ind']lr_model=LogisticRegression(C=0.1)lr_model.fit(x,y)

y_pred=lr_model.predict_proba(x)[:,1]fpr_lr_train,tpr_lr_train,_=roc_curve(y,y_pred)train_ks=abs(fpr_lr_train-tpr_lr_train).max()print('train_ks : ',train_ks)val_y_pred=lr_model.predict_proba(val_x)[:,1]fpr_lr_valtrain,tpr_lr_valtrain,_=roc_curve(val_y,val_y_pred)val_ks=abs(fpr_lr_valtrain-tpr_lr_valtrain).max()print('val_ks : ',val_ks)plt.plot(fpr_lr_train,tpr_lr_train,label = 'train LR') #绘制训练集ROCplt.plot(fpr_lr_valtrain,tpr_lr_valtrain,label = 'evl LR') plt.plot([0,1],[0,1],'k--')plt.show()

import lightgbm as lgbfrom sklearn.model_selection import train_test_splittrain_x,test_x,train_y,test_y=train_test_split(x,y,random_state=0,test_size=0.2)def lgb_test(train_x,test_x,train_y,test_y):clf=lgb.LGBMClassifier(boosting_type='gbdt',objective='binary',metric='auc',learning_rate=0.1,n_estimators=24,max_depth=5,num_leaves=20,max_bin=45,min_data_in_leaf=6,bagging_fraction=0.6,bagging_freq=0,feature_fraction=0.8,)clf.fit(train_x,train_y,eval_set=[(train_x,train_y),(test_x,test_y)],eval_metric='auc')return clf,clf.best_score_['valid_1']['auc'],lgb_model,lgb_auc=lgb_test(train_x,test_x,train_y,test_y)feature_importance=pd.DataFrame({'name':lgb_model.booster_.feature_name(),'importance':lgb_model.feature_importances_}).sort_values(by=['importance'],ascending=False)

feature_importance

feature_lst=['person_info','finance_info','credit_info','act_info']x=train[feature_lst]x.head()y=train['bad_ind']val_x=val[feature_lst]val_y=val['bad_ind']lr_model=LogisticRegression(C=0.1)lr_model.fit(x,y)y_pred=lr_model.predict_proba(x)[:,1]fpr_lr_train,tpr_lr_train,_=roc_curve(y,y_pred)train_ks=abs(fpr_lr_train-tpr_lr_train).max()print('train_ks:',train_ks)val_y_pred=lr_model.predict_proba(val_x)[:,1]fpr_lr_valtrain,tpr_lr_valtrain,_=roc_curve(val_y,val_y_pred)val_ks=abs(fpr_lr_valtrain-tpr_lr_valtrain).max()print('val_ks:',val_ks)plt.plot(fpr_lr_train,tpr_lr_train,label = 'train LR') #绘制训练集ROCplt.plot(fpr_lr_valtrain,tpr_lr_valtrain,label = 'evl LR') plt.plot([0,1],[0,1],'k--')plt.show()

print('变量名单',feature_lst)print('系数:',lr_model.coef_)print('截距:',lr_model.intercept_)

bins=20temp_=pd.DataFrame()temp_['bad_rate_predict']=lr_model.predict_proba(val_x)[:,1]temp_['real_bad']=val_ytemp_=temp_.sort_values('bad_rate_predict',ascending=False)temp_['num']=[i for i in range(temp_.shape[0])]temp_['num']=pd.cut(temp_.num,bins=bins,labels=[i for i in range(bins)])

report=pd.DataFrame()report['BAD']=temp_.groupby('num').real_bad.sum().astype(int)report['GOOD']=temp_.groupby('num').real_bad.count().astype(int)-report['BAD']report['BAD_CNT']=report['BAD'].cumsum()report['GOOD_CNT']=report['GOOD'].cumsum()good_total=report.GOOD_CNT.max()bad_total=report.BAD_CNT.max()report['BAD_PCTG']=round(report.BAD_CNT/bad_total,3)report['BADRATE'] =report.apply(lambda x: round(x.BAD/(x.BAD+x.GOOD),3),axis = 1)def cal_ks(x):#当前箱累计坏人数量/总坏人数量 - 当前箱累计好人数量/好人数量ks = (x.BAD_CNT/bad_total)-(x.GOOD_CNT/good_total)return round(math.fabs(ks),3)report['KS'] = report.apply(cal_ks,axis = 1)report

report=pd.DataFrame()report['BAD']=temp_.groupby('num').real_bad.sum().astype(int)report['GOOD']=temp_.groupby('num').real_bad.count().astype(int)-report['BAD']report['BAD_CNT']=report['BAD'].cumsum()report['GOOD_CNT']=report['GOOD'].cumsum()good_total=report.GOOD_CNT.max()bad_total=report.BAD_CNT.max()report['BAD_PCTG']=round(report.BAD_CNT/bad_total,3)report['BADRATE'] =report.apply(lambda x: round(x.BAD/(x.BAD+x.GOOD),3),axis = 1)def cal_ks(x):#当前箱累计坏人数量/总坏人数量 - 当前箱累计好人数量/好人数量ks = (x.BAD_CNT/bad_total)-(x.GOOD_CNT/good_total)return round(math.fabs(ks),3)report['KS'] = report.apply(cal_ks,axis = 1)report

#['person_info','finance_info','credit_info','act_info']def score(person_info,finance_info,credit_info,act_info):xbeta = person_info * ( 2.48386162) + finance_info * ( 4.44901224 ) + credit_info * (1.88254182) + act_info * ( -1.43356854) -3.90631899 score = 900-50* (xbeta)/math.log(2) # 基准分+ 系数* 2^(1-p/p)return scoreval['score'] = val.apply(lambda x : score(x.person_info,x.finance_info,x.credit_info,x.act_info) ,axis=1)fpr_lr,tpr_lr,_ = roc_curve(val_y,val['score'])val_ks = abs(fpr_lr - tpr_lr).max()print('val_ks : ',val_ks)

#对应评级区间def level(score):level = 0if score <= 600:level = "D"elif score <= 640 and score > 600 : level = "C"elif score <= 680 and score > 640:level = "B"elif score > 680 :level = "A"return levelval['level'] = val.score.map(lambda x : level(x) )val.level.groupby(val.level).count()/len(val)

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。