# -*- coding: utf-8 -*-
"""
Created on Fri Nov 27 15:31:06 2020
"""
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
n_sample = 100000
df_score = pd.DataFrame({
'user_id':[u for u in range(n_sample)],
'label':np.random.randint(2,size=n_sample),
'score':900*np.random.random(size=n_sample),
'term':20201+np.random.randint(5,size=n_sample)
})
df_score.groupby('term').agg(total=('label','count'),
bad=('label','sum'),
bad_rate=('label','mean'))
#KS,GINI,AUC
from sklearn.metrics import roc_auc_score,roc_curve
def get_auc(ytrue,yprob):
auc = roc_auc_score(ytrue,yprob)
if auc < 0.5:
auc = 1 - auc
return auc
def get_ks(ytrue,yprob):
fpr,tpr,thr = roc_curve(ytrue,yprob)
ks = max(abs(tpr - fpr))
return ks
def get_gini(ytrue,yprob):
auc = get_auc(ytrue, yprob)
gini = 2 * auc - 1
return gini
df_metrics = pd.DataFrame({
'auc':df_score.groupby('term').apply(lambda x:get_auc(x['label'], x['score'])),
'ks': df_score.groupby('term').apply(lambda x: get_ks(x['label'], x['score'])),
'gini': df_score.groupby('term').apply(lambda x: get_gini(x['label'], x['score']))
})
#PSI
df_score['score_bin'] = pd.cut(df_score['score'],[0,500,700,800,900])
df_total = pd.pivot_table(df_score,
values='user_id',
index='score_bin',
columns=['term'],
aggfunc="count",
margins=True)
df_ratio = df_total.div(df_total.iloc[-1,:],axis=1)
eps = np.finfo(np.float32).eps
lst_psi = list()
for idx in range(1,len(df_ratio.columns)-1):
last,cur = df_ratio.iloc[0,-1:idx-1] + eps,df_ratio.iloc[0,-1:idx]+eps
psi = sum((cur-last) * np.log(cur/last))
lst_psi.append(psi)
df_ratio.append(pd.Series([np.nan]+lst_psi+[np.nan],
index=df_ratio.columns,
name='psi'))
#总人数比例和坏客户比例
df_total = pd.pivot_table(df_score,
values='user_id',
index='score_bin',
columns=['term'],
aggfunc='count',
margins=True)
df_ratio = df_total.div(df_total.iloc[-1,:],axis=1)
df_bad = pd.pivot_table(df_score[df_score['label']==1],
values='user_id',
index='score_bin',
columns=['term'],
aggfunc='count',
margins=True)
df_bad_rate = df_bad / df_total
#作图
colormap = sns.diverging_palette(130,20,as_cmap=True)
df_ratio.drop('All').T.plot(kind='bar',stacked=True,colormap=colormap)
plt.legend(bbox_to_anchor=(1.05,1),loc=2,borderaxespad=0.)
colormap=sns.diverging_palette(130,20,as_cmap=True)
df_bad_rate.drop('All').T.plot(kind='line',colormap=colormap)
plt.legend(bbox_to_anchor=(1.05,1),loc=2,borderaxespad=0.)
plt.show()
风控模型6大核心指标
©著作权归作者所有,转载或内容合作请联系作者
- 文/潘晓璐 我一进店门,熙熙楼的掌柜王于贵愁眉苦脸地迎上来,“玉大人,你说我怎么就摊上这事。” “怎么了?”我有些...
- 文/花漫 我一把揭开白布。 她就那样静静地躺着,像睡着了一般。 火红的嫁衣衬着肌肤如雪。 梳的纹丝不乱的头发上,一...
- 文/苍兰香墨 我猛地睁开眼,长吁一口气:“原来是场噩梦啊……” “哼!你这毒妇竟也来了?” 一声冷哼从身侧响起,我...