Amount Withdrawn Model Part 5

TASK 3

target ='ATM RATING'
from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.linear_model import LogisticRegression

from sklearn.metrics import roc_auc_score,accuracy_score


from sklearn.preprocessing import  MinMaxScaler
from sklearn.cluster import KMeans

from sklearn.decomposition import PCA
evaluate_accuracy= make_scorer(accuracy_score)
log_reg = LogisticRegression()
decision_tree_clf =DecisionTreeClassifier()
rf_clf = RandomForestClassifier()
gbm_clf = GradientBoostingClassifier()
xgb_clf = XGBClassifier()
lgb_clf = LGBMClassifier()
# dataset[FSET],dataset[target]

dataset_cat = pd.get_dummies(dataset[exploration_dict['possible_categorical_features']])
dataset_continuos = dataset[continuos_columns]
dataset_fin = dataset_continuos.join(dataset_cat)
FSET=[ col for col in dataset_fin if col not in [target]]


X = dataset_fin[FSET].copy()
y= dataset[target]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
X_train.join(y_train).corr()[target].sort_values(ascending =False)
ATM RATING                                          1.000000
Avg Withdrawls Per Hour                             0.672877
Avg No of Withdrawls Per Week                       0.387591
Holiday Sequence_Long Weekend                       0.339350
ATM Prox_Attached to Branch                         0.273523
ATM Prox_Other Bank ATM                             0.193117
Day Type_Working                                    0.180383
No of Other ATMs in 1 KM radius                     0.166138
ATM Zone_FV                                         0.158193
ATM TYPE_Town                                       0.148302
ATM Zone_RL                                         0.122217
ATM Attached to_Petrol Bunk                         0.105171
Estimated Number of Houses in 1 KM Radius           0.066883
ATM Placement_Facing Road                           0.063644
ATM TYPE_Semi Urban                                 0.061452
ATM Location TYPE_Only WIthdraw                     0.047002
ATM Location TYPE_Passbook Printing and Withdraw    0.044917
ATM looks_Normal                                    0.029628
ATM TYPE_Bigger Towns                               0.009193
ATM Since                                           0.004059
ATM Attached to_Shed                                0.000008
ATM Location TYPE_Checkdrop and Withdraw           -0.002357
ATM Attached to_Shop                               -0.010299
Number of Shops Around ATM                         -0.011378
ATM looks_New                                      -0.013827
ATM Attached to_Flat                               -0.017295
Day Type_National Holiday                          -0.020809
Average Wait Time                                  -0.021180
ATM Attached to_House                              -0.024414
Day Type_Strike                                    -0.028450
ATM looks_Old and Dull                             -0.040530
Holiday Sequence_HH                                -0.042008
Holiday Sequence_HW                                -0.047163
ATM Prox_2 ATMS in same Building                   -0.057838
ATM Prox_Non Closed ATM                            -0.058048
Holiday Sequence_WH                                -0.059719
ATM Placement_Little Inside                        -0.063644
ATM Zone_RH                                        -0.080320
ATM Zone_C                                         -0.087497
ATM Attached to_Building                           -0.090750
ATM Location TYPE_Deposit and Withdraw             -0.108344
ATM TYPE_Urban                                     -0.167517
Day Type_Festival                                  -0.176559
ATM Zone_RM                                        -0.187803
Holiday Sequence_WW                                -0.201102
ATM Prox_2+ ATMS Very Close                        -0.213559
ATM Prox_2 ATM Machines (Same Bank)                -0.269406
Name: ATM RATING, dtype: float64
model_name = 'log_reg,decision_tree_clf,rf_clf,gbm_clf,xgb_clf,lgb_clf'


for m in model_name.split(','):
    print(m)
    model = eval(m)

    model.fit(X_train[['Avg Withdrawls Per Hour']].values,y_train.values)


    cv_results = cross_val_score(model,X_train.values,y_train.values,cv=10,scoring= evaluate_accuracy)


    preds_y = model.predict(X_test[['Avg Withdrawls Per Hour']].values)


    print(f'Test Results of model {m}')
    print(f'Insample Accuracy:{cv_results.mean()}, Test Accuracy :{accuracy_score(preds_y,y_test)}')
log_reg
Test Results of model log_reg
Insample Accuracy:0.7456810232460609, Test Accuracy :0.7209060984903763
decision_tree_clf
Test Results of model decision_tree_clf
Insample Accuracy:1.0, Test Accuracy :0.7495257090042321
rf_clf
Test Results of model rf_clf
Insample Accuracy:1.0, Test Accuracy :0.7495257090042321
gbm_clf
Test Results of model gbm_clf
Insample Accuracy:0.8626136542309855, Test Accuracy :0.7495257090042321
xgb_clf
Test Results of model xgb_clf
Insample Accuracy:0.825667445991068, Test Accuracy :0.7495257090042321
lgb_clf
Test Results of model lgb_clf
Insample Accuracy:0.9880280092542281, Test Accuracy :0.7495257090042321
model_name = 'log_reg,decision_tree_clf,rf_clf,gbm_clf,xgb_clf,lgb_clf'


for m in model_name.split(','):
    print(m)
    model = eval(m)

    model.fit(X_train[['Avg Withdrawls Per Hour','Avg No of Withdrawls Per Week']].values,y_train.values)


    cv_results = cross_val_score(model,X_train.values,y_train.values,cv=10,scoring= evaluate_accuracy)


    preds_y = model.predict(X_test[['Avg Withdrawls Per Hour','Avg No of Withdrawls Per Week']].values)


    print(f'Test Results of model {m}')
    print(f'Insample Accuracy:{cv_results.mean()}, Test Accuracy :{accuracy_score(preds_y,y_test)}')
log_reg
Test Results of model log_reg
Insample Accuracy:0.7456810232460609, Test Accuracy :0.7223005950933178
decision_tree_clf
Test Results of model decision_tree_clf
Insample Accuracy:1.0, Test Accuracy :0.9420959608243745
rf_clf
Test Results of model rf_clf
Insample Accuracy:1.0, Test Accuracy :0.9424851226670559
gbm_clf
Test Results of model gbm_clf
Insample Accuracy:0.8626136542309855, Test Accuracy :0.8095863533913833
xgb_clf
Test Results of model xgb_clf
Insample Accuracy:0.825667445991068, Test Accuracy :0.7849880819185678
lgb_clf
Test Results of model lgb_clf
Insample Accuracy:0.9880280092542281, Test Accuracy :0.8499132493392356
result_df = pd.DataFrame(preds_y,columns =['preds'])
result_df['actuals'] = y_test.values
feat_importance = pd.DataFrame(rf_clf.feature_importances_,columns=['feature_importance'])
feat_importance['features'] = X_train.columns.values
feat_importance.sort_values('feature_importance',ascending = False)

feature_importance features
1 0.229098 Avg Withdrawls Per Hour
3 0.151150 Avg No of Withdrawls Per Week
2 0.132929 Estimated Number of Houses in 1 KM Radius
6 0.098502 No of Other ATMs in 1 KM radius
5 0.055319 Number of Shops Around ATM
4 0.049528 ATM Since
0 0.039271 Average Wait Time
43 0.020507 ATM Prox_Attached to Branch
31 0.019383 Holiday Sequence_Long Weekend
40 0.017415 ATM Prox_2 ATM Machines (Same Bank)
34 0.015416 ATM Attached to_Building
25 0.014983 ATM Zone_FV
15 0.013845 ATM TYPE_Urban
14 0.012845 ATM TYPE_Town
37 0.011269 ATM Attached to_Petrol Bunk
28 0.008970 ATM Zone_RM
33 0.008907 Holiday Sequence_WW
27 0.008863 ATM Zone_RL
18 0.008844 ATM Location TYPE_Only WIthdraw
45 0.007995 ATM Prox_Other Bank ATM
20 0.007241 Day Type_Festival
17 0.006592 ATM Location TYPE_Deposit and Withdraw
29 0.006165 Holiday Sequence_HH
42 0.005072 ATM Prox_2+ ATMS Very Close
23 0.004871 Day Type_Working
10 0.004856 ATM looks_Normal
19 0.004796 ATM Location TYPE_Passbook Printing and Withdraw
9 0.004381 ATM looks_New
35 0.004278 ATM Attached to_Flat
13 0.004117 ATM TYPE_Semi Urban
44 0.004089 ATM Prox_Non Closed ATM
16 0.003192 ATM Location TYPE_Checkdrop and Withdraw
26 0.002621 ATM Zone_RH
30 0.002191 Holiday Sequence_HW
41 0.001815 ATM Prox_2 ATMS in same Building
11 0.001667 ATM looks_Old and Dull
39 0.001509 ATM Attached to_Shop
12 0.001336 ATM TYPE_Bigger Towns
24 0.001053 ATM Zone_C
36 0.000903 ATM Attached to_House
32 0.000775 Holiday Sequence_WH
7 0.000681 ATM Placement_Facing Road
8 0.000349 ATM Placement_Little Inside
22 0.000238 Day Type_Strike
38 0.000161 ATM Attached to_Shed
21 0.000010 Day Type_National Holiday
for max_depth in np.arange(1,50):
    model = RandomForestClassifier(max_depth=max_depth)
    model.fit(X_train[['Avg Withdrawls Per Hour','Avg No of Withdrawls Per Week']].values,y_train.values)
    cv_results = cross_val_score(model,X_train[['Avg Withdrawls Per Hour','Avg No of Withdrawls Per Week']].values,y_train.values,cv=10,scoring= evaluate_accuracy)
    preds_y = model.predict(X_test[['Avg Withdrawls Per Hour','Avg No of Withdrawls Per Week']].values)
    print(f'Maxdepth: {max_depth}, Insample Accuracy:{cv_results.mean()}, Test Accuracy :{accuracy_score(preds_y,y_test)}')
Maxdepth: 1, Insample Accuracy:0.7224159535638427, Test Accuracy :0.7209060984903763
Maxdepth: 2, Insample Accuracy:0.7446346667171084, Test Accuracy :0.7403155453941075
Maxdepth: 3, Insample Accuracy:0.7458009801722265, Test Accuracy :0.7497040748487944
Maxdepth: 4, Insample Accuracy:0.7523499135498797, Test Accuracy :0.7492014074686644
Maxdepth: 5, Insample Accuracy:0.7678121085241824, Test Accuracy :0.7690648765221904
Maxdepth: 6, Insample Accuracy:0.779017245108138, Test Accuracy :0.7800262684243809
Maxdepth: 7, Insample Accuracy:0.7913646951946998, Test Accuracy :0.7920092101636101
Maxdepth: 8, Insample Accuracy:0.8083041126314837, Test Accuracy :0.8051920675844401
Maxdepth: 9, Insample Accuracy:0.8280713703549882, Test Accuracy :0.8292876716771254
Maxdepth: 10, Insample Accuracy:0.845786067440496, Test Accuracy :0.8426164647889608
Maxdepth: 11, Insample Accuracy:0.8614795990190192, Test Accuracy :0.8577775615767541
Maxdepth: 12, Insample Accuracy:0.8777962710041655, Test Accuracy :0.8750141881921811
Maxdepth: 13, Insample Accuracy:0.8902633724832689, Test Accuracy :0.8880511099220055
Maxdepth: 14, Insample Accuracy:0.9060534383659145, Test Accuracy :0.8989800716706393
Maxdepth: 15, Insample Accuracy:0.9143031655480852, Test Accuracy :0.9036986590131504
Maxdepth: 16, Insample Accuracy:0.9201736711788391, Test Accuracy :0.9291887597087772
Maxdepth: 17, Insample Accuracy:0.9282078308355667, Test Accuracy :0.9316858815326491
Maxdepth: 18, Insample Accuracy:0.9327604878738853, Test Accuracy :0.9240323652932496
Maxdepth: 19, Insample Accuracy:0.9354438654358512, Test Accuracy :0.9301940944690373
Maxdepth: 20, Insample Accuracy:0.9402759138242723, Test Accuracy :0.940863614989217
Maxdepth: 21, Insample Accuracy:0.9409306458414276, Test Accuracy :0.9413176371390118
Maxdepth: 22, Insample Accuracy:0.9424083148070034, Test Accuracy :0.9428580694329588
Maxdepth: 23, Insample Accuracy:0.9424961767864005, Test Accuracy :0.9429391448168507
Maxdepth: 24, Insample Accuracy:0.9427678098951573, Test Accuracy :0.9428418543561804
Maxdepth: 25, Insample Accuracy:0.942911491844718, Test Accuracy :0.9423716171296072
Maxdepth: 26, Insample Accuracy:0.9423763997344338, Test Accuracy :0.9429229297400723
Maxdepth: 27, Insample Accuracy:0.9429674349173849, Test Accuracy :0.9425661980509478
Maxdepth: 28, Insample Accuracy:0.942368378706601, Test Accuracy :0.9425661980509478
Maxdepth: 29, Insample Accuracy:0.9425361256466539, Test Accuracy :0.9417554442120284
Maxdepth: 30, Insample Accuracy:0.9428875180661175, Test Accuracy :0.9424202623599423
Maxdepth: 31, Insample Accuracy:0.9423763506169995, Test Accuracy :0.9425175528206126
Maxdepth: 32, Insample Accuracy:0.9425600075741322, Test Accuracy :0.9423554020528288
Maxdepth: 33, Insample Accuracy:0.9420169818259735, Test Accuracy :0.9425986282045046
Maxdepth: 34, Insample Accuracy:0.9422326036169251, Test Accuracy :0.9426634885116181
Maxdepth: 35, Insample Accuracy:0.9420648732534611, Test Accuracy :0.94224189651538
Maxdepth: 36, Insample Accuracy:0.9424722017314779, Test Accuracy :0.9423716171296072
Maxdepth: 37, Insample Accuracy:0.942288552427995, Test Accuracy :0.9423554020528288
Maxdepth: 38, Insample Accuracy:0.9421926770771651, Test Accuracy :0.942533767897391
Maxdepth: 39, Insample Accuracy:0.9420249856346243, Test Accuracy :0.9424364774367207
Maxdepth: 40, Insample Accuracy:0.94231248729637, Test Accuracy :0.9426472734348397
Maxdepth: 41, Insample Accuracy:0.9425600330886379, Test Accuracy :0.9424689075902775
Maxdepth: 42, Insample Accuracy:0.9427437487278212, Test Accuracy :0.94224189651538
Maxdepth: 43, Insample Accuracy:0.9422006694028978, Test Accuracy :0.9423878322063854
Maxdepth: 44, Insample Accuracy:0.9428954536174041, Test Accuracy :0.9425175528206126
Maxdepth: 45, Insample Accuracy:0.9428315966744071, Test Accuracy :0.942533767897391
Maxdepth: 46, Insample Accuracy:0.9420649223708961, Test Accuracy :0.9424689075902775
Maxdepth: 47, Insample Accuracy:0.9423603838361723, Test Accuracy :0.9424040472831638
Maxdepth: 48, Insample Accuracy:0.9421847160116771, Test Accuracy :0.9423391869760503
Maxdepth: 49, Insample Accuracy:0.9427996503409819, Test Accuracy :0.9423067568224935
model = RandomForestClassifier()
model.fit(X_train[['Avg Withdrawls Per Hour','Avg No of Withdrawls Per Week']].values,y_train.values)
cv_results = cross_val_score(model,X_train[['Avg Withdrawls Per Hour','Avg No of Withdrawls Per Week']].values,y_train.values,cv=10,scoring= evaluate_accuracy)
preds_y = model.predict(X_test[['Avg Withdrawls Per Hour','Avg No of Withdrawls Per Week']].values)
print(f'Insample Accuracy:{cv_results.mean()}, Test Accuracy :{accuracy_score(preds_y,y_test)}')
Insample Accuracy:0.9423444419242031, Test Accuracy :0.9423554020528288
result_df = pd.DataFrame(preds_y,columns =['preds'])
result_df['actuals'] = y_test.values
display(pd.crosstab(result_df['actuals'],result_df['preds']))
preds 2 3 4 5
actuals



2 1297 328 42 0
3 43 29766 1216 0
4 100 1402 23174 75
5 0 130 219 3879

Comments

Popular posts from this blog

Amount Withdrawn Model Part 2

Hybrid Recommendation Engine

Amount Withdrawn Model Part 1