Amount Withdrawn Model Part 1


 

%load_ext autoreload

%autoreload 2

import sys
sys.path.insert(0, '../../Models/ALS')

from stark import *;hello()

/home/ec2-user/SageMaker/SUNDARESAN M S/Scripts/HandsOnML-CookBook
Data Science Environment Set!


from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score,KFold
from sklearn.pipeline import make_pipeline

from sklearn.metrics import mean_absolute_error,mean_squared_error
from sklearn.metrics import make_scorer


from sklearn.linear_model import ElasticNet, Lasso
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor,  GradientBoostingRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor

from matplotlib.pyplot import style
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

style.use('fivethirtyeight')

%matplotlib inline



dataset = pd.read_csv('../BSE DATASETS/ATM_Withdrawl_Prediction_Data.csv')

dataset.head()
 



ID Number of Shops Around ATM ATM Zone No of Other ATMs in 1 KM radius Estimated Number of Houses in 1 KM Radius ATM Placement ATM TYPE ATM Location TYPE ATM looks ATM Near Avg Withdrawls Per Hour ATM Attached to Avg No of Withdrawls Per Week Average Wait Time Day Type ATM RATING ATM Prox ATM Since Holiday Sequence AmountWithDrawn
0 TS000000001 66 RL 65 8450 Facing Road Urban Only WIthdraw Normal College 7 Building 856 3 Working 4 Attached to Branch 2008 WW 209500
1 TS000000002 26 RL 80 9600 Facing Road Urban Only WIthdraw Normal Temple 6 Building 1262 3 Working 3 Attached to Branch 2007 WW 184300
2 TS000000003 65 RL 68 11250 Facing Road Town Only WIthdraw Normal College 7 Building 920 3 Working 4 Attached to Branch 2008 WW 231500
3 TS000000004 80 RL 60 9550 Facing Road Town Only WIthdraw Normal Shopping Complex 7 Building 961 3 Working 4 2 ATM Machines (Same Bank) 2006 HH 143600
4 TS000000005 66 RL 84 14260 Facing Road Town Only WIthdraw Normal Park 8 Building 1145 4 Working 4 Attached to Branch 2008 WW 255600
exploration_dict= explore(dataset,['ID'])
Shape of the Dataset: (186880, 20)
head()

ID Number of Shops Around ATM ATM Zone No of Other ATMs in 1 KM radius Estimated Number of Houses in 1 KM Radius ATM Placement ATM TYPE ATM Location TYPE ATM looks ATM Near Avg Withdrawls Per Hour ATM Attached to Avg No of Withdrawls Per Week Average Wait Time Day Type ATM RATING ATM Prox ATM Since Holiday Sequence AmountWithDrawn
0 TS000000001 66 RL 65 8450 Facing Road Urban Only WIthdraw Normal College 7 Building 856 3 Working 4 Attached to Branch 2008 WW 209500
1 TS000000002 26 RL 80 9600 Facing Road Urban Only WIthdraw Normal Temple 6 Building 1262 3 Working 3 Attached to Branch 2007 WW 184300
2 TS000000003 65 RL 68 11250 Facing Road Town Only WIthdraw Normal College 7 Building 920 3 Working 4 Attached to Branch 2008 WW 231500
3 TS000000004 80 RL 60 9550 Facing Road Town Only WIthdraw Normal Shopping Complex 7 Building 961 3 Working 4 2 ATM Machines (Same Bank) 2006 HH 143600
4 TS000000005 66 RL 84 14260 Facing Road Town Only WIthdraw Normal Park 8 Building 1145 4 Working 4 Attached to Branch 2008 WW 255600
tail()

ID Number of Shops Around ATM ATM Zone No of Other ATMs in 1 KM radius Estimated Number of Houses in 1 KM Radius ATM Placement ATM TYPE ATM Location TYPE ATM looks ATM Near Avg Withdrawls Per Hour ATM Attached to Avg No of Withdrawls Per Week Average Wait Time Day Type ATM RATING ATM Prox ATM Since Holiday Sequence AmountWithDrawn
186875 TS000186876 62 RL 62 7917 Facing Road Urban Only WIthdraw Normal Lodge and Hotel 6 Building 953 3 Working 3 Attached to Branch 2007 WW 179700
186876 TS000186877 26 RL 85 13175 Facing Road Urban Only WIthdraw Normal Market 6 Building 2073 3 Working 3 Attached to Branch 2010 WW 215600
186877 TS000186878 80 RL 66 9042 Facing Road Urban Only WIthdraw Normal Shopping Complex 7 Building 1188 4 Working 4 Attached to Branch 2010 WW 267800
186878 TS000186879 21 RL 68 9717 Facing Road Urban Only WIthdraw Normal Highway 5 Petrol Bunk 1078 2 Working 4 Attached to Branch 2010 WW 147525
186879 TS000186880 21 RL 75 9937 Facing Road Urban Only WIthdraw Normal Church 5 Building 1256 3 Working 3 Attached to Branch 2008 WW 152100
Total number of columns: 20
array(['ID', 'Number of Shops Around ATM', 'ATM Zone',
       'No of Other ATMs in 1 KM radius',
       'Estimated Number of Houses in 1 KM Radius', 'ATM Placement',
       'ATM TYPE', 'ATM Location TYPE', 'ATM looks', 'ATM Near',
       'Avg Withdrawls Per Hour', 'ATM Attached to',
       'Avg No of Withdrawls Per Week', 'Average Wait Time', 'Day Type',
       'ATM RATING', 'ATM Prox', 'ATM Since', 'Holiday Sequence',
       'AmountWithDrawn'], dtype=object)
Missing Value Report:

Missing_Value_Count Missing_Value_Percentage
ID 0 0.0
Number of Shops Around ATM 0 0.0
Holiday Sequence 0 0.0
ATM Since 0 0.0
ATM Prox 0 0.0
ATM RATING 0 0.0
Day Type 0 0.0
Average Wait Time 0 0.0
Avg No of Withdrawls Per Week 0 0.0
ATM Attached to 0 0.0
Avg Withdrawls Per Hour 0 0.0
ATM Near 0 0.0
ATM looks 0 0.0
ATM Location TYPE 0 0.0
ATM TYPE 0 0.0
ATM Placement 0 0.0
Estimated Number of Houses in 1 KM Radius 0 0.0
No of Other ATMs in 1 KM radius 0 0.0
ATM Zone 0 0.0
AmountWithDrawn 0 0.0
Infering Features:

Facing Road Little Inside
ATM Placement 0.99589 0.00411

Normal New Old and Dull
ATM looks 0.946575 0.044521 0.008904

Urban Town Semi Urban Bigger Towns
ATM TYPE 0.633562 0.331507 0.028082 0.006849

Only WIthdraw Deposit and Withdraw Passbook Printing and Withdraw Checkdrop and Withdraw
ATM Location TYPE 0.897945 0.043151 0.034247 0.024658

Working Festival Strike National Holiday
Day Type 0.953425 0.044521 0.00137 0.000685

RL RM FV RH C
ATM Zone 0.788356 0.149315 0.044521 0.010959 0.006849

WW Long Weekend HH HW WH
Holiday Sequence 0.828767 0.085616 0.069178 0.013699 0.00274

Building Petrol Bunk Flat Shop House Shed
ATM Attached to 0.781507 0.19589 0.008904 0.007534 0.004795 0.00137

2+ ATMS Very Close Attached to Branch 2 ATM Machines (Same Bank) Other Bank ATM Non Closed ATM 2 ATMS in same Building
ATM Prox 0.055479 0.59589 0.265068 0.060274 0.013014 0.010274

count mean std min 25% 50% max (-0.008, 4.0] (4.0, 8.0]
Average Wait Time 186880.0 2.866438 0.815501 0.0 2.0 3.0 8.0 0.980137 0.019863

count mean std min 25% 75% max (1.997, 3.5] (3.5, 5.0]
ATM RATING 186880.0 3.511644 0.663534 2.0 3.0 4.0 5.0 0.530137 0.469863

count mean std min 25% 50% 75% max (4.0, 7.0] (7.0, 10.0] (0.991, 4.0]
Avg Withdrawls Per Hour 186880.0 6.099315 1.382527 1.0 5.0 6.0 7.0 10.0 0.746575 0.156849 0.096575

count mean std min 25% 50% 75% max (1086.055, 22694.5] (22694.5, 44089.0] (44089.0, 65483.5] (151061.5, 172456.0] (193850.5, 215245.0] (172456.0, 193850.5]
Estimated Number of Houses in 1 KM Radius 186880.0 10516.828082 9977.872801 1300.0 7553.5 9478.5 11601.5 215245.0 0.974658 0.016438 0.005479 0.00137 0.000685 0.0

count mean std min 25% 50% 75% max (769.8, 1205.6] (1205.6, 1641.4] (329.642, 769.8] (1641.4, 2077.2] (2077.2, 2513.0] (2513.0, 2948.8] (2948.8, 3384.6] (4256.2, 4692.0] (3820.4, 4256.2]
Avg No of Withdrawls Per Week 186880.0 1162.626712 386.456356 334.0 882.0 1087.0 1391.25 4692.0 0.503425 0.263699 0.113014 0.100685 0.014384 0.00274 0.00137 0.000685 0.0

count mean std min 25% 50% 75% max (2008.8, 2009.2] (2006.8, 2007.2] (2005.996, 2006.4] (2007.6, 2008.0] (2009.6, 2010.0] (2009.2, 2009.6]
ATM Since 186880.0 2007.815753 1.327644 2006.0 2007.0 2008.0 2009.0 2010.0 0.231507 0.225342 0.215068 0.208219 0.119863 0.0

count mean std min 25% 50% 75% max (108350.0, 181300.0] (181300.0, 254250.0] (254250.0, 327200.0] (34670.5, 108350.0] (327200.0, 400150.0] (400150.0, 473100.0] (473100.0, 546050.0] (546050.0, 619000.0] (691950.0, 764900.0] (619000.0, 691950.0]
AmountWithDrawn 186880.0 186128.713549 79466.216407 35400.0 135200.0 168050.0 218900.0 764900.0 0.489063 0.268076 0.097175 0.089822 0.035124 0.012778 0.003039 0.002697 0.00137 0.000856

(20.821, 31.529] (63.118, 73.647] (52.588, 63.118] (73.647, 84.176] (84.176, 94.706] (31.529, 42.059] (115.765, 126.294] (157.882, 168.412] (42.059, 52.588] (126.294, 136.824] ... (178.941, 189.471] (136.824, 147.353] count mean std min 25% 50% 75% max
Number of Shops Around ATM 0.371816 0.15557 0.143044 0.054543 0.051375 0.043086 0.035766 0.034455 0.027702 0.023823 ... 0.006202 0.0 186880.0 62.394542 42.389501 21.0 27.0 58.0 76.0 200.0
1 rows × 23 columns

Highway College Residential Church Railway Station Lodge and Hotel Tourist Place Shops Market Hospital ... Upcoming Area Food Joints Resturant and Bar Traffic Signal IT park Local Transport Temple Govt Buildings 2+ ATMS Very Close Tasmac
ATM Near 0.15411 0.10274 0.077397 0.068493 0.058904 0.05411 0.05274 0.050685 0.05 0.040411 ... 0.025342 0.019178 0.017123 0.013014 0.011644 0.010959 0.007534 0.006164 0.00411 0.00137
1 rows × 25 columns

count mean std min 25% 50% 75% max (51.207, 61.276] (71.345, 81.414] ... (31.069, 41.138] (20.708, 31.069] (101.552, 111.621] (111.621, 121.69] (121.69, 131.759] (131.759, 141.828] (151.897, 161.966] (302.931, 313.0] (161.966, 172.034] (292.862, 302.931]
No of Other ATMs in 1 KM radius 186880.0 69.978082 22.60748 21.0 59.0 70.0 80.0 313.0 0.208219 0.202055 ... 0.038356 0.032877 0.024658 0.012329 0.006849 0.00411 0.002055 0.00137 0.000685 0.0
1 rows × 24 columns
The following are the possible_date_features
  None

The following are the possible_categorical_features
  ATM Placement
  ATM looks
  ATM TYPE
  ATM Location TYPE
  Day Type
  ATM Zone
  Holiday Sequence
  ATM Attached to
  ATM Prox

The following are the possible_continuos_features
  Average Wait Time
  ATM RATING
  Avg Withdrawls Per Hour
  Estimated Number of Houses in 1 KM Radius
  Avg No of Withdrawls Per Week
  ATM Since
  AmountWithDrawn
  Number of Shops Around ATM
  No of Other ATMs in 1 KM radius

The following are the possible_categorical_binary
  ATM Placement

The following are the possible_categorical_multilabel
  ATM looks
  ATM TYPE
  ATM Location TYPE
  Day Type
  ATM Zone
  Holiday Sequence
  ATM Attached to
  ATM Prox

The following are the possible_categorical_string
  ATM Near
 
 
for key in list(exploration_dict.keys())[1:]:
    print(key)
    cols = exploration_dict[key]
    display(dataset[cols].head())
possible_categorical_features

ATM Placement ATM looks ATM TYPE ATM Location TYPE Day Type ATM Zone Holiday Sequence ATM Attached to ATM Prox
0 Facing Road Normal Urban Only WIthdraw Working RL WW Building Attached to Branch
1 Facing Road Normal Urban Only WIthdraw Working RL WW Building Attached to Branch
2 Facing Road Normal Town Only WIthdraw Working RL WW Building Attached to Branch
3 Facing Road Normal Town Only WIthdraw Working RL HH Building 2 ATM Machines (Same Bank)
4 Facing Road Normal Town Only WIthdraw Working RL WW Building Attached to Branch
possible_continuos_features

Average Wait Time ATM RATING Avg Withdrawls Per Hour Estimated Number of Houses in 1 KM Radius Avg No of Withdrawls Per Week ATM Since AmountWithDrawn Number of Shops Around ATM No of Other ATMs in 1 KM radius
0 3 4 7 8450 856 2008 209500 66 65
1 3 3 6 9600 1262 2007 184300 26 80
2 3 4 7 11250 920 2008 231500 65 68
3 3 4 7 9550 961 2006 143600 80 60
4 4 4 8 14260 1145 2008 255600 66 84
possible_categorical_binary

ATM Placement
0 Facing Road
1 Facing Road
2 Facing Road
3 Facing Road
4 Facing Road
possible_categorical_multilabel

ATM looks ATM TYPE ATM Location TYPE Day Type ATM Zone Holiday Sequence ATM Attached to ATM Prox
0 Normal Urban Only WIthdraw Working RL WW Building Attached to Branch
1 Normal Urban Only WIthdraw Working RL WW Building Attached to Branch
2 Normal Town Only WIthdraw Working RL WW Building Attached to Branch
3 Normal Town Only WIthdraw Working RL HH Building 2 ATM Machines (Same Bank)
4 Normal Town Only WIthdraw Working RL WW Building Attached to Branch
possible_categorical_string

ATM Near
0 College
1 Temple
2 College
3 Shopping Complex
4 Park
possible_date_features
 
 
list(exploration_dict.keys())[1:]


    



['possible_categorical_features',
 'possible_continuos_features',
 'possible_categorical_binary',
 'possible_categorical_multilabel',
 'possible_categorical_string',
 'possible_date_features']
infer_schema(dataset)

feature_name data_type number_of_classes suggested_dtype
0 ID object 186880 object
1 Number of Shops Around ATM int64 120 int64
2 ATM Zone object 5 object
3 No of Other ATMs in 1 KM radius int64 110 int64
4 Estimated Number of Houses in 1 KM Radius int64 1073 int64
5 ATM Placement object 2 object
6 ATM TYPE object 4 object
7 ATM Location TYPE object 4 object
8 ATM looks object 3 object
9 ATM Near object 26 object
10 Avg Withdrawls Per Hour int64 10 int64
11 ATM Attached to object 6 object
12 Avg No of Withdrawls Per Week int64 753 int64
13 Average Wait Time int64 8 int64
14 Day Type object 4 object
15 ATM RATING int64 4 int64
16 ATM Prox object 6 object
17 ATM Since int64 5 int64
18 Holiday Sequence object 5 object
19 AmountWithDrawn int64 12929 int64
target =['AmountWithDrawn']
continuos_columns = [col for col in exploration_dict['possible_continuos_features'] if col not in target]
sns.boxplot(result_df.actuals)
<matplotlib.axes._subplots.AxesSubplot at 0x7f6cadb49748>
['possible_categorical_features',
 'possible_continuos_features',
 'possible_categorical_binary',
 'possible_categorical_multilabel',
 'possible_categorical_string',
 'possible_date_features']
dataset_cat = pd.get_dummies(dataset[exploration_dict['possible_categorical_features']])
dataset_continuos = dataset[continuos_columns]
dataset_fin = dataset_continuos.join(dataset_cat)
FSET = dataset_fin.columns
dataset_fin.shape,dataset[target[0]].shape
((186880, 47), (186880,))

Comments

Popular posts from this blog

Amount Withdrawn Model Part 2

Hybrid Recommendation Engine