Copied!







import copper
copper.project.path = '../'
ds = copper.Dataset()
ds.load('data.csv')
ds.role['TARGET_D'] = ds.REJECTED
ds.role['TARGET_B'] = ds.TARGET
ds.type['ID'] = ds.CATEGORY

import copper
copper.project.path = '../'
ds = copper.Dataset()
ds.load('data.csv')
ds.role['TARGET_D'] = ds.REJECTED
ds.role['TARGET_B'] = ds.TARGET
ds.type['ID'] = ds.CATEGORY





Copied!







import copper
copper.project.path = '../'
ds = copper.Dataset()
ds.load('data.csv')
ds.role['TARGET_D'] = ds.REJECTED
ds.role['TARGET_B'] = ds.TARGET
ds.type['ID'] = ds.CATEGORY

import copper
copper.project.path = '../'
ds = copper.Dataset()
ds.load('data.csv')
ds.role['TARGET_D'] = ds.REJECTED
ds.role['TARGET_B'] = ds.TARGET
ds.type['ID'] = ds.CATEGORY





Copied!







ds.fillna('DemAge', 'mean')
ds.fillna('GiftAvgCard36', 'mean')

ds.fillna('DemAge', 'mean')
ds.fillna('GiftAvgCard36', 'mean')





Copied!







ds.fillna('DemAge', 'mean')
ds.fillna('GiftAvgCard36', 'mean')

ds.fillna('DemAge', 'mean')
ds.fillna('GiftAvgCard36', 'mean')





Copied!







ds.inputs

ds.inputs

<class 'pandas.core.frame.DataFrame'>
Int64Index: 9686 entries, 0 to 9685
Data columns:
GiftCnt36            9686  non-null values
GiftCntAll           9686  non-null values
GiftCntCard36        9686  non-null values
GiftCntCardAll       9686  non-null values
GiftAvgLast          9686  non-null values
GiftAvg36            9686  non-null values
GiftAvgAll           9686  non-null values
GiftAvgCard36        9686  non-null values
GiftTimeLast         9686  non-null values
GiftTimeFirst        9686  non-null values
PromCnt12            9686  non-null values
PromCnt36            9686  non-null values
PromCntAll           9686  non-null values
PromCntCard12        9686  non-null values
PromCntCard36        9686  non-null values
PromCntCardAll       9686  non-null values
StatusCat96NK [A]    9686  non-null values
StatusCat96NK [E]    9686  non-null values
StatusCat96NK [F]    9686  non-null values
StatusCat96NK [L]    9686  non-null values
StatusCat96NK [N]    9686  non-null values
StatusCat96NK [S]    9686  non-null values
StatusCatStarAll     9686  non-null values
DemCluster           9686  non-null values
DemAge               9686  non-null values
DemGender [F]        9686  non-null values
DemGender [M]        9686  non-null values
DemGender [U]        9686  non-null values
DemHomeOwner [H]     9686  non-null values
DemHomeOwner [U]     9686  non-null values
DemMedHomeValue      9686  non-null values
DemPctVeterans       9686  non-null values
DemMedIncome         9686  non-null values
dtypes: float64(7), int64(26)





Copied!







ds.inputs

ds.inputs

<class 'pandas.core.frame.DataFrame'>
Int64Index: 9686 entries, 0 to 9685
Data columns:
GiftCnt36            9686  non-null values
GiftCntAll           9686  non-null values
GiftCntCard36        9686  non-null values
GiftCntCardAll       9686  non-null values
GiftAvgLast          9686  non-null values
GiftAvg36            9686  non-null values
GiftAvgAll           9686  non-null values
GiftAvgCard36        9686  non-null values
GiftTimeLast         9686  non-null values
GiftTimeFirst        9686  non-null values
PromCnt12            9686  non-null values
PromCnt36            9686  non-null values
PromCntAll           9686  non-null values
PromCntCard12        9686  non-null values
PromCntCard36        9686  non-null values
PromCntCardAll       9686  non-null values
StatusCat96NK [A]    9686  non-null values
StatusCat96NK [E]    9686  non-null values
StatusCat96NK [F]    9686  non-null values
StatusCat96NK [L]    9686  non-null values
StatusCat96NK [N]    9686  non-null values
StatusCat96NK [S]    9686  non-null values
StatusCatStarAll     9686  non-null values
DemCluster           9686  non-null values
DemAge               9686  non-null values
DemGender [F]        9686  non-null values
DemGender [M]        9686  non-null values
DemGender [U]        9686  non-null values
DemHomeOwner [H]     9686  non-null values
DemHomeOwner [U]     9686  non-null values
DemMedHomeValue      9686  non-null values
DemPctVeterans       9686  non-null values
DemMedIncome         9686  non-null values
dtypes: float64(7), int64(26)





Copied!







ml = copper.MachineLearning()
ml.dataset = ds
ml.sample(trainSize=0.5)

ml = copper.MachineLearning()
ml.dataset = ds
ml.sample(trainSize=0.5)





Copied!







ml = copper.MachineLearning()
ml.dataset = ds
ml.sample(trainSize=0.5)

ml = copper.MachineLearning()
ml.dataset = ds
ml.sample(trainSize=0.5)





Copied!







from sklearn import tree
tree_clf = tree.DecisionTreeClassifier(max_depth=10)
ml.add_clf(tree_clf, 'Decision Tree')
ml.fit()

from sklearn import tree
tree_clf = tree.DecisionTreeClassifier(max_depth=10)
ml.add_clf(tree_clf, 'Decision Tree')
ml.fit()





Copied!







from sklearn import tree
tree_clf = tree.DecisionTreeClassifier(max_depth=10)
ml.add_clf(tree_clf, 'Decision Tree')
ml.fit()

from sklearn import tree
tree_clf = tree.DecisionTreeClassifier(max_depth=10)
ml.add_clf(tree_clf, 'Decision Tree')
ml.fit()





Copied!







from sklearn import cross_validation
bs = cross_validation.Bootstrap(len(ml.X_train), n_iter=20)
i = 0
for train_index, test_index in bs:
    X_train = ml.X_train[train_index]
    y_train = ml.y_train[train_index]
    clf = tree.DecisionTreeClassifier(max_depth=10)
    clf.fit(X_train, y_train)
    ml.add_clf(clf, "DT" + str(i + 1))
    i += 1

from sklearn import cross_validation
bs = cross_validation.Bootstrap(len(ml.X_train), n_iter=20)
i = 0
for train_index, test_index in bs:
    X_train = ml.X_train[train_index]
    y_train = ml.y_train[train_index]
    clf = tree.DecisionTreeClassifier(max_depth=10)
    clf.fit(X_train, y_train)
    ml.add_clf(clf, "DT" + str(i + 1))
    i += 1





Copied!







from sklearn import cross_validation
bs = cross_validation.Bootstrap(len(ml.X_train), n_iter=20)
i = 0
for train_index, test_index in bs:
    X_train = ml.X_train[train_index]
    y_train = ml.y_train[train_index]
    clf = tree.DecisionTreeClassifier(max_depth=10)
    clf.fit(X_train, y_train)
    ml.add_clf(clf, "DT" + str(i + 1))
    i += 1

from sklearn import cross_validation
bs = cross_validation.Bootstrap(len(ml.X_train), n_iter=20)
i = 0
for train_index, test_index in bs:
    X_train = ml.X_train[train_index]
    y_train = ml.y_train[train_index]
    clf = tree.DecisionTreeClassifier(max_depth=10)
    clf.fit(X_train, y_train)
    ml.add_clf(clf, "DT" + str(i + 1))
    i += 1





Copied!







ml.accuracy().head()

ml.accuracy().head()

Decision Tree    0.550279
DT17             0.539955
DT14             0.536031
DT11             0.535205
DT19             0.534999
Name: Accuracy





Copied!







ml.accuracy().head()

ml.accuracy().head()

Decision Tree    0.550279
DT17             0.539955
DT14             0.536031
DT11             0.535205
DT19             0.534999
Name: Accuracy





Copied!







ml.roc(legend=False, retList=True)

ml.roc(legend=False, retList=True)

Decision Tree    0.541008
DT15             0.521935
DT19             0.520583
DT11             0.516007
DT16             0.515312
DT14             0.514619
DT8              0.513731
DT17             0.509060
DT9              0.506149
DT3              0.504594
DT13             0.504411
DT5              0.501457
DT2              0.499923
DT6              0.497339
DT10             0.494504
DT12             0.494483
DT7              0.493177
DT18             0.492243
DT4              0.491786
DT1              0.488879
DT20             0.483900





Copied!







ml.roc(legend=False, retList=True)

ml.roc(legend=False, retList=True)

Decision Tree    0.541008
DT15             0.521935
DT19             0.520583
DT11             0.516007
DT16             0.515312
DT14             0.514619
DT8              0.513731
DT17             0.509060
DT9              0.506149
DT3              0.504594
DT13             0.504411
DT5              0.501457
DT2              0.499923
DT6              0.497339
DT10             0.494504
DT12             0.494483
DT7              0.493177
DT18             0.492243
DT4              0.491786
DT1              0.488879
DT20             0.483900





Copied!







ml.bagging("Bag 1")

ml.bagging("Bag 1")





Copied!







ml.bagging("Bag 1")

ml.bagging("Bag 1")





Copied!







ml.clfs # Checking the classifiers

ml.clfs # Checking the classifiers

DT14             DecisionTreeClassifier(compute_importances=Fal...
Decision Tree    DecisionTreeClassifier(compute_importances=Fal...
DT13             DecisionTreeClassifier(compute_importances=Fal...
DT15             DecisionTreeClassifier(compute_importances=Fal...
DT18             DecisionTreeClassifier(compute_importances=Fal...
DT12             DecisionTreeClassifier(compute_importances=Fal...
DT17             DecisionTreeClassifier(compute_importances=Fal...
DT9              DecisionTreeClassifier(compute_importances=Fal...
DT8              DecisionTreeClassifier(compute_importances=Fal...
DT20             DecisionTreeClassifier(compute_importances=Fal...
DT11             DecisionTreeClassifier(compute_importances=Fal...
DT19             DecisionTreeClassifier(compute_importances=Fal...
DT16             DecisionTreeClassifier(compute_importances=Fal...
DT3              DecisionTreeClassifier(compute_importances=Fal...
DT2              DecisionTreeClassifier(compute_importances=Fal...
DT1              DecisionTreeClassifier(compute_importances=Fal...
DT10             DecisionTreeClassifier(compute_importances=Fal...
DT7              DecisionTreeClassifier(compute_importances=Fal...
DT6              DecisionTreeClassifier(compute_importances=Fal...
DT5              DecisionTreeClassifier(compute_importances=Fal...
DT4              DecisionTreeClassifier(compute_importances=Fal...
Bag 1            <copper.core.ensemble.Bagging object at 0x7937...





Copied!







ml.clfs # Checking the classifiers

ml.clfs # Checking the classifiers

DT14             DecisionTreeClassifier(compute_importances=Fal...
Decision Tree    DecisionTreeClassifier(compute_importances=Fal...
DT13             DecisionTreeClassifier(compute_importances=Fal...
DT15             DecisionTreeClassifier(compute_importances=Fal...
DT18             DecisionTreeClassifier(compute_importances=Fal...
DT12             DecisionTreeClassifier(compute_importances=Fal...
DT17             DecisionTreeClassifier(compute_importances=Fal...
DT9              DecisionTreeClassifier(compute_importances=Fal...
DT8              DecisionTreeClassifier(compute_importances=Fal...
DT20             DecisionTreeClassifier(compute_importances=Fal...
DT11             DecisionTreeClassifier(compute_importances=Fal...
DT19             DecisionTreeClassifier(compute_importances=Fal...
DT16             DecisionTreeClassifier(compute_importances=Fal...
DT3              DecisionTreeClassifier(compute_importances=Fal...
DT2              DecisionTreeClassifier(compute_importances=Fal...
DT1              DecisionTreeClassifier(compute_importances=Fal...
DT10             DecisionTreeClassifier(compute_importances=Fal...
DT7              DecisionTreeClassifier(compute_importances=Fal...
DT6              DecisionTreeClassifier(compute_importances=Fal...
DT5              DecisionTreeClassifier(compute_importances=Fal...
DT4              DecisionTreeClassifier(compute_importances=Fal...
Bag 1            <copper.core.ensemble.Bagging object at 0x7937...





Copied!







ml.accuracy().head()

ml.accuracy().head()

Bag 1            0.558951
Decision Tree    0.550279
DT17             0.539955
DT14             0.536031
DT11             0.535205
Name: Accuracy





Copied!







ml.accuracy().head()

ml.accuracy().head()

Bag 1            0.558951
Decision Tree    0.550279
DT17             0.539955
DT14             0.536031
DT11             0.535205
Name: Accuracy





Copied!







ml.roc(legend=False, retList=True)

ml.roc(legend=False, retList=True)

Bag 1            0.578210
Decision Tree    0.541008
DT15             0.521935
DT19             0.520583
DT11             0.516007
DT16             0.515312
DT14             0.514619
DT8              0.513731
DT17             0.509060
DT9              0.506149
DT3              0.504594
DT13             0.504411
DT5              0.501457
DT2              0.499923
DT6              0.497339
DT10             0.494504
DT12             0.494483
DT7              0.493177
DT18             0.492243
DT4              0.491786
DT1              0.488879
DT20             0.483900





Copied!







ml.roc(legend=False, retList=True)

ml.roc(legend=False, retList=True)

Bag 1            0.578210
Decision Tree    0.541008
DT15             0.521935
DT19             0.520583
DT11             0.516007
DT16             0.515312
DT14             0.514619
DT8              0.513731
DT17             0.509060
DT9              0.506149
DT3              0.504594
DT13             0.504411
DT5              0.501457
DT2              0.499923
DT6              0.497339
DT10             0.494504
DT12             0.494483
DT7              0.493177
DT18             0.492243
DT4              0.491786
DT1              0.488879
DT20             0.483900

Copper - Bootstrap and Bagging

Machine Learning¶

Bagging¶

Conclusion¶