Home About Blog Projects Contact

Tensorflow_classification

10 Oct 2017

Model Classify

from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn import metrics
import logging
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import pandas as pd
import time
from hyperopt import fmin, tpe, hp, Trials

Stop the bitching

import warnings, os
warnings.filterwarnings('ignore')
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)

Define a method to display the results

def Get_Model_Data():
    model = pd.read_csv('./model.csv')
    print(model.head())
    
    # Split the data into Train/Validation/Test with Train having 60% and test and validation 20% each
    train, valid, test = np.split(model.sample(frac=1), [int(.6*len(model)), int(.8*len(model))])

    features = list(['BoxRatio','Thrust', 'Velocity', 'OnBalRun', 'vwapGain'])
    print("features: ", features)
    
    X = train[features].as_matrix()
    X = X.astype(np.float64)
    standard_scaller = StandardScaler()
    X = standard_scaller.fit_transform(X)
    
    label_encoder = LabelEncoder()
    label_encoder.fit(train["Altitude"])
    y = label_encoder.transform(train["Altitude"])
    
    x_train = train[features].as_matrix()
    y_train = train['Altitude']
    x_valid = valid[features].as_matrix()
    y_valid = valid['Altitude']
    x_test  = test[features].as_matrix()
    y_test  = test['Altitude']

    df = pd.DataFrame(data=train)
    df.append(valid)
    df.append(test)
    df[features] = X
    df['Altitude'] = y
    
    mask = list(['BoxRatio','Thrust', 'Velocity', 'OnBalRun', 'vwapGain', 'Altitude'])
    df_new = df[mask]
    print(df_new.head())
    # plt.ioff()
    red_green = ["#ff0000", "#00ff00"]
    sns.set_palette(red_green)
    g = sns.pairplot(df_new, 
                     diag_kind='kde',
                     hue='Altitude', 
                     markers=["o", "D"],
                     size=1.5, 
                     aspect=1,
                     plot_kws={"s": 10})
    g.fig.subplots_adjust(right=0.9)
    # g.savefig('model_classify.png', bbox_inches='tight')
    
    return x_train, y_train, x_valid, y_valid, x_test, y_test

Define the training method

def Train(x_train, y_train,
          x_valid, y_valid,
          hidden,
          activation,
          optimizer,
          dropout):

    feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input(x_train)
    # config = tf.contrib.learn.RunConfig(tf_random_seed=7, save_checkpoints_secs=1)
    config = tf.contrib.learn.RunConfig(tf_random_seed=7)
    # print('hidden: ', hidden, ', activation: ', activation.__name__, ', dropout: ', dropout, ', optimizer: ', optimizer)

    validation_metrics = {
      "accuracy":
          tf.contrib.learn.MetricSpec(
              metric_fn=tf.contrib.metrics.streaming_accuracy,
              prediction_key="classes"),
      "precision":
          tf.contrib.learn.MetricSpec(
              metric_fn=tf.contrib.metrics.streaming_precision,
              prediction_key="classes"),
      "recall":
          tf.contrib.learn.MetricSpec(
              metric_fn=tf.contrib.metrics.streaming_recall,
              prediction_key="classes")
    }
    
    validation_monitor = tf.contrib.learn.monitors.ValidationMonitor(
        x_valid,
        y_valid,
        every_n_steps = 50,
        early_stopping_metric = "loss",
        early_stopping_metric_minimize = True,
        early_stopping_rounds = 200,
        metrics = validation_metrics)
    
    nn = tf.contrib.learn.DNNClassifier(
        feature_columns = feature_columns,
        hidden_units = hidden,
        activation_fn = activation,
        n_classes = 2,
        optimizer = optimizer,
        config = config,
        dropout = dropout
    )
    
    logging.getLogger().setLevel(logging.INFO)
    
    nn.fit(
        x = x_train,
        y = y_train,
        max_steps=500,
        monitors=[validation_monitor])
    
    ev = nn.evaluate(x=x_test, y=y_test, steps=1)
    mse = ev["loss"]
    return nn, mse

Define an objective function for the Bayesian Hyperparameter Optimization

class switch(object):
    def __init__(self, value):
        self.value = value
        self.fall = False

    def __iter__(self):
        """Return the match method once, then stop"""
        yield self.match
        raise StopIteration
    
    def match(self, *args):
        """Indicate whether or not to enter a case suite"""
        if self.fall or not args:
            return True
        elif self.value in args: # changed for v1.5, see below
            self.fall = True
            return True
        else:
            return False

def objective(params):
    # print(' ')
    for name, value in params.items():
        for case in switch(name):
            if case('hidden'):
                hid = value
                # print('hidden: ', hid);
                break;
            if case('activation'):
                act = value
                # print('activation: ', act.__name__);
                break;
            if case('optimizer'):
                opt = value
                # print('optimizer: ', opt);
                break;

    nn, mse = Train(x_train, y_train, 
                    x_valid, y_valid,
                    hidden = hid,
                    activation = act,
                    optimizer = opt,
                    dropout = 0.2)
    
    global best_nn, best_mse, loop, max_loop
    loop = loop + 1
    if mse < best_mse:
        best_mse = mse
        best_nn = nn
        print(loop,'hid:',hid,'act:',act.__name__,'opt:',opt,'best mse:',best_mse)
        
    return mse

Get the training, validation, and testing datasets, and display a pairs plot

x_train, y_train, x_valid, y_valid, x_test, y_test = Get_Model_Data()
        Date  Symbol  BuyIndex  SellIndex  BoxRatio  Thrust  Acceleration  \
0  4/20/2017  NUE          132        214     1.232   1.064        28.436   
1   5/9/2017  TDOC         132        294     0.496   0.780        69.170   
2  11/3/2016  DYN          155        250     0.505  -0.573        95.105   
3  7/27/2017  VZ           132        249     1.149   1.055        38.779   
4  1/24/2017  VRAY         132        190     3.101   4.414         8.504   

   Velocity  nPosVelo  OnBalRun  vwapGain    Gain  Altitude  
0     1.107        33     3.360     0.782  0.8069         0  
1     2.877        13     8.656     1.352  6.4000         1  
2     2.258        22     4.547     0.505  2.2078         1  
3     1.525        54     3.882     1.924  1.7749         1  
4     1.577        59     4.017     0.826  1.3645         0  
features:  ['BoxRatio', 'Thrust', 'Velocity', 'OnBalRun', 'vwapGain']
     BoxRatio    Thrust  Velocity  OnBalRun  vwapGain  Altitude
106 -0.328589 -0.261886 -0.558013 -1.015896 -0.583956         1
787 -0.223632  0.220892  0.579358  1.188217  0.478487         0
780 -0.060862 -0.006298 -0.652533 -0.648388 -0.536345         1
681 -0.423869 -0.120585 -0.445340 -0.483976 -0.362463         0
49  -0.178225  0.095522 -0.484775 -0.031923 -0.070071         1

png

Define the hyper-parameters

hidden_list = list([[40],[80],[100],[200],[300],[400],[500],
                   [80,40],[100,50],[200,100],[300,150],[400,200], [500,250],
                   [80,70,60,50],
                   [100,80,60,40],
                   [200,150,100,50],
                   [300,250,200,150],
                   [80,70,60,50,40],
                   [100,80,60,40,20],
                   [200,180,160,140,120],
                   [300,250,200,150,100],
                   [400,350,300,250,200],
                   [500,400,300,200,100],
                   [80,70,60,50,40,30],
                   [100,90,80,70,60,50],
                   [200,180,160,150,140,130],
                   [300,250,200,150,100,50],
                   [400,350,300,250,200,150],
                   [500,400,300,200,100,50]])

activation_list = [tf.sigmoid, tf.tanh, tf.nn.elu, tf.nn.softplus, tf.nn.softsign]
optimizer_list = ['SGD', 'Adam', 'Adagrad', 'RMSProp']
best_mse = 100000.0
loop = 0
max_loop = 500
start_time = int(time.time())

space = {
    'hidden': hp.choice('hidden', hidden_list),
    'activation': hp.choice('activation', activation_list),
    'optimizer': hp.choice('optimizer', optimizer_list)
}
trials = Trials()

Compute the metrics and display the results

best = fmin(objective, space, algo=tpe.suggest, max_evals=max_loop)

best_hidden = hidden_list[best['hidden']]
best_activation = activation_list[best['activation']].__name__
best_optimizer = optimizer_list[best['optimizer']]
ev = best_nn.evaluate(x=x_test, y=y_test, steps=1)
mse = ev["loss"]
if not best_mse == mse: raise AssertionError
    
print('\nbest model:',
      '\n hidden........ ', best_hidden, 
      '\n activation.... ', best_activation,
      '\n optimizer..... ', best_optimizer,
      '\n best mse...... ', best_mse)

predictions = list(best_nn.predict(x_test, as_iterable=True))
print(metrics.classification_report(y_true=y_test, y_pred=predictions))

cm = metrics.confusion_matrix(y_true=y_test, y_pred=predictions)
print('confusion matrix\n',cm)
true_negative  = cm[0,0]
true_positive  = cm[1,1]
false_negative = cm[1,0]
false_positive = cm[0,1]

total = true_negative + true_positive + false_negative + false_positive
accuracy = (true_positive + true_negative)/total
precision = (true_positive)/(true_positive + false_positive)
recall = (true_positive)/(true_positive + false_negative)
misclassification_rate = (false_positive + false_negative)/total
F1 = (2*true_positive)/(2*true_positive + false_positive + false_negative)

print('accuracy.................', accuracy)
print('precision................', precision)
print('recall...................', recall)
print('misclassification rate...', misclassification_rate)
print('F1.......................', F1)

print('Check the calculations')
y_true = np.asarray(y_test)
y_pred = np.asarray(predictions)
# if not accuracy == metrics.accuracy_score(y_true, y_pred): raise AssertionError
# if not precision == metrics.precision_score(y_true, y_pred): raise AssertionError
# if not recall == metrics.recall_score(y_true, y_pred): raise AssertionError
# if not F1 == metrics.f1_score(y_true, y_pred): raise AssertionError
print('accuracy.................', metrics.accuracy_score(y_true, y_pred))
print('precision................', metrics.precision_score(y_true, y_pred))
print('recall...................', metrics.recall_score(y_true, y_pred))
print('F1.......................', metrics.f1_score(y_true, y_pred))

stream_auc = tf.contrib.metrics.streaming_auc(y_pred, y_true)
sess = tf.Session()
sess.run(tf.initialize_all_variables())
sess.run(tf.initialize_local_variables()) # try commenting this line and you'll get the error
train_auc = sess.run(stream_auc)
print('auc......................', train_auc[1])

end_time = int(time.time())
d = divmod(end_time - start_time,86400)  # days
h = divmod(d[1],3600)  # hours
m = divmod(h[1],60)  # minutes
s = m[1]  # seconds
print('%d days, %d hours, %d minutes, %d seconds' % (d[0],h[0],m[0],s))
1 hid: (100,) act: sigmoid opt: SGD best mse: 0.666657
2 hid: (100, 80, 60, 40) act: tanh opt: Adam best mse: 0.656084
5 hid: (100, 90, 80, 70, 60, 50) act: elu opt: Adagrad best mse: 0.655478
7 hid: (100, 80, 60, 40) act: tanh opt: SGD best mse: 0.654447
23 hid: (100, 80, 60, 40, 20) act: tanh opt: Adagrad best mse: 0.652481
183 hid: (100, 90, 80, 70, 60, 50) act: softplus opt: Adam best mse: 0.638419

best model: 
 hidden........  [100, 90, 80, 70, 60, 50] 
 activation....  softplus 
 optimizer.....  Adam 
 best mse......  0.638419
             precision    recall  f1-score   support

          0       0.68      0.49      0.57       116
          1       0.62      0.78      0.69       124

avg / total       0.65      0.64      0.63       240

confusion matrix
 [[57 59]
 [27 97]]
accuracy................. 0.641666666667
precision................ 0.621794871795
recall................... 0.782258064516
misclassification rate... 0.358333333333
F1....................... 0.692857142857
Check the calculations
accuracy................. 0.641666666667
precision................ 0.621794871795
recall................... 0.782258064516
F1....................... 0.692857142857
auc...................... 0.636819
0 days, 3 hours, 30 minutes, 2 seconds
blog comments powered by Disqus