jithin pradeep Cognitive Research Scientist | AI and Mixed reality Enthusiast

Working with Keras Basics Part 1

Working with Keras Basics Part 1, contain very basic step by step process for creating a multilayer perceptron model in keras.

what is keras?

when not keras is not my first choice

Building a model with keras

Lets build something with keras, a Multilayer Preceptron model and for dat let use the Wine Quality Data Set from UCI machine learning reposistory. Just to get the knack of it lets do some EDA with data before we use it create a mode

import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

import pandas as pd
import os 
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

current_dir = os.getcwd()
datapath = current_dir + "\data"

Fetching the dataset

Note: I have downloaded and stored the file in a folder called data

redWineDF =pd.read_csv((datapath + "\winequality-red.csv"),sep = ';')
whiteWineDF =pd.read_csv((datapath + "\winequality-white.csv"),sep = ';')
redWineDF.head(5)

| | fixed acidity | volatile acidity | citric acid | residual sugar | chlorides | free sulfur dioxide | total sulfur dioxide | density | pH | sulphates | alcohol | quality | | —- | ————- | —————- | ———– | ————– | ——— | ——————- | ——————– | ——- | —- | ——— | ——- | ——- | | 0 | 7.4 | 0.70 | 0.00 | 1.9 | 0.076 | 11.0 | 34.0 | 0.9978 | 3.51 | 0.56 | 9.4 | 5 | | 1 | 7.8 | 0.88 | 0.00 | 2.6 | 0.098 | 25.0 | 67.0 | 0.9968 | 3.20 | 0.68 | 9.8 | 5 | | 2 | 7.8 | 0.76 | 0.04 | 2.3 | 0.092 | 15.0 | 54.0 | 0.9970 | 3.26 | 0.65 | 9.8 | 5 | | 3 | 11.2 | 0.28 | 0.56 | 1.9 | 0.075 | 17.0 | 60.0 | 0.9980 | 3.16 | 0.58 | 9.8 | 6 | | 4 | 7.4 | 0.70 | 0.00 | 1.9 | 0.076 | 11.0 | 34.0 | 0.9978 | 3.51 | 0.56 | 9.4 | 5 |

whiteWineDF.head(5)

| | fixed acidity | volatile acidity | citric acid | residual sugar | chlorides | free sulfur dioxide | total sulfur dioxide | density | pH | sulphates | alcohol | quality | | —- | ————- | —————- | ———– | ————– | ——— | ——————- | ——————– | ——- | —- | ——— | ——- | ——- | | 0 | 7.0 | 0.27 | 0.36 | 20.7 | 0.045 | 45.0 | 170.0 | 1.0010 | 3.00 | 0.45 | 8.8 | 6 | | 1 | 6.3 | 0.30 | 0.34 | 1.6 | 0.049 | 14.0 | 132.0 | 0.9940 | 3.30 | 0.49 | 9.5 | 6 | | 2 | 8.1 | 0.28 | 0.40 | 6.9 | 0.050 | 30.0 | 97.0 | 0.9951 | 3.26 | 0.44 | 10.1 | 6 | | 3 | 7.2 | 0.23 | 0.32 | 8.5 | 0.058 | 47.0 | 186.0 | 0.9956 | 3.19 | 0.40 | 9.9 | 6 | | 4 | 7.2 | 0.23 | 0.32 | 8.5 | 0.058 | 47.0 | 186.0 | 0.9956 | 3.19 | 0.40 | 9.9 | 6 |

Visualizing data

# Histogram 
fig, ax = plt.subplots(1, 2)

ax[0].hist(redWineDF.alcohol, 10, facecolor='red', alpha=0.5, label="Red wine")
ax[1].hist(whiteWineDF.alcohol, 10, facecolor='white', ec="black", lw=0.5, alpha=0.5, label="White wine")

ax[0].set_ylim([0, 1000])
ax[0].set_xlabel("Alcohol in % Vol")
ax[0].set_ylabel("Frequency")
ax[1].set_xlabel("Alcohol in % Vol")
ax[1].set_ylabel("Frequency")
#ax[0].legend(loc='best')
#ax[1].legend(loc='best')
fig.suptitle("Distribution of Alcohol in % Vol")

plt.show()

output_9_0

fig, ax = plt.subplots(1, 2, figsize=(8, 4))

ax[0].scatter(redWineDF['quality'], redWineDF["sulphates"], color="red",edgecolors="black",lw=0.3)
ax[1].scatter(whiteWineDF['quality'], whiteWineDF['sulphates'], color="white", edgecolors="black", lw=0.5)

ax[0].set_title("Red Wine")
ax[1].set_title("White Wine")
ax[0].set_xlabel("Quality")
ax[1].set_xlabel("Quality")
ax[0].set_ylabel("Sulphates")
ax[1].set_ylabel("Sulphates")
ax[0].set_xlim([0,10])
ax[1].set_xlim([0,10])
ax[0].set_ylim([0,2.5])
ax[1].set_ylim([0,2.5])
fig.subplots_adjust(wspace=0.5)
fig.suptitle("Wine Quality by Amount of Sulphates")

plt.show()

output_10_0

np.random.seed(570)

redlabels = np.unique(redWineDF['quality'])
whitelabels = np.unique(whiteWineDF['quality'])

fig, ax = plt.subplots(1, 2, figsize=(8, 4))
redcolors = np.random.rand(6,4)
whitecolors = np.append(redcolors, np.random.rand(1,4), axis=0)

for i in range(len(redcolors)):
    redy = redWineDF['alcohol'][redWineDF.quality == redlabels[i]]
    redx = redWineDF['volatile acidity'][redWineDF.quality == redlabels[i]]
    ax[0].scatter(redx, redy, c=redcolors[i])
for i in range(len(whitecolors)):
    whitey = whiteWineDF['alcohol'][whiteWineDF.quality == whitelabels[i]]
    whitex = whiteWineDF['volatile acidity'][whiteWineDF.quality == whitelabels[i]]
    ax[1].scatter(whitex, whitey, c=whitecolors[i])
    
ax[0].set_title("Red Wine")
ax[1].set_title("White Wine")
ax[0].set_xlim([0,1.7])
ax[1].set_xlim([0,1.7])
ax[0].set_ylim([5,15.5])
ax[1].set_ylim([5,15.5])
ax[0].set_xlabel("Volatile Acidity")
ax[0].set_ylabel("Alcohol")
ax[1].set_xlabel("Volatile Acidity")
ax[1].set_ylabel("Alcohol") 
#ax[0].legend(redlabels, loc='best', bbox_to_anchor=(1.3, 1))
ax[1].legend(whitelabels, loc='best', bbox_to_anchor=(1.3, 1))
#fig.suptitle("Alcohol - Volatile Acidity")
fig.subplots_adjust(top=0.85, wspace=0.7)

plt.show()

output_11_0

# Add class col to both the dataframe and append white to red
redWineDF['class'] = 0
whiteWineDF['class'] = 1
datsetDF =redWineDF.append(whiteWineDF)
# computing correlation matrix 
corr = datsetDF.corr()
sns.heatmap(corr, 
            xticklabels=datsetDF.columns.values,
            yticklabels=datsetDF.columns.values)
sns.plt.show()

output_13_0

# Let split the datset into train and test 
# Specify the feature you would like the model to train 
x = datsetDF.ix[:,0:11]
y = np.ravel(datsetDF['class'])

# Split the data up in train and test sets
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42)
#sclar transformation
scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

Building sequential keras model

# Import `Sequential` from `keras.models`
from keras.models import Sequential
# Import `Dense` from `keras.layers`
from keras.layers import Dense

# Initialize the constructor
model = Sequential()
# Add an input layer 
model.add(Dense(12, activation='relu', input_shape=(11,)))
# Add one hidden layer 
model.add(Dense(8, activation='relu'))
# Add an output layer 
model.add(Dense(1, activation='sigmoid'))
Using TensorFlow backend. ​    
#You print the model configurartion in detail using 
#model.get_config()
#and summary using 
#model.summary()
#Compiling model and fitting to the training data
model.compile(loss='binary_crossentropy',
             optimizer ='adam',
             metrics = ['accuracy'])

model.fit(X_train, y_train,epochs=2, batch_size=1, verbose=1)

print('Predicting...')
y_pred = model.predict(X_test)
score = model.evaluate(X_test, y_test,verbose=1)

print(score)
Epoch 1/2
4352/4352 [==============================] - 4s 950us/step - loss: 0.0770 - acc: 0.9756 
Epoch 2/2
4352/4352 [==============================] - 3s 773us/step - loss: 0.0746 - acc: 0.9775
Predicting..
2145/2145 [==============================] - 0s 27us/step
[0.12698517348459246, 0.9664335664335665]

In the next post I would use keras to build Unet and show my solution to Kaggle Data science bowl challenge 2018.