generateClassifier-nn.py
# Import the modules
from sklearn.externals import joblib from sklearn import datasets
from skimage.feature import hog
from sklearn.neural_network import MLPClassifier from sklearn import preprocessing
import numpy as np
from collections import Counter
# Load the dataset
dataset = datasets.fetch_mldata("MNIST Original")
# Extract the features and labels features = np.array(dataset.data, 'int16') labels = np.array(dataset.target, 'int')
# Extract the hog features list_hog_fd = []
for feature in features:
fd = hog(feature.reshape((28, 28)), orientations=9, pixels_per_cell=(14, 14), cells_per_block=(1, 1), visualise=False) list_hog_fd.append(fd)
hog_features = np.array(list_hog_fd, 'float64')
# Normalize the features
pp = preprocessing.StandardScaler().fit(hog_features) hog_features = pp.transform(hog_features)
print ("Count of digits in dataset", Counter(labels)) # Create an MLP Neural Network object
clf = MLPClassifier(solver='lbfgs', alpha=1e-5,
hidden_layer_sizes=(5, 2), random_state=1)
# Perform the training clf.fit(hog_features, labels)
# Save the classifier
joblib.dump((clf, pp), "digits_nn.pkl", compress=3)
knn.py
# import the necessary packages
from future import print_function
from sklearn.cross_validation import train_test_split from sklearn.neighbors import KNeighborsClassifier from sklearn.metrics import classification_report from sklearn import datasets
from skimage import exposure import numpy as np
import imutils import cv2
# load the MNIST digits dataset mnist = datasets.load_digits()
# take the MNIST data and construct the training and testing split, using 75% of the # data for training and 25% for testing
(trainData, testData, trainLabels, testLabels) = train_test_split(np.array(mnist.data), mnist.target, test_size=0.25, random_state=42)
# now, let's take 10% of the training data and use that for validation
(trainData, valData, trainLabels, valLabels) = train_test_split(trainData, trainLabels, test_size=0.1, random_state=84)
# show the sizes of each data split
print("training data points: {}".format(len(trainLabels))) print("validation data points: {}".format(len(valLabels))) print("testing data points: {}".format(len(testLabels)))
# initialize the values of k for our k-Nearest Neighbor classifier along with the # list of accuracies for each value of k
kVals = range(1, 30, 2) accuracies = []
# loop over various values of `k` for the k-Nearest Neighbor classifier for k in range(1, 30, 2):
# train the k-Nearest Neighbor classifier with the current value of `k` model = KNeighborsClassifier(n_neighbors=k)
model.fit(trainData, trainLabels)
# evaluate the model and update the accuracies list score = model.score(valData, valLabels) print("k=%d, accuracy=%.2f%%" % (k, score * 100)) accuracies.append(score)
# find the value of k that has the largest accuracy i = np.argmax(accuracies)
print("k=%d achieved highest accuracy of %.2f%% on validation data" % (kVals[i], accuracies[i] * 100))
# re-train our classifier using the best k value and predict the labels of the # test data
model = KNeighborsClassifier(n_neighbors=kVals[i]) model.fit(trainData, trainLabels)
predictions = model.predict(testData)
# show a final classification report demonstrating the accuracy of the classifier # for each of the digits
print("EVALUATION ON TESTING DATA")
print(classification_report(testLabels, predictions))
# loop over a few random digits
for i in np.random.randint(0, high=len(testLabels), size=(5,)): # grab the image and classify it
image = testData[i]
prediction = model.predict(image)[0]
# convert the image for a 64-dim array to an 8 x 8 image compatible with OpenCV, # then resize it to 32 x 32 pixels so we can see it better
image = image.reshape((8, 8)).astype("uint8")
image = exposure.rescale_intensity(image, out_range=(0, 255)) image = imutils.resize(image, width=32, inter=cv2.INTER_CUBIC)
# show the prediction
print("I think that digit is: {}".format(prediction)) cv2.imshow("Image", image)
cv2.waitKey(0)
|