KNN实现手写数字识别.docx
KNN实现手写数字识别博客上显示这个没有Jupyter的好看,想看Jupyter Notebook的请戳KNN实现手写数字识别.ipynb1-导入模块import numpy as npimport matplotlib. pyplot as pitfrom PIL import Imagefrom Idmnist import loaddigits%matplotlib inline2 -导入数据及数据预处理import tensorflow as tfit Import MN I ST datafrom tensorflow, examples, tutorials, mnist import input datadef load digitsO :mnist = input data, read data sets("path/", one hot=True)return mnistmnist = loaddigits ()Extracting C:/Users/marsggbo/Documents/Code/ML/TF Tutorial/data/MNIST datatrain-images-i dx3-ubyte. gzExtracting C:/Users/marsggbo/Documents/Code/ML/TF Tutorial/data/MNIST datatrain -labels-idxl-ubyte. gzExtracting C:/Users/marsggbo/Documents/Code/ML/TF Tutorial/data/MNIST datatlOk- images-idx3-ubyte. gzExtracting C:/Users/marsggbo/Documents/Code/ML/TF Tutorial/data/MNIST datatlOk labels-idxl-ubyte. gz数据维度print (,zTrain: "+ str(mnist. train, images, shape)print (,zTrain: "+ str(mnist. train, labels, shape)print(Test: "+ str(mnist. test, images, shape)print("Test: "+ str(mnist. test, labels, shape)Train: (55000, 784)Train: (55000, 10)Test: (10000, 784)Test: (10000, 10)mnist数据采用的是TensorFlow的一个函数进行读取的,由上面的结果可以 知道训练集数据X.train有55000个,每个X的数据长度是784 ( 28*28 )。x_train, y_train, x_test, y_test = mnist. train, images, mnist. train, labels, innis t. test, images, mnist. test, labels展示手写数字3 -构建模型class Knn():def init (self, k):self, k = kself, distance = def topKDistance(self, x train, x test): ,计算距离,这里采用欧氏距离 ,print ("计算距离.”)distance = for i in range(x test, shapeLOJ): disl = x_train - x_testi dis2 = np. sqrt(np. sum(disl*disl, axis=l) distancestr (i) = np. argsort(dis2):sclf. kJ if i%1000=0:print(distancestr(i) return distancedef predict (sei f', x_lrain, y_train, x_lesl): ,预测self, distance = self. topKDistance(x_train, x test)y_hat =prinl(选出每项最正确预测结果)for i in range(x test. shape0. ):classes = for j in range(se 1 f. k):num = np. argmax(y_trainself. distancestr(i) j)classesnum = classes, get (num, 0)+1sortClasses = sorted(classes. items(), key= lambda x:xlJ, reverse=T rue)y_hat. append(sortClasses00)y_hat = np. array(y hat). reshape (-1,1)return y_hatdef fit(self, xtrain, ytrain, xtest, y_test): ,计算准确率 ,print ("预测.)y_hat = self, predict(x_train, y_train, x_test)#index hat =np. argmax(y hat , axis=l)print (计算准确率.")index test = np. argmax(y_test, axis=l). reshaped, -1)accuracy = np. sum(y hat. reshape (index test, shape) = index test)*l. 0/y test, shape:。return accuracy, yhatelf = Knn(10)accuracy, y_hat = elf. fit(x_train, y_train, x_test, y_test)print(accuracy)预测.计算距离.48843 33620 11186 22059 42003 9563 39566 10260 35368 3139554214 4002 11005 15264 49069 8791 38147 47304 51494 1105346624 10708 22134 20108 48606 19774 7855 43740 51345 93088758 47844 50994 456101930 3312 30140 17618910 51918149531156 50024 26833 26006 38112 310809066 32112 41846458241423448282 28432 50966 22786 40902 5226438552 44080248784655 20258 36065 30755 15075 35584 121524683 4325548891 20744 47822 53511 54545 27392 102403970 25721 30357673 17747 33803 20960 25463 35723969 50577 36714 357198255 42067 53282 14383 14073 520837233 8199 8963 12617选出每项最正确预测结果计算准确率.0. 9672准确率略高。