本教程是基于python 3.5 + opencv 3.3 + tensorflow 1.3 + win10 64
1.基于mnist用keras训练出手写识别模型
from keras.models import Sequential,load_model
from keras.layers import Dense, Activation
from keras.datasets import mnist
from keras.utils import np_utils
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(-1,28*28)
x_test = x_test.reshape(-1,28*28)
y_train = np_utils.to_categorical(y_train,10)
y_test = np_utils.to_categorical(y_test,10)
model= Sequential()
model.add(Dense(input_dim=784,units=100))
model.add(Activation('sigmoid'))
model.add(Dense(units=100))
model.add(Activation('sigmoid'))
model.add(Dense(units=10))
model.add(Activation('softmax'))
model.compile(optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy'])
model.fit(x_train, y_train,batch_size=32,epochs=10)
print("testing...")
loss, acc =model.evaluate(x_test, y_test)
print("\nloss: ", loss)
print(" acc: ", acc)
predit = model.predict(x_test[1].reshape(-1,784))
print("predit: ", predit)
print("x_test[1]: ", predit.argmax())
#保存模型,要保证根目录下有mnist-model文件夹
model.save("mnist-model/minit_model.pb")
2.opencv打开摄像
cap = cv2.VideoCapture(0)
fourcc = cv2.VideoWriter_fourcc(*'XVID')
def open_camera():
# fourcc = cv2.cv.CV_FOURCC(*'XVID')
# fourcc = cv2.VideoWriter_fourcc(*'XVID')
while True:
ret, frame = cap.read()
gray= cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
cv2.imshow('frame', frame)#
# cv2.imshow('gray', gray) #
ifcv2.waitKey(1) &0xFF==ord('q'):
print('quit')
break
if cv2.waitKey(1) & 0xFF==ord('p'):
# img = cv2.resize(img, (28, 28))
# img = cv2.imread('9.png', cv2.IMREAD_GRAYSCALE).astype(np.float32)
# img = cv2.resize(frame, (28, 28), interpolation=cv2.INTER_CUBIC)
# timg = (img - (255 / 2.0)) / 255
img = cv2.resize(frame, (28,28))
3.加载模型
keras加载模型很简单
model = load_model('mnist-model/minit_model.pb')
4.以下是打开摄像头进行识别的代码,点击键盘P进行识别
import cv2
import numpy as np
from keras.models import Sequential, load_model
from PIL import Image, ImageFilter
cap = cv2.VideoCapture(0)
is_record =False
fourcc = cv2.VideoWriter_fourcc(*'XVID')
def open_camera():
# fourcc = cv2.cv.CV_FOURCC(*'XVID')
# fourcc = cv2.VideoWriter_fourcc(*'XVID')
while True:
ret, frame = cap.read()
gray= cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
cv2.imshow('frame', frame)#
# cv2.imshow('gray', gray) #
ifcv2.waitKey(1) &0xFF==ord('q'):
print('quit')
break
if cv2.waitKey(1) &0xFF==ord('p'):
# img = cv2.resize(img, (28, 28))
# img = cv2.imread('9.png', cv2.IMREAD_GRAYSCALE).astype(np.float32)
# img = cv2.resize(frame, (28, 28), interpolation=cv2.INTER_CUBIC)
# timg = (img - (255 / 2.0)) / 255
img = cv2.resize(frame, (28,28))
take_photo(img)
array = imageprepare()
predit = model.predict(array)
print("num is ", predit.argmax())
if cv2.waitKey(1) &0xFF==ord('s'):
if is_record ==False:
out = cv2.VideoWriter('test.avi', fourcc,20.0, (640,480))#
print('start record')
globalis_record
is_record =True
if is_record ==True:
start_record(frame, out)
def take_photo(frame):
cv2.imwrite("test.png", frame)
print('snap successful')
defstart_record(frame, out):
out.write(frame)#
if cv2.waitKey(1) &0xFF==ord('e'):
if is_record ==True:
end_record(out)
defend_record(out):
out.release()
globalis_record
is_record =False
print('stop record')
defimageprepare():
img = Image.open('test.png').convert('L')
if img.size[0] !=28orimg.size[1] !=28:
img = img.resize((28,28))
arr = []
for i in range(28):
for j in range(28):
pixel =1.0-float(img.getpixel((j, i))) /255.0
arr.append(pixel)
return np.array(arr).reshape([-1,784])
if__name__ =='__main__':
model = load_model('mnist-model/minit_model.pb')
open_camera()
cap.release()
cv2.destroyAllWindows()
以上代码就能实现动态的识别手写字了,感觉简书对代码太不友好了,排好版粘贴进来全都连在以一起了,欢迎大家留言。