import numpy as np
import struct
import matplotlib.pyplot as plt
import keras
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import Dense,Dropout#全连接层
from keras.optimizers import RMSprop
from keras.layers import Conv2D, MaxPool2D
from keras.layers import Dense, Flatten
# 按照给定的格式化字符串,把数据封装成字符串(实际上是类似于c结构体的字节流)
#string = struct.pack(fmt, v1, v2, ...)
# 按照给定的格式(fmt)解析字节流string,返回解析出来的tuple
#tuple = unpack(fmt, string)
# 计算给定的格式(fmt)占用多少字节的内存
#offset = calcsize(fmt)
# 训练集文件
train_images_idx3_ubyte_file = 'mnistdata/train-images-idx3-ubyte'
# 训练集标签文件
train_labels_idx1_ubyte_file = 'mnistdata/train-labels-idx1-ubyte'
# 测试集文件
test_images_idx3_ubyte_file = 'mnistdata/t10k-images-idx3-ubyte'
# 测试集标签文件
test_labels_idx1_ubyte_file = 'mnistdata/t10k-labels-idx1-ubyte'
def decode_idx3_ubyte(idx3_ubyte_file):
# 读取二进制数据
bin_data = open(idx3_ubyte_file, 'rb').read()
# 解析文件头信息,依次为魔数、图片数量、每张图片高、每张图片宽
offset = 0
fmt_header = '>iiii'
magic_number, num_images, num_rows, num_cols = struct.unpack_from(fmt_header, bin_data, offset)
print( '魔数:%d, 图片数量: %d张, 图片大小: %d*%d' % (magic_number, num_images, num_rows, num_cols))
# 解析数据集
image_size = num_rows * num_cols
offset += struct.calcsize(fmt_header)
fmt_image = '>' + str(image_size) + 'B'
images = np.empty((num_images, num_rows,num_cols,1))
for i in range(num_images):
if (i + 1) % 10000 == 0:
print ('已解析 %d' % (i + 1) + '张')
images[i] = np.array(struct.unpack_from(fmt_image, bin_data, offset)).reshape((num_rows,num_cols,1))
images[i] = images[i].astype('float32')
images[i] /= 255
offset += struct.calcsize(fmt_image)
return images
def decode_idx1_ubyte(idx1_ubyte_file):
# 读取二进制数据
bin_data = open(idx1_ubyte_file, 'rb').read()
# 解析文件头信息,依次为魔数和标签数
offset = 0
fmt_header = '>ii'
magic_number, num_images = struct.unpack_from(fmt_header, bin_data, offset)
print ('魔数:%d, 图片数量: %d张' % (magic_number, num_images))
# 解析数据集
offset += struct.calcsize(fmt_header)
fmt_image = '>B'
labels = np.empty(num_images)
for i in range(num_images):
if (i + 1) % 10000 == 0:
print ('已解析 %d' % (i + 1) + '张')
labels[i] = struct.unpack_from(fmt_image, bin_data, offset)[0]
offset += struct.calcsize(fmt_image)
return labels
def load_train_images(idx_ubyte_file=train_images_idx3_ubyte_file):
return decode_idx3_ubyte(idx_ubyte_file)
def load_train_labels(idx_ubyte_file=train_labels_idx1_ubyte_file):
return decode_idx1_ubyte(idx_ubyte_file)
def load_test_images(idx_ubyte_file=test_images_idx3_ubyte_file):
return decode_idx3_ubyte(idx_ubyte_file)
def load_test_labels(idx_ubyte_file=test_labels_idx1_ubyte_file):
return decode_idx1_ubyte(idx_ubyte_file)
def run():
train_images = load_train_images()
train_labels = load_train_labels()
test_images = load_test_images()
test_labels = load_test_labels()
batch_size = 128
num_classes = 10
epochs = 20
train_labels = keras.utils.to_categorical(train_labels, num_classes)
test_labels = keras.utils.to_categorical(test_labels, num_classes)
print(train_images.shape)
print("======*=======")
#model = Sequential()
#model.add(Dense(512, activation='relu', input_shape=(784,)))
#model.add(Dropout(0.2))
#model.add(Dense(512, activation='relu'))
#model.add(Dropout(0.2))
#model.add(Dense(num_classes, activation='softmax'))
#model.summary()
#model.compile(loss='categorical_crossentropy',
# optimizer=RMSprop(),metrics=['accuracy'])
#history = model.fit(train_images, train_labels, batch_size=batch_size,
# epochs=epochs, verbose=1, validation_data=(test_images, test_labels))
#score = model.evaluate(test_images, test_labels, verbose=0)
#print('Test loss:', score[0])
#print('Test accuracy:', score[1])
#构建模型
model = Sequential()
model.add(Conv2D(32, kernel_size=(5,5), activation='relu', input_shape=(28, 28, 1)))
model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
model.add(Conv2D(64, kernel_size=(5,5), activation='relu'))
model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
model.add(Flatten())
model.add(Dense(1000, activation='relu'))
model.add(Dense(10, activation='softmax'))
#模型编译
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
#训练
model.fit(train_images, train_labels, batch_size=128, epochs=20)
#评估模型
score = model.evaluate(test_images, test_labels)
print('acc', score[1])
if __name__ == '__main__':
run()