二进制文件的读取是按照样本的bytes读取
api
"""
tf.FixedLengthRecordReader(record_bytes)
要读取每个记录是固定数量字节的二进制文件
record_bytes:整型,指定每次读取的字节数
return:读取器实例
"""
案例
注意:cifar数据集已经事先下载好
import tensorflow as tf
# 定义cifar的数据等命令行参数
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_string("cifar_dir", "cifar-10-batches-py", "文件的目录")
class CifarRead():
"""
完成读取二进制文件,写进tfrecords,读取tfrecords
"""
def __init__(self, filelist):
self.file_list = filelist #文件列表
#定义读取图片的一些属性
self.height = 32
self.width = 32
self.channel = 3
#存储的字节
self.label_bytes = 1
self.image_bytes = self.height * self.width * self.channel
self.bytes = self.label_bytes + self.image_bytes
def read_and_decode(self):
#构造文件队列
file_queue = tf.train.string_input_producer(self.file_list)
#构造二进制文件读取器
reader = tf.FixedLengthRecordReader(self.bytes)
key, value = reader.read(file_queue)
#解码内容
print(value)
#二进制文件的解码
label_image = tf.decode_raw(value, out_type=tf.uint8)
print(label_image)
#分割图片和标签:特征值和目标值
label = tf.slice(label_image, [0], [self.label_bytes])
image = tf.slice(label_image, [self.label_bytes], [self.image_bytes])
print("label:", label)
print("image:", image)
#对图片的特征数据进行形状的改变 [3072] --> [32, 32, 3]
image_reshape = tf.reshape(image, [self.height, self.width, self.channel])
print("image_reshape:", image_reshape)
#批处理数据
image_batch, label_batch = tf.train.batch([image_reshape, label], batch_size=10, num_threads=1, capacity=10)
print(image_batch, label_batch)
return image_batch, label_batch
import os
if __name__ == "__main__":
#找到文件,放入列表 路径+名字 ->列表当中
file_name = os.listdir(FLAGS.cifar_dir)
file_list = [os.path.join(FLAGS.cifar_dir, file) for file in file_name if "0" <= file[-1] <= "9"]
print(file_list)
cf = CifarRead(file_list)
image_batch, label_batch = cf.read_and_decode()
with tf.Session() as sess:
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess, coord=coord)
print(sess.run([image_batch, label_batch]))
coord.request_stop()
coord.join()
网友评论