1

I have images of [64,512,5] stored in *.npy files which I convert into *.tfrecords files.

I have verified that the reading of said records corresponds correctly with what is present in the *.npy files. However, when I perform some operation on the parser, like adding 1 to each pixel of the image, the result is not the expected one. The result should be 65*512*5 = 163840 but it is 163839.99980013957 (not always the same)
I have tried to perform different operations like tf.subtract, but the results are the same.

Could someone tell me what is wrong?

import re
import ast
import sys, select
import random as rn
from glob import glob
from tqdm import tqdm
from datetime import datetime
from configparser import SafeConfigParser

import numpy as np
import numpy.ma as ma
import scipy.misc

import os.path
from os import mkdir, stat
from os.path import exists, dirname, abspath
from os.path import join as dir_join
import tensorflow as tf

''' File hierarchy
'''
_code_dir             = dirname(abspath(__file__))
_python_dir           = dirname(_code_dir)
_model_dir            = dirname(_python_dir)
_project_dir          = dirname(_model_dir)
_ml_dir               = dirname(_project_dir)
_srv_dir              = dirname(_ml_dir)
_root_datasets_dir    = dir_join(_srv_dir,'machine_learning','data_sets/ssd_prepared')
_config_dir           = dir_join(_python_dir, 'config')

'''Data sets directories
'''
THIS_DATA_SET_DIR     = 'Sph_50m' #WARNING: Global variable also used in helper.py
_data_dir             = dir_join(_root_datasets_dir, THIS_DATA_SET_DIR)
_data_set_dir         = dir_join(_data_dir,'ImageSet')
_data_npy_dir         = dir_join(_data_dir,'data')
_data_tfRecord_dir    = dir_join(_data_dir,'tfRecord')

''' Configuration parser
'''
cfg_parser = SafeConfigParser()
cfg_parser.read(dir_join(_config_dir,'cfg_model.ini'))

''' Private variables
'''
_batch_size        = cfg_parser.getint(section='train', option='batch_size')
_max_epoch         = cfg_parser.getint(section='train', option='max_epoch')
_standarize        = cfg_parser.getboolean(section='train', option='standarize_input')

_input_shape       = ast.literal_eval(cfg_parser.get(section='data_shape', option='input_shape'))
_label_channel     = cfg_parser.getint(section='data_shape', option='label_channel')
_track_channel     = cfg_parser.getint(section='data_shape', option='track_channel')
_mask_channel      = cfg_parser.getint(section='data_shape', option='mask_channel')

_data_train        = cfg_parser.get(section='data_set', option='data_train')
_data_val          = cfg_parser.get(section='data_set', option='data_val')
_data_test         = cfg_parser.get(section='data_set', option='data_test')

def _int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=value.reshape(-1)))

def _bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def _floats_feature(value):
    return tf.train.Feature(float_list=tf.train.FloatList(value=value.reshape(-1)))

def numpy_to_TFRecord():
    if not exists(_data_tfRecord_dir): mkdir(_data_tfRecord_dir)

    for dataset in [_data_train, _data_val, _data_test]:
        tfRecord_folder = dir_join(_data_tfRecord_dir, dataset)
        if not exists(tfRecord_folder): mkdir(tfRecord_folder)

        #Retrieve list of files
        projections_dir=[]
        file_ = open(dir_join(_data_set_dir, dataset+'.txt'), 'r')
        for x in file_.readlines():
            file_nat = x.strip()+'.npy'
            filename = dir_join(_data_npy_dir, file_nat)
            assert exists(filename), "{} doesn't exist".format(filename)
            projections_dir.append(filename)
        file_.close()

        totaltfRecordSize = 0
        numFile = 0

        for projection_dir in tqdm(projections_dir, ncols= 100, desc = 'TFRecord {}'.format(dataset)):
            scanName = projection_dir.split('/')[-1].split('.')[0]
            if totaltfRecordSize > 100*(10**6) or totaltfRecordSize == 0:
                # address to save the TFRecords file
                train_filename = dir_join(tfRecord_folder, \
                                        str(numFile) + '_' + dataset +'.tfrecords')
                # open the TFRecords file                
                writer = tf.python_io.TFRecordWriter(train_filename)

                numFile += 1
                totaltfRecordSize = 0

            # Load the image
            projection = np.load(projection_dir)
            image = projection[:,:,:_label_channel]
            label = projection[:,:,_label_channel].astype(int)
            mask  = projection[:,:,_mask_channel].astype(int)
            track = projection[:,:,_track_channel].astype(int)

            # Create a feature
            feature = {'image': _floats_feature(image),
                       'label': _int64_feature(label),
                       'mask' : _int64_feature(mask),
                       'track': _int64_feature(track),
                       'scanName': _bytes_feature(tf.compat.as_bytes(scanName))}

            # Create an example protocol buffer
            example = tf.train.Example(features=tf.train.Features(feature=feature))

            # Serialize to string and write on the file
            writer.write(example.SerializeToString())

            fileSize = stat(train_filename).st_size
            totaltfRecordSize += fileSize

        writer.close()
        sys.stdout.flush()

def readTFRecord():
    # Transforms a scalar string `example_proto` into a pair of a scalar string and
    # a scalar integer, representing an image and its label, respectively.
    image_dim = _input_shape[0] * _input_shape[1] * _label_channel
    label_dim = _input_shape[0] * _input_shape[1]
    mean = np.load(dir_join(_data_dir,'mean.npy'))
    std  = np.load(dir_join(_data_dir,'std.npy'))
    mean_tf = tf.convert_to_tensor(mean, dtype=tf.float32, name='mean')
    std_tf = tf.convert_to_tensor(std,  dtype=tf.float32, name='std')

    with tf.variable_scope('TFRecord'):
        def _parse_function(example_proto):
            with tf.variable_scope('parser'):
                features = {'image': tf.FixedLenFeature([image_dim], tf.float32),
                            'label': tf.FixedLenFeature([label_dim], tf.int64),
                            'mask' : tf.FixedLenFeature([label_dim], tf.int64),
                            'track': tf.FixedLenFeature([label_dim], tf.int64),
                            'scanName': tf.FixedLenFeature([], tf.string)}

                parsed_features = tf.parse_single_example(example_proto, features)

                # Reshape image data into the original shape
                image = tf.reshape(parsed_features['image'], [_input_shape[0], _input_shape[1], _label_channel], name='image')
                label = tf.reshape(parsed_features['label'], _input_shape, name='lable_reshape')
                mask  = tf.reshape(parsed_features['mask'],  _input_shape, name='mask_reshape')
                track = tf.reshape(parsed_features['track'], _input_shape, name='track_reshape')
                scanName = parsed_features['scanName']

                image = image + tf.constant(1., dtype=tf.float32)

                return image, label, mask, track, scanName

        training_filenames = glob(dir_join(_data_tfRecord_dir, _data_train, '*.tfrecords'))
        validation_filenames = glob(dir_join(_data_tfRecord_dir, _data_val, '*.tfrecords'))

        filenames = tf.placeholder(tf.string, shape=[None], name='filenames')
        dataset = tf.data.TFRecordDataset(filenames)

        dataset = dataset.map(_parse_function, num_parallel_calls=20)  # Parse the record into tensors.
        dataset = dataset.shuffle(buffer_size=10000)
        dataset = dataset.batch(_batch_size, drop_remainder=True)
        dataset = dataset.prefetch(buffer_size=10)
        iterator = dataset.make_initializable_iterator()

        next = iterator.get_next()
        sess = tf.Session()

        while True:
            sess.run(iterator.initializer, feed_dict={filenames: training_filenames})
            try:
                img, _, _, _, scanX = sess.run(next)
                for i, scan in enumerate(scanX):
                    print(scan.decode("utf-8"))
                    projection = np.load(dir_join(_data_npy_dir, scan.decode("utf-8") + '.npy'))
                    imagenp = projection[:,:,:_label_channel]

                    if np.abs(np.sum(img[i,...] - imagenp)) > 0.:
                        print(np.sum(img[i,...] - imagenp))

            except tf.errors.OutOfRangeError:
                break

    return training_filenames, validation_filenames, filenames, iterator

if __name__ == '__main__':
    numpy_to_TFRecord()
    readTFRecord()

The test I'm doing in the previous code is to convert the *.npy files to *.tfrecords. Then, I compare the *.trecords with the *.npy. The value should be 0 if both images were identical.

img, _, _, _, scanX = sess.run(next)
    for i, scan in enumerate(scanX):
         print(scan.decode("utf-8"))
         projection = np.load(dir_join(_data_npy_dir, scan.decode("utf-8") + '.npy'))
         imagenp = projection[:,:,:_label_channel]

         print(np.sum(img[i,...] - imagenp))

If the data is not preprocessed, these images are the same, however, if we perform some kind of transformation, the results do not match what was expected. In this case we are adding 1 to each pixel of the image, so the total difference should be 64 * 512 * 5.

image = image + tf.constant(1., dtype=tf.float32)

I would like to solve this error, since so far I have not been able to obtain the results obtained by my neural network using feed_dict instead of Tensorflow Dataset API, and this is the only point where I can observe a difference in the input data.

7
  • Welcome to Stack Overflow. Since there is quite a bit of code, can you point to the parts that are being problematic for you? At what point are you computing this result that you were expecting to be equal to 65*512*5? It seems to be a precision error, which is to be expected with tf.float32 and numbers about that size. Can you explain how this is a problem for you?
    – jdehesa
    Oct 8, 2018 at 10:33
  • Thanks you for the comment @jdehesa. I have edited the question. I'm having some troubles when I'm trying to use the Tensorflow Dataset API instead of feed_dict because I'm having more overfitting in my results. I also think that this is a precision problem but maybe I'm doing something wrong with the tensorflow API. Oct 8, 2018 at 11:06
  • Thanks, I see what you mean now. I think however, that your input pipeline is fine, and that small error should not have any significant impact on the model (note that it matches what you might expect; 32-bit floats are precise to about 7 decimal positions, and your error starts at the 9th decimal position, in exponential notation). I do not know what might be causing the difference between your feed_dict version and tf.data one, though...
    – jdehesa
    Oct 8, 2018 at 11:33
  • Is this a correct statement of your problem: You have 65•512•5 floating-point numbers, say in some array x[64,512,5]. You add 1 to each x[i,j,k]. Let’s call the results y[i,j,k]. Let X be the sum of adding all the x[i,j,k], and let Y be the sum of adding all the y[i,j,k]. You expect Y−X to equal 163840 (65•512•6), but it is a slightly different number. Oct 8, 2018 at 13:08
  • If that is a correct statement of your problem, then the differences are expected. Floating-point arithmetic rounds the mathematical results to the nearest representable value. For example, when adding 1 and .3333333432674407958984375, the computed result is 1.33333337306976318359375, not 1.3333333432674407958984375. Furthermore, when adding all the x[i,j,k] or the y[i,j,k], there will be rounding errors while computing the sum. Due to the rounding errors, it is not expected the computed sums will differ by exactly 163840. Oct 8, 2018 at 13:12

0

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service, privacy policy and cookie policy

Browse other questions tagged or ask your own question.