0

I have the following simple image data generator in Keras:

 import tensorflow as tf
 import numpy as np
 import cv2

 class My_Custom_Generator(tf.keras.utils.Sequence) :

 def __init__(self, image_filenames, labels, batch_size) :
     self.image_filenames = image_filenames
     self.labels = labels
     self.batch_size = batch_size  

 def __len__(self) :
    return (np.ceil(len(self.image_filenames) / float(self.batch_size))).astype(np.int)


def __getitem__(self, idx) :
    batch_x = self.image_filenames[idx * self.batch_size : (idx+1) * self.batch_size]
    batch_y = self.labels[idx * self.batch_size : (idx+1) * self.batch_size]

   
    return np.array([
        cv2.imread(file_name).astype(np.int)
           for file_name in batch_x])/255.0, np.array(batch_y) 

Easy thing, I give it a list of images names and labels and all it does is returning the images is read by cv2 and their labels in the batches I defined to go to utilizing GPU.

My problem is how I call it, which is giving me a headache.

from sklearn.model_selection import GridSearchCV 
from statistics import mode
from sklearn.metrics import roc_auc_score, average_precision_score, accuracy_score, f1_score, precision_score, recall_score
import pickle
from sklearn.metrics import plot_confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications import Xception
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.utils import to_categorical
import tensorflow as tf
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adamax
from tensorflow.keras.models import model_from_json
from mycustomgenerator import My_Custom_Generator
from sklearn.utils import shuffle

#This function returns me a vector of labels according to a list of image names
def return_classes_by_name(image_path):

    class_image_vector=[]

    for i in range(0,image_path.shape[0]):
        image_name=image_path[i]
    
        if("RAW_" in image_name):
            class_image_vector.append(0)
        if("GAN_" in image_name):
            class_image_vector.append(1)
     return np.array(class_image_vector)

#So, the rest of the code is where my nightmare is

#Load all names first
print("Loading dataset names")
filenames=np.genfromtxt("dataset_printscan.csv", dtype=str)

print("getting classes (labels) from names")
y=return_classes_by_name(filenames)
   
print("Organizing intro training, validation and testing")
#Before split, we shuffle
filenames_shuffled, y_shuffled = shuffle(filenames, y)   
    
#Now we have the files names of train, test and validation
x_train, x_test, y_train, y_test = train_test_split(filenames_shuffled, y_shuffled, test_size=0.5, random_state=42)

x_train,x_validation,y_train,y_validation=train_test_split(x_train, y_train, test_size=0.3, random_state=42)

num_classes=2    
#For now, I just want to train the network, no test
y_train = to_categorical(y_train, num_classes)
y_validation = to_categorical(y_validation, num_classes)
            
print("Setting up the network")
#Parameters for network training
batch_size = 16
epochs=10

#calling the generator
#pleae be aware that x_train and x_validation are image paths that should be read by the generator in batches and send to the gpu    
my_training_batch_generator = My_Custom_Generator(x_train, y_train, batch_size)
my_validation_batch_generator = My_Custom_Generator(x_validation, y_validation, batch_size)

adamax = Adamax(lr=0.01)

weights_file="weights/mymodel.h5"

#An approach to learning rate reducing through training
lr_reducer= ReduceLROnPlateau(monitor='val_loss', factor=np.sqrt(0.1), cooldown=0, patience=2, min_lr=0.5e-6)

#An approach to stop training before the whole epochs are processed
early_stopper=EarlyStopping(monitor='val_accuracy', min_delta=0.01,patience=3,restore_best_weights=True,verbose=1)

#Policy to save weights
model_checkpoint= ModelCheckpoint(weights_file, monitor="val_accuracy", save_best_only=True, save_weights_only=True,mode='auto')
#callbacks
callbacks=[lr_reducer,early_stopper,model_checkpoint]

print("Compiling the network")
#Load model and prepare it for fine tuning
baseModel = Xception(weights=None, include_top=False, input_tensor=Input(shape=(299, 299, 3)))
            
headModel = baseModel.output
headModel = Flatten(name="flatten")(headModel)
headModel = Dense(512, activation="relu")(headModel)
headModel = Dropout(0.5)(headModel)
headModel = Dense(num_classes, activation="softmax")(headModel)
    
# place the head FC model on top of the base model (this will become
# the actual model we will train)
model = Model(inputs=baseModel.input, outputs=headModel)
        
model.compile(loss="binary_crossentropy", optimizer=adam, metrics=["accuracy"])
print("[INFO] training...")
    
#Here is my problem

H=model.fit(my_training_batch_generator, steps_per_epoch = int(x_train.shape[0] // batch_size), validation_data = my_validation_batch_generator, validation_steps = int(x_validation.shape[0] // batch_size), callbacks=callbacks, epochs=epochs)

The biggest problem is that the generator is already supposed to read the batch size (16 images), read them, and return to GPU, but here is what I got:

File "xception.py", line 150, in <module>
    H=model.fit(my_training_batch_generator, steps_per_epoch = int(x_train.shape[0] // batch_size), validation_data = my_validation_batch_generator, validation_steps = int(x_validation.shape[0] // batch_size), callbacks=callbacks, epochs=epochs)
  File "/home/anselmo/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py", line 108, in _method_wrapper
    return method(self, *args, **kwargs)
  File "/home/anselmo/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py", line 1049, in fit
    data_handler = data_adapter.DataHandler(
  File "/home/anselmo/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/data_adapter.py", line 1105, in __init__
    self._adapter = adapter_cls(
  File "/home/anselmo/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/data_adapter.py", line 909, in __init__
    super(KerasSequenceAdapter, self).__init__(
  File "/home/anselmo/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/data_adapter.py", line 788, in __init__
    peek = _process_tensorlike(peek)
  File "/home/anselmo/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/data_adapter.py", line 1021, in _process_tensorlike
    inputs = nest.map_structure(_convert_numpy_and_scipy, inputs)
  File "/home/anselmo/.local/lib/python3.8/site-packages/tensorflow/python/util/nest.py", line 635, in map_structure
    structure[0], [func(*x) for x in entries],
  File "/home/anselmo/.local/lib/python3.8/site-packages/tensorflow/python/util/nest.py", line 635, in <listcomp>
    structure[0], [func(*x) for x in entries],
  File "/home/anselmo/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/data_adapter.py", line 1016, in _convert_numpy_and_scipy
    return ops.convert_to_tensor(x, dtype=dtype)
  File "/home/anselmo/.local/lib/python3.8/site-packages/tensorflow/python/framework/ops.py", line 1499, in convert_to_tensor
    ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
  File "/home/anselmo/.local/lib/python3.8/site-packages/tensorflow/python/framework/tensor_conversion_registry.py", line 52, in _default_conversion_function
    return constant_op.constant(value, dtype, name=name)
  File "/home/anselmo/.local/lib/python3.8/site-packages/tensorflow/python/framework/constant_op.py", line 263, in constant
    return _constant_impl(value, dtype, shape, name, verify_shape=False,
  File "/home/anselmo/.local/lib/python3.8/site-packages/tensorflow/python/framework/constant_op.py", line 275, in _constant_impl
    return _constant_eager_impl(ctx, value, dtype, shape, verify_shape)
  File "/home/anselmo/.local/lib/python3.8/site-packages/tensorflow/python/framework/constant_op.py", line 300, in _constant_eager_impl
    t = convert_to_eager_tensor(value, ctx, dtype)
  File "/home/anselmo/.local/lib/python3.8/site-packages/tensorflow/python/framework/constant_op.py", line 98, in convert_to_eager_tensor
    return ops.EagerTensor(value, ctx.device_name, dtype)
ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type numpy.ndarray).

What was my mistake here?

2
  • What's in this file My_Custom_Generator? Probably you should convert your data to float32 in your generator like this: x_train=np.array(x_train).astype(np.float32). Commented Jul 9, 2021 at 18:07
  • Make sure that the array (or list) that you are trying to turn into a Tensor (or pass to tensor function) is numeric dtype. If the source was multiple arrays that differ in shape, the array will be object dtype (and likely 1d). Commented Jul 9, 2021 at 18:41

1 Answer 1

2

I have succeeded in reproducing the error.

The error appears when the line np.array([cv2.imread(file_name).astype(np.int) for file_name in batch_x]) in My_Custom_Generator.__getitem__ does not create a multi-dimensional array, but instead it creates an array with dtype=np.object that is just a list of arrays.

That happens when the shapes of the arrays in the list (i.e. the shapes of the images) are not identical, as you can see from the example below.

np.array([np.zeros((4, 4, 3)), np.zeros((5, 5, 3))])
>>> array([array([[[0., 0., 0.], ..., [0., 0., 0.]]]),
           array([[[0., 0., 0.], ..., [0., 0., 0.]]])], dtype=object)

Running the code in the example the following deprecation warning is visualized: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray. np.array([np.zeros((4, 4, 3)), np.zeros((5, 5, 3))]).

In general is not a good practice to stack arrays in a list through the function np.array, it is recommended to use np.stack that allows to catch this type of errors in advance.

Replacing np.array([cv2.imread(file_name).astype(np.int) for file_name in batch_x]) with np.stack([cv2.imread(file_name).astype(np.int) for file_name in batch_x], axis=0), you should see a much more meaningful error stack trace.

To conclude, to solve the problem you should check the shapes of your images to find the image that breaks your code.

Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.