0

I am trying to implement a VAE for MNIST using convolutional layers using TensorFlow-2.6 and Python-3.9. The code I have is:

# Specify latent space dimensions-
latent_space_dim = 3

# Define encoder-
encoder_input = Input(shape = (28, 28, 1))

x = Conv2D(
    filters = 32, kernel_size = 3,
    strides = 2, padding = 'same')(encoder_input)
x = LeakyReLU()(x)

x = Conv2D(
    filters = 64, kernel_size = 3,
    strides = 2, padding = 'same')(x)
x = LeakyReLU()(x)

x = Conv2D(
    filters = 64, kernel_size = 3,
    strides = 1, padding = 'same')(x)
x = LeakyReLU()(x)

x = Conv2D(
    filters = 64, kernel_size = 3,
    strides = 1, padding = 'same')(x)
x = LeakyReLU()(x)

shape_before_flattening = K.int_shape(x)[1:]
x = Flatten()(x)

# Instead of connecting the flattened layer directly to the 3-D latent space, we connect
# it to layers 'mu' and 'log_var'-
mu = Dense(units = latent_space_dim)(x)
log_var = Dense(units = latent_space_dim)(x)

# The Keras model that outputs the values of 'mu' & 'log_var' for a given input image-
encoder_mu_log = Model(encoder_input, (mu, log_var))

print(f"shape_before_flattening: {shape_before_flattening}")
# shape_before_flattening: (7, 7, 64)

def sampling(args):
    mu, log_var = args
    epsilon = K.random_normal(shape = K.shape(mu), mean = 0.0, stddev = 1.0)
    return mu + K.exp(log_var / 2) * epsilon

# This Lambda layer samples a point 'z' in the latent space from the normal distribution
# defined by the parameters 'mu' and 'log_var'-
encoder_output = Lambda(sampling)([mu, log_var])

# The Keras model that defines the encoder — a model that takes an input image and encodes it
# into the 2D latent space, by sampling a point from the multivariate normal distribution
# defined by 'mu' and 'log_var'-
encoder = Model(encoder_input, encoder_output)

decoder_input = Input(shape = (latent_space_dim))

x = Dense(np.prod(shape_before_flattening))(decoder_input)
x = Reshape(shape_before_flattening)(x)

x = Conv2DTranspose(
    filters = 64, kernel_size = (3, 3),
    strides = (1, 1), padding = 'same')(x)
x = LeakyReLU()(x)

x = Conv2DTranspose(
    filters = 64, kernel_size = (3, 3),
    strides = (2, 2), padding = 'same')(x)
x = LeakyReLU()(x)

x = Conv2DTranspose(
    filters = 32, kernel_size = (3, 3),
    strides = (2, 2), padding = 'same')(x)
x = LeakyReLU()(x)

x = Conv2DTranspose(
    filters = 1, kernel_size = (3, 3),
    strides = (1, 1), padding = 'same')(x)
x = Activation('sigmoid')(x)

decoder_output = x

decoder = Model(decoder_input, decoder_output)

# The complete autoencoder-

# The input to the autoencoder is the same as the input to the encoder.
model_input = encoder_input

# The output from the autoencoder is the output from the encoder passed through
# the decoder.
model_output = decoder(encoder_output)

# The Keras model that defines the full autoencoder—a model that takes an image,
# and passes it through the encoder and back out through the decoder to generate
# a reconstruction of the original image.
model = Model(model_input, model_output)

The loss function is defined as follows:

# Weight the reconstruction loss 'r_loss_factor' to ensure that it is well balanced with the KL divergence loss-
r_loss_factor = 1000

def vae_r_loss(y_true, y_pred):
    # Reconstruction loss-
    r_loss = K.mean(K.square(y_true - y_pred), axis = [1,2,3])
    return r_loss_factor * r_loss

def vae_kl_loss(y_true, y_pred):
    # KL-Divergence loss-
    kl_loss = -0.5 * K.sum(1 + log_var - K.square(mu) - K.exp(log_var), axis = 1)
    return kl_loss

def vae_loss(y_true, y_pred):
    # VAE loss = Reconstruction loss + KL-Divergence loss
    r_loss = vae_r_loss(y_true, y_pred)
    kl_loss = vae_kl_loss(y_true, y_pred)
    return r_loss + kl_loss

# Compile model-
model.compile(
    optimizer = tf.keras.optimizers.Adam(learning_rate = 0.003),
    loss = vae_loss,
    metrics = [vae_r_loss, vae_kl_loss]
)

# Train autoencoder-
training_hist = model.fit(
    x = X_train, y = X_train,
    batch_size = batch_size, shuffle = True,
    validation_data = (X_test, X_test),
    epochs = num_epochs
    )

which gives the error:

--------------------------------------------------------------------------- TypeError Traceback (most recent call last) ~\AppData\Local\Temp/ipykernel_11960/995477119.py in 1 # Train autoencoder- ----> 2 training_hist = model.fit( 3 x = X_train, y = X_train, 4 batch_size = batch_size, shuffle = True, 5 validation_data = (X_test, X_test),

~\anaconda3\envs\tf-cpu\lib\site-packages\tensorflow\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing) 1191 _r=1): 1192
callbacks.on_train_batch_begin(step) -> 1193 tmp_logs = self.train_function(iterator) 1194 if data_handler.should_sync: 1195
context.async_wait()

~\anaconda3\envs\tf-cpu\lib\site-packages\tensorflow\python\eager\def_function.py in call(self, *args, **kwds) 883 884 with OptionalXlaContext(self._jit_compile): --> 885 result = self._call(*args, **kwds) 886 887 new_tracing_count = self.experimental_get_tracing_count()

~\anaconda3\envs\tf-cpu\lib\site-packages\tensorflow\python\eager\def_function.py in _call(self, *args, **kwds) 931 # This is the first call of call, so we have to initialize. 932 initializers = [] --> 933 self._initialize(args, kwds, add_initializers_to=initializers) 934 finally: 935 # At this point we know that the initialization is complete (or less

~\anaconda3\envs\tf-cpu\lib\site-packages\tensorflow\python\eager\def_function.py in _initialize(self, args, kwds, add_initializers_to) 757 self._graph_deleter = FunctionDeleter(self._lifted_initializer_graph) 758 self._concrete_stateful_fn = ( --> 759 self._stateful_fn._get_concrete_function_internal_garbage_collected(

pylint: disable=protected-access

760             *args, **kwds))
761 

~\anaconda3\envs\tf-cpu\lib\site-packages\tensorflow\python\eager\function.py in _get_concrete_function_internal_garbage_collected(self, *args, **kwargs) 3064 args, kwargs = None, None 3065 with self._lock: -> 3066 graph_function, _ = self._maybe_define_function(args, kwargs) 3067 return graph_function 3068

~\anaconda3\envs\tf-cpu\lib\site-packages\tensorflow\python\eager\function.py in _maybe_define_function(self, args, kwargs) 3461 3462
self._function_cache.missed.add(call_context_key) -> 3463 graph_function = self._create_graph_function(args, kwargs) 3464 self._function_cache.primary[cache_key] = graph_function 3465

~\anaconda3\envs\tf-cpu\lib\site-packages\tensorflow\python\eager\function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes) 3296 arg_names = base_arg_names + missing_arg_names 3297 graph_function = ConcreteFunction( -> 3298 func_graph_module.func_graph_from_py_func( 3299 self._name, 3300 self._python_function,

~\anaconda3\envs\tf-cpu\lib\site-packages\tensorflow\python\framework\func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes, acd_record_initial_resource_uses) 1005
_, original_func = tf_decorator.unwrap(python_func) 1006 -> 1007 func_outputs = python_func(*func_args, **func_kwargs) 1008 1009 # invariant: func_outputs contains only Tensors, CompositeTensors,

~\anaconda3\envs\tf-cpu\lib\site-packages\tensorflow\python\eager\def_function.py in wrapped_fn(*args, **kwds) 666 # the function a weak reference to itself to avoid a reference cycle. 667 with OptionalXlaContext(compile_with_xla): --> 668 out = weak_wrapped_fn().wrapped(*args, **kwds) 669 return out 670

~\anaconda3\envs\tf-cpu\lib\site-packages\tensorflow\python\framework\func_graph.py in wrapper(*args, **kwargs) 992 except Exception as e: # pylint:disable=broad-except 993 if hasattr(e, "ag_error_metadata"): --> 994 raise e.ag_error_metadata.to_exception(e) 995 else: 996 raise

TypeError: in user code:

C:\Users\Arjun\anaconda3\envs\tf-cpu\lib\site-packages\tensorflow\python\keras\engine\training.py:862

train_function * return step_function(self, iterator) C:\Users\Arjun\anaconda3\envs\tf-cpu\lib\site-packages\tensorflow\python\keras\engine\training.py:852 step_function ** outputs = model.distribute_strategy.run(run_step, args=(data,)) C:\Users\Arjun\anaconda3\envs\tf-cpu\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:1286 run return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs) C:\Users\Arjun\anaconda3\envs\tf-cpu\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2849 call_for_each_replica return self._call_for_each_replica(fn, args, kwargs) C:\Users\Arjun\anaconda3\envs\tf-cpu\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:3632 _call_for_each_replica return fn(*args, **kwargs) C:\Users\Arjun\anaconda3\envs\tf-cpu\lib\site-packages\tensorflow\python\keras\engine\training.py:845 run_step ** outputs = model.train_step(data) C:\Users\Arjun\anaconda3\envs\tf-cpu\lib\site-packages\tensorflow\python\keras\engine\training.py:803 train_step loss = self.compiled_loss( C:\Users\Arjun\anaconda3\envs\tf-cpu\lib\site-packages\tensorflow\python\keras\engine\compile_utils.py:242 call self._loss_metric.update_state( C:\Users\Arjun\anaconda3\envs\tf-cpu\lib\site-packages\tensorflow\python\keras\utils\metrics_utils.py:88 decorated update_op = update_state_fn(*args, **kwargs) C:\Users\Arjun\anaconda3\envs\tf-cpu\lib\site-packages\tensorflow\python\keras\metrics.py:171 update_state_fn return ag_update_state(*args, **kwargs) C:\Users\Arjun\anaconda3\envs\tf-cpu\lib\site-packages\tensorflow\python\keras\metrics.py:403 update_state ** sample_weight = weights_broadcast_ops.broadcast_weights( C:\Users\Arjun\anaconda3\envs\tf-cpu\lib\site-packages\tensorflow\python\ops\weights_broadcast_ops.py:157 broadcast_weights values = ops.convert_to_tensor(values, name="values") C:\Users\Arjun\anaconda3\envs\tf-cpu\lib\site-packages\tensorflow\python\profiler\trace.py:163 wrapped return func(*args, **kwargs) C:\Users\Arjun\anaconda3\envs\tf-cpu\lib\site-packages\tensorflow\python\framework\ops.py:1566 convert_to_tensor ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref) C:\Users\Arjun\anaconda3\envs\tf-cpu\lib\site-packages\tensorflow\python\framework\constant_op.py:346 _constant_tensor_conversion_function return constant(v, dtype=dtype, name=name) C:\Users\Arjun\anaconda3\envs\tf-cpu\lib\site-packages\tensorflow\python\framework\constant_op.py:271 constant return _constant_impl(value, dtype, shape, name, verify_shape=False, C:\Users\Arjun\anaconda3\envs\tf-cpu\lib\site-packages\tensorflow\python\framework\constant_op.py:288 _constant_impl tensor_util.make_tensor_proto( C:\Users\Arjun\anaconda3\envs\tf-cpu\lib\site-packages\tensorflow\python\framework\tensor_util.py:435 make_tensor_proto values = np.asarray(values) C:\Users\Arjun\anaconda3\envs\tf-cpu\lib\site-packages\tensorflow\python\keras\engine\keras_tensor.py:254 array raise TypeError(

TypeError: Cannot convert a symbolic Keras input/output to a numpy array. This error may indicate that you're trying to pass a symbolic

value to a NumPy call, which is not supported. Or, you may be trying to pass Keras symbolic inputs/outputs to a TF API that does not register dispatching, preventing Keras from automatically converting the API call to a lambda layer in the F

unctional Model.

1 Answer 1

0

You need to add loss and metrics as below.

# Add reconstruction loss
r_loss = r_loss_factor * (K.mean(K.square(model_input - model_output), axis = [1,2,3])) # no need of r_loss_factor, better to remove it, instead use kl_beta in KL loss formula
model.add_loss(r_loss)
model.add_metric(r_loss, name='mse_loss', aggregation='mean')

# Add KL loss
kl_loss = -0.5 * K.sum(1 + log_var - K.square(mu) - K.exp(log_var), axis = 1)
# below formula will give you more accurate result
# kl_loss = kl_beta * K.mean(-0.5 * K.sum(1 + log_var - K.square(mu) - K.exp(log_var), axis = 1), axis=0) # read more on KL annealing technique to understand how kl_beta will be scaled during the whole traning
model.add_loss(kl_loss)
model.add_metric(kl_loss, name='kl_loss', aggregation='mean')

# Compile model-
model.compile(
    optimizer = tf.keras.optimizers.Adam(learning_rate = 0.003),
    loss = None
)
Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.