I'm attempting to train a simple dataset using 8 feature columns, 3 numeric, and 5 categorical. My basic training script currently looks like so:
import tensorflow as tf
feature_names = [
'col1', 'col2', 'col3', 'col4', 'col5', 'col6',
'col7', 'col8'
]
def input_fn(file_path, perform_shuffle=False, repeat_count=1):
"""
"""
def decode_csv(line):
"""
"""
parsed_line = tf.decode_csv(line, [[0], [0], [0], [''],[''],[''],[''],[''], [0]], na_value='?')
label = parsed_line[-1:] # Last element is the label
del parsed_line[-1] # Delete last element
features = parsed_line
d = dict(zip(feature_names, features)), label
return d
# Read in our file, skip the header row, and transform each element
dataset = (tf.data.TextLineDataset(file_path)
.skip(1)
.map(decode_csv))
# Randomizes input using a window of 256 elements (read into memory)
if perform_shuffle:
dataset = dataset.shuffle(buffer_size=256)
dataset = dataset.repeat(repeat_count) # Repeats dataset this # times
dataset = dataset.batch(32) # Batch size to use
iterator = dataset.make_one_shot_iterator()
batch_features, batch_labels = iterator.get_next()
return batch_features, batch_labels
I then build my feature columns:
col4 = tf.feature_column.categorical_column_with_identity(key='col4', num_buckets=3),
col5 = tf.feature_column.categorical_column_with_identity(key='col5', num_buckets=3),
col6 = tf.feature_column.categorical_column_with_identity(key='col6', num_buckets=2),
col7 = tf.feature_column.categorical_column_with_identity(key='col7', num_buckets=2),
col8 = tf.feature_column.categorical_column_with_identity(key='col8', num_buckets=2)
feature_columns = [
tf.feature_column.numeric_column(key='col1'),
tf.feature_column.numeric_column(key='col2'),
tf.feature_column.numeric_column(key='col3'),
tf.feature_column.indicator_column(col4),
tf.feature_column.indicator_column(col5),
tf.feature_column.indicator_column(col6),
tf.feature_column.indicator_column(col7),
tf.feature_column.indicator_column(col8)
]
classifier = tf.estimator.DNNClassifier(
feature_columns=feature_columns,
hidden_units=[10,10],
n_classes=14,
model_dir="data"
)
classifier.train(input_fn=lambda: input_fn("data/development.csv", True, 8))
However, I'm getting a very bland error message of:
AttributeError: 'tuple' object has no attribute 'name'
The stack trace indicates the indicator columns that the categorical columns wrap need a name. But I'm not sure why this would be the case, and all of the documentation and tutorials on using feature columns make no reference to any name argument being needed, as far as I can see.