Tensorflow seq2seq tutorial: NoneType object has no 'update' attribute
I am trying to follow tensorflow tutorial from https://www.tensorflow.org/tutorials/seq2seq .
The data seems to be loading fine, but when I initialize the model I get the following error:
Traceback (most recent call last):
File "/Users/<username>/PycharmProjects/tensorflow_chatbot/execute.py", line 334, in <module>
train()
File "/Users/<username>/PycharmProjects/tensorflow_chatbot/execute.py", line 151, in train
model = create_model(sess, False)
File "/Users/<username>/PycharmProjects/tensorflow_chatbot/execute.py", line 113, in create_model
forward_only=forward_only)
File "/Users/<username>/PycharmProjects/tensorflow_chatbot/seq2seq_model_tf.py", line 181, in __init__
softmax_loss_function=softmax_loss_function)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/site-packages/tensorflow/contrib/legacy_seq2seq/python/ops/seq2seq.py", line 1206, in model_with_buckets
decoder_inputs[:bucket[1]])
File "/Users/<username>/PycharmProjects/tensorflow_chatbot/seq2seq_model_tf.py", line 180, in <lambda>
lambda x, y: seq2seq_f(x, y, False),
File "/Users/<username>/PycharmProjects/tensorflow_chatbot/seq2seq_model_tf.py", line 144, in seq2seq_f
dtype=dtype)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/site-packages/tensorflow/contrib/legacy_seq2seq/python/ops/seq2seq.py", line 848, in embedding_attention_seq2seq
encoder_cell = copy.deepcopy(cell)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 174, in deepcopy
y = copier(memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/site-packages/tensorflow/python/layers/base.py", line 476, in __deepcopy__
setattr(result, k, copy.deepcopy(v, memo))
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 163, in deepcopy
y = copier(x, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 230, in _deepcopy_list
y.append(deepcopy(a, memo))
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 190, in deepcopy
y = _reconstruct(x, rv, 1, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 334, in _reconstruct
state = deepcopy(state, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 163, in deepcopy
y = copier(x, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 257, in _deepcopy_dict
y[deepcopy(key, memo)] = deepcopy(value, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 190, in deepcopy
y = _reconstruct(x, rv, 1, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 334, in _reconstruct
state = deepcopy(state, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 163, in deepcopy
y = copier(x, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 257, in _deepcopy_dict
y[deepcopy(key, memo)] = deepcopy(value, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 190, in deepcopy
y = _reconstruct(x, rv, 1, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 334, in _reconstruct
state = deepcopy(state, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 163, in deepcopy
y = copier(x, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 257, in _deepcopy_dict
y[deepcopy(key, memo)] = deepcopy(value, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 190, in deepcopy
y = _reconstruct(x, rv, 1, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 334, in _reconstruct
state = deepcopy(state, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 163, in deepcopy
y = copier(x, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 257, in _deepcopy_dict
y[deepcopy(key, memo)] = deepcopy(value, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 163, in deepcopy
y = copier(x, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 257, in _deepcopy_dict
y[deepcopy(key, memo)] = deepcopy(value, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 163, in deepcopy
y = copier(x, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 230, in _deepcopy_list
y.append(deepcopy(a, memo))
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 190, in deepcopy
y = _reconstruct(x, rv, 1, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 334, in _reconstruct
state = deepcopy(state, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 163, in deepcopy
y = copier(x, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 257, in _deepcopy_dict
y[deepcopy(key, memo)] = deepcopy(value, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 190, in deepcopy
y = _reconstruct(x, rv, 1, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 334, in _reconstruct
state = deepcopy(state, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 163, in deepcopy
y = copier(x, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 257, in _deepcopy_dict
y[deepcopy(key, memo)] = deepcopy(value, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 190, in deepcopy
y = _reconstruct(x, rv, 1, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 334, in _reconstruct
state = deepcopy(state, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 163, in deepcopy
y = copier(x, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 257, in _deepcopy_dict
y[deepcopy(key, memo)] = deepcopy(value, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 163, in deepcopy
y = copier(x, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 230, in _deepcopy_list
y.append(deepcopy(a, memo))
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 163, in deepcopy
y = copier(x, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 237, in _deepcopy_tuple
y.append(deepcopy(a, memo))
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 163, in deepcopy
y = copier(x, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 257, in _deepcopy_dict
y[deepcopy(key, memo)] = deepcopy(value, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 190, in deepcopy
y = _reconstruct(x, rv, 1, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 334, in _reconstruct
state = deepcopy(state, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 163, in deepcopy
y = copier(x, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 257, in _deepcopy_dict
y[deepcopy(key, memo)] = deepcopy(value, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 190, in deepcopy
y = _reconstruct(x, rv, 1, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 343, in _reconstruct
y.__dict__.update(state)
AttributeError: 'NoneType' object has no attribute 'update'
If I change the variable _buckets
in translate.py
to only have 1 tuple (it doesn't matter which one), no problem, but the training doesn't work very well. This error causes at most 1 tuple. No changes were made to the files found on github at https://github.com/tensorflow/models/tree/master/tutorials/rnn/translate other than changing the filenames for my local project.
The beginning of the function __init__
leading to the area looks like this:
def __init__(self,
source_vocab_size,
target_vocab_size,
buckets,
size,
num_layers,
max_gradient_norm,
batch_size,
learning_rate,
learning_rate_decay_factor,
use_lstm=False,
num_samples=512,
forward_only=False,
dtype=tf.float32):
"""Create the model.
Args:
source_vocab_size: size of the source vocabulary.
target_vocab_size: size of the target vocabulary.
buckets: a list of pairs (I, O), where I specifies maximum input length
that will be processed in that bucket, and O specifies maximum output
length. Training instances that have inputs longer than I or outputs
longer than O will be pushed to the next bucket and padded accordingly.
We assume that the list is sorted, e.g., [(2, 4), (8, 16)].
size: number of units in each layer of the model.
num_layers: number of layers in the model.
max_gradient_norm: gradients will be clipped to maximally this norm.
batch_size: the size of the batches used during training;
the model construction is independent of batch_size, so it can be
changed after initialization if this is convenient, e.g., for decoding.
learning_rate: learning rate to start with.
learning_rate_decay_factor: decay learning rate by this much when needed.
use_lstm: if true, we use LSTM cells instead of GRU cells.
num_samples: number of samples for sampled softmax.
forward_only: if set, we do not construct the backward pass in the model.
dtype: the data type to use to store internal variables.
"""
self.source_vocab_size = source_vocab_size
self.target_vocab_size = target_vocab_size
self.buckets = buckets
self.batch_size = batch_size
self.learning_rate = tf.Variable(
float(learning_rate), trainable=False, dtype=dtype)
self.learning_rate_decay_op = self.learning_rate.assign(
self.learning_rate * learning_rate_decay_factor)
self.global_step = tf.Variable(0, trainable=False)
# If we use sampled softmax, we need an output projection.
output_projection = None
softmax_loss_function = None
# Sampled softmax only makes sense if we sample less than vocabulary size.
if 0 < num_samples < self.target_vocab_size:
w_t = tf.get_variable("proj_w", [self.target_vocab_size, size], dtype=dtype)
w = tf.transpose(w_t)
b = tf.get_variable("proj_b", [self.target_vocab_size], dtype=dtype)
output_projection = (w, b)
def sampled_loss(labels, logits):
labels = tf.reshape(labels, [-1, 1])
# We need to compute the sampled_softmax_loss using 32bit floats to
# avoid numerical instabilities.
local_w_t = tf.cast(w_t, tf.float32)
local_b = tf.cast(b, tf.float32)
local_inputs = tf.cast(logits, tf.float32)
return tf.cast(
tf.nn.sampled_softmax_loss(
weights=local_w_t,
biases=local_b,
labels=labels,
inputs=local_inputs,
num_sampled=num_samples,
num_classes=self.target_vocab_size),
dtype)
softmax_loss_function = sampled_loss
# Create the internal multi-layer cell for our RNN.
def single_cell():
return tf.contrib.rnn.GRUCell(size)
if use_lstm:
def single_cell():
return tf.contrib.rnn.BasicLSTMCell(size)
cell = single_cell()
if num_layers > 1:
cell = tf.contrib.rnn.MultiRNNCell([single_cell() for _ in range(num_layers)])
# The seq2seq function: we use embedding for the input and attention.
def seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
return tf.contrib.legacy_seq2seq.embedding_attention_seq2seq(
encoder_inputs,
decoder_inputs,
cell,
num_encoder_symbols=source_vocab_size,
num_decoder_symbols=target_vocab_size,
embedding_size=size,
output_projection=output_projection,
feed_previous=do_decode,
dtype=dtype)
# Feeds for inputs.
self.encoder_inputs = []
self.decoder_inputs = []
self.target_weights = []
for i in xrange(buckets[-1][0]): # Last bucket is the biggest one.
self.encoder_inputs.append(tf.placeholder(tf.int32, shape=[None],
name="encoder{0}".format(i)))
for i in xrange(buckets[-1][1] + 1):
self.decoder_inputs.append(tf.placeholder(tf.int32, shape=[None],
name="decoder{0}".format(i)))
self.target_weights.append(tf.placeholder(dtype, shape=[None],
name="weight{0}".format(i)))
# Our targets are decoder inputs shifted by one.
targets = [self.decoder_inputs[i + 1]
for i in xrange(len(self.decoder_inputs) - 1)]
# Training outputs and losses.
if forward_only:
self.outputs, self.losses = tf.contrib.legacy_seq2seq.model_with_buckets(
self.encoder_inputs, self.decoder_inputs, targets,
self.target_weights, buckets, lambda x, y: seq2seq_f(x, y, True),
softmax_loss_function=softmax_loss_function)
# If we use output projection, we need to project outputs for decoding.
if output_projection is not None:
for b in xrange(len(buckets)):
self.outputs[b] = [
tf.matmul(output, output_projection[0]) + output_projection[1]
for output in self.outputs[b]
]
else:
self.outputs, self.losses = tf.contrib.legacy_seq2seq.model_with_buckets(
self.encoder_inputs, self.decoder_inputs, targets,
self.target_weights, buckets,
lambda x, y: seq2seq_f(x, y, False),
softmax_loss_function=softmax_loss_function) # this is where the error occurrs
What do I need to change to get this to work? Using tensorflow version 1.2 function
UPDATE: This has been tested using both tensorflow built from source and built from Pip package on Mac OSX Sierra and this same issue occurs
source to share
as I already commented here the model you are trying to implement is deprecated. If you want it to work, check the code I pasted into the problem. As of tensorflow 1.1 and 1.2, you have functions to decode the type dynamically tf.nn.bidirectional_dynamic_rnn
. This allows you to take dynamic dimensional sequences into account for free.
I am creating some examples and I will send you a working example with the new api.
source to share