Multidimensional RNN on tensor flow

I am trying to implement 2D RNN in the context of classifying human actions (joints on one RNN axis and time on the other) and have searched high and low for something in Tensorflow that could do the job.

I've heard about GridLSTMCell

( internally and externally ) but couldn't get it to work with dynamic_rnn

(accepts a 3D tensor, but I would have to provide a 4-D tensor [batchsize, max_time, num_joints, n_features]).

It is also ndlstm

also a (somewhat unknown) part of the TF library, which basically uses a regular 1-D LSTM and transposes the output to feed it into a second 1-D LSTM. This has also been protected here , but I'm not too sure if it's correct / if it's the same idea as mine.

Any help would be appreciated.

+3


source to share


1 answer


I have successfully tried to use GridLSTM

and ndlstm

in the tensor stream.

I'm not sure how to convert a 4D Tensor to 3D so that it is accepted dynamic_rnn

, but I think it might give you an idea of โ€‹โ€‹how to use GridLSTM

:

def reshape_to_rnn_dims(tensor, num_time_steps):
    return tf.unstack(tensor, num_time_steps, 1)


class GridLSTMCellTest(tf.test.TestCase):
    def setUp(self):
        self.num_features = 1
        self.time_steps = 1
        self.batch_size = 1
        tf.reset_default_graph()
        self.input_layer = tf.placeholder(tf.float32, [self.batch_size, self.time_steps, self.num_features])
        self.cell = grid_rnn.Grid1LSTMCell(num_units=8)

    def test_simple_grid_rnn(self):
        self.input_layer = reshape_to_rnn_dims(self.input_layer, self.time_steps)
        tf.nn.static_rnn(self.cell, self.input_layer, dtype=tf.float32)

    def test_dynamic_grid_rnn(self):
        tf.nn.dynamic_rnn(self.cell, self.input_layer, dtype=tf.float32)


class BidirectionalGridRNNCellTest(tf.test.TestCase):
    def setUp(self):
        self.num_features = 1
        self.time_steps = 1
        self.batch_size = 1
        tf.reset_default_graph()
        self.input_layer = tf.placeholder(tf.float32, [self.batch_size, self.time_steps, self.num_features])
        self.cell_fw = grid_rnn.Grid1LSTMCell(num_units=8)
        self.cell_bw = grid_rnn.Grid1LSTMCell(num_units=8)

    def test_simple_bidirectional_grid_rnn(self):
        self.input_layer = reshape_to_rnn_dims(self.input_layer, self.time_steps)
        tf.nn.static_bidirectional_rnn(self.cell_fw, self.cell_fw, self.input_layer, dtype=tf.float32)

    def test_bidirectional_dynamic_grid_rnn(self):
        tf.nn.bidirectional_dynamic_rnn(self.cell_fw, self.cell_bw, self.input_layer, dtype=tf.float32)

if __name__ == '__main__':
    tf.test.main()

      



Apparently ndlstm

accept 4D tensors with shape (batch_size, height, width, depth)

, I have these tests (one of which involves using tensorflow ctc_loss

. Also found an example of using it with conv2d):

class MultidimensionalRNNTest(tf.test.TestCase):
    def setUp(self):
        self.num_classes = 26
        self.num_features = 32
        self.time_steps = 64
        self.batch_size = 1 # Can't be dynamic, apparently.
        self.num_channels = 1
        self.num_filters = 16
        self.input_layer = tf.placeholder(tf.float32, [self.batch_size, self.time_steps, self.num_features, self.num_channels])
        self.labels = tf.sparse_placeholder(tf.int32)

    def test_simple_mdrnn(self):
        net = lstm2d.separable_lstm(self.input_layer, self.num_filters)

    def test_image_to_sequence(self):
        net = lstm2d.separable_lstm(self.input_layer, self.num_filters)
        net = lstm2d.images_to_sequence(net)

    def test_convert_to_ctc_dims(self):
        net = lstm2d.separable_lstm(self.input_layer, self.num_filters)
        net = lstm2d.images_to_sequence(net)

        net = tf.reshape(inputs, [-1, self.num_filters])

         W = tf.Variable(tf.truncated_normal([self.num_filters,
                                     self.num_classes],
                                    stddev=0.1, dtype=tf.float32), name='W')
         b = tf.Variable(tf.constant(0., dtype=tf.float32, shape=[self.num_classes], name='b'))

         net = tf.matmul(net, W) + b
         net = tf.reshape(net, [self.batch_size, -1, self.num_classes])

         net = tf.transpose(net, (1, 0, 2))

         loss = tf.nn.ctc_loss(inputs=net, labels=self.labels, sequence_length=[2])

    print(net)


if __name__ == '__main__':
    tf.test.main()

      

0


source







All Articles