diff --git a/.vs/human-motion-prediction/v14/.suo b/.vs/human-motion-prediction/v14/.suo new file mode 100644 index 0000000..340462a Binary files /dev/null and b/.vs/human-motion-prediction/v14/.suo differ diff --git a/.vs/~$GitHubVSTemp$~/v14/.suo b/.vs/~$GitHubVSTemp$~/v14/.suo new file mode 100644 index 0000000..34acf8d Binary files /dev/null and b/.vs/~$GitHubVSTemp$~/v14/.suo differ diff --git a/human-motion-prediction.pyproj b/human-motion-prediction.pyproj new file mode 100644 index 0000000..050ccf2 --- /dev/null +++ b/human-motion-prediction.pyproj @@ -0,0 +1,41 @@ + + + + Debug + 2.0 + {3c86f862-fdab-4861-8f2d-fb0d1f114a76} + + src\translate.py + + . + . + {888888a0-9f3d-457c-b088-3a5042f75d52} + Standard Python launcher + + + + + + + 10.0 + $(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)\Python Tools\Microsoft.PythonTools.targets + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/human-motion-prediction.sln b/human-motion-prediction.sln new file mode 100644 index 0000000..313e05c --- /dev/null +++ b/human-motion-prediction.sln @@ -0,0 +1,20 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 14 +VisualStudioVersion = 14.0.25420.1 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{888888A0-9F3D-457C-B088-3A5042F75D52}") = "human-motion-prediction", "human-motion-prediction.pyproj", "{3C86F862-FDAB-4861-8F2D-FB0D1F114A76}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {3C86F862-FDAB-4861-8F2D-FB0D1F114A76}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {3C86F862-FDAB-4861-8F2D-FB0D1F114A76}.Release|Any CPU.ActiveCfg = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/src/seq2seq_model.py b/src/seq2seq_model.py index fea6352..97d513a 100644 --- a/src/seq2seq_model.py +++ b/src/seq2seq_model.py @@ -85,9 +85,14 @@ def __init__(self, if num_layers > 1: cell = tf.contrib.rnn.MultiRNNCell( [tf.contrib.rnn.GRUCell(self.rnn_size) for _ in range(num_layers)] ) + architecture = architecture.split(',') + architectureSub = architecture[1] + architecture = architecture[0] + + print("========working mode: %s======working submode: %s========"%(architecture, architectureSub)); + # === Transform the inputs === with tf.name_scope("inputs"): - enc_in = tf.placeholder(dtype, shape=[None, source_seq_len-1, self.input_size], name="enc_in") dec_in = tf.placeholder(dtype, shape=[None, target_seq_len, self.input_size], name="dec_in") dec_out = tf.placeholder(dtype, shape=[None, target_seq_len, self.input_size], name="dec_out") @@ -96,15 +101,15 @@ def __init__(self, self.decoder_inputs = dec_in self.decoder_outputs = dec_out - enc_in = tf.transpose(enc_in, [1, 0, 2]) + if architecture != 'bid': + enc_in = tf.transpose(enc_in, [1, 0, 2]) + enc_in = tf.reshape(enc_in, [-1, self.input_size]) + enc_in = tf.split(enc_in, source_seq_len-1, axis=0) + dec_in = tf.transpose(dec_in, [1, 0, 2]) dec_out = tf.transpose(dec_out, [1, 0, 2]) - - enc_in = tf.reshape(enc_in, [-1, self.input_size]) dec_in = tf.reshape(dec_in, [-1, self.input_size]) dec_out = tf.reshape(dec_out, [-1, self.input_size]) - - enc_in = tf.split(enc_in, source_seq_len-1, axis=0) dec_in = tf.split(dec_in, target_seq_len, axis=0) dec_out = tf.split(dec_out, target_seq_len, axis=0) @@ -128,6 +133,9 @@ def lf(prev, i): # function for sampling_based loss else: raise(ValueError, "unknown loss: %s" % loss_to_use) + #cellFw = tf.nn.rnn_cell.LSTMCell(num_units=self.input_size, reuse=tf.get_variable_scope().reuse) + #cellBw = tf.nn.rnn_cell.GRUCell(num_units=self.input_size, reuse=tf.get_variable_scope().reuse) + # Build the RNN if architecture == "basic": # Basic RNN does not have a loop function in its API, so copying here. @@ -136,9 +144,39 @@ def lf(prev, i): # function for sampling_based loss outputs, self.states = tf.contrib.legacy_seq2seq.rnn_decoder( dec_in, enc_state, cell, loop_function=lf ) # Decoder elif architecture == "tied": outputs, self.states = tf.contrib.legacy_seq2seq.tied_rnn_seq2seq( enc_in, dec_in, cell, loop_function=lf ) + elif architecture == 'bid': + #enc_in = tf.transpose(enc_in, [1, 0, 2]) + #enc_in = tf.split(enc_in, [source_seq_len-1], axis=0) + if architectureSub == 'lstm': + cellBid = tf.nn.rnn_cell.LSTMCell(num_units=self.input_size, reuse=tf.get_variable_scope().reuse) + elif architectureSub == 'gru': + cellBid = tf.nn.rnn_cell.GRUCell(num_units=self.input_size, reuse=tf.get_variable_scope().reuse) + else: + print("ERROR===>unknow architectureSub mod: %s"%(architectureSub)) + #encoder + _, enc_state = tf.nn.bidirectional_dynamic_rnn( + cell_fw = cellBid, + cell_bw = cellBid, + inputs = enc_in + #,sequence_length = [target_seq_len,target_seq_len,target_seq_len,target_seq_len,target_seq_len,target_seq_len,target_seq_len,target_seq_len] + ,dtype=tf.float32 + ,time_major=False + #,initial_state_fw=init_state_fw + #,initial_state_bw=init_state_bw + ) + #decoder + if architectureSub == 'lstm': + cellDec = tf.nn.rnn_cell.LSTMCell(num_units=self.input_size, reuse=tf.get_variable_scope().reuse) + elif architectureSub == 'gru': + cellDec = tf.nn.rnn_cell.GRUCell(num_units=self.input_size, reuse=tf.get_variable_scope().reuse) + else: + print("ERROR===>unknow architectureSub mod: %s"%(architectureSub)) + outputs, self.states = tf.contrib.legacy_seq2seq.rnn_decoder( dec_in, enc_state[0], cellDec, loop_function=lf ) else: raise(ValueError, "Uknown architecture: %s" % architecture ) + #if architecture == 'bid': + # outputs = outputs[0] self.outputs = outputs with tf.name_scope("loss_angles"): @@ -409,7 +447,6 @@ def step(self, session, encoder_inputs, decoder_inputs, decoder_outputs, self.loss, self.loss_summary, self.learning_rate_summary] - outputs = session.run( output_feed, input_feed ) return outputs[1], outputs[2], outputs[3], outputs[4] # Gradient norm, loss, summaries diff --git a/src/translate.py b/src/translate.py index 3e3e9ad..a290347 100644 --- a/src/translate.py +++ b/src/translate.py @@ -27,7 +27,13 @@ tf.app.flags.DEFINE_integer("batch_size", 16, "Batch size to use during training.") tf.app.flags.DEFINE_integer("iterations", int(1e5), "Iterations to train for.") # Architecture -tf.app.flags.DEFINE_string("architecture", "tied", "Seq2seq architecture to use: [basic, tied].") +#basic: basic mode, use tf.contrib.rnn.static_rnn as encoder, and use tf.contrib.legacy_seq2seq.rnn_decoder as decoder +#tied: tied mode, use tf.contrib.legacy_seq2seq.tied_rnn_seq2seq for both coder and encoder +#bid: bid mode, use tf.nn.bidirectional_dynamic_rnn as encoder, and use tf.contrib.legacy_seq2seq.rnn_decoder as decoder +# bid mode has 2 submode: +# 1: lstm submode: both encoder and decoder use tf.nn.rnn_cell.LSTMCell as the cell +# 2: gru submode: both encoder and decoder use tf.nn.rnn_cell.GRUCell as the cell +tf.app.flags.DEFINE_string("architecture", "bid,lstm", "Seq2seq architecture to use: [basic, tied, bid(lstm,gru)].") tf.app.flags.DEFINE_integer("size", 1024, "Size of each model layer.") tf.app.flags.DEFINE_integer("num_layers", 1, "Number of layers in the model.") tf.app.flags.DEFINE_integer("seq_length_in", 50, "Number of frames to feed into the encoder. 25 fps") @@ -638,6 +644,7 @@ def define_actions( action ): "greeting", "phoning", "posing", "purchases", "sitting", "sittingdown", "takingphoto", "waiting", "walkingdog", "walkingtogether"] + #actions = ["walking"] #open this line to fast debug if action in actions: return [action] diff --git a/~$GitHubVSTemp$~.sln b/~$GitHubVSTemp$~.sln new file mode 100644 index 0000000..433e7e3 --- /dev/null +++ b/~$GitHubVSTemp$~.sln @@ -0,0 +1,10 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 14 +VisualStudioVersion = 14.0.25420.1 +MinimumVisualStudioVersion = 10.0.40219.1 +Global + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal