una-dinosauria · x3titan · Mar 11, 2019 · Mar 11, 2019 · Mar 11, 2019 · Mar 11, 2019
diff --git a/.vs/human-motion-prediction/v14/.suo b/.vs/human-motion-prediction/v14/.suo
diff --git a/.vs/~$GitHubVSTemp$~/v14/.suo b/.vs/~$GitHubVSTemp$~/v14/.suo
diff --git a/human-motion-prediction.pyproj b/human-motion-prediction.pyproj
@@ -0,0 +1,41 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003" DefaultTargets="Build">
+  <PropertyGroup>
+    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+    <SchemaVersion>2.0</SchemaVersion>
+    <ProjectGuid>{3c86f862-fdab-4861-8f2d-fb0d1f114a76}</ProjectGuid>
+    <ProjectHome />
+    <StartupFile>src\translate.py</StartupFile>
+    <SearchPath />
+    <WorkingDirectory>.</WorkingDirectory>
+    <OutputPath>.</OutputPath>
+    <ProjectTypeGuids>{888888a0-9f3d-457c-b088-3a5042f75d52}</ProjectTypeGuids>
+    <LaunchProvider>Standard Python launcher</LaunchProvider>
+    <InterpreterId />
+    <InterpreterVersion />
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)' == 'Debug'" />
+  <PropertyGroup Condition="'$(Configuration)' == 'Release'" />
+  <PropertyGroup>
+    <VisualStudioVersion Condition=" '$(VisualStudioVersion)' == '' ">10.0</VisualStudioVersion>
+    <PtvsTargetsFile>$(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)\Python Tools\Microsoft.PythonTools.targets</PtvsTargetsFile>
+  </PropertyGroup>
+  <ItemGroup>
+    <Content Include="imgs\walking.gif" />
+  </ItemGroup>
+  <ItemGroup>
+    <Compile Include="src\baselines.py" />
+    <Compile Include="src\data_utils.py" />
+    <Compile Include="src\forward_kinematics.py" />
+    <Compile Include="src\rnn_cell_extensions.py" />
+    <Compile Include="src\seq2seq_model.py" />
+    <Compile Include="src\translate.py" />
+    <Compile Include="src\viz.py" />
+  </ItemGroup>
+  <ItemGroup>
+    <Folder Include="imgs" />
+    <Folder Include="src" />
+  </ItemGroup>
+  <Import Project="$(PtvsTargetsFile)" Condition="Exists($(PtvsTargetsFile))" />
+  <Import Project="$(MSBuildToolsPath)\Microsoft.Common.targets" Condition="!Exists($(PtvsTargetsFile))" />
+</Project>
diff --git a/human-motion-prediction.sln b/human-motion-prediction.sln
@@ -0,0 +1,20 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 14
+VisualStudioVersion = 14.0.25420.1
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{888888A0-9F3D-457C-B088-3A5042F75D52}") = "human-motion-prediction", "human-motion-prediction.pyproj", "{3C86F862-FDAB-4861-8F2D-FB0D1F114A76}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|Any CPU = Debug|Any CPU
+		Release|Any CPU = Release|Any CPU
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{3C86F862-FDAB-4861-8F2D-FB0D1F114A76}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{3C86F862-FDAB-4861-8F2D-FB0D1F114A76}.Release|Any CPU.ActiveCfg = Release|Any CPU
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal
diff --git a/src/seq2seq_model.py b/src/seq2seq_model.py
@@ -85,9 +85,14 @@ def __init__(self,
     if num_layers > 1:
       cell = tf.contrib.rnn.MultiRNNCell( [tf.contrib.rnn.GRUCell(self.rnn_size) for _ in range(num_layers)] )
 
+    architecture = architecture.split(',')
+    architectureSub = architecture[1]
+    architecture = architecture[0]
+
+    print("========working mode: %s======working submode: %s========"%(architecture, architectureSub));
+
     # === Transform the inputs ===
     with tf.name_scope("inputs"):
-
       enc_in = tf.placeholder(dtype, shape=[None, source_seq_len-1, self.input_size], name="enc_in")
       dec_in = tf.placeholder(dtype, shape=[None, target_seq_len, self.input_size], name="dec_in")
       dec_out = tf.placeholder(dtype, shape=[None, target_seq_len, self.input_size], name="dec_out")
@@ -96,15 +101,15 @@ def __init__(self,
       self.decoder_inputs = dec_in
       self.decoder_outputs = dec_out
 
-      enc_in = tf.transpose(enc_in, [1, 0, 2])
+      if architecture != 'bid':
+        enc_in = tf.transpose(enc_in, [1, 0, 2])
+        enc_in = tf.reshape(enc_in, [-1, self.input_size])
+        enc_in = tf.split(enc_in, source_seq_len-1, axis=0)
+
       dec_in = tf.transpose(dec_in, [1, 0, 2])
       dec_out = tf.transpose(dec_out, [1, 0, 2])
-
-      enc_in = tf.reshape(enc_in, [-1, self.input_size])
       dec_in = tf.reshape(dec_in, [-1, self.input_size])
       dec_out = tf.reshape(dec_out, [-1, self.input_size])
-
-      enc_in = tf.split(enc_in, source_seq_len-1, axis=0)
       dec_in = tf.split(dec_in, target_seq_len, axis=0)
       dec_out = tf.split(dec_out, target_seq_len, axis=0)
 
@@ -128,6 +133,9 @@ def lf(prev, i): # function for sampling_based loss
     else:
       raise(ValueError, "unknown loss: %s" % loss_to_use)
 
+    #cellFw = tf.nn.rnn_cell.LSTMCell(num_units=self.input_size, reuse=tf.get_variable_scope().reuse)
+    #cellBw = tf.nn.rnn_cell.GRUCell(num_units=self.input_size, reuse=tf.get_variable_scope().reuse)
+
     # Build the RNN
     if architecture == "basic":
       # Basic RNN does not have a loop function in its API, so copying here.
@@ -136,9 +144,39 @@ def lf(prev, i): # function for sampling_based loss
         outputs, self.states = tf.contrib.legacy_seq2seq.rnn_decoder( dec_in, enc_state, cell, loop_function=lf ) # Decoder
     elif architecture == "tied":
       outputs, self.states = tf.contrib.legacy_seq2seq.tied_rnn_seq2seq( enc_in, dec_in, cell, loop_function=lf )
+    elif architecture == 'bid':
+      #enc_in = tf.transpose(enc_in, [1, 0, 2])    
+      #enc_in = tf.split(enc_in, [source_seq_len-1], axis=0)
+      if architectureSub == 'lstm':
+        cellBid = tf.nn.rnn_cell.LSTMCell(num_units=self.input_size, reuse=tf.get_variable_scope().reuse)
+      elif architectureSub == 'gru':
+        cellBid = tf.nn.rnn_cell.GRUCell(num_units=self.input_size, reuse=tf.get_variable_scope().reuse)
+      else:
+        print("ERROR===>unknow architectureSub mod: %s"%(architectureSub))
+      #encoder
+      _, enc_state = tf.nn.bidirectional_dynamic_rnn(   
+        cell_fw = cellBid,
+        cell_bw = cellBid,
+        inputs = enc_in
+        #,sequence_length = [target_seq_len,target_seq_len,target_seq_len,target_seq_len,target_seq_len,target_seq_len,target_seq_len,target_seq_len]
+        ,dtype=tf.float32
+        ,time_major=False
+        #,initial_state_fw=init_state_fw
+        #,initial_state_bw=init_state_bw
+      )
+      #decoder
+      if architectureSub == 'lstm':
+        cellDec = tf.nn.rnn_cell.LSTMCell(num_units=self.input_size, reuse=tf.get_variable_scope().reuse)
+      elif architectureSub == 'gru':
+        cellDec = tf.nn.rnn_cell.GRUCell(num_units=self.input_size, reuse=tf.get_variable_scope().reuse)
+      else:
+        print("ERROR===>unknow architectureSub mod: %s"%(architectureSub))
+      outputs, self.states = tf.contrib.legacy_seq2seq.rnn_decoder( dec_in, enc_state[0], cellDec, loop_function=lf )
     else:
       raise(ValueError, "Uknown architecture: %s" % architecture )
 
+    #if architecture == 'bid':
+    #  outputs = outputs[0]
     self.outputs = outputs
 
     with tf.name_scope("loss_angles"):
@@ -409,7 +447,6 @@ def step(self, session, encoder_inputs, decoder_inputs, decoder_outputs,
                        self.loss,
                        self.loss_summary,
                        self.learning_rate_summary]
-
         outputs = session.run( output_feed, input_feed )
         return outputs[1], outputs[2], outputs[3], outputs[4]  # Gradient norm, loss, summaries
 

diff --git a/src/translate.py b/src/translate.py
@@ -27,7 +27,13 @@
 tf.app.flags.DEFINE_integer("batch_size", 16, "Batch size to use during training.")
 tf.app.flags.DEFINE_integer("iterations", int(1e5), "Iterations to train for.")
 # Architecture
-tf.app.flags.DEFINE_string("architecture", "tied", "Seq2seq architecture to use: [basic, tied].")
+#basic: basic mode, use tf.contrib.rnn.static_rnn as encoder, and use tf.contrib.legacy_seq2seq.rnn_decoder as decoder
+#tied: tied mode, use tf.contrib.legacy_seq2seq.tied_rnn_seq2seq for both coder and encoder
+#bid: bid mode, use tf.nn.bidirectional_dynamic_rnn as encoder, and use tf.contrib.legacy_seq2seq.rnn_decoder as decoder
+#     bid mode has 2 submode:
+#         1: lstm submode: both encoder and decoder use tf.nn.rnn_cell.LSTMCell as the cell
+#         2: gru submode: both encoder and decoder use tf.nn.rnn_cell.GRUCell as the cell
+tf.app.flags.DEFINE_string("architecture", "bid,lstm", "Seq2seq architecture to use: [basic, tied, bid(lstm,gru)].")
 tf.app.flags.DEFINE_integer("size", 1024, "Size of each model layer.")
 tf.app.flags.DEFINE_integer("num_layers", 1, "Number of layers in the model.")
 tf.app.flags.DEFINE_integer("seq_length_in", 50, "Number of frames to feed into the encoder. 25 fps")
@@ -638,6 +644,7 @@ def define_actions( action ):
               "greeting", "phoning", "posing", "purchases", "sitting",
               "sittingdown", "takingphoto", "waiting", "walkingdog",
               "walkingtogether"]
+  #actions = ["walking"]    #open this line to fast debug
 
   if action in actions:
     return [action]

diff --git a/~$GitHubVSTemp$~.sln b/~$GitHubVSTemp$~.sln
@@ -0,0 +1,10 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 14
+VisualStudioVersion = 14.0.25420.1
+MinimumVisualStudioVersion = 10.0.40219.1
+Global
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal