From 2ac52fa4ace9a6990ee8104ea591b3b9da1034a2 Mon Sep 17 00:00:00 2001 From: Jesse Tam Date: Mon, 11 Mar 2019 18:11:06 +0800 Subject: [PATCH 1/5] add bid architecture (apply bidirectional_dynamic_rnn) --- .vs/human-motion-prediction/v14/.suo | Bin 0 -> 26112 bytes .vs/~$GitHubVSTemp$~/v14/.suo | Bin 0 -> 14848 bytes human-motion-prediction.pyproj | 41 ++++++++++++++++++++++++++ src/seq2seq_model.py | 42 ++++++++++++++++++++------- src/translate.py | 4 ++- ~$GitHubVSTemp$~.sln | 10 +++++++ 6 files changed, 85 insertions(+), 12 deletions(-) create mode 100644 .vs/human-motion-prediction/v14/.suo create mode 100644 .vs/~$GitHubVSTemp$~/v14/.suo create mode 100644 human-motion-prediction.pyproj create mode 100644 ~$GitHubVSTemp$~.sln diff --git a/.vs/human-motion-prediction/v14/.suo b/.vs/human-motion-prediction/v14/.suo new file mode 100644 index 0000000000000000000000000000000000000000..35de07ca12af5bb2abfc61c50857da03367de5ca GIT binary patch literal 26112 zcmeHQd2A!sc^|Eh?8dvcH;&h5ZN+vru2+<1cyqnGHcwrYM2RAGEFF0`9FilE9PSJc zNvmy~CUMgy&Ed94(EzX1A}tmz;5I=6Zvr<#ffTX-=+}J+`)Ws^$2r!UHSmx zhs^u?8Rkyo{)4MmuTt_I07P$xn}`E*jEuh$Gspy)0e<~m*Ur=vxP|&^VTLJ# z>Kr2hKS73x;+_YHT3f6}zGjzafS>vW^#y82vwXYL|Jg{`3%_l5!Cm)+zf=0Dtv@j8 zIgIQ3^%WR#>eC;_GrCf^r!KCi-2S~4IMB=ff7H;aW|zokzW%rW{ZqaFzw&WCS1e<6MKj#>>3@KmUiN>&eY5s|lW#}B6S4!txRM+xo%%mr$y7gJ#E;>6FW^1^@$fj{ zBLK1!AI0@S0NK}v01pEm0XzzL4DdMM*8oodo&;C`9|Ms5o(4PvAhkFN_ypitz^?wyXfCJzJxBzZ|2XGqjJm3uAEZ`jA1;C2{ zFTe-z0|I~`AOsi%i~+_06M#uT7%&Bx2Fw5=0JE)8#OLeoDSgqnU&i%3AOUy@@JYZ5 z;AH?0NCE^v3LpZ^IS~Lu+mDtIdyy0lp_Ts=0QE#$t3dt_ z*&ou`cJXf@@BdH%1Rp4t!QDJ0q2Sss-Fj)VX}bSM{h(d^>G3idIf&3W>;W}t^9 z=wKOEDFJPi5mTUxBJSq!TSSVAco|lR#|WN6{(_dqYdIyPCbaU@qYB88$I}vW$=Y2J zwTW6OVLLT}8VN@N=`{1=VI4{LCD7Ts$J?l@^61^RJs>_@+DdYqMv*wvcaT}t@4`B?nmVB zjGMjilmF5xbVEP+^4~Hl-3x!a@+Tg&OMj2$-|_lKkL9neiIPm)rN77W*XSpnwu`@B z{(se1_K#Ko6{ZR+s9-EufnP+Ui>P^LG#&`s7V(e&X*A~h#n)bV=$FE|2L|q^DWXO{ z@xTm}wpaL8U(rvt{{myx#()c;pT^FVHXi7s;8VFrzRCP&dcyzEy(fNoy7FH{ok^{E z-HYO#&;PKm=&wM=o5qN5^L^*pci(&eUPX8-^w?{EA8t(Rs{;Yef6?bL8R!iUt_@-i zN8SUCpb5-zHsDQ=UtQRBXTw}Ep^diW?Wib2>%JCv96WSw4Zj(qjx0tZ@-6>yr7=&;IS@vA2F@yYhzTtsmNsf1z<}ck-U@s-LWW{jtCHe>QdI-3J~&GW_q? z=GfnqIH;ZnXJl|8iQjejd^GZyA{na>*?hG5neO2&aFSK=YI@5r&SkT zv-7F#^%!028)@854AlQj+;rXfwDDah_^E%k3MYY|)^urocR%n4Ad?DKd7Js6&_4~Y z$%9sHHJ{bfKL)9%G~O3r`Erm(Nb3MGERy#n!{*0;OG?}OxNq#^y6(c=neOW4qd zR{h>=7xCZ3-7fzZP?**+DKzf?v=2;nVFqKK+5a2!0`_N{_IB~dftP$>3OlX*Jn)vm z_0sl!u(Iv@k#)K@kp1nH{(AY7ekFl3gFlk^0^~t!Z&}qoBqc7SKpEyRlzPF()ReCMsmCS z{{-?jlfRCMEP%cf;e{p^VXdh5lP5;0^}*Q%|4*U<)Bh}JeP2Y)-G!rF;3r=#VeEg@ zb9M^-p1fe)5)H#PQeU9?ZwjN5KC{_vl|cW%$57ufK)d{>y#iVTp`i1Z)*A)Q^69?b zIIxoEn}Hpy_qO#i^}nz`9oyd%sNf?;CG)^i1f}|DtM7r3k54v;bd$z@R@?op$6AX& z*9HIU?SHMN?qg=(YnT7sV`^OTdhp+#{~zr9-<0S8MYJC?rM)Zl<1UKTDyPDvKxQRJ zX=XG#T^MCI@puV$mtrigRPhff%~GNYuV0-xK?gM!Qv(^t5YB299V?%uN^WeTz) z`gv7k1_th-!uuV}ZhJj=%(amEI<6o_XBI$BK}#5|&eIPFr)xTAH1VfYgXmkwM$0;~^N8eM7(^E(ILA_*rZ$%+zxXf5c*+Fg?qrvB>ZUDm)Q zT6a3A9=^L(v5n)e4`0UEbRPdyl$w6jImdOlF5E`sSBcTrR;LjLQk0Nwu3Y1o51h1d-Z)c)%U zv~Nm4p_{tkr+=79{_52;-+Z4R`?BSKk3D&OqEcVcd!-Nl-2dGx$G`n3^QYgx@9IC) zKhx1(*Y6ilA5C;{8DNI#+-UpfKJVzXz?TM6eQ;Qn3|&SWq`ue}?J)<@mo*vaYIM0T zTGp95=m%-&fH1ufN_yH~{H1L>YSkEv8D#yRW&5smtiBAzdl6{fJcVAD(?(#L?b6vf zn)A}Cy3R^^>qXygcwXB_zX;#YuX%qV?bnJEPa#-fFW!c*AP4z{9Aa*yT9HmqeQ0k0 zsUZN(qP=*-#)-E{hT-#9wOMuzufA(skCjfyG;W)I&VlkC*53Q-W!s)dE9uRbUQ$bE z;OShMeolJ}7z&t^4uYQ%CI)V#!6_PPX$RSdoGG-R#{Cz1-k#e2U8ilaYi;pD!Xdo% z zJP^sB_OP1MLvL_h8}^#F?0xz#Z1~fyPXFoW&S;HEL3`=EP8EJ8od>L)E2OulNb>p_ zR`Ozabb4J z=|n`4GYX#{)KU_WjZ#)F7#Eu}5tfn(RVs;=qQ2v&CTI&&R?<1Snked-i|E}DNj||B ziydfIKN_nfwy<*0s!rGs-&p$;3Jpfmi?`Bna+L8|qVxLbDA=4fmgSt5G>0M5&Zixg zBxYI5-RFP)|567{r!&R4E6w|!J zYGo~=)#KhfJ7?o;PEXouVV%|#21DLzaXVd{#lxl3Hcm_@?P>d^z^$TSN9(n6t~VaW zK36%!i&hu!OhTYGyTf9)y4@C!GnKYDL_Tfv*hLZY8)jLn-Qn8W?ZrLJ3}AWZyW7dn zUH(nP9|r_(Qd4)gZW-tI!`$vmrElKN8Ua0)Wq19U&mcewTTDb)VLBSi z24fk-e01xb^?R4-ns$$PvJ~+yPDkh1aj&A<1?Ma!%oS#19CG{DSF08G(rkj zS*Uuw0d)$uu7FoaRW6KDfiW=}oAbp3*+?-ywa#02!=`(n$b z39G!iG@;npvUuv0-L+?eJGI81$^|1n$4GTN zl6TF;{q$_Am}g`2smbEpq{J>r{@D#%HCA$m{FSOJ9-UrK&hf=jdqoaZ%fe`8V|CH* zN^Xw1?P8?FjcusOv60d2f;_F3Hoe@891Ufc%j>fjLK(N3F55%pk~ei~-aQ!&F6CVn z*XRVhR0#WXnWZrA6;@-OX?uL#6G-vy8Q-RBX0^IejZUlMi%T|;6n4$8gOL@hIAhB$ zjK!8Y&${3)NmfTJRax9j6{i=*SYKe(Jt2i>l*nSbD$bNW`MJzoc4jdaj3};g$(LRd zdBNuy<9*X}mDw?S+IcFMnF`KTN7pAJu8b0tFL>1KswcL2Dn44wk4yH-qEJn3<^(a4 z*$jKj%MmF$T6H@;6PZ&|m7A(m9jqK470Pnf#cjB=+*sAeO{nfzIutHcqr!B-mrX6j z!wc#LH(s4|EKWH_$M~wJTxP>!)IXI>w{yIk`dvlAoGOs6Y6owm)23#6#tU@kPj7A?=_{r+vMWkhsimaQdUHtk*D z=AY% zlH-2=?ggZHZ+hqrsM$r3SXAi{s!)>uXcloD?Yr7`5-0I|m4c9i$_eRV&yzU-|{Tth2r%f^j z+f4dPT>8dt-NVkJ&tR8w=NIr)>}JNbeLhY*E6ug6kj{R*iCxbA`G#2g-;mMY0{nJg zwSKQJ!01leUqS_lOZp!1{TThMt#dbf_?cDOk>(E6)4*T3(re?9gaebvXBe`QerF39J5 z(7Vi2x~)zX?Db6{{i`~E(^tHs@AMS#-6MKGH;2C!T=g&b=x+?uId}7$jrv#d{;03o zU;l+E{i}tq_f`Ao%Nz6-xN4jx5smXB`Wy4_7@AqdnUw<0=_GK!^ZP|~cAmau)9ei> zD@Jw>-?B2tj#bEyPUGzUPFCah!a1#6?bOTl<4)n2OA5`t!>Rv|6AW|P;h?|Y KwcU^|BmWOiSDW_$ literal 0 HcmV?d00001 diff --git a/.vs/~$GitHubVSTemp$~/v14/.suo b/.vs/~$GitHubVSTemp$~/v14/.suo new file mode 100644 index 0000000000000000000000000000000000000000..34acf8d96ea81d14c4a8033ad4b0e85383a5d9df GIT binary patch literal 14848 zcmeHN&u<&Y6&~4fV<%4A#EsjgX~L#%>ZT&Q%U=@R$aO^7mK)1*LP;)Q8P0M!RMuK9 zja|x?8YDB!M?Y6IV;5#6{uZ>5DB6Wnwrjj%|7XeHCc_33eho+&J`H>ZcnbIpAcyrkK$hXWK%ICF$amrx4weKSrTwR# z*s*`5D9{USfBUfi39WCGEUN4a6;O=rKNOSJ#>bH4JV1Mm+y4oqp8#t6O=AD2BYIu; z_<3+8Fbj{@6zie^Z|CEg)bC-46+93B!?=(3#9oHEd*83&^Vx`G348$J_ z$;_!p%!*31KQiFYScEZ1QvUJ&SI{$8XN%>y_E#pN(8NH_k2CxJN(!Rf|6(L=( z`!MluT>i_@Hb&hRbWQ?wJP>MqH;rp=3GI*l*3ka7dukB={njHdgFjUZz9;2> z0sM9R$?>hC-)p!i@f&K!vEc>j1NuX12>w4qwbWNaC;wTrL81-o=y?;mL7%f%JI;dt zcTo@5kxBdiDrg@737!0DZ}hwUU!-A3C*YTgHY+s`0;o#&D<4@ne%Agq#m+S)Uo>6Ex6J_9wX$u}Sq6V+P zScp+#yC$&zdhC<-pX&w2%Y;zA-2bvszn*KafGZ=PxE)}Zo_GhkbYR0ry0uk}Co7&&I~^}jC5 z=P7*SlYW(RG3P`21lk7I2Hd0JI)%Q1^Tmt66!3e%mw?{~o&o*<_%iT^z_Y*~0bc=f z1SWxLU-;kE=71MFzFk53=fKxH z@>%XW^4l;wiI2yHdHh*^(Zs%&3ftY_%M3Eg;E@uikLHTJyfnJHLMW_N-;EHG2#2bIe3fJXcvNOQpP-~*sfd(UTG7ar3lZoinx$*3)hMPu#Q+IrS?$* zM0MQFOJN<+!g~#LO{BPv&%kf3f~t%=<0~6xAp@xjR3U?b*fskVGL^0qV}SIWmf zAhHRtDhdurTUv;XL%F>F$j6B~PaO&Hv`3^Xoo%3{)?k_PIOFh;LPCo`pRL#&@2|vykT_ zW#KE!^IY8n1@~0-?p7M*xy#N{JlhS=7da|}1KYnJ;MpaSFic&D(|PnhG;lwOdrOLU zHF{yOz2D+x z_&#djx@7_Kag`o%wZZ>5xaQ!lCRZkRKrw?gCV?j(?eH0r+`CgFp#J3GyK@1XfwSq# z(m4ayJ9e&^GYqqsuA9YdI&akr>8e>Qq)YXDaoQ^8v$ABpyF;F{)#ERw0`;(lT3`?oV#!%v`Zl&!&xHwwBK4t!#R_IAx|w zX1$&>WxblO=ilYFRM=&9=SzfNe7rC9hbDLaffER>A*oxYS}Ssgvo~;^{7E!>-G{Y! z*!f&Vy8W*X&Q(0$@3k(?DsBZ(X`RvM@66ePKJl*{ug7k^smgs(R2gXQ7J8LzHYz{$ zhmRWX{Ounfv^GEb)5bIZ?k%bDmu?c!`` + + + Debug + 2.0 + {3c86f862-fdab-4861-8f2d-fb0d1f114a76} + + src\translate.py + + . + . + {888888a0-9f3d-457c-b088-3a5042f75d52} + Standard Python launcher + + + + + + + 10.0 + $(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)\Python Tools\Microsoft.PythonTools.targets + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/src/seq2seq_model.py b/src/seq2seq_model.py index fea6352..b9a0350 100644 --- a/src/seq2seq_model.py +++ b/src/seq2seq_model.py @@ -81,13 +81,13 @@ def __init__(self, # === Create the RNN that will keep the state === print('rnn_size = {0}'.format( rnn_size )) cell = tf.contrib.rnn.GRUCell( self.rnn_size ) + cellBk = tf.contrib.rnn.GRUCell( self.rnn_size ) if num_layers > 1: cell = tf.contrib.rnn.MultiRNNCell( [tf.contrib.rnn.GRUCell(self.rnn_size) for _ in range(num_layers)] ) # === Transform the inputs === with tf.name_scope("inputs"): - enc_in = tf.placeholder(dtype, shape=[None, source_seq_len-1, self.input_size], name="enc_in") dec_in = tf.placeholder(dtype, shape=[None, target_seq_len, self.input_size], name="dec_in") dec_out = tf.placeholder(dtype, shape=[None, target_seq_len, self.input_size], name="dec_out") @@ -96,20 +96,23 @@ def __init__(self, self.decoder_inputs = dec_in self.decoder_outputs = dec_out - enc_in = tf.transpose(enc_in, [1, 0, 2]) - dec_in = tf.transpose(dec_in, [1, 0, 2]) - dec_out = tf.transpose(dec_out, [1, 0, 2]) + if architecture != 'bid': + enc_in = tf.transpose(enc_in, [1, 0, 2]) + dec_in = tf.transpose(dec_in, [1, 0, 2]) + dec_out = tf.transpose(dec_out, [1, 0, 2]) - enc_in = tf.reshape(enc_in, [-1, self.input_size]) - dec_in = tf.reshape(dec_in, [-1, self.input_size]) - dec_out = tf.reshape(dec_out, [-1, self.input_size]) + enc_in = tf.reshape(enc_in, [-1, self.input_size]) + enc_in = tf.reshape(enc_in, [-1, -1, self.input_size]) + dec_in = tf.reshape(dec_in, [-1, self.input_size]) + dec_out = tf.reshape(dec_out, [-1, self.input_size]) - enc_in = tf.split(enc_in, source_seq_len-1, axis=0) - dec_in = tf.split(dec_in, target_seq_len, axis=0) - dec_out = tf.split(dec_out, target_seq_len, axis=0) + enc_in = tf.split(enc_in, source_seq_len-1, axis=0) + dec_in = tf.split(dec_in, target_seq_len, axis=0) + dec_out = tf.split(dec_out, target_seq_len, axis=0) # === Add space decoder === cell = rnn_cell_extensions.LinearSpaceDecoderWrapper( cell, self.input_size ) + #cellBk = rnn_cell_extensionsBk.LinearSpaceDecoderWrapper( cellBk, self.input_size ) # Finally, wrap everything in a residual layer if we want to model velocities if residual_velocities: @@ -128,6 +131,9 @@ def lf(prev, i): # function for sampling_based loss else: raise(ValueError, "unknown loss: %s" % loss_to_use) + cellFw = tf.nn.rnn_cell.LSTMCell(num_units=self.input_size, reuse=tf.get_variable_scope().reuse) + #cellBw = tf.nn.rnn_cell.GRUCell(num_units=self.input_size, reuse=tf.get_variable_scope().reuse) + # Build the RNN if architecture == "basic": # Basic RNN does not have a loop function in its API, so copying here. @@ -136,9 +142,24 @@ def lf(prev, i): # function for sampling_based loss outputs, self.states = tf.contrib.legacy_seq2seq.rnn_decoder( dec_in, enc_state, cell, loop_function=lf ) # Decoder elif architecture == "tied": outputs, self.states = tf.contrib.legacy_seq2seq.tied_rnn_seq2seq( enc_in, dec_in, cell, loop_function=lf ) + elif architecture == 'bid': + cell = tf.nn.rnn_cell.LSTMCell(num_units=self.input_size, reuse=tf.get_variable_scope().reuse) + #cell = tf.nn.rnn_cell.GRUCell(num_units=self.input_size, reuse=tf.get_variable_scope().reuse) + outputs, self.states = tf.nn.bidirectional_dynamic_rnn( + cell_fw = cell, + cell_bw = cell, + inputs = dec_in + #,sequence_length = [-1, target_seq_len] + ,dtype=tf.float32 + ,time_major=False + # initial_state_fw=init_state_fw, + # initial_state_bw=init_state_bw + ) else: raise(ValueError, "Uknown architecture: %s" % architecture ) + if architecture == 'bid': + outputs = outputs[0] self.outputs = outputs with tf.name_scope("loss_angles"): @@ -409,7 +430,6 @@ def step(self, session, encoder_inputs, decoder_inputs, decoder_outputs, self.loss, self.loss_summary, self.learning_rate_summary] - outputs = session.run( output_feed, input_feed ) return outputs[1], outputs[2], outputs[3], outputs[4] # Gradient norm, loss, summaries diff --git a/src/translate.py b/src/translate.py index 3e3e9ad..7336c3a 100644 --- a/src/translate.py +++ b/src/translate.py @@ -27,7 +27,8 @@ tf.app.flags.DEFINE_integer("batch_size", 16, "Batch size to use during training.") tf.app.flags.DEFINE_integer("iterations", int(1e5), "Iterations to train for.") # Architecture -tf.app.flags.DEFINE_string("architecture", "tied", "Seq2seq architecture to use: [basic, tied].") +#tf.app.flags.DEFINE_string("architecture", "tied", "Seq2seq architecture to use: [basic, tied].") +tf.app.flags.DEFINE_string("architecture", "bid", "Seq2seq architecture to use: [basic, tied, bid].") tf.app.flags.DEFINE_integer("size", 1024, "Size of each model layer.") tf.app.flags.DEFINE_integer("num_layers", 1, "Number of layers in the model.") tf.app.flags.DEFINE_integer("seq_length_in", 50, "Number of frames to feed into the encoder. 25 fps") @@ -638,6 +639,7 @@ def define_actions( action ): "greeting", "phoning", "posing", "purchases", "sitting", "sittingdown", "takingphoto", "waiting", "walkingdog", "walkingtogether"] + #actions = ["walking"] if action in actions: return [action] diff --git a/~$GitHubVSTemp$~.sln b/~$GitHubVSTemp$~.sln new file mode 100644 index 0000000..433e7e3 --- /dev/null +++ b/~$GitHubVSTemp$~.sln @@ -0,0 +1,10 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 14 +VisualStudioVersion = 14.0.25420.1 +MinimumVisualStudioVersion = 10.0.40219.1 +Global + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal From 64e8720306716b141037eb1e5b2324c5ae7b7ac0 Mon Sep 17 00:00:00 2001 From: Jesse Tam Date: Mon, 11 Mar 2019 18:11:28 +0800 Subject: [PATCH 2/5] add visual studio project --- human-motion-prediction.sln | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 human-motion-prediction.sln diff --git a/human-motion-prediction.sln b/human-motion-prediction.sln new file mode 100644 index 0000000..313e05c --- /dev/null +++ b/human-motion-prediction.sln @@ -0,0 +1,20 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 14 +VisualStudioVersion = 14.0.25420.1 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{888888A0-9F3D-457C-B088-3A5042F75D52}") = "human-motion-prediction", "human-motion-prediction.pyproj", "{3C86F862-FDAB-4861-8F2D-FB0D1F114A76}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {3C86F862-FDAB-4861-8F2D-FB0D1F114A76}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {3C86F862-FDAB-4861-8F2D-FB0D1F114A76}.Release|Any CPU.ActiveCfg = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal From bb630713884151af2b1466df49c2241a2b7a71b7 Mon Sep 17 00:00:00 2001 From: Jesse Tam Date: Tue, 12 Mar 2019 01:14:08 +0800 Subject: [PATCH 3/5] fix bug: train1000 times report error while in "bid" mode --- src/seq2seq_model.py | 13 ++++++------- src/translate.py | 10 +++++----- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/src/seq2seq_model.py b/src/seq2seq_model.py index b9a0350..e711ccf 100644 --- a/src/seq2seq_model.py +++ b/src/seq2seq_model.py @@ -81,7 +81,6 @@ def __init__(self, # === Create the RNN that will keep the state === print('rnn_size = {0}'.format( rnn_size )) cell = tf.contrib.rnn.GRUCell( self.rnn_size ) - cellBk = tf.contrib.rnn.GRUCell( self.rnn_size ) if num_layers > 1: cell = tf.contrib.rnn.MultiRNNCell( [tf.contrib.rnn.GRUCell(self.rnn_size) for _ in range(num_layers)] ) @@ -102,7 +101,6 @@ def __init__(self, dec_out = tf.transpose(dec_out, [1, 0, 2]) enc_in = tf.reshape(enc_in, [-1, self.input_size]) - enc_in = tf.reshape(enc_in, [-1, -1, self.input_size]) dec_in = tf.reshape(dec_in, [-1, self.input_size]) dec_out = tf.reshape(dec_out, [-1, self.input_size]) @@ -112,7 +110,6 @@ def __init__(self, # === Add space decoder === cell = rnn_cell_extensions.LinearSpaceDecoderWrapper( cell, self.input_size ) - #cellBk = rnn_cell_extensionsBk.LinearSpaceDecoderWrapper( cellBk, self.input_size ) # Finally, wrap everything in a residual layer if we want to model velocities if residual_velocities: @@ -143,23 +140,25 @@ def lf(prev, i): # function for sampling_based loss elif architecture == "tied": outputs, self.states = tf.contrib.legacy_seq2seq.tied_rnn_seq2seq( enc_in, dec_in, cell, loop_function=lf ) elif architecture == 'bid': + dec_in = tf.transpose(dec_in, [1, 0, 2]) + dec_out = tf.transpose(dec_out, [1, 0, 2]) cell = tf.nn.rnn_cell.LSTMCell(num_units=self.input_size, reuse=tf.get_variable_scope().reuse) #cell = tf.nn.rnn_cell.GRUCell(num_units=self.input_size, reuse=tf.get_variable_scope().reuse) outputs, self.states = tf.nn.bidirectional_dynamic_rnn( cell_fw = cell, cell_bw = cell, inputs = dec_in - #,sequence_length = [-1, target_seq_len] + #,sequence_length = [target_seq_len,target_seq_len,target_seq_len,target_seq_len,target_seq_len,target_seq_len,target_seq_len,target_seq_len] ,dtype=tf.float32 ,time_major=False - # initial_state_fw=init_state_fw, - # initial_state_bw=init_state_bw + #,initial_state_fw=init_state_fw + #,initial_state_bw=init_state_bw ) else: raise(ValueError, "Uknown architecture: %s" % architecture ) if architecture == 'bid': - outputs = outputs[0] + outputs = outputs[0] self.outputs = outputs with tf.name_scope("loss_angles"): diff --git a/src/translate.py b/src/translate.py index 7336c3a..e600c85 100644 --- a/src/translate.py +++ b/src/translate.py @@ -635,11 +635,11 @@ def define_actions( action ): ValueError if the action is not included in H3.6M """ - actions = ["walking", "eating", "smoking", "discussion", "directions", - "greeting", "phoning", "posing", "purchases", "sitting", - "sittingdown", "takingphoto", "waiting", "walkingdog", - "walkingtogether"] - #actions = ["walking"] + #actions = ["walking", "eating", "smoking", "discussion", "directions", + # "greeting", "phoning", "posing", "purchases", "sitting", + # "sittingdown", "takingphoto", "waiting", "walkingdog", + # "walkingtogether"] + actions = ["walking"] if action in actions: return [action] From a386656179159c7886c899af62acb17c429def54 Mon Sep 17 00:00:00 2001 From: Jesse Tam Date: Tue, 12 Mar 2019 01:25:19 +0800 Subject: [PATCH 4/5] open every actions --- src/translate.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/translate.py b/src/translate.py index e600c85..7336c3a 100644 --- a/src/translate.py +++ b/src/translate.py @@ -635,11 +635,11 @@ def define_actions( action ): ValueError if the action is not included in H3.6M """ - #actions = ["walking", "eating", "smoking", "discussion", "directions", - # "greeting", "phoning", "posing", "purchases", "sitting", - # "sittingdown", "takingphoto", "waiting", "walkingdog", - # "walkingtogether"] - actions = ["walking"] + actions = ["walking", "eating", "smoking", "discussion", "directions", + "greeting", "phoning", "posing", "purchases", "sitting", + "sittingdown", "takingphoto", "waiting", "walkingdog", + "walkingtogether"] + #actions = ["walking"] if action in actions: return [action] From 26691c19f18077b7d04bc0e2cf105e177e663c0f Mon Sep 17 00:00:00 2001 From: Jesse Tam Date: Wed, 13 Mar 2019 13:59:37 +0800 Subject: [PATCH 5/5] #basic: basic mode, use tf.contrib.rnn.static_rnn as encoder, and use tf.contrib.legacy_seq2seq.rnn_decoder as decoder #tied: tied mode, use tf.contrib.legacy_seq2seq.tied_rnn_seq2seq for both coder and encoder #bid: bid mode, use tf.nn.bidirectional_dynamic_rnn as encoder, and use tf.contrib.legacy_seq2seq.rnn_decoder as decoder # bid mode has 2 submode: # 1: lstm submode: both encoder and decoder use tf.nn.rnn_cell.LSTMCell as the cell # 2: gru submode: both encoder and decoder use tf.nn.rnn_cell.GRUCell as the cell --- .vs/human-motion-prediction/v14/.suo | Bin 26112 -> 45568 bytes src/seq2seq_model.py | 56 ++++++++++++++++++--------- src/translate.py | 11 ++++-- 3 files changed, 45 insertions(+), 22 deletions(-) diff --git a/.vs/human-motion-prediction/v14/.suo b/.vs/human-motion-prediction/v14/.suo index 35de07ca12af5bb2abfc61c50857da03367de5ca..340462a0083a723c30e0304625e1a79e86e175df 100644 GIT binary patch delta 2245 zcmbtVZ%kWN6uS@7()4m*^cH{Ye;enwdUS#Kjxx2vd0y+1e0 zGcgo*z*%80c$8W`2}>IS;szw)>%tls&bZ!8N$QZmyeuD4k3eeGkg^nSNX=bn$i=z5qc zGRN9C;?fmNM=AA@g&fVPBKmTgQ>A)ER4s8ds3PKxX!VK|oKr<2BJmLZJYq$MyK1ju zmNyV@BIr#zuAzS&F_GfmMSBzRp61Uz&ts3i(&h!P;)JlxCv!6J~^l96ZT=}d>E0a!zPfmz* zbBZz0>2_$#rc43i+r)9Fs7YC)yx@7Hk0FjD^!TrSESLG;k8669 zz%#wr1U)PWe3BL2O*(ZBA~`f2e=Tsk zQ~s2vZ;P|9MxC?H2!FIk{S#X+O|dWQj^noMl;2N#Y-qB^6|3@4!$7!d)E+=R^r3o4 zIj2*4(RFncl@dbkBs+%JaRWyNft_QGz=GOJ$vKySxdan>}P7aJPLUGm`FqUvT!;e5Du_PXD3tcI7bQ za&l1{admQeG)+U7O~VvUKE>i`kr6o5*t||}*sc9Vu(jzb%&R3@yeXOya~~AFuAU&| zwYlMjy8-U+`d0hSZOhQ^?Eapw3u#;S&SmA1nWHmPXDEvwcopcnB*Cr&mB1T;hO_YVwhS=S_+8l zLEp3lZ8|^fW9Y6>nz8$>?g688)I8f*Y@@M&wqb)ZOh|qtBMQkbf_i7JD5Lo z{WtF0z3xWt1a4Qr9)O+sFU%@<$d1o}y}$r~3pyyN2Ry?FG(cw|r;m!uw4-ZA9NP(C z5|{!m0nD5jYSiiBG4=9@iw&Qzzbjp zcnQ1$%5*Y*n+_C1eT-d_w&L^BkLV15#!iYYYQ?*~_VuJqDo&&&&xG&ARq6%;Np0;( zqtb7p*)#RQJBq7kLZ<(Rx) zm&(;}N^;YA7}m*3R17ch(M5q;;k4wgM|;UA6(eaG#MbINyEmU@~d2H`E-YOl;oevJWJCNrOr7+5FV6PkI;*TXMj5#UwzrsN3tBG7G|7e7@DYT9fLq}?Kh rEWbUjP_NV^==l}_< diff --git a/src/seq2seq_model.py b/src/seq2seq_model.py index e711ccf..97d513a 100644 --- a/src/seq2seq_model.py +++ b/src/seq2seq_model.py @@ -85,6 +85,12 @@ def __init__(self, if num_layers > 1: cell = tf.contrib.rnn.MultiRNNCell( [tf.contrib.rnn.GRUCell(self.rnn_size) for _ in range(num_layers)] ) + architecture = architecture.split(',') + architectureSub = architecture[1] + architecture = architecture[0] + + print("========working mode: %s======working submode: %s========"%(architecture, architectureSub)); + # === Transform the inputs === with tf.name_scope("inputs"): enc_in = tf.placeholder(dtype, shape=[None, source_seq_len-1, self.input_size], name="enc_in") @@ -97,16 +103,15 @@ def __init__(self, if architecture != 'bid': enc_in = tf.transpose(enc_in, [1, 0, 2]) - dec_in = tf.transpose(dec_in, [1, 0, 2]) - dec_out = tf.transpose(dec_out, [1, 0, 2]) - enc_in = tf.reshape(enc_in, [-1, self.input_size]) - dec_in = tf.reshape(dec_in, [-1, self.input_size]) - dec_out = tf.reshape(dec_out, [-1, self.input_size]) - enc_in = tf.split(enc_in, source_seq_len-1, axis=0) - dec_in = tf.split(dec_in, target_seq_len, axis=0) - dec_out = tf.split(dec_out, target_seq_len, axis=0) + + dec_in = tf.transpose(dec_in, [1, 0, 2]) + dec_out = tf.transpose(dec_out, [1, 0, 2]) + dec_in = tf.reshape(dec_in, [-1, self.input_size]) + dec_out = tf.reshape(dec_out, [-1, self.input_size]) + dec_in = tf.split(dec_in, target_seq_len, axis=0) + dec_out = tf.split(dec_out, target_seq_len, axis=0) # === Add space decoder === cell = rnn_cell_extensions.LinearSpaceDecoderWrapper( cell, self.input_size ) @@ -128,7 +133,7 @@ def lf(prev, i): # function for sampling_based loss else: raise(ValueError, "unknown loss: %s" % loss_to_use) - cellFw = tf.nn.rnn_cell.LSTMCell(num_units=self.input_size, reuse=tf.get_variable_scope().reuse) + #cellFw = tf.nn.rnn_cell.LSTMCell(num_units=self.input_size, reuse=tf.get_variable_scope().reuse) #cellBw = tf.nn.rnn_cell.GRUCell(num_units=self.input_size, reuse=tf.get_variable_scope().reuse) # Build the RNN @@ -140,25 +145,38 @@ def lf(prev, i): # function for sampling_based loss elif architecture == "tied": outputs, self.states = tf.contrib.legacy_seq2seq.tied_rnn_seq2seq( enc_in, dec_in, cell, loop_function=lf ) elif architecture == 'bid': - dec_in = tf.transpose(dec_in, [1, 0, 2]) - dec_out = tf.transpose(dec_out, [1, 0, 2]) - cell = tf.nn.rnn_cell.LSTMCell(num_units=self.input_size, reuse=tf.get_variable_scope().reuse) - #cell = tf.nn.rnn_cell.GRUCell(num_units=self.input_size, reuse=tf.get_variable_scope().reuse) - outputs, self.states = tf.nn.bidirectional_dynamic_rnn( - cell_fw = cell, - cell_bw = cell, - inputs = dec_in + #enc_in = tf.transpose(enc_in, [1, 0, 2]) + #enc_in = tf.split(enc_in, [source_seq_len-1], axis=0) + if architectureSub == 'lstm': + cellBid = tf.nn.rnn_cell.LSTMCell(num_units=self.input_size, reuse=tf.get_variable_scope().reuse) + elif architectureSub == 'gru': + cellBid = tf.nn.rnn_cell.GRUCell(num_units=self.input_size, reuse=tf.get_variable_scope().reuse) + else: + print("ERROR===>unknow architectureSub mod: %s"%(architectureSub)) + #encoder + _, enc_state = tf.nn.bidirectional_dynamic_rnn( + cell_fw = cellBid, + cell_bw = cellBid, + inputs = enc_in #,sequence_length = [target_seq_len,target_seq_len,target_seq_len,target_seq_len,target_seq_len,target_seq_len,target_seq_len,target_seq_len] ,dtype=tf.float32 ,time_major=False #,initial_state_fw=init_state_fw #,initial_state_bw=init_state_bw ) + #decoder + if architectureSub == 'lstm': + cellDec = tf.nn.rnn_cell.LSTMCell(num_units=self.input_size, reuse=tf.get_variable_scope().reuse) + elif architectureSub == 'gru': + cellDec = tf.nn.rnn_cell.GRUCell(num_units=self.input_size, reuse=tf.get_variable_scope().reuse) + else: + print("ERROR===>unknow architectureSub mod: %s"%(architectureSub)) + outputs, self.states = tf.contrib.legacy_seq2seq.rnn_decoder( dec_in, enc_state[0], cellDec, loop_function=lf ) else: raise(ValueError, "Uknown architecture: %s" % architecture ) - if architecture == 'bid': - outputs = outputs[0] + #if architecture == 'bid': + # outputs = outputs[0] self.outputs = outputs with tf.name_scope("loss_angles"): diff --git a/src/translate.py b/src/translate.py index 7336c3a..a290347 100644 --- a/src/translate.py +++ b/src/translate.py @@ -27,8 +27,13 @@ tf.app.flags.DEFINE_integer("batch_size", 16, "Batch size to use during training.") tf.app.flags.DEFINE_integer("iterations", int(1e5), "Iterations to train for.") # Architecture -#tf.app.flags.DEFINE_string("architecture", "tied", "Seq2seq architecture to use: [basic, tied].") -tf.app.flags.DEFINE_string("architecture", "bid", "Seq2seq architecture to use: [basic, tied, bid].") +#basic: basic mode, use tf.contrib.rnn.static_rnn as encoder, and use tf.contrib.legacy_seq2seq.rnn_decoder as decoder +#tied: tied mode, use tf.contrib.legacy_seq2seq.tied_rnn_seq2seq for both coder and encoder +#bid: bid mode, use tf.nn.bidirectional_dynamic_rnn as encoder, and use tf.contrib.legacy_seq2seq.rnn_decoder as decoder +# bid mode has 2 submode: +# 1: lstm submode: both encoder and decoder use tf.nn.rnn_cell.LSTMCell as the cell +# 2: gru submode: both encoder and decoder use tf.nn.rnn_cell.GRUCell as the cell +tf.app.flags.DEFINE_string("architecture", "bid,lstm", "Seq2seq architecture to use: [basic, tied, bid(lstm,gru)].") tf.app.flags.DEFINE_integer("size", 1024, "Size of each model layer.") tf.app.flags.DEFINE_integer("num_layers", 1, "Number of layers in the model.") tf.app.flags.DEFINE_integer("seq_length_in", 50, "Number of frames to feed into the encoder. 25 fps") @@ -639,7 +644,7 @@ def define_actions( action ): "greeting", "phoning", "posing", "purchases", "sitting", "sittingdown", "takingphoto", "waiting", "walkingdog", "walkingtogether"] - #actions = ["walking"] + #actions = ["walking"] #open this line to fast debug if action in actions: return [action]