From ffccac01f3e09ccd8fec73cda647d38f816dccab Mon Sep 17 00:00:00 2001
From: Du Tran <trandu@gmail.com>
Date: Wed, 12 Aug 2015 00:47:59 -0700
Subject: [PATCH] adding an example of training UCF101 from scratch

---
 .../conv3d_ucf101_solver.prototxt             |  20 +
 .../conv3d_ucf101_test.prototxt               | 355 ++++++++++++++++++
 .../conv3d_ucf101_train.prototxt              | 348 +++++++++++++++++
 .../c3d_train_ucf101/create_volume_mean.sh    |   1 +
 examples/c3d_train_ucf101/test_ucf101.sh      |   1 +
 examples/c3d_train_ucf101/train_ucf101.sh     |   1 +
 6 files changed, 726 insertions(+)
 create mode 100644 examples/c3d_train_ucf101/conv3d_ucf101_solver.prototxt
 create mode 100644 examples/c3d_train_ucf101/conv3d_ucf101_test.prototxt
 create mode 100644 examples/c3d_train_ucf101/conv3d_ucf101_train.prototxt
 create mode 100644 examples/c3d_train_ucf101/create_volume_mean.sh
 create mode 100644 examples/c3d_train_ucf101/test_ucf101.sh
 create mode 100644 examples/c3d_train_ucf101/train_ucf101.sh

diff --git a/examples/c3d_train_ucf101/conv3d_ucf101_solver.prototxt b/examples/c3d_train_ucf101/conv3d_ucf101_solver.prototxt
new file mode 100644
index 0000000000..72b311d6fb
--- /dev/null
+++ b/examples/c3d_train_ucf101/conv3d_ucf101_solver.prototxt
@@ -0,0 +1,20 @@
+train_net: "conv3d_ucf101_train.prototxt"
+test_net: "conv3d_ucf101_test.prototxt"
+test_iter: 100
+test_interval: 1000
+base_lr: 0.003
+momentum: 0.9
+weight_decay: 0.005
+lr_policy: "step"
+gamma: 0.1
+stepsize: 20000
+# Display every 20 iterations
+display: 20
+# The maximum number of iterations
+max_iter: 60000
+# snapshot intermediate results
+snapshot: 1000
+snapshot_prefix: "conv3d_ucf101"
+# solver mode: CPU or GPU
+solver_mode: GPU
+device_id: 0
diff --git a/examples/c3d_train_ucf101/conv3d_ucf101_test.prototxt b/examples/c3d_train_ucf101/conv3d_ucf101_test.prototxt
new file mode 100644
index 0000000000..713a94e0ff
--- /dev/null
+++ b/examples/c3d_train_ucf101/conv3d_ucf101_test.prototxt
@@ -0,0 +1,355 @@
+name: "deep_c3d_ucf101"
+layers {
+  name: "data"
+  type: VIDEO_DATA
+  top: "data"
+  top: "label"
+  image_data_param {
+    source: "../c3d_finetuning/test_01.lst"
+    use_image: true
+    mean_file: "ucf101_train_mean.binaryproto"
+    batch_size: 30
+    crop_size: 112
+    mirror: false
+    show_data: 0
+    new_height: 128
+    new_width: 171
+    new_length: 16
+    shuffle: true
+  }
+}
+# ----------- 1st layer group ---------------
+layers {
+  name: "conv1a"
+  type: CONVOLUTION3D
+  bottom: "data"
+  top: "conv1a"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 64
+    kernel_size: 3
+    kernel_depth: 3
+    pad: 1
+    temporal_pad: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu1a"
+  type: RELU
+  bottom: "conv1a"
+  top: "conv1a"
+}
+layers {
+  name: "pool1"
+  type: POOLING3D
+  bottom: "conv1a"
+  top: "pool1"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    kernel_depth: 1
+    stride: 2
+    temporal_stride: 1
+  }
+}
+# ------------- 2nd layer group --------------
+layers {
+  name: "conv2a"
+  type: CONVOLUTION3D
+  bottom: "pool1"
+  top: "conv2a"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 128
+    kernel_size: 3
+    kernel_depth: 3
+    pad: 1
+    temporal_pad: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 1
+    }
+  }
+}
+layers {
+  name: "relu2a"
+  type: RELU
+  bottom: "conv2a"
+  top: "conv2a"
+}
+layers {
+  name: "pool2"
+  type: POOLING3D
+  bottom: "conv2a"
+  top: "pool2"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    kernel_depth: 2
+    stride: 2
+    temporal_stride: 2
+  }
+}
+# ----------------- 3rd layer group --------------
+layers {
+  name: "conv3a"
+  type: CONVOLUTION3D
+  bottom: "pool2"
+  top: "conv3a"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 256
+    kernel_size: 3
+    kernel_depth: 3
+    pad: 1
+    temporal_pad: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 1
+    }
+  }
+}
+layers {
+  name: "relu3a"
+  type: RELU
+  bottom: "conv3a"
+  top: "conv3a"
+}
+layers {
+  name: "pool3"
+  type: POOLING3D
+  bottom: "conv3a"
+  top: "pool3"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    kernel_depth: 2
+    stride: 2
+    temporal_stride: 2
+  }
+}
+
+# --------- 4th layer group
+layers {
+  name: "conv4a"
+  type: CONVOLUTION3D
+  bottom: "pool3"
+  top: "conv4a"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 256
+    kernel_size: 3
+    kernel_depth: 3
+    pad: 1
+    temporal_pad: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 1
+    }
+  }
+}
+layers {
+  name: "relu4a"
+  type: RELU
+  bottom: "conv4a"
+  top: "conv4a"
+}
+layers {
+  name: "pool4"
+  type: POOLING3D
+  bottom: "conv4a"
+  top: "pool4"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    kernel_depth: 2
+    stride: 2
+    temporal_stride: 2
+  }
+}
+
+# --------------- 5th layer group --------
+layers {
+  name: "conv5a"
+  type: CONVOLUTION3D
+  bottom: "pool4"
+  top: "conv5a"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 256
+    kernel_size: 3
+    kernel_depth: 3
+    pad: 1
+    temporal_pad: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 1
+    }
+  }
+}
+layers {
+  name: "relu5a"
+  type: RELU
+  bottom: "conv5a"
+  top: "conv5a"
+}
+layers {
+  name: "pool5"
+  type: POOLING3D
+  bottom: "conv5a"
+  top: "pool5"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    kernel_depth: 2
+    stride: 2
+    temporal_stride: 2
+  }
+}
+# ---------------- fc layers -------------
+layers {
+  name: "fc6"
+  type: INNER_PRODUCT
+  bottom: "pool5"
+  top: "fc6"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  inner_product_param {
+    num_output: 2048
+    weight_filler {
+      type: "gaussian"
+      std: 0.005
+    }
+    bias_filler {
+      type: "constant"
+      value: 1
+    }
+  }
+}
+layers {
+  name: "relu6"
+  type: RELU
+  bottom: "fc6"
+  top: "fc6"
+}
+layers {
+  name: "drop6"
+  type: DROPOUT
+  bottom: "fc6"
+  top: "fc6"
+  dropout_param {
+    dropout_ratio: 0.5
+  }
+}
+layers {
+  name: "fc7"
+  type: INNER_PRODUCT
+  bottom: "fc6"
+  top: "fc7"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  inner_product_param {
+    num_output: 2048
+    weight_filler {
+      type: "gaussian"
+      std: 0.005
+    }
+    bias_filler {
+      type: "constant"
+      value: 1
+    }
+  }
+}
+layers {
+  name: "relu7"
+  type: RELU
+  bottom: "fc7"
+  top: "fc7"
+}
+layers {
+  name: "drop7"
+  type: DROPOUT
+  bottom: "fc7"
+  top: "fc7"
+  dropout_param {
+    dropout_ratio: 0.5
+  }
+}
+layers {
+  name: "fc8"
+  type: INNER_PRODUCT
+  bottom: "fc7"
+  top: "fc8"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  inner_product_param {
+    num_output: 101
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "prob"
+  type: SOFTMAX
+  bottom: "fc8"
+  top: "prob"
+}
+layers {
+  top: "accuracy"
+  name: "accuracy"
+  type: ACCURACY
+  bottom: "prob"
+  bottom: "label"
+}
diff --git a/examples/c3d_train_ucf101/conv3d_ucf101_train.prototxt b/examples/c3d_train_ucf101/conv3d_ucf101_train.prototxt
new file mode 100644
index 0000000000..202a6c4443
--- /dev/null
+++ b/examples/c3d_train_ucf101/conv3d_ucf101_train.prototxt
@@ -0,0 +1,348 @@
+name: "deep_c3d_ucf101"
+layers {
+  name: "data"
+  type: VIDEO_DATA
+  top: "data"
+  top: "label"
+  image_data_param {
+    source: "../c3d_finetuning/train_01.lst"
+    use_image: true
+    mean_file: "ucf101_train_mean.binaryproto"
+    batch_size: 30
+    crop_size: 112
+    mirror: true
+    show_data: 0
+    new_height: 128
+    new_width: 171
+    new_length: 16
+    shuffle: true
+  }
+}
+# ----------- 1st layer group ---------------
+layers {
+  name: "conv1a"
+  type: CONVOLUTION3D
+  bottom: "data"
+  top: "conv1a"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 64
+    kernel_size: 3
+    kernel_depth: 3
+    pad: 1
+    temporal_pad: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu1a"
+  type: RELU
+  bottom: "conv1a"
+  top: "conv1a"
+}
+layers {
+  name: "pool1"
+  type: POOLING3D
+  bottom: "conv1a"
+  top: "pool1"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    kernel_depth: 1
+    stride: 2
+    temporal_stride: 1
+  }
+}
+# ------------- 2nd layer group --------------
+layers {
+  name: "conv2a"
+  type: CONVOLUTION3D
+  bottom: "pool1"
+  top: "conv2a"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 128
+    kernel_size: 3
+    kernel_depth: 3
+    pad: 1
+    temporal_pad: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 1
+    }
+  }
+}
+layers {
+  name: "relu2a"
+  type: RELU
+  bottom: "conv2a"
+  top: "conv2a"
+}
+layers {
+  name: "pool2"
+  type: POOLING3D
+  bottom: "conv2a"
+  top: "pool2"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    kernel_depth: 2
+    stride: 2
+    temporal_stride: 2
+  }
+}
+# ----------------- 3rd layer group --------------
+layers {
+  name: "conv3a"
+  type: CONVOLUTION3D
+  bottom: "pool2"
+  top: "conv3a"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 256
+    kernel_size: 3
+    kernel_depth: 3
+    pad: 1
+    temporal_pad: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 1
+    }
+  }
+}
+layers {
+  name: "relu3a"
+  type: RELU
+  bottom: "conv3a"
+  top: "conv3a"
+}
+layers {
+  name: "pool3"
+  type: POOLING3D
+  bottom: "conv3a"
+  top: "pool3"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    kernel_depth: 2
+    stride: 2
+    temporal_stride: 2
+  }
+}
+
+# --------- 4th layer group
+layers {
+  name: "conv4a"
+  type: CONVOLUTION3D
+  bottom: "pool3"
+  top: "conv4a"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 256
+    kernel_size: 3
+    kernel_depth: 3
+    pad: 1
+    temporal_pad: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 1
+    }
+  }
+}
+layers {
+  name: "relu4a"
+  type: RELU
+  bottom: "conv4a"
+  top: "conv4a"
+}
+layers {
+  name: "pool4"
+  type: POOLING3D
+  bottom: "conv4a"
+  top: "pool4"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    kernel_depth: 2
+    stride: 2
+    temporal_stride: 2
+  }
+}
+
+# --------------- 5th layer group --------
+layers {
+  name: "conv5a"
+  type: CONVOLUTION3D
+  bottom: "pool4"
+  top: "conv5a"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 256
+    kernel_size: 3
+    kernel_depth: 3
+    pad: 1
+    temporal_pad: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 1
+    }
+  }
+}
+layers {
+  name: "relu5a"
+  type: RELU
+  bottom: "conv5a"
+  top: "conv5a"
+}
+layers {
+  name: "pool5"
+  type: POOLING3D
+  bottom: "conv5a"
+  top: "pool5"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    kernel_depth: 2
+    stride: 2
+    temporal_stride: 2
+  }
+}
+# ---------------- fc layers -------------
+layers {
+  name: "fc6"
+  type: INNER_PRODUCT
+  bottom: "pool5"
+  top: "fc6"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  inner_product_param {
+    num_output: 2048
+    weight_filler {
+      type: "gaussian"
+      std: 0.005
+    }
+    bias_filler {
+      type: "constant"
+      value: 1
+    }
+  }
+}
+layers {
+  name: "relu6"
+  type: RELU
+  bottom: "fc6"
+  top: "fc6"
+}
+layers {
+  name: "drop6"
+  type: DROPOUT
+  bottom: "fc6"
+  top: "fc6"
+  dropout_param {
+    dropout_ratio: 0.5
+  }
+}
+layers {
+  name: "fc7"
+  type: INNER_PRODUCT
+  bottom: "fc6"
+  top: "fc7"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  inner_product_param {
+    num_output: 2048
+    weight_filler {
+      type: "gaussian"
+      std: 0.005
+    }
+    bias_filler {
+      type: "constant"
+      value: 1
+    }
+  }
+}
+layers {
+  name: "relu7"
+  type: RELU
+  bottom: "fc7"
+  top: "fc7"
+}
+layers {
+  name: "drop7"
+  type: DROPOUT
+  bottom: "fc7"
+  top: "fc7"
+  dropout_param {
+    dropout_ratio: 0.5
+  }
+}
+layers {
+  name: "fc8"
+  type: INNER_PRODUCT
+  bottom: "fc7"
+  top: "fc8"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  inner_product_param {
+    num_output: 101
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "loss"
+  type: SOFTMAX_LOSS
+  bottom: "fc8"
+  bottom: "label"
+}
diff --git a/examples/c3d_train_ucf101/create_volume_mean.sh b/examples/c3d_train_ucf101/create_volume_mean.sh
new file mode 100644
index 0000000000..5ec0ff51c5
--- /dev/null
+++ b/examples/c3d_train_ucf101/create_volume_mean.sh
@@ -0,0 +1 @@
+GLOG_logtostderr=1 ../../build/tools/compute_volume_mean_from_list.bin ../c3d_finetuning/train_01.lst 16 128 171 1 ucf101_train_mean.binaryproto 10
diff --git a/examples/c3d_train_ucf101/test_ucf101.sh b/examples/c3d_train_ucf101/test_ucf101.sh
new file mode 100644
index 0000000000..94e4bc0f8e
--- /dev/null
+++ b/examples/c3d_train_ucf101/test_ucf101.sh
@@ -0,0 +1 @@
+GLOG_logtostderr=1 ../../build/tools/test_net.bin conv3d_ucf101_test.prototxt conv3d_ucf101_iter_60000 1396 GPU 0
diff --git a/examples/c3d_train_ucf101/train_ucf101.sh b/examples/c3d_train_ucf101/train_ucf101.sh
new file mode 100644
index 0000000000..45fadef1f8
--- /dev/null
+++ b/examples/c3d_train_ucf101/train_ucf101.sh
@@ -0,0 +1 @@
+GLOG_logtostderr=1 ../../build/tools/train_net.bin conv3d_ucf101_solver.prototxt