From ffccac01f3e09ccd8fec73cda647d38f816dccab Mon Sep 17 00:00:00 2001 From: Du Tran Date: Wed, 12 Aug 2015 00:47:59 -0700 Subject: [PATCH] adding an example of training UCF101 from scratch --- .../conv3d_ucf101_solver.prototxt | 20 + .../conv3d_ucf101_test.prototxt | 355 ++++++++++++++++++ .../conv3d_ucf101_train.prototxt | 348 +++++++++++++++++ .../c3d_train_ucf101/create_volume_mean.sh | 1 + examples/c3d_train_ucf101/test_ucf101.sh | 1 + examples/c3d_train_ucf101/train_ucf101.sh | 1 + 6 files changed, 726 insertions(+) create mode 100644 examples/c3d_train_ucf101/conv3d_ucf101_solver.prototxt create mode 100644 examples/c3d_train_ucf101/conv3d_ucf101_test.prototxt create mode 100644 examples/c3d_train_ucf101/conv3d_ucf101_train.prototxt create mode 100644 examples/c3d_train_ucf101/create_volume_mean.sh create mode 100644 examples/c3d_train_ucf101/test_ucf101.sh create mode 100644 examples/c3d_train_ucf101/train_ucf101.sh diff --git a/examples/c3d_train_ucf101/conv3d_ucf101_solver.prototxt b/examples/c3d_train_ucf101/conv3d_ucf101_solver.prototxt new file mode 100644 index 0000000000..72b311d6fb --- /dev/null +++ b/examples/c3d_train_ucf101/conv3d_ucf101_solver.prototxt @@ -0,0 +1,20 @@ +train_net: "conv3d_ucf101_train.prototxt" +test_net: "conv3d_ucf101_test.prototxt" +test_iter: 100 +test_interval: 1000 +base_lr: 0.003 +momentum: 0.9 +weight_decay: 0.005 +lr_policy: "step" +gamma: 0.1 +stepsize: 20000 +# Display every 20 iterations +display: 20 +# The maximum number of iterations +max_iter: 60000 +# snapshot intermediate results +snapshot: 1000 +snapshot_prefix: "conv3d_ucf101" +# solver mode: CPU or GPU +solver_mode: GPU +device_id: 0 diff --git a/examples/c3d_train_ucf101/conv3d_ucf101_test.prototxt b/examples/c3d_train_ucf101/conv3d_ucf101_test.prototxt new file mode 100644 index 0000000000..713a94e0ff --- /dev/null +++ b/examples/c3d_train_ucf101/conv3d_ucf101_test.prototxt @@ -0,0 +1,355 @@ +name: "deep_c3d_ucf101" +layers { + name: "data" + type: VIDEO_DATA + top: "data" + top: "label" + image_data_param { + source: "../c3d_finetuning/test_01.lst" + use_image: true + mean_file: "ucf101_train_mean.binaryproto" + batch_size: 30 + crop_size: 112 + mirror: false + show_data: 0 + new_height: 128 + new_width: 171 + new_length: 16 + shuffle: true + } +} +# ----------- 1st layer group --------------- +layers { + name: "conv1a" + type: CONVOLUTION3D + bottom: "data" + top: "conv1a" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 64 + kernel_size: 3 + kernel_depth: 3 + pad: 1 + temporal_pad: 1 + stride: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu1a" + type: RELU + bottom: "conv1a" + top: "conv1a" +} +layers { + name: "pool1" + type: POOLING3D + bottom: "conv1a" + top: "pool1" + pooling_param { + pool: MAX + kernel_size: 2 + kernel_depth: 1 + stride: 2 + temporal_stride: 1 + } +} +# ------------- 2nd layer group -------------- +layers { + name: "conv2a" + type: CONVOLUTION3D + bottom: "pool1" + top: "conv2a" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 128 + kernel_size: 3 + kernel_depth: 3 + pad: 1 + temporal_pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 1 + } + } +} +layers { + name: "relu2a" + type: RELU + bottom: "conv2a" + top: "conv2a" +} +layers { + name: "pool2" + type: POOLING3D + bottom: "conv2a" + top: "pool2" + pooling_param { + pool: MAX + kernel_size: 2 + kernel_depth: 2 + stride: 2 + temporal_stride: 2 + } +} +# ----------------- 3rd layer group -------------- +layers { + name: "conv3a" + type: CONVOLUTION3D + bottom: "pool2" + top: "conv3a" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 256 + kernel_size: 3 + kernel_depth: 3 + pad: 1 + temporal_pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 1 + } + } +} +layers { + name: "relu3a" + type: RELU + bottom: "conv3a" + top: "conv3a" +} +layers { + name: "pool3" + type: POOLING3D + bottom: "conv3a" + top: "pool3" + pooling_param { + pool: MAX + kernel_size: 2 + kernel_depth: 2 + stride: 2 + temporal_stride: 2 + } +} + +# --------- 4th layer group +layers { + name: "conv4a" + type: CONVOLUTION3D + bottom: "pool3" + top: "conv4a" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 256 + kernel_size: 3 + kernel_depth: 3 + pad: 1 + temporal_pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 1 + } + } +} +layers { + name: "relu4a" + type: RELU + bottom: "conv4a" + top: "conv4a" +} +layers { + name: "pool4" + type: POOLING3D + bottom: "conv4a" + top: "pool4" + pooling_param { + pool: MAX + kernel_size: 2 + kernel_depth: 2 + stride: 2 + temporal_stride: 2 + } +} + +# --------------- 5th layer group -------- +layers { + name: "conv5a" + type: CONVOLUTION3D + bottom: "pool4" + top: "conv5a" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 256 + kernel_size: 3 + kernel_depth: 3 + pad: 1 + temporal_pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 1 + } + } +} +layers { + name: "relu5a" + type: RELU + bottom: "conv5a" + top: "conv5a" +} +layers { + name: "pool5" + type: POOLING3D + bottom: "conv5a" + top: "pool5" + pooling_param { + pool: MAX + kernel_size: 2 + kernel_depth: 2 + stride: 2 + temporal_stride: 2 + } +} +# ---------------- fc layers ------------- +layers { + name: "fc6" + type: INNER_PRODUCT + bottom: "pool5" + top: "fc6" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + inner_product_param { + num_output: 2048 + weight_filler { + type: "gaussian" + std: 0.005 + } + bias_filler { + type: "constant" + value: 1 + } + } +} +layers { + name: "relu6" + type: RELU + bottom: "fc6" + top: "fc6" +} +layers { + name: "drop6" + type: DROPOUT + bottom: "fc6" + top: "fc6" + dropout_param { + dropout_ratio: 0.5 + } +} +layers { + name: "fc7" + type: INNER_PRODUCT + bottom: "fc6" + top: "fc7" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + inner_product_param { + num_output: 2048 + weight_filler { + type: "gaussian" + std: 0.005 + } + bias_filler { + type: "constant" + value: 1 + } + } +} +layers { + name: "relu7" + type: RELU + bottom: "fc7" + top: "fc7" +} +layers { + name: "drop7" + type: DROPOUT + bottom: "fc7" + top: "fc7" + dropout_param { + dropout_ratio: 0.5 + } +} +layers { + name: "fc8" + type: INNER_PRODUCT + bottom: "fc7" + top: "fc8" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + inner_product_param { + num_output: 101 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "prob" + type: SOFTMAX + bottom: "fc8" + top: "prob" +} +layers { + top: "accuracy" + name: "accuracy" + type: ACCURACY + bottom: "prob" + bottom: "label" +} diff --git a/examples/c3d_train_ucf101/conv3d_ucf101_train.prototxt b/examples/c3d_train_ucf101/conv3d_ucf101_train.prototxt new file mode 100644 index 0000000000..202a6c4443 --- /dev/null +++ b/examples/c3d_train_ucf101/conv3d_ucf101_train.prototxt @@ -0,0 +1,348 @@ +name: "deep_c3d_ucf101" +layers { + name: "data" + type: VIDEO_DATA + top: "data" + top: "label" + image_data_param { + source: "../c3d_finetuning/train_01.lst" + use_image: true + mean_file: "ucf101_train_mean.binaryproto" + batch_size: 30 + crop_size: 112 + mirror: true + show_data: 0 + new_height: 128 + new_width: 171 + new_length: 16 + shuffle: true + } +} +# ----------- 1st layer group --------------- +layers { + name: "conv1a" + type: CONVOLUTION3D + bottom: "data" + top: "conv1a" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 64 + kernel_size: 3 + kernel_depth: 3 + pad: 1 + temporal_pad: 1 + stride: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu1a" + type: RELU + bottom: "conv1a" + top: "conv1a" +} +layers { + name: "pool1" + type: POOLING3D + bottom: "conv1a" + top: "pool1" + pooling_param { + pool: MAX + kernel_size: 2 + kernel_depth: 1 + stride: 2 + temporal_stride: 1 + } +} +# ------------- 2nd layer group -------------- +layers { + name: "conv2a" + type: CONVOLUTION3D + bottom: "pool1" + top: "conv2a" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 128 + kernel_size: 3 + kernel_depth: 3 + pad: 1 + temporal_pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 1 + } + } +} +layers { + name: "relu2a" + type: RELU + bottom: "conv2a" + top: "conv2a" +} +layers { + name: "pool2" + type: POOLING3D + bottom: "conv2a" + top: "pool2" + pooling_param { + pool: MAX + kernel_size: 2 + kernel_depth: 2 + stride: 2 + temporal_stride: 2 + } +} +# ----------------- 3rd layer group -------------- +layers { + name: "conv3a" + type: CONVOLUTION3D + bottom: "pool2" + top: "conv3a" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 256 + kernel_size: 3 + kernel_depth: 3 + pad: 1 + temporal_pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 1 + } + } +} +layers { + name: "relu3a" + type: RELU + bottom: "conv3a" + top: "conv3a" +} +layers { + name: "pool3" + type: POOLING3D + bottom: "conv3a" + top: "pool3" + pooling_param { + pool: MAX + kernel_size: 2 + kernel_depth: 2 + stride: 2 + temporal_stride: 2 + } +} + +# --------- 4th layer group +layers { + name: "conv4a" + type: CONVOLUTION3D + bottom: "pool3" + top: "conv4a" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 256 + kernel_size: 3 + kernel_depth: 3 + pad: 1 + temporal_pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 1 + } + } +} +layers { + name: "relu4a" + type: RELU + bottom: "conv4a" + top: "conv4a" +} +layers { + name: "pool4" + type: POOLING3D + bottom: "conv4a" + top: "pool4" + pooling_param { + pool: MAX + kernel_size: 2 + kernel_depth: 2 + stride: 2 + temporal_stride: 2 + } +} + +# --------------- 5th layer group -------- +layers { + name: "conv5a" + type: CONVOLUTION3D + bottom: "pool4" + top: "conv5a" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 256 + kernel_size: 3 + kernel_depth: 3 + pad: 1 + temporal_pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 1 + } + } +} +layers { + name: "relu5a" + type: RELU + bottom: "conv5a" + top: "conv5a" +} +layers { + name: "pool5" + type: POOLING3D + bottom: "conv5a" + top: "pool5" + pooling_param { + pool: MAX + kernel_size: 2 + kernel_depth: 2 + stride: 2 + temporal_stride: 2 + } +} +# ---------------- fc layers ------------- +layers { + name: "fc6" + type: INNER_PRODUCT + bottom: "pool5" + top: "fc6" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + inner_product_param { + num_output: 2048 + weight_filler { + type: "gaussian" + std: 0.005 + } + bias_filler { + type: "constant" + value: 1 + } + } +} +layers { + name: "relu6" + type: RELU + bottom: "fc6" + top: "fc6" +} +layers { + name: "drop6" + type: DROPOUT + bottom: "fc6" + top: "fc6" + dropout_param { + dropout_ratio: 0.5 + } +} +layers { + name: "fc7" + type: INNER_PRODUCT + bottom: "fc6" + top: "fc7" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + inner_product_param { + num_output: 2048 + weight_filler { + type: "gaussian" + std: 0.005 + } + bias_filler { + type: "constant" + value: 1 + } + } +} +layers { + name: "relu7" + type: RELU + bottom: "fc7" + top: "fc7" +} +layers { + name: "drop7" + type: DROPOUT + bottom: "fc7" + top: "fc7" + dropout_param { + dropout_ratio: 0.5 + } +} +layers { + name: "fc8" + type: INNER_PRODUCT + bottom: "fc7" + top: "fc8" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + inner_product_param { + num_output: 101 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "loss" + type: SOFTMAX_LOSS + bottom: "fc8" + bottom: "label" +} diff --git a/examples/c3d_train_ucf101/create_volume_mean.sh b/examples/c3d_train_ucf101/create_volume_mean.sh new file mode 100644 index 0000000000..5ec0ff51c5 --- /dev/null +++ b/examples/c3d_train_ucf101/create_volume_mean.sh @@ -0,0 +1 @@ +GLOG_logtostderr=1 ../../build/tools/compute_volume_mean_from_list.bin ../c3d_finetuning/train_01.lst 16 128 171 1 ucf101_train_mean.binaryproto 10 diff --git a/examples/c3d_train_ucf101/test_ucf101.sh b/examples/c3d_train_ucf101/test_ucf101.sh new file mode 100644 index 0000000000..94e4bc0f8e --- /dev/null +++ b/examples/c3d_train_ucf101/test_ucf101.sh @@ -0,0 +1 @@ +GLOG_logtostderr=1 ../../build/tools/test_net.bin conv3d_ucf101_test.prototxt conv3d_ucf101_iter_60000 1396 GPU 0 diff --git a/examples/c3d_train_ucf101/train_ucf101.sh b/examples/c3d_train_ucf101/train_ucf101.sh new file mode 100644 index 0000000000..45fadef1f8 --- /dev/null +++ b/examples/c3d_train_ucf101/train_ucf101.sh @@ -0,0 +1 @@ +GLOG_logtostderr=1 ../../build/tools/train_net.bin conv3d_ucf101_solver.prototxt