Merge pull request #144 from oarriaga/refactor_boxes

Refactor boxes
oarriaga · May 10, 2021 · 4f9e842 · 4f9e842
2 parents 4a90c7c + 30fc2fa
commit 4f9e842
Show file tree

Hide file tree

Showing 12 changed files with 395 additions and 139 deletions.
diff --git a/docs/structure.py b/docs/structure.py
@@ -41,7 +41,7 @@
             boxes.to_center_form,
             boxes.to_one_hot,
             boxes.to_normalized_coordinates,
-            boxes.to_point_form
+            boxes.to_corner_form
         ],
     },
 

diff --git a/examples/object_detection/boxes.py b/examples/object_detection/boxes.py
@@ -0,0 +1,40 @@
+import numpy as np
+from paz.backend.boxes import compute_ious, to_corner_form
+
+
+def match(boxes, prior_boxes, iou_threshold=0.5):
+    """Matches each prior box with a ground truth box (box from `boxes`).
+    It then selects which matched box will be considered positive e.g. iou > .5
+    and returns for each prior box a ground truth box that is either positive
+    (with a class argument different than 0) or negative.
+
+    # Arguments
+        boxes: Numpy array of shape `(num_ground_truh_boxes, 4 + 1)`,
+            where the first the first four coordinates correspond to
+            box coordinates and the last coordinates is the class
+            argument. This boxes should be the ground truth boxes.
+        prior_boxes: Numpy array of shape `(num_prior_boxes, 4)`.
+            where the four coordinates are in center form coordinates.
+        iou_threshold: Float between [0, 1]. Intersection over union
+            used to determine which box is considered a positive box.
+
+    # Returns
+        numpy array of shape `(num_prior_boxes, 4 + 1)`.
+            where the first the first four coordinates correspond to point
+            form box coordinates and the last coordinates is the class
+            argument.
+    """
+    ious = compute_ious(boxes, to_corner_form(np.float32(prior_boxes)))
+    per_prior_which_box_iou = np.max(ious, axis=0)
+    per_prior_which_box_arg = np.argmax(ious, 0)
+
+    #  overwriting per_prior_which_box_arg if they are the best prior box
+    per_box_which_prior_arg = np.argmax(ious, 1)
+    per_prior_which_box_iou[per_box_which_prior_arg] = 2
+    for box_arg in range(len(per_box_which_prior_arg)):
+        best_prior_box_arg = per_box_which_prior_arg[box_arg]
+        per_prior_which_box_arg[best_prior_box_arg] = box_arg
+
+    matches = boxes[per_prior_which_box_arg]
+    matches[per_prior_which_box_iou < iou_threshold, 4] = 0
+    return matches
diff --git a/examples/object_detection/debugger.py b/examples/object_detection/debugger.py
@@ -1,12 +1,20 @@
+import tensorflow as tf
+gpus = tf.config.experimental.list_physical_devices('GPU')
+tf.config.experimental.set_memory_growth(gpus[0], True)
+
+
 import numpy as np
 from paz.models import SSD300
 from paz.datasets import VOC
 from paz.abstract import Processor, SequentialProcessor
 from paz import processors as pr
-from paz.pipelines import AugmentDetection
+from detection import AugmentDetection
+# from paz.pipelines import AugmentDetection
+
 
 class ShowBoxes(Processor):
-    def __init__(self, class_names, prior_boxes, variances=[.1, .2]):
+    def __init__(self, class_names, prior_boxes,
+                 variances=[0.1, 0.1, 0.2, 0.2]):
         super(ShowBoxes, self).__init__()
         self.deprocess_boxes = SequentialProcessor([
             pr.DecodeBoxes(prior_boxes, variances),
@@ -15,8 +23,10 @@ def __init__(self, class_names, prior_boxes, variances=[.1, .2]):
         self.denormalize_boxes2D = pr.DenormalizeBoxes2D()
         self.draw_boxes2D = pr.DrawBoxes2D(class_names)
         self.show_image = pr.ShowImage()
+        self.resize_image = pr.ResizeImage((600, 600))
 
     def call(self, image, boxes):
+        image = self.resize_image(image)
         boxes2D = self.deprocess_boxes(boxes)
         boxes2D = self.denormalize_boxes2D(image, boxes2D)
         image = self.draw_boxes2D(image, boxes2D)
@@ -35,7 +45,8 @@ def call(self, image, boxes):
 data = data_manager.load_data()
 
 class_names = data_manager.class_names
-model = SSD300(base_weights='VGG', head_weights=None)
+# model = SSD300(base_weights='VGG', head_weights=None)
+model = SSD300()
 prior_boxes = model.prior_boxes
 
 testor_encoder = AugmentDetection(prior_boxes)

diff --git a/examples/object_detection/demo.py b/examples/object_detection/demo.py
@@ -2,6 +2,11 @@
 
 from paz.pipelines import SSD300FAT, SSD300VOC, SSD512COCO, SSD512YCBVideo
 from paz.backend.camera import VideoPlayer, Camera
+import tensorflow as tf
+gpus = tf.config.experimental.list_physical_devices('GPU')
+tf.config.experimental.set_memory_growth(gpus[0], True)
+
+
 
 
 parser = argparse.ArgumentParser(description='SSD object detection demo')

diff --git a/examples/object_detection/detection.py b/examples/object_detection/detection.py
@@ -0,0 +1,113 @@
+
+from paz import processors as pr
+from paz.abstract import SequentialProcessor
+from processors import MatchBoxes
+
+
+class AugmentImage(SequentialProcessor):
+    """Augments an RGB image by randomly changing contrast, brightness
+        saturation and hue.
+    """
+    def __init__(self):
+        super(AugmentImage, self).__init__()
+        self.add(pr.RandomContrast())
+        self.add(pr.RandomBrightness())
+        self.add(pr.RandomSaturation(0.7))
+        self.add(pr.RandomHue())
+
+
+class PreprocessImage(SequentialProcessor):
+    """Preprocess RGB image by resizing it to the given ``shape``. If a
+    ``mean`` is given it is substracted from image and it not the image gets
+    normalized.
+
+    # Arguments
+        shape: List of two Ints.
+        mean: List of three Ints indicating the per-channel mean to be
+            subtracted.
+    """
+    def __init__(self, shape, mean=pr.BGR_IMAGENET_MEAN):
+        super(PreprocessImage, self).__init__()
+        self.add(pr.ResizeImage(shape))
+        self.add(pr.CastImage(float))
+        if mean is None:
+            self.add(pr.NormalizeImage())
+        else:
+            self.add(pr.SubtractMeanImage(mean))
+
+
+class AugmentBoxes(SequentialProcessor):
+    """Perform data augmentation with bounding boxes.
+
+    # Arguments
+        mean: List of three elements used to fill empty image spaces.
+    """
+    def __init__(self, mean=pr.BGR_IMAGENET_MEAN):
+        super(AugmentBoxes, self).__init__()
+        self.add(pr.ToImageBoxCoordinates())
+        self.add(pr.Expand(mean=mean))
+        # RandomSampleCrop was commented out
+        self.add(pr.RandomSampleCrop())
+        self.add(pr.RandomFlipBoxesLeftRight())
+        self.add(pr.ToNormalizedBoxCoordinates())
+
+
+class PreprocessBoxes(SequentialProcessor):
+    """Preprocess bounding boxes
+
+    # Arguments
+        num_classes: Int.
+        prior_boxes: Numpy array of shape ``[num_boxes, 4]`` containing
+            prior/default bounding boxes.
+        IOU: Float. Intersection over union used to match boxes.
+        variances: List of two floats indicating variances to be encoded
+            for encoding bounding boxes.
+    """
+    def __init__(self, num_classes, prior_boxes, IOU, variances):
+        super(PreprocessBoxes, self).__init__()
+        self.add(MatchBoxes(prior_boxes, IOU),)
+        self.add(pr.EncodeBoxes(prior_boxes, variances))
+        self.add(pr.BoxClassToOneHotVector(num_classes))
+
+
+class AugmentDetection(SequentialProcessor):
+    """Augment boxes and images for object detection.
+
+    # Arguments
+        prior_boxes: Numpy array of shape ``[num_boxes, 4]`` containing
+            prior/default bounding boxes.
+        split: Flag from `paz.processors.TRAIN`, ``paz.processors.VAL``
+            or ``paz.processors.TEST``. Certain transformations would take
+            place depending on the flag.
+        num_classes: Int.
+        size: Int. Image size.
+        mean: List of three elements indicating the per channel mean.
+        IOU: Float. Intersection over union used to match boxes.
+        variances: List of two floats indicating variances to be encoded
+            for encoding bounding boxes.
+    """
+    def __init__(self, prior_boxes, split=pr.TRAIN, num_classes=21, size=300,
+                 mean=pr.BGR_IMAGENET_MEAN, IOU=.5,
+                 variances=[0.1, 0.1, 0.2, 0.2]):
+        super(AugmentDetection, self).__init__()
+        # image processors
+        self.augment_image = AugmentImage()
+        self.augment_image.add(pr.ConvertColorSpace(pr.RGB2BGR))
+        self.preprocess_image = PreprocessImage((size, size), mean)
+
+        # box processors
+        self.augment_boxes = AugmentBoxes()
+        args = (num_classes, prior_boxes, IOU, variances)
+        self.preprocess_boxes = PreprocessBoxes(*args)
+
+        # pipeline
+        self.add(pr.UnpackDictionary(['image', 'boxes']))
+        self.add(pr.ControlMap(pr.LoadImage(), [0], [0]))
+        if split == pr.TRAIN:
+            self.add(pr.ControlMap(self.augment_image, [0], [0]))
+            self.add(pr.ControlMap(self.augment_boxes, [0, 1], [0, 1]))
+        self.add(pr.ControlMap(self.preprocess_image, [0], [0]))
+        self.add(pr.ControlMap(self.preprocess_boxes, [1], [1]))
+        self.add(pr.SequenceWrapper(
+            {0: {'image': [size, size, 3]}},
+            {1: {'boxes': [len(prior_boxes), 4 + num_classes]}}))
diff --git a/examples/object_detection/processors.py b/examples/object_detection/processors.py
@@ -0,0 +1,22 @@
+from paz.abstract import Processor
+from boxes import match
+
+
+class MatchBoxes(Processor):
+    """Match prior boxes with ground truth boxes.
+
+    # Arguments
+        prior_boxes: Numpy array of shape (num_boxes, 4).
+        iou: Float in [0, 1]. Intersection over union in which prior boxes
+            will be considered positive. A positive box is box with a class
+            different than `background`.
+        variance: List of two floats.
+    """
+    def __init__(self, prior_boxes, iou=.5):
+        self.prior_boxes = prior_boxes
+        self.iou = iou
+        super(MatchBoxes, self).__init__()
+
+    def call(self, boxes):
+        boxes = match(boxes, self.prior_boxes, self.iou)
+        return boxes
diff --git a/examples/object_detection/train.py b/examples/object_detection/train.py
@@ -1,10 +1,17 @@
 import os
 import argparse
+import tensorflow as tf
+gpus = tf.config.experimental.list_physical_devices('GPU')
+tf.config.experimental.set_memory_growth(gpus[0], True)
 
+# from tensorflow.python.framework.ops import disable_eager_execution
+# disable_eager_execution()
+# import tensorflow as tf
+# tf.compat.v1.experimental.output_all_intermediates(True)
 from tensorflow.keras.optimizers import SGD
 from tensorflow.keras.callbacks import CSVLogger, ModelCheckpoint
 from paz.optimization.callbacks import LearningRateScheduler
-from paz.pipelines import AugmentDetection
+from detection import AugmentDetection
 from paz.models import SSD300
 from paz.datasets import VOC
 from paz.optimization import MultiBoxLoss
@@ -17,17 +24,15 @@
 parser = argparse.ArgumentParser(description=description)
 parser.add_argument('-bs', '--batch_size', default=32, type=int,
                     help='Batch size for training')
-parser.add_argument('-st', '--steps_per_epoch', default=1000, type=int,
-                    help='Batch size for training')
-parser.add_argument('-et', '--evaluation_period', default=1, type=int,
+parser.add_argument('-et', '--evaluation_period', default=10, type=int,
                     help='evaluation frequency')
 parser.add_argument('-lr', '--learning_rate', default=0.001, type=float,
                     help='Initial learning rate for SGD')
 parser.add_argument('-m', '--momentum', default=0.9, type=float,
                     help='Momentum for SGD')
 parser.add_argument('-g', '--gamma_decay', default=0.1, type=float,
                     help='Gamma decay for learning rate scheduler')
-parser.add_argument('-e', '--num_epochs', default=120, type=int,
+parser.add_argument('-e', '--num_epochs', default=240, type=int,
                     help='Maximum number of epochs before finishing')
 parser.add_argument('-iou', '--AP_IOU', default=0.5, type=float,
                     help='Average precision IOU used for evaluation')
@@ -36,7 +41,7 @@
 parser.add_argument('-dp', '--data_path', default='VOCdevkit/',
                     type=str, help='Path for writing model weights and logs')
 parser.add_argument('-se', '--scheduled_epochs', nargs='+', type=int,
-                    default=[55, 76], help='Epochs for reducing learning rate')
+                    default=[110, 152], help='Epoch learning rate reduction')
 parser.add_argument('-mp', '--multiprocessing', default=False, type=bool,
                     help='Select True for multiprocessing')
 parser.add_argument('-w', '--workers', default=1, type=int,
@@ -100,9 +105,8 @@
     args.AP_IOU)
 
 # training
-model.fit_generator(
+model.fit(
     sequencers[0],
-    steps_per_epoch=args.steps_per_epoch,
     epochs=args.num_epochs,
     verbose=1,
     callbacks=[checkpoint, log, schedule, evaluate],

diff --git a/examples/tutorials/object_detection_pipeline.py b/examples/tutorials/object_detection_pipeline.py
@@ -102,7 +102,8 @@ def __init__(self, num_classes, prior_boxes, IOU, variances):
 # Putting everything together in a single processor:
 class AugmentDetection(SequentialProcessor):
     def __init__(self, prior_boxes, split=pr.TRAIN, num_classes=21, size=300,
-                 mean=pr.BGR_IMAGENET_MEAN, IOU=.5, variances=[.1, .2]):
+                 mean=pr.BGR_IMAGENET_MEAN, IOU=.5,
+                 variances=[0.1, 0.1, 0.2, 0.2]):
         super(AugmentDetection, self).__init__()
 
         # image processors