Skip to content

Commit

Permalink
Merge pull request #144 from oarriaga/refactor_boxes
Browse files Browse the repository at this point in the history
Refactor boxes
  • Loading branch information
oarriaga authored May 10, 2021
2 parents 4a90c7c + 30fc2fa commit 4f9e842
Show file tree
Hide file tree
Showing 12 changed files with 395 additions and 139 deletions.
2 changes: 1 addition & 1 deletion docs/structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
boxes.to_center_form,
boxes.to_one_hot,
boxes.to_normalized_coordinates,
boxes.to_point_form
boxes.to_corner_form
],
},

Expand Down
40 changes: 40 additions & 0 deletions examples/object_detection/boxes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import numpy as np
from paz.backend.boxes import compute_ious, to_corner_form


def match(boxes, prior_boxes, iou_threshold=0.5):
"""Matches each prior box with a ground truth box (box from `boxes`).
It then selects which matched box will be considered positive e.g. iou > .5
and returns for each prior box a ground truth box that is either positive
(with a class argument different than 0) or negative.
# Arguments
boxes: Numpy array of shape `(num_ground_truh_boxes, 4 + 1)`,
where the first the first four coordinates correspond to
box coordinates and the last coordinates is the class
argument. This boxes should be the ground truth boxes.
prior_boxes: Numpy array of shape `(num_prior_boxes, 4)`.
where the four coordinates are in center form coordinates.
iou_threshold: Float between [0, 1]. Intersection over union
used to determine which box is considered a positive box.
# Returns
numpy array of shape `(num_prior_boxes, 4 + 1)`.
where the first the first four coordinates correspond to point
form box coordinates and the last coordinates is the class
argument.
"""
ious = compute_ious(boxes, to_corner_form(np.float32(prior_boxes)))
per_prior_which_box_iou = np.max(ious, axis=0)
per_prior_which_box_arg = np.argmax(ious, 0)

# overwriting per_prior_which_box_arg if they are the best prior box
per_box_which_prior_arg = np.argmax(ious, 1)
per_prior_which_box_iou[per_box_which_prior_arg] = 2
for box_arg in range(len(per_box_which_prior_arg)):
best_prior_box_arg = per_box_which_prior_arg[box_arg]
per_prior_which_box_arg[best_prior_box_arg] = box_arg

matches = boxes[per_prior_which_box_arg]
matches[per_prior_which_box_iou < iou_threshold, 4] = 0
return matches
17 changes: 14 additions & 3 deletions examples/object_detection/debugger.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,20 @@
import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpus[0], True)


import numpy as np
from paz.models import SSD300
from paz.datasets import VOC
from paz.abstract import Processor, SequentialProcessor
from paz import processors as pr
from paz.pipelines import AugmentDetection
from detection import AugmentDetection
# from paz.pipelines import AugmentDetection


class ShowBoxes(Processor):
def __init__(self, class_names, prior_boxes, variances=[.1, .2]):
def __init__(self, class_names, prior_boxes,
variances=[0.1, 0.1, 0.2, 0.2]):
super(ShowBoxes, self).__init__()
self.deprocess_boxes = SequentialProcessor([
pr.DecodeBoxes(prior_boxes, variances),
Expand All @@ -15,8 +23,10 @@ def __init__(self, class_names, prior_boxes, variances=[.1, .2]):
self.denormalize_boxes2D = pr.DenormalizeBoxes2D()
self.draw_boxes2D = pr.DrawBoxes2D(class_names)
self.show_image = pr.ShowImage()
self.resize_image = pr.ResizeImage((600, 600))

def call(self, image, boxes):
image = self.resize_image(image)
boxes2D = self.deprocess_boxes(boxes)
boxes2D = self.denormalize_boxes2D(image, boxes2D)
image = self.draw_boxes2D(image, boxes2D)
Expand All @@ -35,7 +45,8 @@ def call(self, image, boxes):
data = data_manager.load_data()

class_names = data_manager.class_names
model = SSD300(base_weights='VGG', head_weights=None)
# model = SSD300(base_weights='VGG', head_weights=None)
model = SSD300()
prior_boxes = model.prior_boxes

testor_encoder = AugmentDetection(prior_boxes)
Expand Down
5 changes: 5 additions & 0 deletions examples/object_detection/demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@

from paz.pipelines import SSD300FAT, SSD300VOC, SSD512COCO, SSD512YCBVideo
from paz.backend.camera import VideoPlayer, Camera
import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpus[0], True)




parser = argparse.ArgumentParser(description='SSD object detection demo')
Expand Down
113 changes: 113 additions & 0 deletions examples/object_detection/detection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@

from paz import processors as pr
from paz.abstract import SequentialProcessor
from processors import MatchBoxes


class AugmentImage(SequentialProcessor):
"""Augments an RGB image by randomly changing contrast, brightness
saturation and hue.
"""
def __init__(self):
super(AugmentImage, self).__init__()
self.add(pr.RandomContrast())
self.add(pr.RandomBrightness())
self.add(pr.RandomSaturation(0.7))
self.add(pr.RandomHue())


class PreprocessImage(SequentialProcessor):
"""Preprocess RGB image by resizing it to the given ``shape``. If a
``mean`` is given it is substracted from image and it not the image gets
normalized.
# Arguments
shape: List of two Ints.
mean: List of three Ints indicating the per-channel mean to be
subtracted.
"""
def __init__(self, shape, mean=pr.BGR_IMAGENET_MEAN):
super(PreprocessImage, self).__init__()
self.add(pr.ResizeImage(shape))
self.add(pr.CastImage(float))
if mean is None:
self.add(pr.NormalizeImage())
else:
self.add(pr.SubtractMeanImage(mean))


class AugmentBoxes(SequentialProcessor):
"""Perform data augmentation with bounding boxes.
# Arguments
mean: List of three elements used to fill empty image spaces.
"""
def __init__(self, mean=pr.BGR_IMAGENET_MEAN):
super(AugmentBoxes, self).__init__()
self.add(pr.ToImageBoxCoordinates())
self.add(pr.Expand(mean=mean))
# RandomSampleCrop was commented out
self.add(pr.RandomSampleCrop())
self.add(pr.RandomFlipBoxesLeftRight())
self.add(pr.ToNormalizedBoxCoordinates())


class PreprocessBoxes(SequentialProcessor):
"""Preprocess bounding boxes
# Arguments
num_classes: Int.
prior_boxes: Numpy array of shape ``[num_boxes, 4]`` containing
prior/default bounding boxes.
IOU: Float. Intersection over union used to match boxes.
variances: List of two floats indicating variances to be encoded
for encoding bounding boxes.
"""
def __init__(self, num_classes, prior_boxes, IOU, variances):
super(PreprocessBoxes, self).__init__()
self.add(MatchBoxes(prior_boxes, IOU),)
self.add(pr.EncodeBoxes(prior_boxes, variances))
self.add(pr.BoxClassToOneHotVector(num_classes))


class AugmentDetection(SequentialProcessor):
"""Augment boxes and images for object detection.
# Arguments
prior_boxes: Numpy array of shape ``[num_boxes, 4]`` containing
prior/default bounding boxes.
split: Flag from `paz.processors.TRAIN`, ``paz.processors.VAL``
or ``paz.processors.TEST``. Certain transformations would take
place depending on the flag.
num_classes: Int.
size: Int. Image size.
mean: List of three elements indicating the per channel mean.
IOU: Float. Intersection over union used to match boxes.
variances: List of two floats indicating variances to be encoded
for encoding bounding boxes.
"""
def __init__(self, prior_boxes, split=pr.TRAIN, num_classes=21, size=300,
mean=pr.BGR_IMAGENET_MEAN, IOU=.5,
variances=[0.1, 0.1, 0.2, 0.2]):
super(AugmentDetection, self).__init__()
# image processors
self.augment_image = AugmentImage()
self.augment_image.add(pr.ConvertColorSpace(pr.RGB2BGR))
self.preprocess_image = PreprocessImage((size, size), mean)

# box processors
self.augment_boxes = AugmentBoxes()
args = (num_classes, prior_boxes, IOU, variances)
self.preprocess_boxes = PreprocessBoxes(*args)

# pipeline
self.add(pr.UnpackDictionary(['image', 'boxes']))
self.add(pr.ControlMap(pr.LoadImage(), [0], [0]))
if split == pr.TRAIN:
self.add(pr.ControlMap(self.augment_image, [0], [0]))
self.add(pr.ControlMap(self.augment_boxes, [0, 1], [0, 1]))
self.add(pr.ControlMap(self.preprocess_image, [0], [0]))
self.add(pr.ControlMap(self.preprocess_boxes, [1], [1]))
self.add(pr.SequenceWrapper(
{0: {'image': [size, size, 3]}},
{1: {'boxes': [len(prior_boxes), 4 + num_classes]}}))
22 changes: 22 additions & 0 deletions examples/object_detection/processors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from paz.abstract import Processor
from boxes import match


class MatchBoxes(Processor):
"""Match prior boxes with ground truth boxes.
# Arguments
prior_boxes: Numpy array of shape (num_boxes, 4).
iou: Float in [0, 1]. Intersection over union in which prior boxes
will be considered positive. A positive box is box with a class
different than `background`.
variance: List of two floats.
"""
def __init__(self, prior_boxes, iou=.5):
self.prior_boxes = prior_boxes
self.iou = iou
super(MatchBoxes, self).__init__()

def call(self, boxes):
boxes = match(boxes, self.prior_boxes, self.iou)
return boxes
20 changes: 12 additions & 8 deletions examples/object_detection/train.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,17 @@
import os
import argparse
import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpus[0], True)

# from tensorflow.python.framework.ops import disable_eager_execution
# disable_eager_execution()
# import tensorflow as tf
# tf.compat.v1.experimental.output_all_intermediates(True)
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.callbacks import CSVLogger, ModelCheckpoint
from paz.optimization.callbacks import LearningRateScheduler
from paz.pipelines import AugmentDetection
from detection import AugmentDetection
from paz.models import SSD300
from paz.datasets import VOC
from paz.optimization import MultiBoxLoss
Expand All @@ -17,17 +24,15 @@
parser = argparse.ArgumentParser(description=description)
parser.add_argument('-bs', '--batch_size', default=32, type=int,
help='Batch size for training')
parser.add_argument('-st', '--steps_per_epoch', default=1000, type=int,
help='Batch size for training')
parser.add_argument('-et', '--evaluation_period', default=1, type=int,
parser.add_argument('-et', '--evaluation_period', default=10, type=int,
help='evaluation frequency')
parser.add_argument('-lr', '--learning_rate', default=0.001, type=float,
help='Initial learning rate for SGD')
parser.add_argument('-m', '--momentum', default=0.9, type=float,
help='Momentum for SGD')
parser.add_argument('-g', '--gamma_decay', default=0.1, type=float,
help='Gamma decay for learning rate scheduler')
parser.add_argument('-e', '--num_epochs', default=120, type=int,
parser.add_argument('-e', '--num_epochs', default=240, type=int,
help='Maximum number of epochs before finishing')
parser.add_argument('-iou', '--AP_IOU', default=0.5, type=float,
help='Average precision IOU used for evaluation')
Expand All @@ -36,7 +41,7 @@
parser.add_argument('-dp', '--data_path', default='VOCdevkit/',
type=str, help='Path for writing model weights and logs')
parser.add_argument('-se', '--scheduled_epochs', nargs='+', type=int,
default=[55, 76], help='Epochs for reducing learning rate')
default=[110, 152], help='Epoch learning rate reduction')
parser.add_argument('-mp', '--multiprocessing', default=False, type=bool,
help='Select True for multiprocessing')
parser.add_argument('-w', '--workers', default=1, type=int,
Expand Down Expand Up @@ -100,9 +105,8 @@
args.AP_IOU)

# training
model.fit_generator(
model.fit(
sequencers[0],
steps_per_epoch=args.steps_per_epoch,
epochs=args.num_epochs,
verbose=1,
callbacks=[checkpoint, log, schedule, evaluate],
Expand Down
3 changes: 2 additions & 1 deletion examples/tutorials/object_detection_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,8 @@ def __init__(self, num_classes, prior_boxes, IOU, variances):
# Putting everything together in a single processor:
class AugmentDetection(SequentialProcessor):
def __init__(self, prior_boxes, split=pr.TRAIN, num_classes=21, size=300,
mean=pr.BGR_IMAGENET_MEAN, IOU=.5, variances=[.1, .2]):
mean=pr.BGR_IMAGENET_MEAN, IOU=.5,
variances=[0.1, 0.1, 0.2, 0.2]):
super(AugmentDetection, self).__init__()

# image processors
Expand Down
Loading

0 comments on commit 4f9e842

Please sign in to comment.