lava-nc · mgkwill · Dec 14, 2023 · Dec 15, 2023 · Dec 15, 2023 · Nov 9, 2023
diff --git a/.gitignore b/.gitignore
@@ -129,3 +129,9 @@ dmypy.json
 
 # Pyre type checker
 .pyre/
+
+#Trained log folders
+#Trained_*
+#Logs_*
+#runs
+
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -46,9 +46,9 @@ classifiers = [
 "Discussions" = "https://github.com/lava-nc/lava-dl/discussions"
 
 [tool.poetry.dependencies]
-python = ">=3.8, <3.11"
+python = ">=3.9,<3.11"
 
-lava-nc = { git = "https://github.com/lava-nc/lava.git", branch = "main", develop = true }
+lava-nc = { path = "../lava", develop = true }
 
 torchvision = "^0.15.1"
 h5py = "^3.7.0"

diff --git a/src/lava/lib/dl/slayer/object_detection/boundingbox/utils.py b/src/lava/lib/dl/slayer/object_detection/boundingbox/utils.py
@@ -635,6 +635,78 @@ def create_frames(inputs: torch.tensor,
                                         box_color_map=box_color_map,
                                         thickness=5)
         draw = ImageDraw.Draw(marked_gt)
+        draw.text([5, 5], 'Ground Truth',
+                  fill=(255, 255, 255), anchor='lt')
+        marked_images = Img.new('RGB',
+                                (marked_img.width + marked_gt.width,
+                                 marked_img.height))
+        marked_images.paste(marked_img, (0, 0))
+        marked_images.paste(marked_gt, (marked_img.width, 0))
+        frames.append(marked_images)
+    return frames
+
+
+def create_frames_events(inputs: torch.tensor,
+                         targets: torch.tensor,
+                         predictions: torch.tensor,
+                         classes: List[str],
+                         batch: Optional[int] = 0,
+                         box_color_map: Optional[
+                             List[Tuple[RGB, RGB, RGB]]] = None) -> List[Image]:
+    """Create video frames of object detection prediction.
+    Note: the prediction is on the left side and the ground truth is on the
+    right side.
+
+    Parameters
+    ----------
+    inputs : torch.tensor
+        Input image frame tensor in NCHWT format.
+    targets : torch.tensor
+        Target bounding box tensor of shape (num_bbox, 6). The column values
+        represent x_center, y_center, width, height, confidence, label.
+    predictions : torch.tensor
+        Prediction bounding box tensor of shape (num_bbox, 6). The column values
+        represent x_center, y_center, width, height, confidence, label.
+    classes : List[str]
+        List of the classes name string.
+    batch : Optional[int], optional
+        The batch idx which needs to be converted to video, by default 0.
+    box_color_map : Optional[List[Tuple[RGB, RGB, RGB]]], optional
+        Color map associated to the classes. If None, it  will be randomly
+        generated. By default None.
+    """
+    if box_color_map is None:
+        box_color_map = [(np.random.randint(256),
+                          np.random.randint(256),
+                          np.random.randint(256)) for _ in range(len(classes))]
+
+    frames = []
+    b = batch
+    for t in range(inputs.shape[-1]):
+        image = render_events_img(inputs[b, :, :, :, t].cpu().data.numpy())
+        annotation = annotation_from_tensor(predictions[t][b],
+                                            {'height': image.height,
+                                             'width': image.width},
+                                            classes,
+                                            confidence_th=0)
+        marked_img = mark_bounding_boxes(image,
+                                         annotation['annotation']['object'],
+                                         box_color_map=box_color_map,
+                                         thickness=5)
+        draw = ImageDraw.Draw(marked_img)
+        draw.text([5, 5], 'Prediction',
+                  fill=(255, 255, 255), anchor='lt')
+        image = render_events_img(inputs[b, :, :, :, t].cpu())
+        annotation = annotation_from_tensor(targets[t][b],
+                                            {'height': image.height,
+                                             'width': image.width},
+                                            classes,
+                                            confidence_th=0)
+        marked_gt = mark_bounding_boxes(image,
+                                        annotation['annotation']['object'],
+                                        box_color_map=box_color_map,
+                                        thickness=5)
+        draw = ImageDraw.Draw(marked_gt)
         draw.text([5, 5], 'Ground Truth',
                   fill=(255, 255, 255), anchor='rt')
         marked_images = Img.new('RGB',
@@ -691,4 +763,57 @@ def create_video(inputs: torch.tensor,
     video.release()
 
 
+def create_video_events(inputs: torch.tensor,
+                        targets: torch.tensor,
+                        predictions: torch.tensor,
+                        output_path: str,
+                        classes: List[str],
+                        batch: Optional[int] = 0,
+                        box_color_map: Optional[List[Tuple[RGB,
+                                                           RGB,
+                                                           RGB]]]
+                        = None) -> None:
+    """Create video of object detection prediction.
+    Note: the prediction is on the left side and the ground truth is on the
+    right side.
+
+    Parameters
+    ----------
+    inputs : torch.tensor
+        Input image frame tensor in NCHWT format.
+    targets : torch.tensor
+        Target bounding box tensor of shape (num_bbox, 6). The column values
+        represent x_center, y_center, width, height, confidence, label.
+    predictions : torch.tensor
+        Prediction bounding box tensor of shape (num_bbox, 6). The column values
+        represent x_center, y_center, width, height, confidence, label.
+    output_path : str
+        Path to save the video file
+    classes : List[str]
+        List of the classes name string.
+    batch : Optional[int], optional
+        The batch idx which needs to be converted to video, by default 0.
+    box_color_map : Optional[List[Tuple[RGB, RGB, RGB]]], optional
+        Color map associated to the classes. If None, it  will be randomly
+        generated. By default None.
+    """
+    frames = create_frames_events(inputs, targets, predictions, classes,
+                                  batch, box_color_map)
+    _, _, H, W, _ = inputs.shape
+    video_dims = (2 * W, H)
+    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
+    video = cv2.VideoWriter(output_path + '.mp4', fourcc, 10, video_dims)
+
+    for frame in frames:
+        video.write(cv2.cvtColor(np.array(frame), cv2.COLOR_RGB2BGR))
+    video.release()
+
+
+def render_events_img(inputs: np.ndarray) -> Image:
+    out = np.zeros((3, inputs.shape[1], inputs.shape[2]))
+    out[0, :, :] = 255 * inputs[0, :, :]
+    out[2, :, :] = 255 * inputs[1, :, :]
+    return Img.fromarray(np.uint8(out).transpose([1, 2, 0]))
+
+
 nms = non_maximum_suppression
diff --git a/src/lava/lib/dl/slayer/object_detection/dataset/__init__.py b/src/lava/lib/dl/slayer/object_detection/dataset/__init__.py
@@ -3,6 +3,7 @@
 
 
 from .bdd100k import BDD
+from .prophesee_automotive import PropheseeAutomotive
 
 
-__all__ = ['BDD']
+__all__ = ['BDD', 'PropheseeAutomotive']
diff --git a/src/lava/lib/dl/slayer/object_detection/dataset/prophesee_automotive.py b/src/lava/lib/dl/slayer/object_detection/dataset/prophesee_automotive.py
@@ -0,0 +1,220 @@
+import os
+import json
+import random
+import numpy as np
+
+import torch
+from torch.utils.data import Dataset
+from torchvision import transforms
+
+from .utils import resize_events_frame, fliplr_events
+from ..boundingbox import utils as bbutils
+from ..boundingbox.utils import Height, Width
+
+from typing import Any, Dict, Tuple
+
+try:
+    from src.io.psee_loader import PSEELoader
+    from src.io.box_filtering import filter_boxes
+except ModuleNotFoundError:
+    src = 'https://github.com/prophesee-ai/prophesee-automotive-dataset-toolbox'
+    print("WARNING: Prophesee Dataset Toolbox could not be found!")
+    print("         Only Prophesee DVS demo will not run properly.")
+    print(f"         Please install it from {src}")
+
+
+class _PropheseeAutomotive(Dataset):
+    def __init__(self,
+                 root: str = '.',
+                 delta_t: int = 1,
+                 seq_len: int = 32,
+                 events_ratio: float = 0.07,
+                 randomize_seq: bool = False,
+                 train: bool = False) -> None:
+        super().__init__()
+        self.cat_name = []
+        self.delta_t = delta_t * 1000
+        self.seq_len = seq_len
+        self.randomize_seq = randomize_seq
+        self.events_ratio_threshold = events_ratio
+
+        with open(root + os.sep + 'label_map_dictionary.json') as file:
+            data = json.load(file)
+            self.idx_map = {int(key) : value for key, value in data.items()}
+            [self.cat_name.append(value) for _, value in data.items()]
+
+        dataset = 'train' if train else 'val'
+        self.dataset_path = root + os.sep + dataset
+
+        td_files = [td_file
+                    for td_file in os.listdir(self.dataset_path)
+                    if td_file.endswith('.dat')]
+        self.videos = [PSEELoader(self.dataset_path + os.sep + td_file)
+                       for td_file in td_files]
+        self.bbox_videos = [
+            PSEELoader(self.dataset_path + os.sep
+                       + td_file.split('_td.dat')[0]
+                       + '_bbox.npy') for td_file in td_files]
+
+    def validate_bbox(self, events, bbox):
+        events_bbox = events[bbox['ymin']:bbox['ymax'],
+                             bbox['xmin']:bbox['xmax']]
+        pixels_area = (bbox['xmax'] - bbox['xmin']) * \
+            (bbox['ymax'] - bbox['ymin'])
+        events_ratio = np.count_nonzero(events_bbox) / pixels_area
+        return events_ratio > self.events_ratio_threshold
+
+    def get_seq(self, video, bbox_video):
+        images = []
+        annotations = []
+        height, width = video.get_size()
+
+        while not video.done:
+            try:
+                events = video.load_delta_t(self.delta_t)
+                boxes = bbox_video.load_delta_t(self.delta_t)
+            except (AssertionError, IndexError):
+                pass
+
+            min_box_diag = 60
+            min_box_side = 20
+            boxes = filter_boxes(boxes, int(1e5), min_box_diag, min_box_side)
+
+            frame = np.zeros((height, width, 2), dtype=np.uint8)
+            valid = (events['x'] >= 0) & (events['x'] < width) & \
+                    (events['y'] >= 0) & (events['y'] < height)
+            events = events[valid]
+            frame[events['y'][events['p'] == 1],
+                  events['x'][events['p'] == 1], 0] = 1
+            frame[events['y'][events['p'] == 0],
+                  events['x'][events['p'] == 0], 1] = 1
+
+            objects = []
+            size = {'height': height, 'width': width}
+
+            for idx in range(boxes.shape[0]):
+                if (int(boxes['w'][idx]) > 0) and (int(boxes['h'][idx]) > 0):
+                    bndbox = {
+                        'xmin': int(boxes['x'][idx]),
+                        'ymin': int(boxes['y'][idx]),
+                        'xmax': int(boxes['x'][idx])
+                        + int(boxes['w'][idx]),
+                        'ymax': int(boxes['y'][idx])
+                        + int(boxes['h'][idx])}
+                    name = self.idx_map[boxes['class_id'][idx]]
+                    if (bndbox['xmax'] < width) and \
+                        (bndbox['ymax'] < height) and \
+                            (bndbox['xmin'] > 0) and (bndbox['ymin'] > 0):
+                        if len(images) == 0:
+                            if self.validate_bbox(frame, bndbox):
+                                objects.append({'id': boxes['class_id'][idx],
+                                                'name': name,
+                                                'bndbox': bndbox})
+                        else:
+                            objects.append({'id': boxes['class_id'][idx],
+                                            'name': name,
+                                            'bndbox': bndbox})
+
+            if len(objects) == 0:
+                if len(annotations) == 0:
+                    continue
+                annotations.append(annotations[-1])
+            else:
+                annotation = {'size': size, 'object': objects}
+                annotations.append({'annotation': annotation})
+
+            images.append(frame)
+
+            if len(images) >= self.seq_len:
+                break
+        return images, annotations
+
+    def get_name(self, index):
+        video = self.videos[index]
+        return video._file.name.split('_td.dat')[0].split('/')[-1]
+
+    def __getitem__(self, index: int) -> Tuple[torch.tensor, Dict[Any, Any]]:
+        video = self.videos[index]
+        bbox_video = self.bbox_videos[index]
+
+        if self.randomize_seq:
+            skip_time = (video.duration_s - 0.1) - \
+                ((self.seq_len * self.delta_t) / 1000000)
+            while True:
+                try:
+                    video.seek_time(skip_time * np.random.random() * 1000000)
+                    bbox_video.seek_time(skip_time
+                                         * np.random.random() * 1000000)
+                    break
+                except IndexError:
+                    continue
+
+        images, annotations = self.get_seq(video, bbox_video)
+
+        if len(images) != self.seq_len or len(annotations) != self.seq_len:
+            video.reset()
+            bbox_video.reset()
+            images, annotations = self.get_seq(video, bbox_video)
+        return images, annotations
+
+    def __len__(self) -> int:
+        return len(self.videos)
+
+
+class PropheseeAutomotive(Dataset):
+    def __init__(self,
+                 root: str = './',
+                 delta_t: int = 1,
+                 size: Tuple[Height, Width] = (448, 448),
+                 train: bool = False,
+                 seq_len: int = 32,
+                 events_ratio: float = 0.07,
+                 randomize_seq: bool = False,
+                 augment_prob: float = 0.0) -> None:
+        super().__init__()
+        self.img_transform = transforms.Compose([
+            lambda x: resize_events_frame(x, size),
+            lambda x: torch.FloatTensor(x).permute([2, 0, 1])])
+        self.bb_transform = transforms.Compose([
+            lambda x: bbutils.resize_bounding_boxes(x, size),
+        ])
+
+        self.datasets = [_PropheseeAutomotive(root=root,
+                                              delta_t=delta_t,
+                                              train=train,
+                                              events_ratio=events_ratio,
+                                              seq_len=seq_len,
+                                              randomize_seq=randomize_seq)]
+
+        self.classes = self.datasets[0].cat_name
+        self.idx_map = self.datasets[0].idx_map
+        self.augment_prob = augment_prob
+        self.seq_len = seq_len
+
+    def __getitem__(self, index) -> Tuple[torch.tensor, Dict[Any, Any]]:
+
+        dataset_idx = index // len(self.datasets[0])
+        index = index % len(self.datasets[0])
+        images, annotations = [], []
+        while (len(images) != self.seq_len) and \
+                (len(annotations) != self.seq_len):
+            images, annotations = self.datasets[dataset_idx][index]
+            index = np.random.randint(0, len(self.datasets[0]) - 1)
+
+        # flip left right
+        if np.random.random() < self.augment_prob:
+            for idx in range(len(images)):
+                images[idx] = fliplr_events(images[idx])
+                annotations[idx] = bbutils.fliplr_bounding_boxes(
+                    annotations[idx])
+
+        image = torch.cat([torch.unsqueeze(self.img_transform(img), -1)
+                           for img in images], dim=-1)
+        annotations = [self.bb_transform(ann) for ann in annotations]
+
+        # [C, H, W, T], [bbox] * T
+        # list in time
+        return image, annotations
+
+    def __len__(self) -> int:
+        return sum([len(dataset) for dataset in self.datasets])