Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
felixdittrich92 committed May 8, 2024
1 parent 4d9e0d9 commit 44869b4
Show file tree
Hide file tree
Showing 8 changed files with 53 additions and 93 deletions.
2 changes: 1 addition & 1 deletion onnxtr/contrib/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,5 +101,5 @@ def __call__(self, inputs: List[np.ndarray]) -> Any:
np.array([self.preprocess(img) for img in batch], dtype=np.float32) for batch in batched_inputs
]

outputs = [self.run(None, {model_inputs[0].name: batch}) for batch in processed_batches]
outputs = [self.session.run(None, {model_inputs[0].name: batch}) for batch in processed_batches]
return self.postprocess(outputs, batched_inputs)
2 changes: 1 addition & 1 deletion onnxtr/models/classification/models/mobilenet.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
"std": (0.299, 0.296, 0.301),
"input_shape": (3, 256, 256),
"classes": [0, -90, 180, 90],
"url": "https://doctr-static.mindee.com/models?id=v0.8.1/mobilenet_v3_small_crop_orientation-f0847a18.pt&src=0",
"url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/mobilenet_v3_small_crop_orientation-5620cf7e.onnx",
},
"mobilenet_v3_small_page_orientation": {
"mean": (0.694, 0.695, 0.693),
Expand Down
19 changes: 1 addition & 18 deletions onnxtr/models/classification/zoo.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

from typing import Any, Dict, List
from typing import Any, List

from .. import classification
from ..preprocessor import PreProcessor
Expand All @@ -13,23 +13,6 @@

ORIENTATION_ARCHS: List[str] = ["mobilenet_v3_small_crop_orientation", "mobilenet_v3_small_page_orientation"]

default_cfgs: Dict[str, Dict[str, Any]] = {
"mobilenet_v3_small_crop_orientation": {
"mean": (0.694, 0.695, 0.693),
"std": (0.299, 0.296, 0.301),
"input_shape": (3, 256, 256),
"classes": [0, -90, 180, 90],
"url": "https://doctr-static.mindee.com/models?id=v0.8.1/mobilenet_v3_small_crop_orientation-f0847a18.pt&src=0", # TODO
},
"mobilenet_v3_small_page_orientation": {
"mean": (0.694, 0.695, 0.693),
"std": (0.299, 0.296, 0.301),
"input_shape": (3, 512, 512),
"classes": [0, -90, 180, 90],
"url": "https://doctr-static.mindee.com/models?id=v0.8.1/mobilenet_v3_small_page_orientation-8e60325c.pt&src=0", # TODO
},
}


def _orientation_predictor(arch: str, **kwargs: Any) -> OrientationPredictor:
if arch not in ORIENTATION_ARCHS:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,15 +51,14 @@ class DBNet(Engine):
def __init__(
self,
model_path,
bin_thresh: float = 0.1,
bin_thresh: float = 0.3,
box_thresh: float = 0.1,
assume_straight_pages: bool = True,
cfg: Optional[Dict[str, Any]] = None,
) -> None:
super().__init__(url=model_path)
self.cfg = cfg
self.assume_straight_pages = assume_straight_pages
print(f"ASSUME STRAIGHT PAGES: {assume_straight_pages}")
self.postprocessor = GeneralDetectionPostProcessor(
assume_straight_pages=self.assume_straight_pages, bin_thresh=bin_thresh, box_thresh=box_thresh
)
Expand All @@ -78,12 +77,6 @@ def __call__(
if return_model_output:
out["out_map"] = prob_map

print(prob_map.shape)
import cv2

cv2.imwrite("prob_map.jpg", prob_map[0, 0, :, :] * 255)
print(prob_map)

out["preds"] = [
dict(zip(["words"], preds)) for preds in self.postprocessor(np.transpose(prob_map, (0, 2, 3, 1)))
]
Expand Down
4 changes: 2 additions & 2 deletions onnxtr/models/detection/postprocessor/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,12 @@ class GeneralDetectionPostProcessor(DetectionPostProcessor):

def __init__(
self,
bin_thresh: float = 0.3,
bin_thresh: float = 0.1,
box_thresh: float = 0.1,
assume_straight_pages: bool = True,
) -> None:
super().__init__(box_thresh, bin_thresh, assume_straight_pages)
self.unclip_ratio = 1.0
self.unclip_ratio = 1.5

def polygon_to_box(
self,
Expand Down
3 changes: 1 addition & 2 deletions onnxtr/models/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ def __init__(self, url: str, **kwargs: Any) -> None:
)

def run(self, inputs: np.ndarray) -> List[np.ndarray]:
# inputs = np.transpose(inputs, (0, 3, 1, 2)).astype(np.float32) # TODO: Can we remove this maybe ?
inputs = np.transpose(inputs, (0, 3, 1, 2)).astype(np.float32)
logits = self.runtime.run(["logits"], {"input": inputs})[0]
print(f"logits: {logits.shape}")
return logits
1 change: 0 additions & 1 deletion onnxtr/models/preprocessor/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import math
from typing import Any, List, Tuple, Union

import cv2
import numpy as np

from onnxtr.transforms import Normalize, Resize
Expand Down
106 changes: 46 additions & 60 deletions onnxtr/transforms/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,83 +3,77 @@
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

import math
from typing import Tuple, Union, Optional
from typing import Tuple, Union

import cv2
import numpy as np

__all__ = ["Resize", "Normalize"]


from torch.nn.functional import pad
from torchvision.transforms import functional as F
from torchvision.transforms import transforms as T

import torch


class Resize(T.Resize): # TODO: Translate me correct !!!
class Resize:
"""Resize the input image to the given size"""

def __init__(
self,
size: Union[int, Tuple[int, int]],
interpolation=F.InterpolationMode.BILINEAR,
interpolation=cv2.INTER_LINEAR,
preserve_aspect_ratio: bool = False,
symmetric_pad: bool = False,
) -> None:
super().__init__(size, interpolation, antialias=True)
super().__init__()
self.size = size
self.interpolation = interpolation
self.preserve_aspect_ratio = preserve_aspect_ratio
self.symmetric_pad = symmetric_pad

if not isinstance(self.size, (int, tuple, list)):
raise AssertionError("size should be either a tuple, a list or an int")

def forward(
def __call__(
self,
img: torch.Tensor,
target: Optional[np.ndarray] = None,
) -> Union[torch.Tensor, Tuple[torch.Tensor, np.ndarray]]:
img = np.transpose(img, (2, 0, 1))
print(img.shape)
img = torch.from_numpy(img)
if isinstance(self.size, int):
target_ratio = img.shape[-2] / img.shape[-1]
else:
target_ratio = self.size[0] / self.size[1]
actual_ratio = img.shape[-2] / img.shape[-1]
img: np.ndarray,
) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]:
h, w = img.shape[:2]
sh, sw = self.size

# Calculate aspect ratio of the image
aspect = w / h

if not self.preserve_aspect_ratio or (target_ratio == actual_ratio and (isinstance(self.size, (tuple, list)))):
return super().forward(img).numpy()
# Compute scaling and padding sizes
if self.preserve_aspect_ratio:
if aspect > 1: # Horizontal image
new_w = sw
new_h = int(sw / aspect)
elif aspect < 1: # Vertical image
new_h = sh
new_w = int(sh * aspect)
else: # Square image
new_h, new_w = sh, sw

img_resized = cv2.resize(img, (new_w, new_h), interpolation=self.interpolation)

# Calculate padding
pad_top = max((sh - new_h) // 2, 0)
pad_bottom = max(sh - new_h - pad_top, 0)
pad_left = max((sw - new_w) // 2, 0)
pad_right = max(sw - new_w - pad_left, 0)

# Pad the image
img_resized = cv2.copyMakeBorder(
img_resized, pad_top, pad_bottom, pad_left, pad_right, borderType=cv2.BORDER_CONSTANT, value=0
)

# Ensure the image matches the target size by resizing it again if needed
img_resized = cv2.resize(img_resized, (sw, sh), interpolation=self.interpolation)
else:
# Resize
if isinstance(self.size, (tuple, list)):
if actual_ratio > target_ratio:
tmp_size = (self.size[0], max(int(self.size[0] / actual_ratio), 1))
else:
tmp_size = (max(int(self.size[1] * actual_ratio), 1), self.size[1])
elif isinstance(self.size, int): # self.size is the longest side, infer the other
if img.shape[-2] <= img.shape[-1]:
tmp_size = (max(int(self.size * actual_ratio), 1), self.size)
else:
tmp_size = (self.size, max(int(self.size / actual_ratio), 1))

# Scale image
img = F.resize(img, tmp_size, self.interpolation, antialias=True)
raw_shape = img.shape[-2:]
if isinstance(self.size, (tuple, list)):
# Pad (inverted in pytorch)
_pad = (0, self.size[1] - img.shape[-1], 0, self.size[0] - img.shape[-2])
if self.symmetric_pad:
half_pad = (math.ceil(_pad[1] / 2), math.ceil(_pad[3] / 2))
_pad = (half_pad[0], _pad[1] - half_pad[0], half_pad[1], _pad[3] - half_pad[1])
img = pad(img, _pad)

return img.numpy()
# Resize the image without preserving aspect ratio
img_resized = cv2.resize(img, (sw, sh), interpolation=self.interpolation)

return img_resized

def __repr__(self) -> str:
interpolate_str = self.interpolation.value
interpolate_str = self.interpolation
_repr = f"output_size={self.size}, interpolation='{interpolate_str}'"
if self.preserve_aspect_ratio:
_repr += f", preserve_aspect_ratio={self.preserve_aspect_ratio}, symmetric_pad={self.symmetric_pad}"
Expand Down Expand Up @@ -107,15 +101,7 @@ def __call__(
img: np.ndarray,
) -> np.ndarray:
# Normalize image
print(self.mean, self.std)
print(img.shape)
img = np.transpose(img, (0, 3, 2, 1))
mean = np.array(self.mean).astype(img.dtype)
std = np.array(self.std).astype(img.dtype)
img = (img - mean) / std
img = np.transpose(img, (0, 3, 2, 1))
print(img.shape)
return img
return (img - np.array(self.mean).astype(img.dtype)) / np.array(self.std).astype(img.dtype)

def __repr__(self) -> str:
_repr = f"mean={self.mean}, std={self.std}"
Expand Down

0 comments on commit 44869b4

Please sign in to comment.