-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpreprocessing.py
686 lines (589 loc) · 25.8 KB
/
preprocessing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Image preprocessing helpers."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import cv2
from scipy import ndimage
import tensorflow as tf
from tensorflow.python.ops import control_flow_ops
def apply_with_random_selector(x, func, num_cases):
"""Computes func(x, sel), with sel sampled from [0...num_cases-1].
TODO(coreylynch): add as a dependency, when slim or tensorflow/models are
pipfied.
Source:
https://raw.githubusercontent.com/tensorflow/models/a9d0e6e8923a4/slim/preprocessing/inception_preprocessing.py
Args:
x: input Tensor.
func: Python function to apply.
num_cases: Python int32, number of cases to sample sel from.
Returns:
The result of func(x, sel), where func receives the value of the
selector as a python integer, but sel is sampled dynamically.
"""
sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32)
# Pass the real x only to one of the func calls.
return control_flow_ops.merge([
func(control_flow_ops.switch(x, tf.equal(sel, case))[1], case)
for case in range(num_cases)])[0]
def distorted_bounding_box_crop(image,
bbox,
min_object_covered=0.1,
aspect_ratio_range=(0.75, 1.33),
area_range=(0.05, 1.0),
max_attempts=100,
scope=None):
"""Generates cropped_image using a one of the bboxes randomly distorted.
TODO(coreylynch): add as a dependency, when slim or tensorflow/models are
pipfied.
Source:
https://raw.githubusercontent.com/tensorflow/models/a9d0e6e8923a4/slim/preprocessing/inception_preprocessing.py
See `tf.image.sample_distorted_bounding_box` for more documentation.
Args:
image: 3-D Tensor of image (it will be converted to floats in [0, 1]).
bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
where each coordinate is [0, 1) and the coordinates are arranged
as [ymin, xmin, ymax, xmax]. If num_boxes is 0 then it would use the whole
image.
min_object_covered: An optional `float`. Defaults to `0.1`. The cropped
area of the image must contain at least this fraction of any bounding box
supplied.
aspect_ratio_range: An optional list of `floats`. The cropped area of the
image must have an aspect ratio = width / height within this range.
area_range: An optional list of `floats`. The cropped area of the image
must contain a fraction of the supplied image within in this range.
max_attempts: An optional `int`. Number of attempts at generating a cropped
region of the image of the specified constraints. After `max_attempts`
failures, return the entire image.
scope: Optional scope for name_scope.
Returns:
A tuple, a 3-D Tensor cropped_image and the distorted bbox
"""
with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bbox]):
# Each bounding box has shape [1, num_boxes, box coords] and
# the coordinates are ordered [ymin, xmin, ymax, xmax].
# A large fraction of image datasets contain a human-annotated bounding
# box delineating the region of the image containing the object of interest.
# We choose to create a new bounding box for the object which is a randomly
# distorted version of the human-annotated bounding box that obeys an
# allowed range of aspect ratios, sizes and overlap with the human-annotated
# bounding box. If no box is supplied, then we assume the bounding box is
# the entire image.
sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box(
tf.shape(image),
bounding_boxes=bbox,
min_object_covered=min_object_covered,
aspect_ratio_range=aspect_ratio_range,
area_range=area_range,
max_attempts=max_attempts,
use_image_if_no_bounding_boxes=True)
bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box
# Crop the image to the specified bounding box.
cropped_image = tf.slice(image, bbox_begin, bbox_size)
return cropped_image, distort_bbox
def distort_color(image, color_ordering=0, fast_mode=True, scope=None):
"""Distort the color of a Tensor image.
TODO(coreylynch): add as a dependency, when slim or tensorflow/models are
pipfied.
Source:
https://raw.githubusercontent.com/tensorflow/models/a9d0e6e8923a4/slim/preprocessing/inception_preprocessing.py
Each color distortion is non-commutative and thus ordering of the color ops
matters. Ideally we would randomly permute the ordering of the color ops.
Rather then adding that level of complication, we select a distinct ordering
of color ops for each preprocessing thread.
Args:
image: 3-D Tensor containing single image in [0, 1].
color_ordering: Python int, a type of distortion (valid values: 0-3).
fast_mode: Avoids slower ops (random_hue and random_contrast)
scope: Optional scope for name_scope.
Returns:
3-D Tensor color-distorted image on range [0, 1]
Raises:
ValueError: if color_ordering not in [0, 3]
"""
with tf.name_scope(scope, 'distort_color', [image]):
if fast_mode:
if color_ordering == 0:
image = tf.image.random_brightness(image, max_delta=32. / 255.)
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
else:
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
image = tf.image.random_brightness(image, max_delta=32. / 255.)
else:
if color_ordering == 0:
image = tf.image.random_brightness(image, max_delta=32. / 255.)
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
image = tf.image.random_hue(image, max_delta=0.2)
image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
elif color_ordering == 1:
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
image = tf.image.random_brightness(image, max_delta=32. / 255.)
image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
image = tf.image.random_hue(image, max_delta=0.2)
elif color_ordering == 2:
image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
image = tf.image.random_hue(image, max_delta=0.2)
image = tf.image.random_brightness(image, max_delta=32. / 255.)
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
elif color_ordering == 3:
image = tf.image.random_hue(image, max_delta=0.2)
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
image = tf.image.random_brightness(image, max_delta=32. / 255.)
else:
raise ValueError('color_ordering must be in [0, 3]')
# The random_* ops do not necessarily clamp.
return tf.clip_by_value(image, 0.0, 1.0)
def crop_center(image):
"""Returns a cropped square image."""
shape = tf.shape(image)
new_shape = tf.minimum(shape[0], shape[1])
offset_y = tf.maximum(shape[0] - shape[1], 0) // 2
offset_x = tf.maximum(shape[1] - shape[0], 0) // 2
image = tf.image.crop_to_bounding_box(
image, offset_y, offset_x, new_shape, new_shape)
return image
def pad(image):
"""Returns an image padded to be square."""
shape = tf.shape(image)
new_shape = tf.maximum(shape[0], shape[1])
height = shape[0]
width = shape[1]
offset_x = tf.maximum((height-width), 0) // 2
offset_y = tf.maximum((width-height), 0) // 2
image = tf.image.pad_to_bounding_box(
image, offset_y, offset_x, new_shape, new_shape)
return image
def pad_200(image):
"""Returns an image padded width-padded with 200 pixels."""
shape = tf.shape(image)
image = tf.image.pad_to_bounding_box(
image, 0, 200, shape[0], shape[1]+400)
shape = tf.shape(image)
new_shape = tf.minimum(shape[0], shape[1])
offset_y = tf.maximum(shape[0] - shape[1], 0) // 2
offset_x = tf.maximum(shape[1] - shape[0], 0) // 2
image = tf.image.crop_to_bounding_box(
image, offset_y, offset_x, new_shape, new_shape)
return image
def pad_crop_central(image, central_fraction=0.875):
"""Pads the image to the maximum length, crops the central fraction."""
# Pad the image to be square.
image = pad(image)
# Crop the central region of the image with an area containing 87.5% of
# the original image.
image = tf.image.central_crop(image, central_fraction=central_fraction)
return image
def crop_image_by_strategy(image, cropping):
"""Crops an image according to a strategy defined in config.
Args:
image: 3-d image tensor.
cropping: str, name of cropping strategy.
Returns:
image: cropped image.
Raises:
ValueError: When unknown cropping strategy is specified.
"""
strategy_to_method = {
'crop_center': crop_center,
'pad': pad,
'pad200': pad_200,
'pad_crop_central': pad_crop_central
}
tf.logging.info('Cropping strategy: %s.' % cropping)
if cropping not in strategy_to_method:
raise ValueError('Unknown cropping strategy: %s' % cropping)
return strategy_to_method[cropping](image)
def scale_augment_crop(image, central_bbox, area_range, min_object_covered):
"""Training time scale augmentation.
Args:
image: 3-d float tensor.
central_bbox: Bounding box defining the central region of interest.
area_range: Range of allowed areas for the augmented bounding box.
min_object_covered: Constraint for the fraction of original image in
augmented bounding box.
Returns:
distort_image: The scaled, cropped image.
"""
(distorted_image, _) = distorted_bounding_box_crop(
image, central_bbox, area_range=area_range,
aspect_ratio_range=(1.0, 1.0),
min_object_covered=min_object_covered)
# Restore the shape since the dynamic slice based upon the bbox_size loses
# the third dimension.
distorted_image.set_shape([None, None, 3])
return distorted_image
def scale_to_inception_range(image):
"""Scales an image in the range [0,1] to [-1,1] as expected by inception."""
# Assert that incoming images have been properly scaled to [0,1].
with tf.control_dependencies(
[tf.assert_less_equal(tf.reduce_max(image), 1.),
tf.assert_greater_equal(tf.reduce_min(image), 0.)]):
image = tf.subtract(image, 0.5)
image = tf.multiply(image, 2.0)
return image
def resize_image(image, height, width):
"""Resizes an image to a target height and width."""
image = tf.expand_dims(image, 0)
image = tf.image.resize_bilinear(image, [height, width], align_corners=False)
image = tf.squeeze(image, [0])
return image
def crop_or_pad(image, curr_height, curr_width, new, height=True, crop=True):
"""Crops or pads an image.
Args:
image: 3-D float32 `Tensor` image.
curr_height: Int, current height.
curr_width: Int, current width.
new: Int, new width or height.
height: Boolean, cropping or padding for height.
crop: Boolean, True if we're cropping, False if we're padding.
Returns:
image: 3-D float32 `Tensor` image.
"""
# Crop the image to fit the new shape.
abs_diff = tf.abs(new-curr_height)//2 if height else tf.abs(new-curr_width)//2
offset_x = 0 if height else abs_diff
offset_y = abs_diff if height else 0
# We process height first, so always pad/crop to new height.
target_height = new
# We process height first, so pad/crop to new width only if not doing height.
target_width = curr_width if height else new
if crop:
image = tf.image.crop_to_bounding_box(
image, offset_y, offset_x, target_height, target_width)
else:
image = tf.image.pad_to_bounding_box(
image, offset_y, offset_x, target_height, target_width)
return image
def get_central_bbox(min_side, new_size):
"""Gets the central bounding box for an image.
If image is square, returns bounding box [0,0,1,1].
Otherwise, returns the bounding box containing the central
smallest side x smallest side square.
Args:
min_side: Int, size of smallest side in pixels.
new_size: Int, resize image to a square of new_size x new_size pixels.
Returns:
bbox: A 4-D Int `Tensor`, holding the coordinates of the central bounding
box.
"""
max_shape = tf.cast(new_size, tf.float32)
min_shape = tf.cast(min_side, tf.float32)
top_xy = ((max_shape-min_shape)/2)/max_shape
bottom_xy = (min_shape+(max_shape-min_shape)/2)/max_shape
# Create a bbox for the center region of interest.
bbox = tf.stack([[[top_xy, top_xy, bottom_xy, bottom_xy]]])
bbox.set_shape([1, 1, 4])
return bbox
def pad_to_max(image, max_scale):
"""Pads an image to max_scale times the current center crop size.
E.g.: For an image with dimensions 1920x1080 and a max_scale of 1.5,
returns an image that is 1.5 * (1080x1080).
Args:
image: 3-D float32 `Tensor` image.
max_scale: Float, maximum scale of the image, as a multiplier on the
central bounding box.
Returns:
image: 3-D float32 `Tensor` image.
"""
orig_shape = tf.shape(image)
orig_height = orig_shape[0]
orig_width = orig_shape[1]
# Find the smallest side and corresponding new size.
min_side = tf.cast(tf.minimum(orig_height, orig_width), tf.float32)
new_shape = tf.cast(tf.sqrt(max_scale*min_side*min_side), tf.int32)
# Crop or pad height.
# pylint: disable=g-long-lambda
image = tf.cond(
orig_height >= new_shape,
lambda: crop_or_pad(
image, orig_height, orig_width, new_shape, height=True, crop=True),
lambda: crop_or_pad(
image, orig_height, orig_width, new_shape, height=True, crop=False))
# Crop or pad width.
image = tf.cond(
orig_width >= new_shape,
lambda: crop_or_pad(
image, orig_height, orig_width, new_shape, height=False, crop=True),
lambda: crop_or_pad(
image, orig_height, orig_width, new_shape, height=False, crop=False))
# Get the bounding box of the original centered box in the new resized image.
original_bounding_box = get_central_bbox(min_side, new_shape)
return image, original_bounding_box
def scale_up_augmentation(image, max_scale):
"""Scales an image randomly >100% up to some max scale."""
# Pad to max size.
image, original_central_bbox = pad_to_max(image, max_scale)
# Determine area range of the augmented crop, as a percentage of the
# new max area.
# aug_max == 100% of new max area.
aug_max = 1.0
# aug_min == original_area/new_area == original_area/(max_scale*original_area)
# == 1/max_scale.
aug_min = 1.0/max_scale
area_range = (aug_min, aug_max)
# Since we're doing >100% scale, always have the full original crop in frame.
min_object_covered = 1.0
# Get a random scaled, cropped image.
image = scale_augment_crop(image, original_central_bbox, area_range,
min_object_covered)
return image
def scale_down_augmentation(image, min_scale):
"""Scales an image randomly <100% down to some min scale."""
# Crop the center, and consider the whole image the bounding box ROI.
image = crop_center(image)
bbox = tf.constant([0.0, 0.0, 1.0, 1.0], dtype=tf.float32, shape=[1, 1, 4])
# Determine area range of the augmented crop, as a percentage of the
# original crop center area.
# aug_max == 100% of original area.
area_range = (min_scale, 1.0)
# Get a random scaled, cropped image.
image = scale_augment_crop(image, bbox, area_range, min_scale)
return image
def augment_image_scale(image, min_scale, max_scale, p_scale_up):
"""Training time scale augmentation.
Args:
image: 3-d float tensor representing image.
min_scale: minimum scale augmentation allowed, as a fraction of the
central min_side * min_side area of the original image.
max_scale: maximum scale augmentation allowed, as a fraction of the
central min_side * min_side area of the original image.
p_scale_up: Fraction of images scaled up.
Returns:
image: The scale-augmented image.
"""
assert max_scale >= 1.0
assert min_scale <= 1.0
if min_scale == max_scale == 1.0:
tf.logging.info('Min and max scale are 1.0, don`t augment.')
# Do no augmentation, just crop the center.
return crop_center(image)
elif (max_scale == 1.0) and (min_scale < 1.0):
tf.logging.info('Max scale is 1.0, only scale down augment.')
# Always do <100% augmentation.
return scale_down_augmentation(image, min_scale)
elif (min_scale == 1.0) and (max_scale > 1.0):
tf.logging.info('Min scale is 1.0, only scale up augment.')
# Always do >100% augmentation.
return scale_up_augmentation(image, max_scale)
else:
tf.logging.info('Sample both augmentations.')
# Choose to scale image up or down.
rn = tf.random_uniform([], minval=0., maxval=1., dtype=tf.float32)
image = tf.cond(rn >= p_scale_up,
lambda: scale_up_augmentation(image, max_scale),
lambda: scale_down_augmentation(image, min_scale))
return image
def decode_image(image_str):
"""Decodes a jpeg-encoded image string into a image in range [0,1]."""
# Decode jpeg string into np.uint8 tensor.
image = tf.image.decode_jpeg(image_str, channels=3)
# Convert the image to range [0,1].
if image.dtype != tf.float32:
image = tf.image.convert_image_dtype(image, dtype=tf.float32)
return image
def decode_images(image_strs):
"""Decodes a tensor of image strings."""
return tf.map_fn(decode_image, image_strs, dtype=tf.float32)
def preprocess_training_images(images, height, width, min_scale, max_scale,
p_scale_up, aug_color=True, fast_mode=True):
"""Preprocesses a batch of images for training.
This applies training-time scale and color augmentation, crops/resizes,
and scales images to the [-1,1] range expected by pre-trained Inception nets.
Args:
images: A 4-D float32 `Tensor` holding raw images to be preprocessed.
height: Int, height in pixels to resize image to.
width: Int, width in pixels to resize image to.
min_scale: Float, minimum scale augmentation allowed, as a fraction of the
central min_side * min_side area of the original image.
max_scale: Float, maximum scale augmentation allowed, as a fraction of the
central min_side * min_side area of the original image.
p_scale_up: Float, fraction of images scaled up.
aug_color: Whether or not to do color augmentation.
fast_mode: Boolean, avoids slower ops (random_hue and random_contrast).
Returns:
preprocessed_images: A 4-D float32 `Tensor` holding preprocessed images.
"""
def _prepro_train(im):
"""Map this preprocessing function over each image in the batch."""
return preprocess_training_image(
im, height, width, min_scale, max_scale, p_scale_up,
aug_color=aug_color, fast_mode=fast_mode)
return tf.map_fn(_prepro_train, images)
def preprocess_training_image(
image, height, width, min_scale, max_scale, p_scale_up,
aug_color=True, fast_mode=True):
"""Preprocesses an image for training.
Args:
image: A 3-d float tensor representing the image.
height: Target image height.
width: Target image width.
min_scale: Minimum scale of bounding box (as a percentage of full
bounding box) used to crop image during scale augmentation.
max_scale: Minimum scale of bounding box (as a percentage of full
bounding box) used to crop image during scale augmentation.
p_scale_up: Fraction of images to scale >100%.
aug_color: Whether or not to do color augmentation.
fast_mode: Avoids slower ops (random_hue and random_contrast).
Returns:
scaled_image: An scaled image tensor in the range [-1,1].
"""
# Get a random scaled, cropped image.
image = augment_image_scale(image, min_scale, max_scale, p_scale_up)
# Resize image to desired height, width.
image = tf.expand_dims(image, 0)
image = tf.image.resize_bilinear(image, [height, width], align_corners=False)
image = tf.squeeze(image, [0])
# Optionally augment the color.
# pylint: disable=g-long-lambda
if aug_color:
image = apply_with_random_selector(
image,
lambda x, ordering: distort_color(
x, ordering, fast_mode=fast_mode), num_cases=4)
# Scale to [-1,1] range as expected by inception.
scaled_image = scale_to_inception_range(image)
return scaled_image
def preprocess_test_image(image, height, width, crop_strategy):
"""Preprocesses an image for test/inference.
Args:
image: A 3-d float tensor representing the image.
height: Target image height.
width: Target image width.
crop_strategy: String, name of the strategy used to crop test-time images.
Can be: 'crop_center', 'pad', 'pad_200', 'pad_crop_central'.
Returns:
scaled_image: An scaled image tensor in the range [-1,1].
"""
image = crop_image_by_strategy(image, crop_strategy)
# Resize.
image = resize_image(image, height, width)
# Scale the input range to [-1,1] as expected by inception.
image = scale_to_inception_range(image)
return image
def preprocess_test_images(images, height, width, crop_strategy):
"""Apply test-time preprocessing to a batch of images.
This crops images (given a named strategy for doing so), resizes them,
and scales them to the [-1,1] range expected by pre-trained Inception nets.
Args:
images: A 4-D float32 `Tensor` holding raw images to be preprocessed.
height: Int, height in pixels to resize image to.
width: Int, width in pixels to resize image to.
crop_strategy: String, name of the strategy used to crop test-time images.
Can be: 'crop_center', 'pad', 'pad_200', 'pad_crop_central'.
Returns:
preprocessed_images: A 4-D float32 `Tensor` holding preprocessed images.
"""
def _prepro_test(im):
"""Map this preprocessing function over each image in the batch."""
return preprocess_test_image(im, height, width, crop_strategy)
if len(images.shape) == 3:
return _prepro_test(images)
else:
return tf.map_fn(_prepro_test, images)
def preprocess_images(
images, is_training, height, width,
min_scale=1.0, max_scale=1.0, p_scale_up=0.0,
aug_color=True, fast_mode=True,
crop_strategy='pad_crop_central'):
"""Preprocess a batch of images.
Args:
images: A 4-D float32 `Tensor` holding raw images to be preprocessed.
is_training: Boolean, whether to preprocess them for training or test.
height: Int, height in pixels to resize image to.
width: Int, width in pixels to resize image to.
min_scale: Float, minimum scale augmentation allowed, as a fraction of the
central min_side * min_side area of the original image.
max_scale: Float, maximum scale augmentation allowed, as a fraction of the
central min_side * min_side area of the original image.
p_scale_up: Float, fraction of images scaled up.
aug_color: Whether or not to do color augmentation.
fast_mode: Boolean, avoids slower ops (random_hue and random_contrast).
crop_strategy: String, name of the strategy used to crop test-time images.
Can be: 'crop_center', 'pad', 'pad_200', 'pad_crop_central'.
Returns:
preprocessed_images: A 4-D float32 `Tensor` holding preprocessed images.
"""
if is_training:
return preprocess_training_images(
images, height, width, min_scale, max_scale,
p_scale_up, aug_color, fast_mode)
else:
return preprocess_test_images(
images, height, width, crop_strategy)
def cv2rotateimage(image, angle):
"""Efficient rotation if 90 degrees rotations, slow otherwise.
Not a tensorflow function, using cv2 and scipy on numpy arrays.
Args:
image: a numpy array with shape [height, width, channels].
angle: the rotation angle in degrees in the range [-180, 180].
Returns:
The rotated image.
"""
# Limit angle to [-180, 180] degrees.
assert angle <= 180 and angle >= -180
if angle == 0:
return image
# Efficient rotations.
if angle == -90:
image = cv2.transpose(image)
image = cv2.flip(image, 0)
elif angle == 90:
image = cv2.transpose(image)
image = cv2.flip(image, 1)
elif angle == 180 or angle == -180:
image = cv2.flip(image, 0)
image = cv2.flip(image, 1)
else: # Slow rotation.
image = ndimage.interpolation.rotate(image, 270)
return image
def cv2resizeminedge(image, min_edge_size):
"""Resize smallest edge of image to min_edge_size."""
assert min_edge_size >= 0
height, width = (image.shape[0], image.shape[1])
new_height, new_width = (0, 0)
if height > width:
new_width = min_edge_size
new_height = int(height * new_width / float(width))
else:
new_height = min_edge_size
new_width = int(width * new_height / float(height))
return cv2.resize(image, (new_width, new_height),
interpolation=cv2.INTER_AREA)
def shapestring(array):
"""Returns a compact string describing shape of an array."""
shape = array.shape
s = str(shape[0])
for i in range(1, len(shape)):
s += 'x' + str(shape[i])
return s
def unscale_jpeg_encode(ims):
"""Unscales pixel values and jpeg encodes preprocessed image.
Args:
ims: A 4-D float32 `Tensor` holding preprocessed images.
Returns:
im_strings: A 1-D string `Tensor` holding images that have been unscaled
(reversing the inception [-1,1] scaling), and jpeg encoded.
"""
ims /= 2.0
ims += 0.5
ims *= 255.0
ims = tf.clip_by_value(ims, 0, 255)
ims = tf.cast(ims, tf.uint8)
im_strings = tf.map_fn(
lambda x: tf.image.encode_jpeg(x, format='rgb', quality=100),
ims, dtype=tf.string)
return im_strings