Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add command option --imgdir, process all images in directory. #361

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions convert_voc_annotation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import os
import argparse
import xml.etree.ElementTree as ET

#classes = ["bus"]
classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]

def scan_xml_files(dir):
files = []
for file in os.listdir(dir):
if file.endswith(".xml") or file.endswith(".XML"):
files.append(os.path.join(dir, file))
return files

def convert_annotation(file):#,list_file):
in_file = open(file)
tree=ET.parse(in_file)
root = tree.getroot()
write_lines = []

for obj in root.iter('object'):
difficult = obj.find('difficult').text
cls = obj.find('name').text
if cls not in classes or int(difficult)==1:
continue
cls_id = classes.index(cls)
xmlbox = obj.find('bndbox')
b = (int(xmlbox.find('xmin').text), int(xmlbox.find('ymin').text), int(xmlbox.find('xmax').text), int(xmlbox.find('ymax').text))
write_line = " " + ",".join([str(a) for a in b]) + ',' + str(cls_id)
write_lines.append(write_line)
#list_file.write()

return write_lines


def create_train_txt(input, imgdir):

files = scan_xml_files(input)
lines_to_write = []

for f in files:
write_lines = convert_annotation(f)
if len(write_lines) == 0:
continue

basename = os.path.basename(f)
img_file = basename.replace('xml','jpg')
fullpath = os.path.join(imgdir, img_file)

write_lines_join = ''.join(write_lines)
write_line = '{}{}'.format(fullpath,write_lines_join)
lines_to_write.append(write_line)


full_text = '\n'.join(lines_to_write)
with open('train.txt', 'w+') as the_file:
the_file.write(full_text)


parser = argparse.ArgumentParser()
parser.add_argument( "-t", dest="input_dir", action="store", type=str, required=False,
help="directory of annotations", default='./image/' )
parser.add_argument( "-i", dest="image_dir", action="store", type=str, required=True,
help="directory of image files")
args = parser.parse_args()
create_train_txt(args.input_dir, args.image_dir)
14 changes: 9 additions & 5 deletions yolo.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def generate(self):
score_threshold=self.score, iou_threshold=self.iou)
return boxes, scores, classes

def detect_image(self, image):
def detect_image(self, image, single_image=True, output=list()):
start = timer()

if self.model_image_size != (None, None):
Expand All @@ -112,7 +112,7 @@ def detect_image(self, image):
boxed_image = letterbox_image(image, new_image_size)
image_data = np.array(boxed_image, dtype='float32')

print(image_data.shape)
if single_image:print(image_data.shape)
image_data /= 255.
image_data = np.expand_dims(image_data, 0) # Add batch dimension.

Expand All @@ -124,7 +124,7 @@ def detect_image(self, image):
K.learning_phase(): 0
})

print('Found {} boxes for {}'.format(len(out_boxes), 'img'))
if single_image:print('Found {} boxes for {}'.format(len(out_boxes), 'img'))

font = ImageFont.truetype(font='font/FiraMono-Medium.otf',
size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
Expand All @@ -144,7 +144,11 @@ def detect_image(self, image):
left = max(0, np.floor(left + 0.5).astype('int32'))
bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
right = min(image.size[0], np.floor(right + 0.5).astype('int32'))
print(label, (left, top), (right, bottom))
if single_image:
print(label, (left, top), (right, bottom))
else:
output_str='{} {} {} {} {}'.format(label, left, top, right, bottom)
output.append(output_str)

if top - label_size[1] >= 0:
text_origin = np.array([left, top - label_size[1]])
Expand All @@ -163,7 +167,7 @@ def detect_image(self, image):
del draw

end = timer()
print(end - start)
if single_image:print(end - start)
return image

def close_session(self):
Expand Down
6 changes: 3 additions & 3 deletions yolo3/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ def yolo_eval(yolo_outputs,
iou_threshold=.5):
"""Evaluate YOLO model on given input and return filtered boxes."""
num_layers = len(yolo_outputs)
anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]] # default setting
anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [0,1,2]] # default setting
input_shape = K.shape(yolo_outputs[0])[1:3] * 32
boxes = []
box_scores = []
Expand Down Expand Up @@ -247,7 +247,7 @@ def preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes):
'''
assert (true_boxes[..., 4]<num_classes).all(), 'class id must be less than num_classes'
num_layers = len(anchors)//3 # default setting
anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]]
anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [0,1,2]]

true_boxes = np.array(true_boxes, dtype='float32')
input_shape = np.array(input_shape, dtype='int32')
Expand Down Expand Up @@ -361,7 +361,7 @@ def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False):
num_layers = len(anchors)//3 # default setting
yolo_outputs = args[:num_layers]
y_true = args[num_layers:]
anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]]
anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [0,1,2]]
input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))
grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers)]
loss = 0
Expand Down
96 changes: 73 additions & 23 deletions yolo3/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,10 @@

from PIL import Image
import numpy as np
from matplotlib.colors import rgb_to_hsv, hsv_to_rgb
import cv2

def compose(*funcs):
"""Compose arbitrarily many functions, evaluated left to right.

Reference: https://mathieularose.com/function-composition-in-python/
"""
# return lambda x: reduce(lambda v, f: f(v), funcs, x)
Expand Down Expand Up @@ -36,8 +35,11 @@ def rand(a=0, b=1):
def get_random_data(annotation_line, input_shape, random=True, max_boxes=20, jitter=.3, hue=.1, sat=1.5, val=1.5, proc_img=True):
'''random preprocessing for real-time data augmentation'''
line = annotation_line.split()
image = Image.open(line[0])
iw, ih = image.size

# numpy array: BGR, 0-255
image = cv2.imread(line[0])
# height, width, channel
ih, iw, _ = image.shape
h, w = input_shape
box = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]])

Expand All @@ -50,9 +52,13 @@ def get_random_data(annotation_line, input_shape, random=True, max_boxes=20, jit
dy = (h-nh)//2
image_data=0
if proc_img:
image = image.resize((nw,nh), Image.BICUBIC)
# resize
image = cv2.resize(image, (nw, nh), interpolation=cv2.INTER_AREA)
# convert into PIL Image object
image = Image.fromarray(image[:, :, ::-1])
new_image = Image.new('RGB', (w,h), (128,128,128))
new_image.paste(image, (dx, dy))
# convert into numpy array: RGB, 0-1
image_data = np.array(new_image)/255.

# correct boxes
Expand All @@ -75,40 +81,84 @@ def get_random_data(annotation_line, input_shape, random=True, max_boxes=20, jit
else:
nw = int(scale*w)
nh = int(nw/new_ar)
image = image.resize((nw,nh), Image.BICUBIC)

# resize
image = cv2.resize(image, (nw, nh), interpolation=cv2.INTER_AREA)
# convert into PIL Image object
image = Image.fromarray(image[:, :, ::-1])

# place image
dx = int(rand(0, w-nw))
dy = int(rand(0, h-nh))
new_image = Image.new('RGB', (w,h), (128,128,128))
new_image.paste(image, (dx, dy))
image = new_image

# flip image or not
flip = rand()<.5
if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)
# convert into numpy array: BGR, 0-255
image = np.asarray(new_image)[:, :, ::-1]

# horizontal flip (faster than cv2.flip())
h_flip = rand() < 0.5
if h_flip:
image = image[:, ::-1]

# vertical flip
v_flip = False#rand() < 0.5
if v_flip:
image = image[::-1]

# rotation augment
is_rot = False
if is_rot:
right = rand() < 0.5
if right:
image = image.transpose(1, 0, 2)[:, ::-1]
else:
image = image.transpose(1, 0, 2)[::-1]

# distort image
hue = rand(-hue, hue)
img_hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
H = img_hsv[:, :, 0].astype(np.float32)
S = img_hsv[:, :, 1].astype(np.float32)
V = img_hsv[:, :, 2].astype(np.float32)

hue = rand(-hue, hue) * 179
H += hue
np.clip(H, a_min=0, a_max=179, out=H)

sat = rand(1, sat) if rand()<.5 else 1/rand(1, sat)
S *= sat
np.clip(S, a_min=0, a_max=255, out=S)

val = rand(1, val) if rand()<.5 else 1/rand(1, val)
x = rgb_to_hsv(np.array(image)/255.)
x[..., 0] += hue
x[..., 0][x[..., 0]>1] -= 1
x[..., 0][x[..., 0]<0] += 1
x[..., 1] *= sat
x[..., 2] *= val
x[x>1] = 1
x[x<0] = 0
image_data = hsv_to_rgb(x) # numpy array, 0 to 1
V *= val
np.clip(V, a_min=0, a_max=255, out=V)

img_hsv[:, :, 0] = H.astype(np.uint8)
img_hsv[:, :, 1] = S.astype(np.uint8)
img_hsv[:, :, 2] = V.astype(np.uint8)

# convert into numpy array: RGB, 0-1
image_data = cv2.cvtColor(img_hsv, cv2.COLOR_HSV2RGB) / 255.0

# correct boxes
box_data = np.zeros((max_boxes,5))
if len(box)>0:
np.random.shuffle(box)
box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
if flip: box[:, [0,2]] = w - box[:, [2,0]]
if h_flip:
box[:, [0,2]] = w - box[:, [2,0]]
if v_flip:
box[:, [1,3]] = h - box[:, [3,1]]
if is_rot:
if right:
tmp = box[:, [0, 2]]
box[:, [0,2]] = h - box[:, [3,1]]
box[:, [1,3]] = tmp
else:
tmp = box[:, [2, 0]]
box[:, [0,2]] = box[:, [1,3]]
box[:, [1,3]] = w - tmp

box[:, 0:2][box[:, 0:2]<0] = 0
box[:, 2][box[:, 2]>w] = w
box[:, 3][box[:, 3]>h] = h
Expand All @@ -118,4 +168,4 @@ def get_random_data(annotation_line, input_shape, random=True, max_boxes=20, jit
if len(box)>max_boxes: box = box[:max_boxes]
box_data[:len(box)] = box

return image_data, box_data
return image_data, box_data
59 changes: 56 additions & 3 deletions yolo_video.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import sys
import argparse
import os
from yolo import YOLO, detect_video
from PIL import Image

Expand All @@ -16,6 +17,42 @@ def detect_img(yolo):
r_image.show()
yolo.close_session()

def get_image_files(dir):
imgs = []
for file in os.listdir(dir):
file_lower = file.lower()
if file_lower.endswith(".png") or file_lower.endswith(".jpg"):
imgs.append(os.path.join(dir, file))
return imgs

def detect_imgdir(yolo, dir, output_txt=False):
img_files = get_image_files(dir)
save_dir = os.path.join(dir,'out')
if not os.path.exists(save_dir): os.makedirs(save_dir)
for img in img_files:
try:
image = Image.open(img)
except:
print('Open Error! {}'.format(img))
continue
else:
fullpath = os.path.join(save_dir, os.path.basename(img))
detections = list()
r_image = yolo.detect_image(image, single_image=False, output=detections)

if not output_txt:
r_image.save(fullpath,"JPEG")
print('save {}'.format(fullpath))
else:
basename = os.path.basename(img) # eg. 123.jpg
txt_file = os.path.splitext(basename)[0]+'.txt' # eg. 0001
txt_fullpath = os.path.join(save_dir, txt_file)
with open(txt_fullpath, 'w+') as the_file:
full_text = '\n'.join(detections)
the_file.write(full_text)

yolo.close_session()

FLAGS = None

if __name__ == '__main__':
Expand All @@ -25,17 +62,17 @@ def detect_img(yolo):
Command line options
'''
parser.add_argument(
'--model', type=str,
'--model_path', type=str,
help='path to model weight file, default ' + YOLO.get_defaults("model_path")
)

parser.add_argument(
'--anchors', type=str,
'--anchors_path', type=str,
help='path to anchor definitions, default ' + YOLO.get_defaults("anchors_path")
)

parser.add_argument(
'--classes', type=str,
'--classes_path', type=str,
help='path to class definitions, default ' + YOLO.get_defaults("classes_path")
)

Expand All @@ -48,6 +85,16 @@ def detect_img(yolo):
'--image', default=False, action="store_true",
help='Image detection mode, will ignore all positional arguments'
)

parser.add_argument(
'--imgdir', type=str, default='',
help='Image dir detection mode, will ignore all positional arguments'
)

parser.add_argument(
'--txt', default=False, action="store_true",
help='Image dir detection will output txt files'
)
'''
Command line positional arguments -- for video detection mode
'''
Expand All @@ -71,6 +118,12 @@ def detect_img(yolo):
if "input" in FLAGS:
print(" Ignoring remaining command line arguments: " + FLAGS.input + "," + FLAGS.output)
detect_img(YOLO(**vars(FLAGS)))

elif os.path.isdir(FLAGS.imgdir):
print("Image directory mode")
if "input" in FLAGS:
print(" Ignoring remaining command line arguments: " + FLAGS.input + "," + FLAGS.output)
detect_imgdir(YOLO(**vars(FLAGS)), FLAGS.imgdir, FLAGS.txt)
elif "input" in FLAGS:
detect_video(YOLO(**vars(FLAGS)), FLAGS.input, FLAGS.output)
else:
Expand Down