-
Notifications
You must be signed in to change notification settings - Fork 1
/
coco_to_manifest.py
126 lines (92 loc) · 3.8 KB
/
coco_to_manifest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import json
import datetime
from datetime import datetime
import boto3
import PIL.Image as Image
from pathlib import Path
#S3 location for images
s3_bucket = 'clothing-recognition' # use the s3 bucket name
s3_key_path_manifest_file = 'manifest/'
s3_key_path_images = 'images/'
s3_path = 's3://' + s3_bucket + '/' + s3_key_path_images
s3 = boto3.resource('s3')
#Local file information
local_path = Path.cwd()
coco_manifest = 'deepfashion2.json'
coco_json_file = local_path + coco_manifest
job_name = 'deepfashion2'
cl_manifest_file = 'custom_labels.manifest'
label_attribute = 'bounding-box'
open(local_path + cl_manifest_file, 'w').close()
# class representing a Custom Label JSON line for an image
class cl_json_line:
def __init__(self,job, img):
#Get image info. Annotations are dealt with seperately
sizes=[]
image_size={}
image_size["width"] = img["width"]
image_size["depth"] = 3
image_size["height"] = img["height"]
sizes.append(image_size)
bounding_box={}
bounding_box["annotations"] = []
bounding_box["image_size"] = sizes
self.__dict__["source-ref"] = s3_path + img['file_name']
self.__dict__[job] = bounding_box
#get metadata
metadata = {}
metadata['job-name'] = job_name
metadata['class-map'] = {}
metadata['human-annotated']='yes'
metadata['objects'] = []
metadata['creation-date']= datetime.now().strftime('%Y-%m-%dT%H:%M:%S')
metadata['type']='groundtruth/object-detection'
self.__dict__[job + '-metadata'] = metadata
print("Getting image, annotations, and categories from COCO file...")
with open(coco_json_file) as f:
#Get custom label compatible info
js = json.load(f)
images = js['images']
categories = js['categories']
annotations = js['annotations']
print('Images: ' + str(len(images)))
print('annotations: ' + str(len(annotations)))
print('categories: ' + str(len (categories)))
print("Creating CL JSON lines...")
images_dict = {image['id']: cl_json_line(label_attribute, image) for image in images}
print('Parsing annotations...')
for annotation in annotations:
image=images_dict[annotation['image_id']]
cl_annotation = {}
cl_class_map={}
# get bounding box information
cl_bounding_box={}
cl_bounding_box['left'] = annotation['bbox'][0]
cl_bounding_box['top'] = annotation['bbox'][1]
cl_bounding_box['width'] = annotation['bbox'][2]
cl_bounding_box['height'] = annotation['bbox'][3]
cl_bounding_box['class_id'] = annotation['category_id']
getattr(image, label_attribute)['annotations'].append(cl_bounding_box)
for category in categories:
if annotation['category_id'] == category['id']:
getattr(image, label_attribute + '-metadata')['class-map'][category['id']]=category['name']
cl_object={}
cl_object['confidence'] = int(1) #not currently used by Custom Labels
getattr(image, label_attribute + '-metadata')['objects'].append(cl_object)
print('Done parsing annotations')
# Create manifest file.
print('Writing Custom Labels manifest...')
for im in images_dict.values():
with open(local_path+cl_manifest_file, 'a+') as outfile:
json.dump(im.__dict__,outfile)
outfile.write('\n')
outfile.close()
# Upload manifest file to S3 bucket.
print ('Uploading Custom Labels manifest file to S3 bucket')
print('Uploading' + local_path + cl_manifest_file + ' to ' + s3_key_path_manifest_file)
print(s3_bucket)
s3 = boto3.resource('s3')
s3.Bucket(s3_bucket).upload_file(local_path + cl_manifest_file, s3_key_path_manifest_file + cl_manifest_file)
# Print S3 URL to manifest file,
print ('S3 URL Path to manifest file. ')
print('\033[1m s3://' + s3_bucket + '/' + s3_key_path_manifest_file + cl_manifest_file + '\033[0m')