-
Notifications
You must be signed in to change notification settings - Fork 0
/
voc2012.py
79 lines (73 loc) · 3.97 KB
/
voc2012.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import cv2
import numpy as np
from yolov1test import *
class VOC2012(Dataset):
def __init__(self,is_train=True,is_aug=True):
"""
:param is_train: 调用的是训练集(True),还是验证集(False)
:param is_aug: 是否进行数据增广
"""
self.filenames = [] #储存数据集的文件名称
if is_train:
with open( r'H:/pythonProject/yolov1test/VOCdevkit/VOC2012/'+ "ImageSets/Main/train.txt",'r') as f:
self.filenames = [x.stirp() for x in f]
else:
with open( r'H:/pythonProject/yolov1test/VOCdevkit/VOC2012/'+ "ImageSets/Main/val.txt",'r') as f:
self.filenames = [x.strip() for x in f]
self.imgpath = 'H:/pythonProject/yolov1test/VOCdevkit/VOC2012/'+ "JPEGImages/"
self.labelpath = './labels/'
self.is_aug = is_aug
def __len__(self):
return len(self.filenames)
def __getitem__(self,item):
img = cv2.imread(self.imgpath + self.filenames[item]+".jpg") #读取原始图像
h,w = img.shape[0:2]
input_size = 448 #输入YOLOv1网络的图像尺寸为448*448
# 因为数据集内原始图像的尺寸是不定的,所以需要进行适当的padding,将原始图像padding成宽高一致的正方形
# 然后再将Padding后的正方形图像缩放成448x448
padw ,padh = 0,0 # 要记录宽高方向的padding具体数值,因为padding之后需要调整bbox的位置信息
if h>w:
padw = (h-w)//2
img = np.pad(img,((0,0),(padw,padw),(0,0)),'constant',constant_values=0)
elif w>h:
padh = (w-h)//2
img = np.pad(img,((padh,padh),(0,0),(0,0)),'constant',constant_values=0)
img = cv2.resize(img,(input_size,input_size))
# 图像增广部分,这里不做过多处理,因为改变bbox信息还蛮麻烦的
if self.is_aug:
aug = transforms.Compose([
transforms.ToTensor()
])
img = aug(img)
with open(self.labelpath + self.filenames[item]+".txt") as f:
bbox = f.read().split('\n')
bbox = [x.split() for x in bbox]
bbox = [float(x) for y in bbox for x in y]
if len(bbox)%5!=0:
raise ValueError("File:" + self.labelpath+self.filenames[item]+".txt"+"——bbox Extraction Error!")
for i in range(len(bbox)//5):
if padw != 0:
bbox[i*5+1]=(bbox[i*5+1]*w+padw)/h
bbox[i*5+3]=(bbox[i*5+3]*w)/h
elif padh!=0:
bbox[i*5+2]=(bbox[i*5+2]*h+padh)/w
bbox[i*5+4]=(bbox[i*5+4]*h)/w
def convert_bbox2labels(bbox):
"""将bbox的(cls,x,y,w,h)数据转换为训练时方便计算Loss的数据形式(7,7,5*B+cls_num)
注意,输入的bbox的信息是(xc,yc,w,h)格式的,转换为labels后,bbox的信息转换为了(px,py,w,h)格式"""
gridsize = 1.0/7
labels = np.zeros((7,7,5*2+len(CLASSES))) # 注意,此处需要根据不同数据集的类别个数进行修改
for i in range(len(bbox)//5):
gridx = int(bbox[i*5+1] // gridsize) # 当前bbox中心落在第gridx个网格,列
gridy = int(bbox[i*5+2] // gridsize) # 当前bbox中心落在第gridy个网格,行
# (bbox中心坐标 - 网格左上角点的坐标)/网格大小 ==> bbox中心点的相对位置
gridpx = bbox[i * 5 + 1] / gridsize - gridx
gridpy = bbox[i * 5 + 2] / gridsize - gridy
# 将第gridy行,gridx列的网格设置为负责当前ground truth的预测,置信度和对应类别概率均置为1
labels[gridy, gridx, 0:5] = np.array([gridpx, gridpy, bbox[i * 5 + 3], bbox[i * 5 + 4], 1])
labels[gridy, gridx, 5:10] = np.array([gridpx, gridpy, bbox[i * 5 + 3], bbox[i * 5 + 4], 1])
labels[gridy, gridx, 10+int(bbox[i*5])] = 1
return labels