Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Computing Occlusion masks for DAVIS real-world performance evaluation #1

Open
starsong98 opened this issue Aug 21, 2024 · 2 comments
Open

Comments

@starsong98
Copy link

Hello. Thank you for releasing the code for your amazing paper!

I have a question regarding your evaluation of real-world generalization on the DAVIS 2017 dataset by backward warping.

In the paper it says that you used the DAVIS annotation masks to compute occlusion masks, and exclude occluded region for computing photometric/SSIM losses.
How exactly did you compute these occlusion masks? Was there some baseline occlusion estimation model you used? I cannot seem to find the code for that in the released code.

Best regards,

@HanLingsgjk
Copy link
Owner

def Davis_vis(model, iters=6):
imroot ='/home/lh/CSCV_occ/Davis/JPEGImages/480p'
dirictor = os.listdir(imroot)
dataroot = '/home/lh/CSCV_occ/ScaleFlowplusplus'
output_filenameroot= os.path.join(dataroot)
if os.path.exists(output_filenameroot) == False:
os.makedirs(output_filenameroot)
dchange = None
txtpath = dataroot+'/records.txt'
record = open(txtpath, 'w')
ssim_list_fgall = []
ssim_list_bgall = []
ploss_list_fgall = []
ploss_list_bgall = []
for dir in dirictor:
images1 = sorted(glob(osp.join(imroot,dir, '*.jpg')))
images2 = images1[1:]
images1.pop()
print(dir)
pathsplit = dir.split('/')
nameid = pathsplit[-1]
# 创建光流文件夹
output_filenameflow = os.path.join(dataroot, 'flow/',nameid)
if os.path.exists(output_filenameflow) == False:
os.makedirs(output_filenameflow)
# 重投影图片
output_filenameim = os.path.join(dataroot, 'imout/',nameid)
if os.path.exists(output_filenameim) == False:
os.makedirs(output_filenameim)

    output_filenamedc = os.path.join(dataroot, 'dc/', nameid)
    if os.path.exists(output_filenamedc) == False:
        os.makedirs(output_filenamedc)

    #先计算每个序列的平均损失
    ssim_list_fg = []
    ssim_list_bg = []
    ploss_list_fg = []
    ploss_list_bg = []
    for id in range(images1.__len__()):
        print(id)
        img1 = frame_utils.read_gen(images1[id])
        img2 = frame_utils.read_gen(images2[id])
        maskpath1 = images1[id].replace('JPEGImages', 'Annotations').replace('.jpg', '.png')
        maskpath2 = images2[id].replace('JPEGImages', 'Annotations').replace('.jpg', '.png')

        pathsplit = maskpath1.split('/')
        idout = pathsplit[-1].split('.')[0]


        foremask1 = frame_utils.read_gen(maskpath1)
        foremask2 = frame_utils.read_gen(maskpath2)

        img1 = np.array(img1).astype(np.uint8)[..., :3]
        img2 = np.array(img2).astype(np.uint8)[..., :3]
        foremask2 = np.array(foremask2).astype(np.uint8)
        foremask1 = np.array(foremask1).astype(np.uint8)


        if foremask2.shape.__len__()>2:
            foremask2 = foremask2[:,:,0]
        if foremask1.shape.__len__()>2:
            foremask1 = foremask1[:,:,0]
        img1 = torch.from_numpy(img1).permute(2, 0, 1).float()
        img2 = torch.from_numpy(img2).permute(2, 0, 1).float()
        image1 = img1[None].cuda()
        image2 = img2[None].cuda()
        padder = InputPadder(image1.shape, mode='kitti',sp=16)
        image1, image2 = padder.pad(image1, image2)

        #计算光流
        flow_low, flow_pr,_,dchange = model(image1, image2, iters=iters, test_mode=True)
        #flow_low, flow_pr = model(image1, image2, iters=iters, test_mode=True)
        #res = gma_forward(image1, image2)
        #flow_pr = res['flow_preds'][0]
        #flow_pr = MDFlow_forward(image1, image2)

        flow = padder.unpad(flow_pr[0]).detach().cpu()
        image2 = padder.unpad(image2).detach().cpu()


        N, C, H, W = image2.shape
        foremask2 = torch.from_numpy(foremask2).float().view(1,1,H, W)
        coords0 = coords_grid(N, H, W)
        coordw = coords0 + flow

        maskout = bilinear_sampler(foremask2, coordw)
        imgout = bilinear_sampler(image2, coordw)
        imgout = imgout.permute(2, 3, 1, 0).squeeze(3).numpy()



        ssim,sall,ploss = get_ssim(imgout,img1.permute(1, 2, 0).numpy())

        if dchange is not None:
            frame_id = idout+'.png'
            dchange = padder.unpad(dchange[0,0]).detach().cpu().numpy()
            datamin = np.min(dchange)
            datamax = np.max(dchange)
            mid_data = (datamin + datamax) * 0.5
            lenthmid = 1 / (mid_data - datamin)
            dchange = ((dchange - mid_data) * lenthmid).clip(-1, 1) * 128 + 128
            colormap = plt.get_cmap('plasma')#plasma viridis
            heatmap = (colormap((dchange).astype(np.uint8)) * 2 ** 8).astype(np.uint16)[:, :, :3]
            heatmap = cv2.cvtColor(heatmap, cv2.COLOR_RGB2BGR)


            cv2.imwrite('%s/%s' % (output_filenamedc, frame_id), heatmap*255)


        coordw = coordw.numpy()
        flowocc = (coordw[0,0] >0) * (coordw[0,0] <W) * (coordw[0,1] >0) * (coordw[0,1] <H)
        flowocc = flowocc.astype(np.float64)


        maskshow = maskout[0,0].numpy()
        maskshow2 = foremask1.astype(np.float64)
        occ = np.abs(maskshow2 - maskshow) < 0.0001
        imgout = imgout*occ[:,:,np.newaxis]

        #首先分出前景和背景,分别计算前景和背景的SSIM
        maskfg = flowocc*(foremask1.astype(np.float64)/255.)#前景掩膜
        maskbg = flowocc*((255-maskshow)/255.)
        ssimfg = sall[maskfg>0].mean()
        ssimbg = sall[maskbg > 0].mean()

        print("ssimfg:", ssimfg,"ssimbg:", ssimbg)
        ploss = ploss.astype(np.uint8)
        plossfg = ploss[maskfg>0].mean()
        plossbg = ploss[maskbg>0].mean()

        filenname = idout+'_sfg_'+str(ssimfg)[0:6]+'_sbg_'+str(ssimbg)[0:6]+'.png'
        filennamep = idout + '_pfg_' + str(plossfg)[0:6] + '_pbg_' + str(plossbg)[0:6] + '.png'

        if maskfg.max()>0.5:
            ssim_list_fg.append(ssimfg)
            ssim_list_bg.append(ssimbg)
            ploss_list_fg.append(plossfg)
            ploss_list_bg.append(plossbg)
        else:
            ssim_list_bg.append(ssimbg)
            ploss_list_bg.append(plossbg)
        flowviz = (flow2rgb(flow.permute(1, 2, 0).numpy())*255).astype(np.uint8)
        flowviz = cv2.cvtColor(flowviz, cv2.COLOR_RGB2BGR)
        cv2.imwrite(os.path.join(output_filenameflow, filenname),flowviz)
        imgout = imgout.astype(np.uint8)
        imgout = cv2.cvtColor(imgout, cv2.COLOR_RGB2BGR)
        cv2.imwrite(os.path.join(output_filenameim, filennamep), imgout.astype(np.uint8))


        #要存的东西
        #去了遮挡的投影图 前景背景SSIM,Ploss
        #光流可视化。SSIM和光度损失放在标题里
        #深度变化率可视化(可选)
    ssim_list_fg = np.array(ssim_list_fg)
    ssim_fg_mean = ssim_list_fg.mean()
    ssim_list_bg = np.array(ssim_list_bg)
    ssim_bg_mean = ssim_list_bg.mean()
    ploss_list_fg = np.array(ploss_list_fg)
    ploss_fg_mean = ploss_list_fg.mean()
    ploss_list_bg = np.array(ploss_list_bg)
    ploss_bg_mean = ploss_list_bg.mean()

    filercord = dir + '_' + str(ssim_fg_mean)[0:6] + '_' + str(ssim_bg_mean)[0:6]+ '_' + str(ploss_fg_mean)[0:6] + '_' + str(ploss_bg_mean)[0:6]
    record.write(filercord + "\n")

    ssim_list_fgall.append(ssim_fg_mean)
    ssim_list_bgall.append(ssim_bg_mean)
    ploss_list_fgall.append(ploss_fg_mean)
    ploss_list_bgall.append(ploss_bg_mean)

ssim_list_fgall = np.array(ssim_list_fgall)
ssim_fg_meanall = ssim_list_fgall.mean()
ssim_list_bgall = np.array(ssim_list_bgall)
ssim_bg_meanall = ssim_list_bgall.mean()
ploss_list_fgall = np.array(ploss_list_fgall)
ploss_fg_meanall = ploss_list_fgall.mean()
ploss_list_bgall = np.array(ploss_list_bgall)
ploss_bg_meanall = ploss_list_bgall.mean()
filercord = 'ALL'+dir + '_' + str(ssim_fg_meanall)[0:6] + '_' + str(ssim_bg_meanall)[0:6] + '_' + str(ploss_fg_meanall)[0:6] + '_' + str(ploss_bg_meanall)[0:6]
record.write(filercord + "\n")
record.close()

@HanLingsgjk
Copy link
Owner

You can refer to it. This is the code I evaluated. My occlusion calculation is relatively rough, mainly based on the optical flow results, removing pixels that move outside the image, as well as the part that moves from the background to the foreground

Then the foreground is the foreground mask provided by DAVIS, and the background is the remaining part

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants