Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
mask_targets = []
if num_pos > 0:
proposals_np = pos_proposals.cpu().numpy()
_, maxh, maxw = gt_masks.shape
proposals_np[:, [0, 2]] = np.clip(proposals_np[:, [0, 2]], 0, maxw - 1)
proposals_np[:, [1, 3]] = np.clip(proposals_np[:, [1, 3]], 0, maxh - 1)
pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy()
for i in range(num_pos):
gt_mask = gt_masks[pos_assigned_gt_inds[i]]
bbox = proposals_np[i, :].astype(np.int32)
x1, y1, x2, y2 = bbox
w = np.maximum(x2 - x1 + 1, 1)
h = np.maximum(y2 - y1 + 1, 1)
# mask is uint8 both before and after resizing
# mask_size (h, w) to (w, h)
target = mmcv.imresize(gt_mask[y1:y1 + h, x1:x1 + w],
mask_size[::-1])
mask_targets.append(target)
mask_targets = torch.from_numpy(np.stack(mask_targets)).float().to(
pos_proposals.device)
else:
mask_targets = pos_proposals.new_zeros((0, ) + mask_size)
return mask_targets
def _resize_masks(self, results):
for key in results.get('mask_fields', []):
if results[key] is None:
continue
if self.keep_ratio:
masks = [
mmcv.imrescale(
mask, results['scale_factor'], interpolation='nearest')
for mask in results[key]
]
else:
mask_size = (results['img_shape'][1], results['img_shape'][0])
masks = [
mmcv.imresize(mask, mask_size, interpolation='nearest')
for mask in results[key]
]
if masks:
results[key] = np.stack(masks)
else:
results[key] = np.empty(
(0, ) + results['img_shape'], dtype=np.uint8)
scale_factor = 1.0
for i in range(bboxes.shape[0]):
if not isinstance(scale_factor, (float, np.ndarray)):
scale_factor = scale_factor.cpu().numpy()
bbox = (bboxes[i, :] / scale_factor).astype(np.int32)
label = labels[i]
w = max(bbox[2] - bbox[0] + 1, 1)
h = max(bbox[3] - bbox[1] + 1, 1)
if not self.class_agnostic:
mask_pred_ = mask_pred[i, label, :, :]
else:
mask_pred_ = mask_pred[i, 0, :, :]
bbox_mask = mmcv.imresize(mask_pred_, (w, h))
bbox_mask = (bbox_mask > rcnn_test_cfg.mask_thr_binary).astype(
np.uint8)
if rcnn_test_cfg.get('crop_mask', False):
im_mask = bbox_mask
else:
im_mask = np.zeros((img_h, img_w), dtype=np.uint8)
im_mask[bbox[1]:bbox[1] + h, bbox[0]:bbox[0] + w] = bbox_mask
if rcnn_test_cfg.get('rle_mask_encode', True):
rle = mask_util.encode(
np.array(im_mask[:, :, np.newaxis], order='F'))[0]
cls_segms[label - 1].append(rle)
else:
cls_segms[label - 1].append(im_mask)
def __call__(self, img, scale, flip=False, keep_ratio=True):
if keep_ratio:
img, scale_factor = mmcv.imrescale(img, scale, return_scale=True)
else:
img, w_scale, h_scale = mmcv.imresize(
img, scale, return_scale=True)
scale_factor = np.array([w_scale, h_scale, w_scale, h_scale],
dtype=np.float32)
img_shape = img.shape
img = mmcv.imnormalize(img, self.mean, self.std, self.to_rgb)
if flip:
img = mmcv.imflip(img)
if self.size_divisor is not None:
img = mmcv.impad_to_multiple(img, self.size_divisor)
pad_shape = img.shape
else:
pad_shape = img_shape
img = img.transpose(2, 0, 1)
return img, img_shape, pad_shape, scale_factor
def __call__(self, img, scale, flip=False, keep_ratio=True):
if keep_ratio:
img, scale_factor = mmcv.imrescale(img, scale, return_scale=True)
else:
img, w_scale, h_scale = mmcv.imresize(
img, scale, return_scale=True)
scale_factor = np.array([w_scale, h_scale, w_scale, h_scale],
dtype=np.float32)
img_shape = img.shape
img = mmcv.imnormalize(img, self.mean, self.std, self.to_rgb)
if flip:
img = mmcv.imflip(img)
if self.size_divisor is not None:
img = mmcv.impad_to_multiple(img, self.size_divisor)
pad_shape = img.shape
else:
pad_shape = img_shape
img = img.transpose(2, 0, 1)
return img, img_shape, pad_shape, scale_factor
"""
mask_size = cfg.mask_size
num_pos = pos_proposals.size(0)
mask_targets = []
if num_pos > 0:
proposals_np = pos_proposals.cpu().numpy()
pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy()
for i in range(num_pos):
gt_mask = gt_masks[pos_assigned_gt_inds[i]] # gt_mask原本为整张图大小的数组(1216,800), 包含1和0,其中1代表有物体,0代表没物体
# 这里提取出第i个gt_mask作为target
bbox = proposals_np[i, :].astype(np.int32)
x1, y1, x2, y2 = bbox
w = np.maximum(x2 - x1 + 1, 1)
h = np.maximum(y2 - y1 + 1, 1)
# mask is uint8 both before and after resizing
target = mmcv.imresize(gt_mask[y1:y1 + h, x1:x1 + w], # 取出gt_mask中proposal大小的一块,然后缩放到目标特征大小(28,28)
(mask_size, mask_size))
mask_targets.append(target)
mask_targets = torch.from_numpy(np.stack(mask_targets)).float().to(
pos_proposals.device)
else:
mask_targets = pos_proposals.new_zeros((0, mask_size, mask_size))
return mask_targets
# aspect ratio unchanged
if isinstance(scale_factor, float):
masks = [
mmcv.imrescale(mask, scale_factor, interpolation='nearest')
for mask in masks
]
# aspect ratio changed
else:
w_ratio, h_ratio = scale_factor[:2]
if masks:
h, w = masks[0].shape[:2]
new_h = int(np.round(h * h_ratio))
new_w = int(np.round(w * w_ratio))
new_size = (new_w, new_h)
masks = [
mmcv.imresize(mask, new_size, interpolation='nearest')
for mask in masks
]
if flip:
masks = [mask[:, ::-1] for mask in masks]
padded_masks = [
mmcv.impad(mask, pad_shape[:2], pad_val=0) for mask in masks
]
padded_masks = np.stack(padded_masks, axis=0)
return padded_masks
def _resize_seg(self, results):
for key in results.get('seg_fields', []):
if self.keep_ratio:
gt_seg = mmcv.imrescale(
results[key], results['scale'], interpolation='nearest')
else:
gt_seg = mmcv.imresize(
results[key], results['scale'], interpolation='nearest')
results['gt_semantic_seg'] = gt_seg
for i in range(bboxes.shape[0]):
if not isinstance(scale_factor, (float, np.ndarray)):
scale_factor = scale_factor.cpu().numpy()
bbox = (bboxes[i, :] / scale_factor).astype(np.int32)
label = labels[i]
w = max(bbox[2] - bbox[0] + 1, 1)
h = max(bbox[3] - bbox[1] + 1, 1)
if not self.class_agnostic:
mask_pred_ = mask_pred[i, label, :, :]
else:
mask_pred_ = mask_pred[i, 0, :, :]
im_mask = np.zeros((img_h, img_w), dtype=np.uint8)
bbox_mask = mmcv.imresize(mask_pred_, (w, h))
bbox_mask = (bbox_mask > rcnn_test_cfg.mask_thr_binary).astype(
np.uint8)
im_mask[bbox[1]:bbox[1] + h, bbox[0]:bbox[0] + w] = bbox_mask
rle = mask_util.encode(
np.array(im_mask[:, :, np.newaxis], order='F'))[0]
cls_segms[label - 1].append(rle)
return cls_segms
img_w = np.round(ori_shape[1] * scale_factor).astype(np.int32)
scale_factor = 1.0
for i in range(bboxes.shape[0]):
bbox = (bboxes[i, :] / scale_factor).astype(np.int32)
label = labels[i]
w = max(bbox[2] - bbox[0] + 1, 1)
h = max(bbox[3] - bbox[1] + 1, 1)
if not self.class_agnostic:
mask_pred_ = mask_pred[i, label, :, :]
else:
mask_pred_ = mask_pred[i, 0, :, :]
im_mask = np.zeros((img_h, img_w), dtype=np.uint8)
bbox_mask = mmcv.imresize(mask_pred_, (w, h))
bbox_mask = (bbox_mask > rcnn_test_cfg.mask_thr_binary).astype(
np.uint8)
im_mask[bbox[1]:bbox[1] + h, bbox[0]:bbox[0] + w] = bbox_mask
rle = mask_util.encode(
np.array(im_mask[:, :, np.newaxis], order='F'))[0]
cls_segms[label - 1].append(rle)
return cls_segms