Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WSI reader #1548

Merged
merged 53 commits into from
Mar 5, 2021
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
53 commits
Select commit Hold shift + click to select a range
753deca
Implement CuImageReader and OpenSlideReader
bhashemian Feb 4, 2021
ddbd6ab
Add unittests for CuImageReader
bhashemian Feb 4, 2021
7e77449
Add unittests for OpenSlideReader
bhashemian Feb 4, 2021
d3dbf7d
Merge branch 'master' into pathology_dataset
bhashemian Feb 4, 2021
c40b019
Sort imports
bhashemian Feb 4, 2021
f8b0962
Add correct boundaries
bhashemian Feb 5, 2021
e4dd37d
Merge branch 'pathology_dataset' of github.com:behxyz/MONAI into path…
bhashemian Feb 5, 2021
9a3e672
Add test cases for reading patches on a grid for CuImage
bhashemian Feb 5, 2021
b463310
Add patch whole slide imaging dataset for pathology
bhashemian Feb 5, 2021
4c735cb
Add test case for read patches for OpenSlide
bhashemian Feb 5, 2021
378893c
flake8 and few minor changes
bhashemian Feb 5, 2021
ec5261b
black
bhashemian Feb 5, 2021
ce01a9b
flake8
bhashemian Feb 5, 2021
51c1578
Add kwargs to CuImageReader and OpenSlideReader's read method
bhashemian Feb 8, 2021
714561a
Change the type hint from np.dtype to DTypeLike
bhashemian Feb 8, 2021
f6f5cf6
Merge branch 'master' into pathology_dataset
bhashemian Feb 8, 2021
642ee9b
Merge branch 'master' into pathology_dataset
bhashemian Feb 8, 2021
e83573d
Fix a bug
bhashemian Feb 8, 2021
1adf4ee
Merge branch 'master' into pathology_dataset
bhashemian Feb 22, 2021
097eb19
Implement WSIReader and unittests
bhashemian Feb 22, 2021
356e0d4
Minor updates
bhashemian Feb 22, 2021
27a04f6
Fix few typing issues
bhashemian Feb 23, 2021
9f09e49
Revert datasets
bhashemian Feb 23, 2021
4b9734f
Add shape property to openslide image object
bhashemian Feb 23, 2021
563314f
Add untittest for loading the whole image
bhashemian Feb 23, 2021
eb9655d
Update the whole image size
bhashemian Feb 23, 2021
71f9af4
Remove optional size
bhashemian Feb 23, 2021
3b98096
Remove optional dtype
bhashemian Feb 23, 2021
0076988
Remove _get_spatial_shape return type
bhashemian Feb 23, 2021
291846f
Reverse the orders of dimensions of `location`
bhashemian Feb 24, 2021
3ac7647
Change test cases to use smaller image and revese location's dimensions
bhashemian Feb 24, 2021
40a6f23
Merge branch 'master' into pathology_dataset
bhashemian Feb 24, 2021
00b7a55
Merge branch 'master' into pathology_dataset
bhashemian Feb 26, 2021
b851859
Replace the test TIFF and some upgrades
bhashemian Feb 26, 2021
0a99658
Update dependencies for OpenSlide
bhashemian Feb 26, 2021
dede661
Merge branch 'master' into pathology_dataset
bhashemian Mar 1, 2021
563a4fa
Update unittests for OpenSlide and CuImage
bhashemian Mar 1, 2021
9ee2200
Merge branch 'pathology_dataset' of pathology_dataset
bhashemian Mar 1, 2021
3ac12c3
Fix openslide dependency
bhashemian Mar 1, 2021
15c147d
Fix doc dependencies
bhashemian Mar 1, 2021
d9059ec
Merge branch 'master' into pathology_dataset
Nic-Ma Mar 3, 2021
c394ebe
Merge branch 'master' into pathology_dataset
bhashemian Mar 3, 2021
8a279c3
Minor changes
bhashemian Mar 3, 2021
c6171d1
Merge branch 'pathology_dataset' into pathology_dataset
bhashemian Mar 3, 2021
0082ac6
Merge branch 'master' into pathology_dataset
bhashemian Mar 3, 2021
22846f8
Merge branch 'master' into pathology_dataset
bhashemian Mar 4, 2021
c8750f0
Few variable name changes
bhashemian Mar 4, 2021
a440caf
Add EnsureChannelFirst
bhashemian Mar 4, 2021
d4ff431
Merge branch 'pathology_dataset' of github.com:behxyz/MONAI into path…
bhashemian Mar 4, 2021
652f046
Add metadata to WSIReader
bhashemian Mar 4, 2021
2ffdf58
Merge branch 'master' into pathology_dataset
bhashemian Mar 4, 2021
1f32f71
Merge branch 'master' into pathology_dataset
bhashemian Mar 5, 2021
2202f57
Merge branch 'master' into pathology_dataset
bhashemian Mar 5, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 107 additions & 0 deletions monai/apps/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,14 @@
from monai.apps.utils import download_and_extract
from monai.data import (
CacheDataset,
Dataset,
SmartCacheDataset,
load_decathlon_datalist,
load_decathlon_properties,
partition_dataset,
select_cross_validation_folds,
)
from monai.data.image_reader import CuImageReader, OpenSlideReader
from monai.transforms import LoadImaged, Randomizable
from monai.utils import ensure_tuple

Expand Down Expand Up @@ -388,3 +391,107 @@ def _split_datalist(self, datalist: List[Dict]) -> List[Dict]:
return select_cross_validation_folds(partitions=data, folds=folds)

return _NsplitsDataset(**self.dataset_params)


class PatchWSIDataset(Dataset):
"""
Load whole slide images and associated class labels and create patches
"""

def __init__(self, data, region_size, grid_size, patch_size, image_reader_name="CuImage", transform=None):
self.image_reader_name = image_reader_name.lower()
if type(region_size) == int:
self.region_size = (region_size, region_size)
else:
self.region_size = region_size
if type(grid_size) == int:
self.grid_size = (grid_size, grid_size)
else:
self.grid_size = grid_size
self.sub_region_size = (self.region_size[0] / self.grid_size[0], self.region_size[1] / self.grid_size[1])
self.patch_size = patch_size

self.transform = transform
self.image_base_path = data[0]["image_base_path"]
self.samples = self.load_samples(data[0]["labels"])
self.image_path_list = {x[0] for x in self.samples}
self.num_samples = len(self.samples)

self.cu_image_dict = {}

if self.image_reader_name == "cuimage":
self.image_reader = CuImageReader()
elif self.image_reader_name == "openslide":
self.image_reader = OpenSlideReader()
else:
raise ValueError('image_reader_name should be either "CuImage" or "OpenSlide"')
self._fetch_cu_images()

def _fetch_cu_images(self):
for image_path in self.image_path_list:
self.cu_image_dict[image_path] = self.image_reader.read(image_path)

def process_label_row(self, row):
row = row.strip("\n").split(",")
# create full image path
image_name = row[0] + ".tif"
image_path = os.path.join(self.image_base_path, image_name)
# change center locations to upper left location
location = (int(row[1]) - self.region_size[0] // 2, int(row[2]) - self.region_size[1] // 2)
# convert labels to float32 and add empty HxW channel to label
labels = tuple(int(lbl) for lbl in row[3:])
labels = np.array(labels, dtype=np.float32)[:, np.newaxis, np.newaxis]
return image_path, location, labels

def load_samples(self, loc_path):
with open(loc_path) as label_file:
rows = [self.process_label_row(row) for row in label_file.readlines()]
return rows

def __len__(self):
return self.num_samples

def __getitem__(self, index):
image_path, location, labels = self.samples[index]
images = self.image_reader.get_data(
img_obj=self.cu_image_dict[image_path],
location=location,
size=self.region_size,
grid_shape=self.grid_size,
patch_size=self.patch_size,
)
samples = [{"image": images[i], "label": labels[i]} for i in range(labels.shape[0])]
if self.transform:
samples = self.transform(samples)
return samples


class SmartCachePatchWSIDataset(SmartCacheDataset):
"""
Add SmartCache functionality to PatchWSIDataset
"""

def __init__(
self,
data,
region_size,
grid_size,
patch_size,
transform,
replace_rate,
cache_num,
cache_rate=1.0,
num_init_workers=None,
num_replace_workers=0,
image_reader_name="CuImage",
):
extractor = PatchWSIDataset(data, region_size, grid_size, patch_size, image_reader_name)
super().__init__(
data=extractor,
transform=transform,
replace_rate=replace_rate,
cache_num=cache_num,
cache_rate=cache_rate,
num_init_workers=num_init_workers,
num_replace_workers=num_replace_workers,
)
Loading