Source code for olympus.datasets.pennfudan

import os
import shutil

import numpy as np
import torch
from PIL import Image

from filelock import FileLock
from torchvision import datasets

from olympus.datasets.dataset import AllDataset
from olympus.utils.dtypes import VariableShape, Bound1D, DictionaryShape
from olympus.utils import option


[docs]class CocoDetection(datasets.CocoDetection):
    def __init__(self, img_folder, ann_file, transforms):
        super(CocoDetection, self).__init__(img_folder, ann_file)
        self._transforms = transforms

    def __getitem__(self, idx):
        img, target = super(CocoDetection, self).__getitem__(idx)
        image_id = self.ids[idx]
        target = dict(image_id=image_id, annotations=target)

        if self._transforms is not None:
            img, target = self._transforms(img, target)

        return img, target


class _PennFudanDataset:
    """from https://pytorch.org/tutorials/intermediate/torchvision_tutorial.html

    Notes
    -----
    BSD 3-Clause License

    Copyright (c) Soumith Chintala 2016,
    All rights reserved.

    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice, this
      list of conditions and the following disclaimer.

    * Redistributions in binary form must reproduce the above copyright notice,
      this list of conditions and the following disclaimer in the documentation
      and/or other materials provided with the distribution.

    * Neither the name of the copyright holder nor the names of its
      contributors may be used to endorse or promote products derived from
      this software without specific prior written permission.

    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
    DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
    DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
    SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
    OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    """

    URL = 'https://www.cis.upenn.edu/~jshi/ped_html/PennFudanPed.zip'

    def __init__(self, data_path, transforms=None, target_transforms=None, download=True):
        self.root = data_path

        if download:
            with FileLock('penndufan.lock', timeout=option('download.lock.timeout', 4 * 60, type=int)):
                self.download()

        self.transforms = transforms
        self.target_transforms = target_transforms
        # load all image files, sorting them to
        # ensure that they are aligned
        self.imgs = list(sorted(os.listdir(self.images_path)))
        self.masks = list(sorted(os.listdir(self.masks_path)))

    @property
    def images_path(self):
        return os.path.join(self.folder, 'PennFudanPed', 'PNGImages')

    @property
    def masks_path(self):
        return os.path.join(self.folder, 'PennFudanPed', 'PedMasks')

    def __getitem__(self, idx):
        # load images ad masks
        img_path = os.path.join(self.images_path, self.imgs[idx])
        mask_path = os.path.join(self.masks_path, self.masks[idx])

        img = Image.open(img_path).convert("RGB")
        # note that we haven't converted the mask to RGB,
        # because each color corresponds to a different instance
        # with 0 being background
        mask = Image.open(mask_path)
        # convert the PIL Image into a numpy array
        mask = np.array(mask)
        # instances are encoded as different colors
        obj_ids = np.unique(mask)
        # first id is the background, so remove it
        obj_ids = obj_ids[1:]

        # split the color-encoded mask into a set
        # of binary masks
        masks = mask == obj_ids[:, None, None]

        # get bounding box coordinates for each mask
        num_objs = len(obj_ids)
        boxes = []
        for i in range(num_objs):
            pos = np.where(masks[i])
            xmin = np.min(pos[1])
            xmax = np.max(pos[1])
            ymin = np.min(pos[0])
            ymax = np.max(pos[0])
            boxes.append([xmin, ymin, xmax, ymax])

        # convert everything into a torch.Tensor
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        # there is only one class
        labels = torch.ones((num_objs,), dtype=torch.int64)
        masks = torch.as_tensor(masks, dtype=torch.uint8)

        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        # suppose all instances are not crowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["masks"] = masks
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        if self.transforms is not None:
            img = self.transforms(img)

        if self.target_transforms:
            target = self.target_transforms(target)

        return img, target

    def __len__(self):
        return len(self.imgs)

    @property
    def folder(self):
        return os.path.join(self.root, self.__class__.__name__.replace('_', ''))

    def _check_exists(self):
        return os.path.exists(self.folder)

    def download(self):
        if self._check_exists():
            return

        try:
            from torchvision.datasets.utils import download_and_extract_archive

            os.makedirs(self.folder, exist_ok=True)
            url = _PennFudanDataset.URL
            filename = url.rpartition('/')[2]
            download_and_extract_archive(url, download_root=self.folder, filename=filename)

        except:
            shutil.rmtree(self.folder)
            raise


[docs]def penn_collate_fn(batch):
    return tuple(zip(*batch))


[docs]class PennFudanDataset(AllDataset):
    """This is an image database containing images that are used for pedestrian detection in the experiments reported in [1].
    The images are taken from scenes around campus and urban street.
    The objects we are interested in these images are pedestrians. Each image will have at least one pedestrian in it.

    All labeled pedestrians are straight up.
    More on `official website <https://www.cis.upenn.edu/~jshi/ped_html/>`_.

    Attributes
    ----------
    input_shape: (3, H, W) with H ∈ [311, 581], W ∈ [253, 1017] and H * W ∈ [81719, 451548]
        The heights of labeled pedestrians in this database fall into [180,390] pixels.
        They are all have unique shapes

    target_shape: DictionaryKeys('boxes', 'labels', 'masks', 'image_id', 'area', 'iscrowd')
        boxes: Tensor[P, 4] where P equals the number of pedestrian and 4 is the bounding box
        labels: Tensor[P], always 1
        masks: Tensor[P, H, W], 1 when the pixel belongs to a pedestrian, 0 if not
        image_id: Tensor[1], image id inside the dataset
        iscrowd: Tensor[P], if the image has a crowd of people (always false)

    train_size: 136
        Size of the train dataset
        96 images are taken from around University of Pennsylvania
        74 are taken from around Fudan University.

    valid_size: 16
        Size of the validation dataset

    test_size: 16
        Size of the test dataset

    References
    ----------
    .. [1] Liming Wang, Jianbo Shi, Gang Song, I-fan Shen.
        "Object Detection Combining Recognition and Segmentation". ACCV 2007
    """
    def __init__(self, data_path):
        from torchvision.transforms import Compose, ToTensor, RandomHorizontalFlip

        transforms = Compose([
            RandomHorizontalFlip(0.5),
            ToTensor()
        ])

        # FIXME: This is wrong validation and test should not have the horizontal flip
        super(PennFudanDataset, self).__init__(
            _PennFudanDataset(data_path, transforms),
            input_shape=VariableShape(C=3, H=Bound1D(311, 581), W=Bound1D(253, 1017)),
            target_shape=DictionaryShape('boxes', 'labels', 'masks', 'image_id', 'area', 'iscrowd'),
            train_size=136,
            test_size=16,
            valid_size=16
        )

    @property
    def num_classes(self):
        return 2

[docs]    @staticmethod
    def categories():
        return set(['detection'])

    collate_fn = penn_collate_fn


def _test_PennFudanDataset(*args, **kwargs):
    from torch.utils.data import Subset
    dataset = PennFudanDataset(*args, **kwargs)
    dataset.dataset = Subset(
        dataset.dataset,
        indices=list(range(0, 16))
    )
    dataset._train_size = 8
    dataset._test_size = 4
    dataset._valid_size = 4
    return dataset


builders = {
    'pennfudan': PennFudanDataset,
    'test_pennfudan': _test_PennFudanDataset,
}