master
/ scripts / eval_cityscapes / cityscapes.py

cityscapes.py @0013db4

0013db4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
# The following code is modified from https://github.com/shelhamer/clockwork-fcn
import sys
import os
import glob
import numpy as np
from PIL import Image


class cityscapes:
    def __init__(self, data_path):
        # data_path something like /data2/cityscapes
        self.dir = data_path
        self.classes = ['road', 'sidewalk', 'building', 'wall', 'fence',
                        'pole', 'traffic light', 'traffic sign', 'vegetation', 'terrain',
                        'sky', 'person', 'rider', 'car', 'truck',
                        'bus', 'train', 'motorcycle', 'bicycle']
        self.mean = np.array((72.78044, 83.21195, 73.45286), dtype=np.float32)
        # import cityscapes label helper and set up label mappings
        sys.path.insert(0, '{}/scripts/helpers/'.format(self.dir))
        labels = __import__('labels')
        self.id2trainId = {label.id: label.trainId for label in labels.labels}  # dictionary mapping from raw IDs to train IDs
        self.trainId2color = {label.trainId: label.color for label in labels.labels}  # dictionary mapping train IDs to colors as 3-tuples

    def get_dset(self, split):
        '''
        List images as (city, id) for the specified split

        TODO(shelhamer) generate splits from cityscapes itself, instead of
        relying on these separately made text files.
        '''
        if split == 'train':
            dataset = open('{}/ImageSets/segFine/train.txt'.format(self.dir)).read().splitlines()
        else:
            dataset = open('{}/ImageSets/segFine/val.txt'.format(self.dir)).read().splitlines()
        return [(item.split('/')[0], item.split('/')[1]) for item in dataset]

    def load_image(self, split, city, idx):
        im = Image.open('{}/leftImg8bit_sequence/{}/{}/{}_leftImg8bit.png'.format(self.dir, split, city, idx))
        return im

    def assign_trainIds(self, label):
        """
        Map the given label IDs to the train IDs appropriate for training
        Use the label mapping provided in labels.py from the cityscapes scripts
        """
        label = np.array(label, dtype=np.float32)
        if sys.version_info[0] < 3:
            for k, v in self.id2trainId.iteritems():
                label[label == k] = v
        else:
            for k, v in self.id2trainId.items():
                label[label == k] = v
        return label

    def load_label(self, split, city, idx):
        """
        Load label image as 1 x height x width integer array of label indices.
        The leading singleton dimension is required by the loss.
        """
        label = Image.open('{}/gtFine/{}/{}/{}_gtFine_labelIds.png'.format(self.dir, split, city, idx))
        label = self.assign_trainIds(label)  # get proper labels for eval
        label = np.array(label, dtype=np.uint8)
        label = label[np.newaxis, ...]
        return label

    def preprocess(self, im):
        """
        Preprocess loaded image (by load_image) for Caffe:
        - cast to float
        - switch channels RGB -> BGR
        - subtract mean
        - transpose to channel x height x width order
        """
        in_ = np.array(im, dtype=np.float32)
        in_ = in_[:, :, ::-1]
        in_ -= self.mean
        in_ = in_.transpose((2, 0, 1))
        return in_

    def palette(self, label):
        '''
        Map trainIds to colors as specified in labels.py
        '''
        if label.ndim == 3:
            label = label[0]
        color = np.empty((label.shape[0], label.shape[1], 3))
        if sys.version_info[0] < 3:
            for k, v in self.trainId2color.iteritems():
                color[label == k, :] = v
        else:
            for k, v in self.trainId2color.items():
                color[label == k, :] = v
        return color

    def make_boundaries(label, thickness=None):
        """
        Input is an image label, output is a numpy array mask encoding the boundaries of the objects
        Extract pixels at the true boundary by dilation - erosion of label.
        Don't just pick the void label as it is not exclusive to the boundaries.
        """
        assert(thickness is not None)
        import skimage.morphology as skm
        void = 255
        mask = np.logical_and(label > 0, label != void)[0]
        selem = skm.disk(thickness)
        boundaries = np.logical_xor(skm.dilation(mask, selem),
                                    skm.erosion(mask, selem))
        return boundaries

    def list_label_frames(self, split):
        """
        Select labeled frames from a split for evaluation
        collected as (city, shot, idx) tuples
        """
        def file2idx(f):
            """Helper to convert file path into frame ID"""
            city, shot, frame = (os.path.basename(f).split('_')[:3])
            return "_".join([city, shot, frame])
        frames = []
        cities = [os.path.basename(f) for f in glob.glob('{}/gtFine/{}/*'.format(self.dir, split))]
        for c in cities:
            files = sorted(glob.glob('{}/gtFine/{}/{}/*labelIds.png'.format(self.dir, split, c)))
            frames.extend([file2idx(f) for f in files])
        return frames

    def collect_frame_sequence(self, split, idx, length):
        """
        Collect sequence of frames preceding (and including) a labeled frame
        as a list of Images.

        Note: 19 preceding frames are provided for each labeled frame.
        """
        SEQ_LEN = length
        city, shot, frame = idx.split('_')
        frame = int(frame)
        frame_seq = []
        for i in range(frame - SEQ_LEN, frame + 1):
            frame_path = '{0}/leftImg8bit_sequence/val/{1}/{1}_{2}_{3:0>6d}_leftImg8bit.png'.format(
                self.dir, city, shot, i)
            frame_seq.append(Image.open(frame_path))
        return frame_seq