Source code for gala.imio

# built-ins
import os
import json
from os.path import split as split_path, join as join_path
from fnmatch import filter as fnfilter
import logging
import itertools as it
import subprocess
import tempfile as tmp

# libraries
import h5py
from PIL import Image

from scipy.ndimage.measurements import label

from numpy import array, uint8, uint16, uint32, uint64, zeros, \
    zeros_like, squeeze, fromstring, ndim, concatenate, newaxis, swapaxes, \
    savetxt, unique, double, cumsum, ndarray
import numpy as np

from skimage.io.collection import alphanumeric_key
from skimage.io import imread

# local files
from . import evaluate
from . import morpho

### Auto-detect file format

supported_image_extensions = ['png', 'tif', 'tiff', 'jpg', 'jpeg']

[docs]def read_image_stack(fn, *args, **kwargs):
    """Read a 3D volume of images in image or .h5 format into a numpy.ndarray.

    This function attempts to automatically determine input file types and
    wraps specific image-reading functions.

    Parameters
    ----------
    fn : filename (string)
        A file path or glob pattern specifying one or more valid image files.
        The file format is automatically determined from this argument.

    *args : filenames (string, optional)
        More than one positional argument will be interpreted as a list of
        filenames pointing to all the 2D images in the stack.

    **kwargs : keyword arguments (optional)
        Arguments to be passed to the underlying functions. A 'crop'
        keyword argument is supported, as a list of length 6:
        [xmin, xmax, ymin, ymax, zmin, zmax]. Use 'None' for no crop in
        that coordinate.

    Returns
    -------
    stack : 3-dimensional numpy ndarray

    Notes
    -----
        If reading in .h5 format, keyword arguments are passed through to
        read_h5_stack().

        Automatic file type detection may be deprecated in the future.
    """
    # TODO: Refactor.  Rather than have implicit designation of stack format
    # based on filenames (*_boundpred.h5, etc), require explicit parameters
    # in config JSON files.
    if os.path.isdir(fn):
        fn += '/'
    d, fn = split_path(os.path.expanduser(fn))
    if len(d) == 0: d = '.'
    crop = kwargs.get('crop', [None]*6)
    if crop is None:
        crop = [None]*6
    if len(crop) == 4: crop.extend([None]*2)
    elif len(crop) == 2: crop = [None]*4 + crop
    kwargs['crop'] = crop
    if any(fn.endswith(ext) for ext in supported_image_extensions):
        # image types, such as a set of pngs or a multi-page tiff
        xmin, xmax, ymin, ymax, zmin, zmax = crop
        if len(args) > 0 and type(args[0]) == str and args[0].endswith(fn[-3:]):
            # input is a list of filenames
            fns = [fn] + [split_path(f)[1] for f in args]
        else:
            # input is a filename pattern to match
            fns = fnfilter(os.listdir(d), fn)
        if len(fns) == 1 and fns[0].endswith('.tif'):
            stack = read_multi_page_tif(join_path(d,fns[0]), crop)
        else:
            fns.sort(key=alphanumeric_key) # sort filenames numerically
            fns = fns[zmin:zmax]
            im0 = imread(join_path(d, fns[0]))
            ars = (imread(join_path(d, fn)) for fn in fns)
            im0 = im0[xmin:xmax, ymin:ymax]
            dtype = im0.dtype
            stack = zeros((len(fns),)+im0.shape, dtype)
            for i, im in enumerate(ars):
                stack[i] = im[xmin:xmax,ymin:ymax]
    elif fn.endswith('_boundpred.h5') or fn.endswith('_processed.h5'):
        # Ilastik batch prediction output file
        stack = read_prediction_from_ilastik_batch(os.path.join(d,fn), **kwargs)
    elif fn.endswith('.h5'):
        # other HDF5 file
        stack = read_h5_stack(join_path(d,fn), *args, **kwargs)
    elif os.path.isfile(os.path.join(d, 'superpixel_to_segment_map.txt')):
        # Raveler export
        stack = raveler_to_labeled_volume(d, *args, **kwargs)
    return squeeze(stack)

[docs]def write_image_stack(npy_vol, fn, **kwargs):
    """Write a numpy.ndarray 3D volume to a stack of images or an HDF5 file.
    
    Parameters
    ----------
    npy_vol : numpy ndarray
        The volume to be written to disk.
    
    fn : string
        The filename to be written, or a format string when writing a 3D
        stack to a 2D format (e.g. a png image stack).
    
    **kwargs : keyword arguments
        Keyword arguments to be passed to wrapped functions. See
        corresponding docs for valid arguments.
    
    Returns
    -------
    out : None

    Examples
    --------
    >>> import numpy as np
    >>> from gala.imio import write_image_stack
    >>> im = 255 * np.array([
    ... [[0, 1, 0], [1, 0, 1], [0, 1, 0]],
    ... [[1, 0, 1], [0, 1, 0], [1, 0, 1]]], dtype=uint8)
    >>> im.shape
    (2, 3, 3)
    >>> write_image_stack(im, 'image-example-%02i.png', axis=0)
    >>> import os
    >>> fns = sorted(filter(lambda x: x.endswith('.png'), os.listdir('.')))
    >>> fns # two 3x3 images
    ['image-example-00.png', 'image-example-01.png']
    >>> os.remove(fns[0]); os.remove(fns[1]) # doctest cleanup
    """
    fn = os.path.expanduser(fn)
    if fn.endswith('.png'):
        write_png_image_stack(npy_vol, fn, **kwargs)
    elif fn.endswith('.h5'):
        write_h5_stack(npy_vol, fn, **kwargs)
    elif fn.endswith('.vtk'):
        write_vtk(npy_vol, fn, **kwargs)
    else:
        raise ValueError('Image format not supported: ' + fn + '\n')

### Standard image formats (png, tiff, etc.)

[docs]def pil_to_numpy(img):
    """Convert an Image object to a numpy array.
    
    Parameters
    ----------
    img : Image object (from the Python Imaging Library)
    
    Returns
    -------
    ar : numpy ndarray
        The corresponding numpy array (same shape as the image)
    """
    ar = squeeze(array(img.getdata()).reshape((img.size[1], img.size[0], -1)))
    return ar

[docs]def read_multi_page_tif(fn, crop=[None]*6):
    """Read a multi-page tif file into a numpy array.
    
    Parameters
    ----------
    fn : string
        The filename of the image file being read.
    
    Returns
    -------
    ar : numpy ndarray
        The image stack in array format.

    Notes
    -----
        Currently, only grayscale images are supported.
    """
    xmin, xmax, ymin, ymax, zmin, zmax = crop
    img = Image.open(fn)
    pages = []
    if zmin is not None and zmin > 0:
        img.seek(zmin)
    eof = False
    while not eof and img.tell() != zmax:
        pages.append(pil_to_numpy(img)[...,newaxis])
        try:
            img.seek(img.tell()+1)
        except EOFError:
            eof = True
    return concatenate(pages, axis=-1)


[docs]def write_png_image_stack(npy_vol, fn, axis=-1, bitdepth=None):
    """Write a numpy.ndarray 3D volume to a stack of .png images.

    Parameters
    ----------
    npy_vol : numpy ndarray, shape (M, N, P)
        The volume to be written to disk.

    fn : format string
        The file pattern to which to write the volume.

    axis : int, optional (default = -1)
        The axis along which output the images. If the input array has shape
        (M, N, P), and axis is 1, the function will write N images of shape
        (M, P) to disk. In keeping with Python convention, -1 specifies the
        last axis.

    Returns
    -------
    None : None
        No value is returned.

    Notes
    -----
        Only 8-bit and 16-bit single-channel images are currently supported.
    """
    npy_vol = swapaxes(npy_vol, 0, axis)
    fn = os.path.expanduser(fn)
    if 0 <= npy_vol.max() <= 1 and npy_vol.dtype == double:
        bitdepth = 16 if None else bitdepth
        imdtype = uint16 if bitdepth == 16 else uint8
        npy_vol = ((2**bitdepth-1)*npy_vol).astype(imdtype)
    if 1 < npy_vol.max() < 256 and bitdepth is None or bitdepth == 8:
        mode = 'L'
        mode_base = 'L'
        npy_vol = uint8(npy_vol)
    elif 256 <= np.max(npy_vol) < 2**16 and bitdepth is None or \
                                                bitdepth == 16:
        mode = 'I;16'
        mode_base = 'I'
        npy_vol = uint16(npy_vol)
    else:
        mode = 'RGBA'
        mode_base = 'RGBA'
        npy_vol = uint32(npy_vol)
    for z, pl in enumerate(npy_vol):
        im = Image.new(mode_base, pl.T.shape)
        im.frombytes(pl.tostring(), 'raw', mode)
        im.save(fn % z)

### VTK structured points array format

[docs]def extract_segments(seg, ids):
    """Get a uint8 volume containing only the specified segment ids.

    Parameters
    ----------
    seg : array of int
        The input segmentation.
    ids : list of int, maximum length 255
        A list of segments to extract from `seg`.

    Returns
    -------
    segs : array of uint8
        A volume with 1, 2, ..., ``len(ids)`` labels where the required
        segments were, and 0 elsewhere.

    Notes
    -----
    This function is designed to output volumes to VTK format for
    viewing in ITK-SNAP

    Examples
    --------
    >>> segments = array([[45, 45, 51, 51],
    ...                   [45, 83, 83, 51]])
    >>> extract_segments(segments, [83, 45])
    array([[2, 2, 0, 0],
           [2, 1, 1, 0]], dtype=uint8)
    """
    segs = np.zeros(seg.shape, dtype=np.uint8)
    for i, s in enumerate(ids):
        segs[seg == s] = i + 1
    return segs


numpy_type_to_vtk_string = {
    np.uint8:'unsigned_char', np.int8:'char', np.uint16:'unsigned_short',
    np.int16:'short', np.uint32:'unsigned_int', np.int32:'int',
    np.uint64:'unsigned_long', np.int64:'long', np.float32:'float',
    np.float64:'double'
}


vtk_string_to_numpy_type = \
    dict([(v,k) for k, v in numpy_type_to_vtk_string.items()])

[docs]def write_vtk(ar, fn, spacing=[1.0, 1.0, 1.0]):
    """Write 3D volume to VTK structured points format file.

    Code adapted from Erik Vidholm's writeVTK.m Matlab implementation.

    Parameters
    ----------
    ar : a numpy array, shape (M, N, P)
        The array to be written to disk.
    fn : string
        The desired output filename.
    spacing : iterable of float, optional (default: [1.0, 1.0, 1.0])
        The voxel spacing in x, y, and z.

    Returns
    -------
    None : None
        This function does not have a return value.
    """
    # write header
    f = open(fn, 'wb')
    f.write(b'# vtk DataFile Version 3.0\n')
    f.write(b'created by write_vtk (Python implementation by JNI)\n')
    f.write(b'BINARY\n')
    f.write(b'DATASET STRUCTURED_POINTS\n')
    f.write(str.encode(' '.join(['DIMENSIONS'] +
                                list(map(str, ar.shape[-1::-1]))) + '\n'))
    f.write(str.encode(' '.join(['ORIGIN'] + list(map(str, zeros(3)))) + '\n'))
    f.write(str.encode(' '.join(['SPACING'] + list(map(str, spacing))) + '\n'))
    f.write(str.encode('POINT_DATA ' + str(ar.size) + '\n'))
    f.write(str.encode('SCALARS image_data ' +
                       numpy_type_to_vtk_string[ar.dtype.type] + '\n'))
    f.write(b'LOOKUP_TABLE default\n');
    f.close()

    # write data as binary
    f = open(fn, 'ab')
    f.write(ar.tobytes())
    f.close()


[docs]def read_vtk(fin):
    """Read a numpy volume from a VTK structured points file.

    Code adapted from Erik Vidholm's readVTK.m Matlab implementation.

    Parameters
    ----------
    fin : string
        The input filename.

    Returns
    -------
    ar : numpy ndarray
        The array contained in the file.
    """
    f = open(fin, 'rb')
    num_lines_in_header = 10
    lines = [bytes.decode(f.readline()) for i in range(num_lines_in_header)]
    shape_line = [line for line in lines if line.startswith('DIMENSIONS')][0]
    type_line = [line for line in lines 
        if line.startswith('SCALARS') or line.startswith('VECTORS')][0]
    ar_shape = [int(b) for b in shape_line.rstrip().split(' ')[-1:0:-1]]
    ar_type = vtk_string_to_numpy_type[type_line.rstrip().split(' ')[2]]
    if type_line.startswith('VECTORS'):
        ar_shape.append(-1)
    ar = fromstring(f.read(), ar_type).reshape(ar_shape)
    return ar

### HDF5 format

[docs]def read_h5_stack(fn, group='stack', crop=[None]*6, **kwargs):
    """Read a volume in HDF5 format into numpy.ndarray.

    Parameters
    ----------
    fn : string
        The filename of the input HDF5 file.
    group : string, optional (default 'stack')
        The group within the HDF5 file containing the dataset.
    crop : list of int, optional (default '[None]*6', no crop)
        A crop to get of the volume of interest. Only available for 2D and 3D
        volumes.

    Returns
    -------
    stack : numpy ndarray
        The stack contained in fn, possibly cropped.
    """
    fn = os.path.expanduser(fn)
    dset = h5py.File(fn, 'r')
    if group not in dset:
        raise ValueError("HDF5 file (%s) doesn't have group (%s)!" % 
                            (fn, group))
    a = dset[group]
    if ndim(a) == 2:
        xmin, xmax, ymin, ymax = crop[:4]
        a = a[xmin:xmax, ymin:ymax]
    elif ndim(a) == 3:
        xmin, xmax, ymin, ymax, zmin, zmax = crop
        a = a[xmin:xmax, ymin:ymax, zmin:zmax]
    stack = array(a)
    dset.close()
    return stack

[docs]def compute_sp_to_body_map(sps, bodies):
    """Return unique (sp, body) pairs from a superpixel map and segmentation.

    Parameters
    ----------
    sps : numpy ndarray, arbitrary shape
        The superpixel (supervoxel) map.
    bodies : numpy ndarray, same shape as sps
        The corresponding segmentation.

    Returns
    -------
    sp_to_body : numpy ndarray, shape (NUM_SPS, 2)

    Notes
    -----
    No checks are made for sane inputs. This means that incorrect input,
    such as non-matching shapes, or superpixels mapping to more than one
    segment, will result in undefined behavior downstream with no warning.
    """
    sp_to_body = unique(list(zip(sps.ravel(), bodies.ravel()))).astype(uint64)
    return sp_to_body

[docs]def write_mapped_segmentation(superpixel_map, sp_to_body_map, fn, 
                              sp_group='stack', sp_to_body_group='transforms'):
    """Write a mapped segmentation to an HDF5 file.

    Parameters
    ----------
    superpixel_map : numpy ndarray, arbitrary shape
    sp_to_body_map : numpy ndarray, shape (NUM_SPS, 2)
        A many-to-one map of superpixels to bodies (segments), specified as
        rows of (superpixel, body) pairs.
    fn : string
        The output filename.
    sp_group : string, optional (default 'stack')
        the group within the HDF5 file to store the superpixel map.
    sp_to_body_group : string, optional (default 'transforms')
        the group within the HDF5 file to store the superpixel to body map.

    Returns
    -------
    None
    """
    fn = os.path.expanduser(fn)
    fout = h5py.File(fn, 'w')
    fout.create_dataset(sp_group, data=superpixel_map)
    fout.create_dataset(sp_to_body_group, data=sp_to_body_map)
    fout.close()


[docs]def read_mapped_segmentation(fn, 
                             sp_group='stack', sp_to_body_group='transforms'):
    """Read a volume in mapped HDF5 format into a numpy.ndarray pair.

    Parameters
    ----------
    fn : string
        The filename to open.
    sp_group : string, optional (default 'stack')
        The group within the HDF5 file where the superpixel map is stored.
    sp_to_body_group : string, optional (default 'transforms')
        The group within the HDF5 file where the superpixel to body map is
        stored.

    Returns
    -------
    segmentation : numpy ndarray, same shape as 'superpixels', int type
        The segmentation induced by the superpixels and map.
    """
    sps, sp2body = read_mapped_segmentation_raw(fn, sp_group, sp_to_body_group)
    segmentation = apply_segmentation_map(sps, sp2body)
    return segmentation

[docs]def apply_segmentation_map(superpixels, sp_to_body_map):
    """Return a segmentation from superpixels and a superpixel to body map.

    Parameters
    ----------
    superpixels : numpy ndarray, arbitrary shape, int type
        A superpixel (or supervoxel) map (aka label field).
    sp_to_body_map : numpy ndarray, shape (NUM_SUPERPIXELS, 2), int type
        An array of (superpixel, body) map pairs.

    Returns
    -------
    segmentation : numpy ndarray, same shape as 'superpixels', int type
        The segmentation induced by the superpixels and map.
    """
    forward_map = np.zeros(sp_to_body_map[:, 0].max() + 1,
                           sp_to_body_map.dtype)
    forward_map[sp_to_body_map[:, 0]] = sp_to_body_map[:, 1]
    segmentation = forward_map[superpixels]
    return segmentation

[docs]def read_mapped_segmentation_raw(fn, 
                             sp_group='stack', sp_to_body_group='transforms'):
    """Read a volume in mapped HDF5 format into a numpy.ndarray pair.

    Parameters
    ----------
    fn : string
        The filename to open.
    sp_group : string, optional (default 'stack')
        The group within the HDF5 file where the superpixel map is stored.
    sp_to_body_group : string, optional (default 'transforms')
        The group within the HDF5 file where the superpixel to body map is
        stored.

    Returns
    -------
    sp_map : numpy ndarray, arbitrary shape
        The superpixel (or supervoxel) map.
    sp_to_body_map : numpy ndarray, shape (NUM_SUPERPIXELS, 2)
        The superpixel to body (segment) map, as (superpixel, body) pairs.
    """
    fn = os.path.expanduser(fn)
    dset = h5py.File(fn, 'r')
    if sp_group not in dset:
        raise ValueError(
            "HDF5 file (%s) doesn't have group (%s)!" % (fn, sp_group))
    if sp_to_body_group not in dset:
        raise ValueError(
            "HDF5 file (%s) doesn't have group (%s)!" % (fn, sp_to_body_group))
    sp_map = array(dset[sp_group])
    sp_to_body_map = array(dset[sp_to_body_group])
    dset.close()
    return sp_map, sp_to_body_map


[docs]def write_h5_stack(npy_vol, fn, group='stack', compression=None, chunks=None,
                   shuffle=None, attrs=None):
    """Write a numpy.ndarray 3D volume to an HDF5 file.

    Parameters
    ----------
    npy_vol : numpy ndarray
        The array to be saved to HDF5.
    fn : string
        The output filename.
    group : string, optional (default: 'stack')
        The group within the HDF5 file to write to.
    compression : {None, 'gzip', 'szip', 'lzf'}, optional (default: None)
        The compression to use, if any. Note that 'lzf' is only available
        through h5py, so implementations in other languages will not be able
        to read files created with this compression.
    chunks : tuple, True, or None (default: None)
        Whether to use chunking in the HDF5 dataset. Default is None. True
        lets h5py choose a chunk size automatically. Otherwise, use a tuple
        of int of the same length as `npy_vol.ndim`. From the h5py
        documentation: "In the real world, chunks of size 10kB - 300kB work
        best, especially for compression. Very small chunks lead to lots of
        overhead in the file, while very large chunks can result in 
        inefficient I/O."
    shuffle : bool, optional
        Shuffle the bytes on disk to improve compression efficiency.
    attrs : dict, optional
        A dictionary, keyed by string, of attributes to append to the dataset.
    """
    fn = os.path.expanduser(fn)
    fout = h5py.File(fn, 'a')
    if group in fout:
        del fout[group]
    fout.create_dataset(group, data=npy_vol, compression=compression,
                        chunks=chunks, shuffle=shuffle)
    if attrs is not None:
        for attr, value in attrs.items():
            fout[group].attrs[attr] = value
    fout.close()

### Raveler format

[docs]def ucm_to_raveler(ucm, sp_threshold=0.0, body_threshold=0.1, **kwargs):
    """Return Raveler map from a UCM.
    
    Parameters
    ----------
    ucm : numpy ndarray, shape (M, N, P)
        An ultrametric contour map. This is a map of scored segment boundaries
        such that if A, B, and C are segments, then 
        score(A, B) = score(B, C) >= score(A, C), for some permutation of
        A, B, and C.
        A hierarchical agglomeration process produces a UCM.
    sp_threshold : float, optional (default: 0.0)
        The value for which to threshold the UCM to obtain the superpixels.
    body_threshold : float, optional (default: 0.1)
        The value for which to threshold the UCM to obtain the segments/bodies.
        The condition `body_threshold >= sp_threshold` should hold in order
        to obtain sensible results.
    **kwargs : dict, optional
        Keyword arguments to be passed through to `segs_to_raveler`.

    Returns
    -------
    superpixels : numpy ndarray, shape (M, N, P)
        The superpixel map. Non-zero superpixels are unique to each plane.
        That is, `np.unique(superpixels[i])` and `np.unique(superpixels[j])` 
        have only 0 as their intersection.
    sp_to_segment : numpy ndarray, shape (Q, 3)
        The superpixel to segment map. Segments are unique to each plane. The
        first number on each line is the plane number.
    segment_to_body : numpy ndarray, shape (R, 2)
        The segment to body map.
    """
    sps = label(ucm < sp_threshold)[0]
    bodies = label(ucm <= body_threshold)[0]
    return segs_to_raveler(sps, bodies, **kwargs)

[docs]def segs_to_raveler(sps, bodies, min_size=0, do_conn_comp=False, sps_out=None):
    """Return a Raveler tuple from 3D superpixel and body maps.
    
    Parameters
    ----------
    sps : numpy ndarray, shape (M, N, P)
        The supervoxel map.
    bodies : numpy ndarray, shape (M, N, P)
        The body map. Superpixels should not map to more than one body.
    min_size : int, optional (default: 0)
        Superpixels smaller than this size on a particular plane are blacked
        out.
    do_conn_comp : bool (default: False)
        Whether to do a connected components operation on each plane. This is
        required if we want superpixels to be contiguous on each plane, since
        3D-contiguous superpixels are not guaranteed to be contiguous along
        a slice.
    sps_out : numpy ndarray, shape (M, N, P), optional (default: None)
        A Raveler-compatible superpixel map, meaning that superpixels are
        unique to each plane along axis 0. (See `superpixels` in the return
        values.) If provided, this saves significant computation time.

    Returns
    -------
    superpixels : numpy ndarray, shape (M, N, P)
        The superpixel map. Non-zero superpixels are unique to each plane.
        That is, `np.unique(superpixels[i])` and `np.unique(superpixels[j])` 
        have only 0 as their intersection.
    sp_to_segment : numpy ndarray, shape (Q, 3)
        The superpixel to segment map. Segments are unique to each plane. The
        first number on each line is the plane number.
    segment_to_body : numpy ndarray, shape (R, 2)
        The segment to body map.
    """
    if sps_out is None:
        sps_out = raveler_serial_section_map(sps, min_size, do_conn_comp, False)
    segment_map = raveler_serial_section_map(bodies, min_size, do_conn_comp)
    segment_to_body = unique(list(zip(segment_map.ravel(), bodies.ravel())))
    segment_to_body = segment_to_body[segment_to_body[:,0] != 0]
    segment_to_body = concatenate((array([[0,0]]), segment_to_body), axis=0)
    sp_to_segment = []
    for i, (sp_map_i, segment_map_i, body_map_i) in \
                            enumerate(zip(sps_out, segment_map, bodies)):
        segment_map_i *= sp_map_i.astype(bool)
        valid = (sp_map_i != 0) + (segment_map_i == 0)
        sp_to_segment.append(
            unique(list(zip(it.repeat(i), sp_map_i[valid], segment_map_i[valid]))))
        valid = segment_map != 0
        logging.debug('plane %i done'%i)
    logging.info('total superpixels before: ' + str(len(unique(sps))) +
                ' total superpixels after: ' + str(len(unique(sps_out))))
    sp_to_segment = concatenate(sp_to_segment, axis=0)
    return sps_out, sp_to_segment, segment_to_body

[docs]def raveler_serial_section_map(nd_map, min_size=0, do_conn_comp=False, 
                                                    globally_unique_ids=True):
    """Produce `serial_section_map` and label one corner of each plane as 0.

    Raveler chokes when there are no pixels with label 0 on a plane, so this
    function produces the serial section map as normal but then adds a 0 to
    the [0, 0] corner of each plane, IF the volume doesn't already have 0
    pixels.

    Notes
    -----
        See `serial_section_map` for more info.
    """
    nd_map = serial_section_map(nd_map, min_size, do_conn_comp, 
                                                        globally_unique_ids)
    if not (nd_map == 0).any():
        nd_map[:,0,0] = 0
    return nd_map

[docs]def serial_section_map(nd_map, min_size=0, do_conn_comp=False, 
                                                    globally_unique_ids=True):
    """Produce a plane-by-plane superpixel map with unique IDs.

    Raveler requires sps to be unique and different on each plane. This
    function converts a fully 3D superpixel map to a serial-2D superpixel
    map compatible with Raveler.

    Parameters
    ----------
    nd_map : np.ndarray, int, shape (M, N, P)
        The original superpixel map.
    min_size : int (optional, default 0)
        Remove superpixels smaller than this size (on each plane)
    do_conn_comp : bool (optional, default False)
        In some cases, a single supervoxel may result in two disconnected
        superpixels in 2D. Set to True to force these to have different IDs.
    globally_unique_ids : bool (optional, default True)
        If True, every plane has unique IDs, with plane n having IDs {i1, i2,
        ..., in} and plane n+1 having IDs {in+1, in+2, ..., in+ip}, and so on.

    Returns
    -------
    relabeled_planes : np.ndarray, int, shape (M, N, P)
        A volume equal to nd_map but with superpixels relabeled along axis 0.
        That is, the input volume is reinterpreted as M slices of shape (N, P).
    """
    if do_conn_comp:
        label_fct = label
    else:
        def label_fct(a):
            relabeled, fmap, imap = evaluate.relabel_from_one(a)
            return relabeled, len(imap)
    def remove_small(a):
        return morpho.remove_small_connected_components(a, min_size)
    mplanes = map(remove_small, nd_map)
    relabeled_planes, nids_per_plane = zip(*map(label_fct, mplanes))
    start_ids = concatenate((array([0], int), cumsum(nids_per_plane)[:-1])) \
        if globally_unique_ids else [0]*len(nids_per_plane)
    relabeled_planes = [(relabeled_plane + start_id)[newaxis, ...]
        for relabeled_plane, start_id in zip(relabeled_planes, start_ids)]
    return concatenate(relabeled_planes, axis=0)

[docs]def write_to_raveler(sps, sp_to_segment, segment_to_body, directory, gray=None,
                    raveler_dir='/usr/local/raveler-hdf', nproc_contours=16,
                    body_annot=None):
    """Output a segmentation to Raveler format. 

    Parameters
    ----------
    sps : np.ndarray, int, shape (nplanes, nx, ny)
        The superpixel map. Superpixels can only occur on one plane.
    sp_to_segment : np.ndarray, int, shape (nsps + nplanes, 3)
        Superpixel-to-segment map as a 3 column list of (plane number,
        superpixel id, segment id). Segments must be unique to a plane, and
        each plane must contain the map {0: 0}
    segment_to_body: np.ndarray, int, shape (nsegments, 2)
        The segment to body map.
    directory: string 
        The directory in which to write the stack. This directory and all
        necessary subdirectories will be created.
    gray: np.ndarray, uint8 or uint16, shape (nplanes, nx, ny) (optional)
        The grayscale images corresponding to the superpixel maps.
    raveler dir: string (optional, default `/usr/local/raveler-hdf`)
        Where Raveler is installed.
    nproc_contours: int (optional, default 16) 
        How many processes to use when generating the Raveler contours.
    body_annot: dict or np.ndarray (optional)
        Either a dictionary to write to JSON in Raveler body annotation
        format, or a numpy ndarray of the segmentation from which to compute
        orphans and non traversing bodies (which then get written out as body
        annotations).

    Returns
    -------
    None

    Notes
    -----
        Raveler is the EM segmentation proofreading tool developed in-house at
        Janelia for the FlyEM project.
    """
    sp_path = os.path.join(directory, 'superpixel_maps')
    im_path = os.path.join(directory, 'grayscale_maps')
    tile_path = os.path.join(directory, 'tiles')

    if not os.path.exists(directory):
        os.makedirs(directory)

    # write superpixel->segment->body maps
    savetxt(os.path.join(directory, 'superpixel_to_segment_map.txt'),
        sp_to_segment, '%i') 
    savetxt(os.path.join(directory, 'segment_to_body_map.txt'), 
        segment_to_body, '%i')

    # write superpixels
    if not os.path.exists(sp_path): 
        os.mkdir(sp_path)
    write_png_image_stack(sps, os.path.join(sp_path, 'sp_map.%05i.png'),
        bitdepth=16, axis=0)

    # write grayscale
    if gray is not None:
        if not os.path.exists(im_path): 
            os.mkdir(im_path)
        write_png_image_stack(gray, 
                              os.path.join(im_path, 'img.%05d.png'), axis=0)

    # body annotations
    if body_annot is not None:
        if type(body_annot) == ndarray:
            orphans = morpho.orphans(body_annot)
            non_traversing = morpho.non_traversing_segments(body_annot)
            body_annot = raveler_body_annotations(orphans, non_traversing)
        write_json(body_annot, os.path.join(directory, 'annotations-body.json'))

    # make tiles, bounding boxes, and contours, and compile HDF5 stack info.
    with tmp.TemporaryFile() as tmp_stdout:
        try: 
            def call(arglist):
                return subprocess.call(arglist, stdout=tmp_stdout)
            r1 = call(['createtiles', im_path, sp_path, tile_path])
            r2 = call(['bounds', directory])
            r3 = call(['compilestack', directory])
        except:
            logging.warning(
                'Error during Raveler export post-processing step. ' +
                'Possible causes are that you do not have Raveler installed ' +
                'or you did not specify the correct installation path.')
            logging.warning('Return codes: %i, %i, %i' % (r1, r2, r3))
#            with sys.exc_info() as ex:
#                logging.warning('Exception info:\n' + '\n'.join(map(str, ex)))
    # make permissions friendly for proofreaders.
    try:
        subprocess.call(['chmod', '-R', 'go=u', directory])
    except:
        logging.warning('Could not change Raveler export permissions.')

[docs]def raveler_output_shortcut(svs, seg, gray, outdir, sps_out=None):
    """Compute the Raveler format and write to directory, all at once.
    
    Parameters
    ----------
    svs : np.ndarray, int, shape (M, N, P)
        The supervoxel map.
    seg : np.ndarray, int, shape (M, N, P)
        The segmentation map. It is assumed that no supervoxel crosses
        any segment boundary.
    gray : np.ndarray, uint8, shape (M, N, P)
        The grayscale EM images corresponding to the above segmentations.
    outdir : string
        The export directory for the Raveler volume.
    sps_out : np.ndarray, int, shape (M, N, P) (optional)
        The precomputed serial section 2D superpixel map. Output will be
        much faster if this is provided.

    Returns
    -------
    sps_out : np.ndarray, int, shape (M, N, P)
        The computed serial section 2D superpixel map. Keep this when
        making multiple calls to `raveler_output_shortcut` with the
        same supervoxel map.
    """
    sps_out, sp2seg, seg2body = segs_to_raveler(svs, seg, sps_out=sps_out)
    write_to_raveler(sps_out, sp2seg, seg2body, outdir, gray, body_annot=seg)
    return sps_out

[docs]def raveler_body_annotations(orphans, non_traversing=None):
    """Return a Raveler body annotation dictionary of orphan segments.

    Orphans are labeled as body annotations with `not sure` status and
    a string indicating `orphan` in the comments field.

    Non-traversing segments have only one contact with the surface of
    the volume, and are labeled `does not traverse` in the comments.

    Parameters
    ----------
    orphans : iterable of int
        The ID numbers corresponding to orphan segments.
    non_traversing : iterable of int (optional, default None)
        The ID numbers of segments having only one exit point in the volume.

    Returns
    -------
    body_annotations : dict
        A dictionary containing entries for 'data' and 'metadata' as
        specified in the Raveler body annotations format [1, 2].

    References
    ----------
    [1] https://wiki.janelia.org/wiki/display/flyem/body+annotation+file+format
    and:
    [2] https://wiki.janelia.org/wiki/display/flyem/generic+file+format
    """
    data = [{'status': 'not sure', 'comment': 'orphan', 'body ID': int(o)}
        for o in orphans]
    if non_traversing is not None:
        data.extend([{'status': 'not sure', 'comment': 'does not traverse',
            'body ID': int(n)} for n in non_traversing])
    metadata = {'description': 'body annotations', 'file version': 2}
    return {'data': data, 'metadata': metadata}

[docs]def write_json(annot, fn='annotations-body.json', directory=None):
    """Write an annotation dictionary in Raveler format to a JSON file.
    
    The annotation file format is described in:
    https://wiki.janelia.org/wiki/display/flyem/body+annotation+file+format
    and:
    https://wiki.janelia.org/wiki/display/flyem/generic+file+format

    Parameters
    ----------
    annot : dict
        A body annotations dictionary (described in pages above).
    fn : string (optional, default 'annotations-body.json')
        The filename to which to write the file.
    directory : string (optional, default None, or '.')
        A directory in which to write the file.

    Returns
    -------
    None
    """
    if directory is not None:
        fn = join_path(directory, fn)
    with open(fn, 'w') as f:
        json.dump(annot, f, indent=2)


[docs]def raveler_rgba_to_int(im, ignore_alpha=True):
    """Convert a volume using Raveler's RGBA encoding to int. [1]

    Parameters
    ----------
    im : np.ndarray, shape (M, N, P, 4)
        The image stack to be converted.
    ignore_alpha : bool, optional
        By default, the alpha channel does not encode anything. However, if
        we ever need 32 bits, it would be used. This function supports that
        with `ignore_alpha=False`. (default is True.)

    Returns
    -------
    im_int : np.ndarray, shape (M, N, P)
        The label volume.

    References
    ----------
    [1] https://wiki.janelia.org/wiki/display/flyem/Proofreading+data+and+formats
    """
    if im.ndim == 4 and im.shape[3] == 4:
        if ignore_alpha:
            im = im[..., :3]
        im_int = (im * 255 ** np.arange(im.shape[3])).sum(axis=3)
    else:
        im_int = im
    return im_int


[docs]def raveler_to_labeled_volume(rav_export_dir, get_glia=False, 
                        use_watershed=False, probability_map=None, crop=None):
    """Import a raveler export stack into a labeled segmented volume.
    
    Parameters
    ----------
    rav_export_dir : string
        The directory containing the Raveler stack.
    get_glia : bool (optional, default False)
        Return the segment numbers corresponding to glia, if available.
    use_watershed : bool (optional, default False)
        Fill in 0-labeled voxels using watershed.
    probability_map : np.ndarray, same shape as volume to be read (optional)
        If `use_watershed` is True, use `probability_map` as the landscape. If
        this is not provided, it uses a flat landscape.
    crop : tuple of int (optional, default None)
        A 6-tuple of [xmin, xmax, ymin, ymax, zmin, zmax].

    Returns
    -------
    output_volume : np.ndarray, shape (Z, X, Y)
        The segmentation in the Raveler volume.
    glia : list of int (optional, only returned if `get_glia` is True)
        The IDs in the segmentation corresponding to glial cells.
    """
    from . import morpho
    spmap = read_image_stack(
        os.path.join(rav_export_dir, 'superpixel_maps', '*.png'), crop=crop)
    spmap = raveler_rgba_to_int(spmap)
    sp2seg_list = np.loadtxt(
        os.path.join(rav_export_dir, 'superpixel_to_segment_map.txt'), uint32)
    seg2bod_list = np.loadtxt(
        os.path.join(rav_export_dir, 'segment_to_body_map.txt'), uint32)
    sp2seg = {}
    max_sp = sp2seg_list[:,1].max()
    start_plane = sp2seg_list[:,0].min()
    for z, sp, seg in sp2seg_list:
        if z not in sp2seg:
            sp2seg[z] = zeros(max_sp+1, uint32)
        sp2seg[z][sp] = seg
    max_seg = seg2bod_list[:,0].max()
    seg2bod = zeros(max_seg+1, uint32)
    seg2bod[seg2bod_list[:,0]] = seg2bod_list[:,1]
    initial_output_volume = zeros_like(spmap)
    for i, m in enumerate(spmap):
        j = start_plane + i
        initial_output_volume[i] = seg2bod[sp2seg[j][m]]
    if use_watershed:
        probs = np.ones_like(spmap) if probability_map is None \
                                    else probability_map
        output_volume = morpho.watershed(probs, seeds=initial_output_volume)
    else:
        output_volume = initial_output_volume
    if (output_volume[:, 0, 0] == 0).all() and \
                        (output_volume == 0).sum() == output_volume.shape[0]:
        output_volume[:, 0, 0] = output_volume[:, 0, 1]
    if get_glia:
        annots = json.load(
            open(os.path.join(rav_export_dir, 'annotations-body.json'), 'r'))
        glia = [a['body ID'] for a in annots['data'] 
                                        if a.get('comment', None) == 'glia']
        return output_volume, glia
    else:
        return output_volume

### Ilastik formats

# obtained from Ilastik 0.5.4
ilastik_label_colors = \
    [0xffff0000, 0xff00ff00, 0xffffff00, 0xff0000ff, 
    0xffff00ff, 0xff808000, 0xffc0c0c0, 0xfff2022d] 

[docs]def write_ilastik_project(images, labels, fn, label_names=None):
    """Write one or more image volumes and corresponding labels to Ilastik.
    
    Parameters
    ----------
    images : np.ndarray or list of np.ndarray, shapes (M_i, N_i[, P_i])
        The grayscale images to be saved.
    labels : np.ndarray or list of np.ndarray, same shapes as `images`
        The label maps corresponding to the images.
    fn : string
        The filename to save the project in.
    label_names : list of string (optional)
        The names corresponding to each label in `labels`. (Not implemented!)

    Returns
    -------
    None

    Notes
    -----
    Limitations:
        Assumes the same labels are used for all images.
        Supports only grayscale images and volumes, and a maximum of 8 labels.
        Requires at least one unlabeled voxel in the label field.
    """
    f = h5py.File(fn, 'w')
    if type(images) != list:
        images = [images]
        labels = [labels]
    ulbs = unique(concatenate(list(map(unique, labels))))[1:]
    colors = array(ilastik_label_colors[:len(ulbs)])
    names = ['Label %i'%i for i in ulbs]
    names = array(names, '|S%i'%max(map(len, names)))
    label_attributes = {'color':colors, 'name':names, 'number':ulbs}
    for i, (im, lb) in enumerate(zip(images, labels)):
        if im.ndim == 2:
            new_shape = (1,1)+im.shape+(1,)
        elif im.ndim == 3:
            new_shape = (1,)+im.shape+(1,)
        else:
            raise ValueError('Unsupported number of dimensions in image.')
        im = im.reshape(new_shape)
        lb = lb.reshape(new_shape)
        root = 'DataSets/dataItem%02i/'%i
        f[root+'data'] = im
        f[root+'labels'] = lb
        for k, v in label_attributes.items():
            f[root+'labels'].attrs[k] = v
        f[root].attrs['Name'] = ''
        f[root].attrs['fileName'] = ''
    for subgroup in ['Description', 'Labeler', 'Name']:
        f['Project/%s'%subgroup] = array('', dtype='|S1')
    f['ilastikVersion'] = array(0.5)
    f.close()

[docs]def write_ilastik_batch_volume(im, fn):
    """Write a volume to an HDF5 file for Ilastik batch processing.
    
    Parameters
    ----------
    im : np.ndarray, shape (M, N[, P])
        The image volume to be saved.
    fn : string
        The filename in which to save the volume.

    Returns
    -------
    None
    """
    if im.ndim == 2:
        im = im.reshape((1,1)+im.shape+(1,))
    elif im.ndim == 3:
        im = im.reshape((1,)+im.shape+(1,))
    else:
        raise ValueError('Unsupported number of dimensions in image.')
    write_h5_stack(im, fn, group='/volume/data')

[docs]def read_prediction_from_ilastik_batch(fn, **kwargs):
    """Read the prediction produced by Ilastik from batch processing.
    
    Parameters
    ----------
    fn : string
        The filename to read from.
    group : string (optional, default '/volume/prediction')
        Where to read from in the HDF5 file hierarchy.
    single_channel : bool (optional, default True)
        Read only the 0th channel (final dimension) from the volume.

    Returns
    -------
    None
    """
    if 'group' not in kwargs:
        kwargs['group'] = '/volume/prediction'
    a = squeeze(read_h5_stack(fn, **kwargs))
    if kwargs.get('single_channel', True):
        a = a[..., 0]
    return a


[docs]def read_cremi(fn, datasets=['/volumes/raw', '/volumes/labels/neuron_ids']):
    """Read volume formatted as described in CREMI data challenge [1]_.

    The format is HDF5, with:
        - raw image data (uint8) in: /volumes/raw
        - (optional) membrane prediction data (uint8, inverted) in:
          /volumes/membrane
        - synaptic cleft annotations in: /volumes/labels/clefts
        - neuron ids (uint64) in: /volumes/labels/neuron_ids
        - (optional) fragment data (uint64) in: /volumes/labels/fragments

    We currently ignore the synaptic cleft annotations, and return only
    the raw image and the neuron ids.

    Parameters
    ----------
    fn : string
        The input filename.

    Returns
    -------
    datasets : list of array
        The arrays corresponding to the requested datasets.

    References
    ----------
    .. [1]: https://cremi.org/data/
    """
    out = [read_h5_stack(fn, group=ds) for ds in datasets]
    return out


[docs]def write_cremi(data_dict, fn, resolution=(40., 4., 4.)):
    """Write a volume formatted as described in CREMI data challenge [1]_.

    Parameters
    ----------
    data_dict : dictionary of string to arrays
        The data dictionary mapping HDF groups to arrays.
    fn : string
        The filename to write to.
    resolution : tuple of float, optional
        The resolution along each axis of the datasets. Currently, this
        is the same for each dataset written.
    """
    for group, data in data_dict.items():
        write_h5_stack(data, fn, group=group, compression='gzip',
                       attrs={'resolution': resolution})