Source code for nanshe.misc.random_dictionary_learning_data

"""
``random_dictionary_learning_data`` supports generating synthetic data.

===============================================================================
Overview
===============================================================================
The module ``random_dictionary_learning_data`` provides a way of generating
synthetic data for testing the segmentation algorithm against. Moving forward
the useful content in here will be refactored and moved into the ``data``
module. So, depending on this module is unwise.

===============================================================================
API
===============================================================================
"""


__author__ = "John Kirkham <kirkhamj@janelia.hhmi.org>"
__date__ = "Apr 17, 2014 11:27:08 EDT$"


import warnings

warnings.warn(
    "The module `random_dictionary_learning_data` is deprecated." +
    "Please consider using `data` instead." +
    "Relevant content from this module is being refactored and moved there.",
    DeprecationWarning
)

import numpy
import scipy
import scipy.ndimage

import nanshe.util.prof
import nanshe.util.iters


# Get the logger
trace_logger = nanshe.util.prof.getTraceLogger(__name__)



[docs]class MappingDiscreteUniformDistributionGenerator(object):
    """
        Given a bunch of arguments. This will create a random element generator
        that returns one or many.
    """

    def __init__(self, *args):
        """
            Builds a random element generator.

            Args:
                *args:  Anything that needs to be drawn from.
        """

        self.args = args

    def __call__(self, size=1):
        """
            Draws a certain number of elements with equal likelihood and
            returns them in a list.

            Args:
                size(int):       Number of elements to draw (one if
                                 unspecified).

            Returns:
                results(list):   a list of arguments drawn (None if no
                                 arguments)
        """

        results = []

        if len(self.args) > 0:
            indices = numpy.random.randint(0, len(self.args), size)

            results = [self.args[_] for _ in indices]
        else:
            results = [None for _ in nanshe.util.iters.irange(size)]

        return(results)


[docs]class NumpyRandomArrayDiscreteUniformDistributionGenerator(object):
    """
        Creates a random numpy array (with type bool) generator that will set
        a certain number of random positions in the array to True (like
        Bernoulli random distribution)
    """

    def __init__(self, shape):
        """
            Creates a random numpy array generator that returns arrays of a
            certain shape.

            Args:
                shape(tuple):   A tuple permissible for setting the size of a
                                numpy array.
        """

        self.shape = shape

    def __call__(self, size=1):
        """
            Generates a numpy array.

            Args:
                size(int):                Number of Trues to have in the numpy
                                          array.

            Returns:
                results(numpy.ndarray):   a boolean numpy array with a fixed
                                          number of randomly placed Trues
        """

        # A completely empty numpy array
        results = numpy.zeros(self.shape, dtype=bool)

        # Gets a set of random indices that need to be non-zero
        indices = tuple([
            numpy.random.randint(0, each_dim, size) for each_dim in self.shape
        ])

        # Makes them non-zero
        results[indices] = True

        return(results)


[docs]class MappingDiscreteGeometricDistributionGenerator(object):
    """
        A random generator of groups. Each group has a size that is
        geometrically distributed. However, the individuals chosen for the
        group are all equally likely.
    """

    def __init__(self, *args):
        """
            Sets the arguments for use to compose the groups.

            Args:
                *args:  Any variety of useful items for drawing.
        """

        self.args = args

    def __call__(self, p, size=1):
        """
            Generates a number of groups equal to size with each group size
            being distributed geometrically by p.

            Args:
                p(float)         the probability of success for a geometric
                                 distribution (starts with 1 so has mean 1/p).

                size(int)        the number of groups to make

            Returns:
                results(list):   a list of groups of arguments drawn (None if
                                 no arguments)
        """

        # Get a uniform distribution over the elements to fill each group.
        uni_gen = MappingDiscreteUniformDistributionGenerator(*self.args)

        # Draw the sizes for each group
        group_sizes = numpy.random.geometric(p, size)

        # Using the sizes draw element to fill groups up to the right size
        results = [
            uni_gen(group_sizes[i]) for i in nanshe.util.iters.irange(size)
        ]

        return(results)





[docs]class DictionaryLearningRandomDataSample(object):
    """
        Essentially a struct with its values set at runtime by
        DictionaryLearningRandomDataGenerator calls.
    """
    def __init__(self):
        """
            Default constructor just to establish values.
        """

        self.points = None
        self.centroid_activation_frames = None
        self.noiseless_frames = None
        self.frames = None


[docs]class DictionaryLearningRandomDataGenerator(object):
    """
        A Random Generator that build pseudo-data similar in nature to that
        which the ADINA algorithm is run.
    """

    def __init__(self,
                 frame_shape,
                 num_objects,
                 num_groups,
                 num_frames,
                 mean_group_size,
                 object_spread,
                 object_max_intensity,
                 object_min_intensity,
                 background_noise_intensity):
        """
            Builds a DictionaryLearningRandomDataGenerator for draws.

            Args:
                frame_shape(tuple)                  a tuple of ints for
                                                    constructing a numpy array

                num_objects(int)                    the number of objects that
                                                    can possible be active
                                                    (i.e. neurons present
                                                    whether active or not)

                num_groups(int)                     number of groups of objects
                                                    that will be active (i.e.
                                                    number of groups of neurons
                                                    seen to be active)

                num_frames(int)                     number of frames for any
                                                    group to be active in the
                                                    pseudo-video

                mean_group_size(float)              average group size (average
                                                    for a geometric
                                                    distribution)

                object_spread(float)                how big an object is on
                                                    average

                object_max_intensity(float)         the highest intensity
                                                    possible

                object_min_intensity(float)         the lowest intensity
                                                    possible

                background_noise_intensity(float)   how much noise there is in
                                                    the background.
        """

        self.frame_shape = frame_shape
        self.num_objects = num_objects
        self.num_groups = num_groups
        self.num_frames = num_frames
        self.mean_group_size = mean_group_size
        self.object_spread = object_spread
        self.object_max_intensity = object_max_intensity
        self.object_min_intensity = object_min_intensity
        self.background_noise_intensity = background_noise_intensity

        self.object_intensity_range = self.object_max_intensity - \
                                      self.object_min_intensity

    def __call__(self, num_runs=1, seed=None):
        """
            Constructs a series of pseudo-videos.

            Args:
                num_runs(int):          number of pseudo-videos to generate
                seed(int):              uses the seed for numpy.random.seed if
                                        provided.

            Returns:
                results(list):          a list of
                                        DictionaryLearningRandomDataSample
                                        instances with relevant data from
                                        generation included.
        """

        # Use the seed provided.
        numpy.random.seed(seed)

        # A list of DictionaryLearningRandomDataSample instances
        results = []

        for i in nanshe.util.iters.irange(num_runs):
            # Where the result will be stored
            each_result = DictionaryLearningRandomDataSample()

            # Generates a numpy array that has a shape of self.frame_shape with
            # a fixed number of randomly selected (equally likely) non-zero
            # entries
            each_result.points = NumpyRandomArrayDiscreteUniformDistributionGenerator(
                self.frame_shape)(self.num_objects).astype(float)

            # Creates a point generator that selects from the non-zero points
            # generated for activation to create groups
            # as an index array (tuple of 1D numpy.ndarrays)
            selected_points = each_result.points.nonzero()
            # convert to a single numpy.ndarrays
            selected_points = numpy.array(selected_points)
            # simpler, lightweight way of doing zip(*selected_points)
            selected_points = selected_points.T
            selected_points = selected_points.tolist()
            point_groups_gen = MappingDiscreteGeometricDistributionGenerator(
                *selected_points
            )

            # Using a mean group size and the number of groups creates point
            # groups (these should in someway relate to the basis images)
            point_groups = point_groups_gen(
                1.0 / float(self.mean_group_size), self.num_groups)

            # Will store the essential frames that indicate which points will
            # be active in each frame
            each_result.centroid_activation_frames = []
            for each_point_group in point_groups:
                # Get an index array
                each_point_group_index_array = nanshe.util.iters.list_indices_to_index_array(
                    each_point_group
                )

                # Create an empty activation frame
                each_centroid_activation_frame = numpy.zeros(self.frame_shape)

                # Set the active points to be randomly distributed
                each_centroid_activation_frame_points_shape = each_centroid_activation_frame[each_point_group_index_array].shape

                # Set the active points to be randomly distributed
                each_centroid_activation_frame[each_point_group_index_array] = numpy.random.random(
                    each_centroid_activation_frame_points_shape
                )

                # Rescale the active points
                each_centroid_activation_frame[each_point_group_index_array] *= self.object_intensity_range

                # Translate the active points
                each_centroid_activation_frame[each_point_group_index_array] += self.object_min_intensity

                # add to the stack of centroid activations
                each_result.centroid_activation_frames.append(
                    each_centroid_activation_frame
                )

            # convert to numpy array
            each_result.centroid_activation_frames = numpy.array(
                each_result.centroid_activation_frames
            )

            # Holds the frames without noise
            each_result.noiseless_frames = []

            # Takes each centroid activation frame and creates objects that dim
            # over time
            for each_centroid_activation_frame in each_result.centroid_activation_frames:
                # Determines how much to spread each active point
                # (self.object_spread is like the average spread)
                sigma = 2 * self.object_spread * numpy.random.random()
                for each_frame_num in nanshe.util.iters.irange(self.num_frames):
                    # Determines a linear rescaling of each image (where they
                    # slowly become dimmer)
                    rescale = float(
                        self.num_frames - each_frame_num
                    ) / float(self.num_frames)
                    # Convolves each frame to generate a frame with objects
                    # (uses the same spread for each simply dims over time)
                    each_matrix_convolved = scipy.ndimage.filters.gaussian_filter(
                        rescale * each_centroid_activation_frame, sigma
                    )
                    # Adds to the stack of frames
                    each_result.noiseless_frames.append(each_matrix_convolved)

            # Converts the form of the noiseless frames
            each_result.noiseless_frames = numpy.array(
                each_result.noiseless_frames
            )

            # Creates frames that contain some background noise from a normal
            # distribution
            each_result.frames = each_result.noiseless_frames.copy()
            each_result.frames += numpy.random.normal(
                scale=self.background_noise_intensity,
                size=each_result.frames.shape
            )

            # Append to our list of results
            results.append(each_result)


        return(results)