Source code for nanshe.misc.random_dictionary_learning_data
"""
``random_dictionary_learning_data`` supports generating synthetic data.
===============================================================================
Overview
===============================================================================
The module ``random_dictionary_learning_data`` provides a way of generating
synthetic data for testing the segmentation algorithm against. Moving forward
the useful content in here will be refactored and moved into the ``data``
module. So, depending on this module is unwise.
===============================================================================
API
===============================================================================
"""
__author__ = "John Kirkham <kirkhamj@janelia.hhmi.org>"
__date__ = "Apr 17, 2014 11:27:08 EDT$"
import warnings
warnings.warn(
"The module `random_dictionary_learning_data` is deprecated." +
"Please consider using `data` instead." +
"Relevant content from this module is being refactored and moved there.",
DeprecationWarning
)
import numpy
import scipy
import scipy.ndimage
import nanshe.util.prof
import nanshe.util.iters
# Get the logger
trace_logger = nanshe.util.prof.getTraceLogger(__name__)
[docs]class MappingDiscreteGeometricDistributionGenerator(object):
"""
A random generator of groups. Each group has a size that is
geometrically distributed. However, the individuals chosen for the
group are all equally likely.
"""
def __init__(self, *args):
"""
Sets the arguments for use to compose the groups.
Args:
*args: Any variety of useful items for drawing.
"""
self.args = args
def __call__(self, p, size=1):
"""
Generates a number of groups equal to size with each group size
being distributed geometrically by p.
Args:
p(float) the probability of success for a geometric
distribution (starts with 1 so has mean 1/p).
size(int) the number of groups to make
Returns:
results(list): a list of groups of arguments drawn (None if
no arguments)
"""
# Get a uniform distribution over the elements to fill each group.
uni_gen = MappingDiscreteUniformDistributionGenerator(*self.args)
# Draw the sizes for each group
group_sizes = numpy.random.geometric(p, size)
# Using the sizes draw element to fill groups up to the right size
results = [
uni_gen(group_sizes[i]) for i in nanshe.util.iters.irange(size)
]
return(results)
[docs]class DictionaryLearningRandomDataSample(object):
"""
Essentially a struct with its values set at runtime by
DictionaryLearningRandomDataGenerator calls.
"""
def __init__(self):
"""
Default constructor just to establish values.
"""
self.points = None
self.centroid_activation_frames = None
self.noiseless_frames = None
self.frames = None
[docs]class DictionaryLearningRandomDataGenerator(object):
"""
A Random Generator that build pseudo-data similar in nature to that
which the ADINA algorithm is run.
"""
def __init__(self,
frame_shape,
num_objects,
num_groups,
num_frames,
mean_group_size,
object_spread,
object_max_intensity,
object_min_intensity,
background_noise_intensity):
"""
Builds a DictionaryLearningRandomDataGenerator for draws.
Args:
frame_shape(tuple) a tuple of ints for
constructing a numpy array
num_objects(int) the number of objects that
can possible be active
(i.e. neurons present
whether active or not)
num_groups(int) number of groups of objects
that will be active (i.e.
number of groups of neurons
seen to be active)
num_frames(int) number of frames for any
group to be active in the
pseudo-video
mean_group_size(float) average group size (average
for a geometric
distribution)
object_spread(float) how big an object is on
average
object_max_intensity(float) the highest intensity
possible
object_min_intensity(float) the lowest intensity
possible
background_noise_intensity(float) how much noise there is in
the background.
"""
self.frame_shape = frame_shape
self.num_objects = num_objects
self.num_groups = num_groups
self.num_frames = num_frames
self.mean_group_size = mean_group_size
self.object_spread = object_spread
self.object_max_intensity = object_max_intensity
self.object_min_intensity = object_min_intensity
self.background_noise_intensity = background_noise_intensity
self.object_intensity_range = self.object_max_intensity - \
self.object_min_intensity
def __call__(self, num_runs=1, seed=None):
"""
Constructs a series of pseudo-videos.
Args:
num_runs(int): number of pseudo-videos to generate
seed(int): uses the seed for numpy.random.seed if
provided.
Returns:
results(list): a list of
DictionaryLearningRandomDataSample
instances with relevant data from
generation included.
"""
# Use the seed provided.
numpy.random.seed(seed)
# A list of DictionaryLearningRandomDataSample instances
results = []
for i in nanshe.util.iters.irange(num_runs):
# Where the result will be stored
each_result = DictionaryLearningRandomDataSample()
# Generates a numpy array that has a shape of self.frame_shape with
# a fixed number of randomly selected (equally likely) non-zero
# entries
each_result.points = NumpyRandomArrayDiscreteUniformDistributionGenerator(
self.frame_shape)(self.num_objects).astype(float)
# Creates a point generator that selects from the non-zero points
# generated for activation to create groups
# as an index array (tuple of 1D numpy.ndarrays)
selected_points = each_result.points.nonzero()
# convert to a single numpy.ndarrays
selected_points = numpy.array(selected_points)
# simpler, lightweight way of doing zip(*selected_points)
selected_points = selected_points.T
selected_points = selected_points.tolist()
point_groups_gen = MappingDiscreteGeometricDistributionGenerator(
*selected_points
)
# Using a mean group size and the number of groups creates point
# groups (these should in someway relate to the basis images)
point_groups = point_groups_gen(
1.0 / float(self.mean_group_size), self.num_groups)
# Will store the essential frames that indicate which points will
# be active in each frame
each_result.centroid_activation_frames = []
for each_point_group in point_groups:
# Get an index array
each_point_group_index_array = nanshe.util.iters.list_indices_to_index_array(
each_point_group
)
# Create an empty activation frame
each_centroid_activation_frame = numpy.zeros(self.frame_shape)
# Set the active points to be randomly distributed
each_centroid_activation_frame_points_shape = each_centroid_activation_frame[each_point_group_index_array].shape
# Set the active points to be randomly distributed
each_centroid_activation_frame[each_point_group_index_array] = numpy.random.random(
each_centroid_activation_frame_points_shape
)
# Rescale the active points
each_centroid_activation_frame[each_point_group_index_array] *= self.object_intensity_range
# Translate the active points
each_centroid_activation_frame[each_point_group_index_array] += self.object_min_intensity
# add to the stack of centroid activations
each_result.centroid_activation_frames.append(
each_centroid_activation_frame
)
# convert to numpy array
each_result.centroid_activation_frames = numpy.array(
each_result.centroid_activation_frames
)
# Holds the frames without noise
each_result.noiseless_frames = []
# Takes each centroid activation frame and creates objects that dim
# over time
for each_centroid_activation_frame in each_result.centroid_activation_frames:
# Determines how much to spread each active point
# (self.object_spread is like the average spread)
sigma = 2 * self.object_spread * numpy.random.random()
for each_frame_num in nanshe.util.iters.irange(self.num_frames):
# Determines a linear rescaling of each image (where they
# slowly become dimmer)
rescale = float(
self.num_frames - each_frame_num
) / float(self.num_frames)
# Convolves each frame to generate a frame with objects
# (uses the same spread for each simply dims over time)
each_matrix_convolved = scipy.ndimage.filters.gaussian_filter(
rescale * each_centroid_activation_frame, sigma
)
# Adds to the stack of frames
each_result.noiseless_frames.append(each_matrix_convolved)
# Converts the form of the noiseless frames
each_result.noiseless_frames = numpy.array(
each_result.noiseless_frames
)
# Creates frames that contain some background noise from a normal
# distribution
each_result.frames = each_result.noiseless_frames.copy()
each_result.frames += numpy.random.normal(
scale=self.background_noise_intensity,
size=each_result.frames.shape
)
# Append to our list of results
results.append(each_result)
return(results)