PKxJvǁtests/test_cl_routines.py#!/usr/bin/env python # -*- coding: utf-8 -*- """ test_mot ---------------------------------- Tests for `mot` module. """ import unittest import numpy as np import mot from mot import configuration from mot.cl_routines.mapping.residual_calculator import ResidualCalculator from mot.cl_routines.optimizing.nmsimplex import NMSimplex from mot.cl_routines.optimizing.levenberg_marquardt import LevenbergMarquardt from mot.cl_routines.optimizing.powell import Powell from mot.cl_routines.filters.gaussian import GaussianFilter from mot.cl_routines.filters.mean import MeanFilter from mot.cl_routines.filters.median import MedianFilter from mot.model_building.models_examples import Rosenbrock, MatlabLSQNonlinExample class CLRoutineTestCase(unittest.TestCase): def __init__(self, *args, **kwargs): super(CLRoutineTestCase, self).__init__(*args, **kwargs) self._old_config_value = mot.configuration._config['compile_flags']['general']['-cl-single-precision-constant'] def setUp(self): mot.configuration._config['compile_flags']['general'].update({ '-cl-single-precision-constant': False }) def tearDown(self): mot.configuration._config['compile_flags']['general'].update({ '-cl-single-precision-constant': self._old_config_value }) class TestRosenbrock(CLRoutineTestCase): def setUp(self): super(TestRosenbrock, self).setUp() self.model = Rosenbrock(5) self.optimizers = (NMSimplex(), Powell(patience=10)) def test_model(self): for optimizer in self.optimizers: v = optimizer.minimize(self.model).get_optimization_result()[0] for ind in range(self.model.get_nmr_inst_per_problem()): self.assertAlmostEqual(float(v[ind]), 1.0, places=3) class TestLSQNonLinExample(CLRoutineTestCase): def setUp(self): super(TestLSQNonLinExample, self).setUp() self.model = MatlabLSQNonlinExample() self.optimizers = (LevenbergMarquardt(),) self.residual_calc = ResidualCalculator() def test_model(self): for optimizer in self.optimizers: v = optimizer.minimize(self.model).get_optimization_result() res = self.residual_calc.calculate(self.model, v) s = 0 for i in range(res.shape[1]): s += res[0, i]**2 self.assertAlmostEqual(s, 124.3622, places=4) class TestFilters(CLRoutineTestCase): def setUp(self): super(TestFilters, self).setUp() self.d1 = np.array([1, 2, 4, 2, 1], dtype=np.float64) self.d2 = np.eye(4) def test_median(self): filter = MedianFilter(2) s1 = filter.filter(self.d1) np.testing.assert_almost_equal(s1, np.array([2, 2, 2, 2, 2])) s2 = filter.filter(self.d2) np.testing.assert_almost_equal(s2, np.zeros((4, 4))) def test_mean(self): filter = MeanFilter(2) s1 = filter.filter(self.d1) np.testing.assert_almost_equal(s1, np.array([2 + 1/3.0, 2.25, 2, 2.25, 2 + 1/3.0])) s2 = filter.filter(self.d2) expected = np.ones((4, 4)) * 0.25 expected[0, 0] = 1/3.0 expected[0, 3] = 2/9.0 expected[3, 0] = 2/9.0 expected[3, 3] = 1/3.0 np.testing.assert_almost_equal(s2, expected) def test_gaussian(self): filter = GaussianFilter(2, sigma=1.0) s1 = filter.filter(self.d1, mask=np.array([1, 1, 1, 1, 0])) s2 = filter.filter(self.d2) np.testing.assert_almost_equal(s1, [1.1089774, 2.135224, 2.6417738, 1.8910226, 0]) expected = np.array([[0.22470613, 0.20994687, 0.10351076, 0.02661242], [0.20994687, 0.28434043, 0.22325308, 0.10351076], [0.10351076, 0.22325308, 0.28434043, 0.20994687], [0.02661242, 0.10351076, 0.20994687, 0.22470613]]) np.testing.assert_almost_equal(s2, expected) if __name__ == '__main__': unittest.main() PKR}J,%==tests/model_interfaces.pyimport inspect import unittest from mot.model_interfaces import OptimizeModelInterface, SampleModelInterface __author__ = 'Robbert Harms' __date__ = "2017-03-28" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl" class test_OptimizeModelInterface(unittest.TestCase): def test_for_not_implemented_error(self): interface = OptimizeModelInterface() self.assertRaises(NotImplementedError, lambda: interface.name) self.assertRaises(NotImplementedError, lambda: interface.double_precision) functions = inspect.getmembers(OptimizeModelInterface, predicate=inspect.isfunction) for function in functions: sig = inspect.signature(function[1]) extra_args = [None]*(len(sig.parameters) -1) self.assertRaises(NotImplementedError, function[1], interface, *extra_args) class test_SampleModelInterface(unittest.TestCase): def test_for_not_implemented_error(self): interface = SampleModelInterface() functions = inspect.getmembers(SampleModelInterface, predicate=inspect.isfunction) for function in functions: sig = inspect.signature(function[1]) extra_args = [None] * (len(sig.parameters) - 1) self.assertRaises(NotImplementedError, function[1], interface, *extra_args) PKi}JR8Ptests/utils.pyimport unittest from textwrap import dedent import numpy as np import pyopencl as cl from numpy.testing import assert_array_equal from mot.utils import device_type_from_string, device_supports_double, results_to_dict, get_float_type_def, is_scalar, \ all_elements_equal, get_single_value, topological_sort __author__ = 'Robbert Harms' __date__ = "2017-03-28" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl" class test_device_type_from_string(unittest.TestCase): def test_gpu(self): assert(device_type_from_string('GPU') == cl.device_type.GPU) def test_cpu(self): assert(device_type_from_string('CPU') == cl.device_type.CPU) def test_accelerator(self): assert(device_type_from_string('ACCELERATOR') == cl.device_type.ACCELERATOR) def test_custom(self): assert(device_type_from_string('CUSTOM') == cl.device_type.CUSTOM) def test_none(self): assert(device_type_from_string('') is None) class test_device_supports_double(unittest.TestCase): def test_has_double(self): for platform in cl.get_platforms(): for device in platform.get_devices(): has_double = device.get_info(cl.device_info.DOUBLE_FP_CONFIG) == 63 assert(device_supports_double(device) == has_double) class test_results_to_dict(unittest.TestCase): def test_mismatch(self): results = np.zeros((2, 3, 4)) param_names = ['only_one_name_for_three_params'] self.assertRaises(ValueError, results_to_dict, results, param_names) def test_2d_matrix(self): results = np.random.rand(2, 3) param_names = ['p1', 'p2', 'p3'] results_dict = results_to_dict(results, param_names) assert(all(name in results_dict for name in param_names)) for ind, name in enumerate(param_names): assert_array_equal(results_dict[name], results[:, ind]) def test_3d_matrix(self): results = np.random.rand(2, 3, 4) param_names = ['p1', 'p2', 'p3'] results_dict = results_to_dict(results, param_names) assert(all(name in results_dict for name in param_names)) for ind, name in enumerate(param_names): assert_array_equal(results_dict[name], results[:, ind, :]) class test_get_float_type_def(unittest.TestCase): def test_float(self): known_good_value = ''' #if __OPENCL_VERSION__ <= CL_VERSION_1_1 #pragma OPENCL EXTENSION cl_khr_fp64 : enable #endif #define mot_float_type float #define mot_float_type2 float2 #define mot_float_type4 float4 #define mot_float_type8 float8 #define mot_float_type16 float16 #define MOT_EPSILON FLT_EPSILON #define MOT_MIN FLT_MIN #define MOT_MAX FLT_MAX #define MOT_INT_CMP_TYPE int ''' value = get_float_type_def(False) assert(dedent(value) == dedent(known_good_value)) def test_double(self): known_good_value = ''' #if __OPENCL_VERSION__ <= CL_VERSION_1_1 #pragma OPENCL EXTENSION cl_khr_fp64 : enable #endif #define mot_float_type double #define mot_float_type2 double2 #define mot_float_type4 double4 #define mot_float_type8 double8 #define mot_float_type16 double16 #define MOT_EPSILON DBL_EPSILON #define MOT_MIN DBL_MIN #define MOT_MAX DBL_MAX #define MOT_INT_CMP_TYPE long ''' value = get_float_type_def(True) assert(dedent(value) == dedent(known_good_value)) class test_is_scalar(unittest.TestCase): def test_is_not_scalar(self): self.assertFalse(is_scalar(np.zeros((2, 2)))) def test_is_scalar(self): self.assertTrue(is_scalar(np.zeros((1, ))[:, None])) self.assertTrue(is_scalar(np.zeros((1,)))) self.assertTrue(is_scalar(-1)) self.assertTrue(is_scalar(0)) self.assertTrue(is_scalar(1)) self.assertTrue(is_scalar(-1.0)) self.assertTrue(is_scalar(0.0)) self.assertTrue(is_scalar(1.0)) class test_all_elements_equal(unittest.TestCase): def test_scalar(self): self.assertTrue(all_elements_equal(np.zeros((1,))[:, None])) self.assertTrue(all_elements_equal(np.zeros((1,)))) self.assertTrue(all_elements_equal(-1)) self.assertTrue(all_elements_equal(0)) self.assertTrue(all_elements_equal(1)) self.assertTrue(all_elements_equal(-1.0)) self.assertTrue(all_elements_equal(0.0)) self.assertTrue(all_elements_equal(1.0)) def test_false(self): self.assertFalse(all_elements_equal(np.random.rand(2, 2))) def test_matrix(self): self.assertTrue(all_elements_equal(np.zeros((2,)))) self.assertTrue(all_elements_equal(np.zeros((2, 3)))) self.assertTrue(all_elements_equal(np.zeros((2, 3, 4)))) class test_get_single_value(unittest.TestCase): def test_exception(self): self.assertRaises(ValueError, get_single_value, np.random.rand(2, 2)) def test_true(self): self.assertTrue(get_single_value(np.ones((2, 2))[:, None]) == 1) self.assertTrue(get_single_value(np.zeros((1,))[:, None]) == 0) self.assertTrue(get_single_value(np.zeros((1,))) == 0) self.assertTrue(get_single_value(-1) == -1) self.assertTrue(get_single_value(0) == 0) self.assertTrue(get_single_value(1) == 1) self.assertTrue(get_single_value(-1.0) == -1.0) self.assertTrue(get_single_value(0.0) == 0.0) self.assertTrue(get_single_value(1.0) == 1.0) class test_topological_sort(unittest.TestCase): def test_auto_dependency(self): circular = {'a': ('a',), 'b': ('a',)} self.assertRaises(ValueError, topological_sort, circular) def test_cyclic_dependency(self): circular = {'a': ('b',), 'b': ('a',)} self.assertRaises(ValueError, topological_sort, circular) def test_sorting(self): data = {'a': (), 'm': ('c',), 'e': ('m',), '!': ('a', 'e', 'c')} assert(list(topological_sort(data)) == ['a', 'c', 'm', 'e', '!']) def test_unsortables(self): class A(): pass class B(): pass a = A() b = B() data = {'a': (a, b), 'b': ('a',)} assert(len(list(topological_sort(data))) == 4) def test_empty_input(self): data = {} self.assertFalse(topological_sort(data)) PKHuhtests/__init__.py# -*- coding: utf-8 -*- PK]J-NZNZmot/model_interfaces.py"""The interfaces needed for models. Since a lot of information about a model is needed to be able to optimize or sample it, we encapsulate all that information in an interface. Only objects that successful implement the interfaces in this module can be optimized or sampled using one of the optimization or sampling routines in MOT. These interfaces expose data and modeling code. The data is represented as numpy arrays and the CL code as strings. """ __author__ = 'Robbert Harms' __date__ = "2014-03-14" __license__ = "LGPL v3" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl" class OptimizeModelInterface(object): @property def name(self): """Get the name of this model. This should be overwritten by the implementing model. Returns: str: A string with the name of this model. """ raise NotImplementedError() @property def double_precision(self): """Flag to signal if we should use the double float type during calculations. By default we ask the cl routines to use the single precision float type, you can overwrite this with your own flags. Returns: boolean: if we would like to use double precision floating point during the calculations """ raise NotImplementedError() def get_data(self): """Get the data this model needs inside the CL kernels. This data should be buffered to CL without changes, since the kernel arguments are generated by the model. At one point the model was also creating the buffers, but this did not work out since it was not clear where the buffers would then be freed. It is better to let the CLRoutines manage the buffers and let the model just supply the data. Returns: list of ndarray: the arrays that need to be buffered. """ raise NotImplementedError() def get_kernel_data_struct(self, device): """Get the CL code for the data structure in the kernel. In combination with the :meth:`get_data_buffers`, this returns the data structure matching the buffers. This should generates something like: .. code-block: c typedef struct{ ... } ; with the struct containing all the data needed in the model. The name can of course be chosen by yourself. Args: device (pyopencl.Device): the device for which to generate the data structure Returns: str: the kernel data structure CL code """ raise NotImplementedError() def get_kernel_param_names(self, device): """Get for all the data buffers the kernel parameter arguments. In combination with the :meth:`get_data_buffers`, this returns a list with kernel arguments for the buffers For example: .. code-block: python list = ['global float* observations', ...] That is, each element is one of the kernel parameter names. Args: device (pyopencl.Device): the device for which to generate the data structure Returns: list: the kernel parameter names """ raise NotImplementedError() def get_kernel_data_struct_initialization(self, device, variable_name, problem_id_name): """The assignment code for the data structure. The data structure needs to be generated given the kernel arguments, this function returns the initialization assignment. For example: .. code-block: c = {}; Args: device (pyopencl.Device): the device for which to generate the data structure variable_name (str): the name for the generated struct variable problem_id_name (str): the name of the variable holding the problem id, commonly set to get_global_id() Returns: str: the initialization assignment for the data structure. """ raise NotImplementedError() def get_kernel_data_struct_type(self): """Get the CL type of the kernel datastruct. Returns: str: the CL type of the data struct """ raise NotImplementedError() def get_parameter_decode_function(self, fname='decodeParameters'): """Get a CL function that can transform the model parameters from encoded space to model space. The signature of the CL function is: .. code-block:: c void (const void* data, const mot_float_type* x); Args: fname (str): The CL function name to use Returns: str: An OpenCL function that is used in the CL kernel to transform the parameters from encoded space to model space so they can be used as input to the model. """ raise NotImplementedError() def get_parameter_encode_function(self, fname='encodeParameters'): """Get a CL function that can transform the model parameters from model space to an encoded space. The signature of the CL function is: .. code-block:: c void (const void* data, const mot_float_type* x); Args: fname (str): The CL function name to use Returns: str: An OpenCL function that is used in the CL kernel to transform the parameters from model space to encoded space so they can be used as input to an CL routine. """ raise NotImplementedError() def get_nmr_problems(self): """Get the number of problems we need to analyze. Returns: int: A single integer specifying the number of problem instances """ raise NotImplementedError() def get_model_eval_function(self, func_name='evaluateModel'): """Get the evaluation function that evaluates the model at the given parameters. This returned function should not do any error calculations, it should merely return the result of evaluating the model for the given parameters. Please make sure the sign of the return value is correct given the following. The minimization routines may make use of this function and get_observation_return_function to build their own objective function. This is always done as: observation() - evaluation(). This means that if you want to optimize a function without observation data you need to make sure the evaluation function returns the answers with the right sign. Args: func_name (string): specifies the name of the function. Returns: str: An CL function with the signature: .. code-block:: c double (const void* const data, const mot_float_type* const x, const uint observation_index); """ raise NotImplementedError() def get_observation_return_function(self, func_name='getObservation'): """Get the CL function that returns the observation for the given problem. Args: func_name (string): specifies the name of the function. Returns: str: An CL function with the signature: .. code-block:: c double (const void* const data, const uint observation_index); """ raise NotImplementedError() def get_objective_function(self, func_name="calculateObjective"): """Get the objective function that evaluates the entire problem instance under a noise model. This CL function should return a double (instead of a mot_float_type) for accuracy reasons. Args: func_name (string): specifies the name of the function. Returns: str: A CL function with signature: .. code-block:: c double (const void* const data, mot_float_type* const x); """ raise NotImplementedError() def get_objective_per_observation_function(self, func_name="getObjectiveInstanceValue"): """Get the objective function that returns the objective value at the given instance point. This function is used by some evaluation routines (like for example LevenbergMarquardt) that need a list of objective values (one per instance point), instead of a single objective function scalar. This function is called with the index of the observation index to evaluate. Args: func_name (str): the name of the function Returns: str: A CL function with signature: .. code-block:: c double (const void* const data, mot_float_type* const x, uint observation_index); """ raise NotImplementedError() def get_initial_parameters(self, results_dict=None): """Get a two dimensional matrix with the initial parameters (starting points) for every voxel. Optionally, one may specify a list of previously calculated results which may be applicable to the model. If a parameter is found in the results_dict, those values are used for the initial parameters. Args: results_dict (dict): a dictionary with for every parameter name, a value per voxel which is (for example) the result of a previous calculation. Returns: ndarray: A two dimensional matrix with on the first axis the problem instances and on the second the parameter values per problem instance """ raise NotImplementedError() def get_lower_bounds(self): """Get for each estimable parameter the lower bounds. Returns: list: For every estimable parameter a scalar or vector with the the lower bound(s) for that parameter. This value can also be the literal string '-inf' for infinity. """ raise NotImplementedError() def get_upper_bounds(self): """Get for each estimable parameter the upper bounds. Returns: list: For every estimable parameter a scalar or vector with the the upper bound(s) for that parameter. This value can also be the literal string '-inf' for infinity. """ raise NotImplementedError() def get_free_param_names(self): """Get a list of names with the free parameter names (the parameters that are estimated by the routines). The function get_optimization_output_param_names() returns the names of all the parameter names, including fixed and static parameters. This should only return the names of the parameters that are actually used in the optimization. Returns: list of str: A list with the parameter names (in dot format) of all the estimated (free) parameters. """ raise NotImplementedError() def get_optimization_output_param_names(self): """Get a list with the names of the parameters, this is the list of keys to the titles and results. See get_free_param_names() for getting the names of the parameters that are actually being optimized. This should be a complete overview of all the maps returned from optimizing this model. Returns: list of str: a list with the parameter names """ raise NotImplementedError() def get_nmr_inst_per_problem(self): """Get the number of instances/data points per problem. The minimum is one instance per problem. This number represents the number of data points Returns: int: the number of instances per problem. """ raise NotImplementedError() def get_nmr_estimable_parameters(self): """Get the number of estimable parameters. Returns: int: the number of estimable parameters """ raise NotImplementedError() class SampleModelInterface(OptimizeModelInterface): """Extends the OptimizeModelInterface with information for sampling purposes. This specific interface is tied to sampling with the Metropolis Hastings Random Walk sampler as implement in :class:`mot.cl_routines.sampling.metropolis_hastings.MetropolisHastings`. To be able to sample a model we (in principle) need to have: * a log likelihood function; * a proposal function; * and a prior function Proposal functions can be symmetric (if it holds that ``q(x|x') == q(x'|x)``) or non symmetric (i.e. ``q(x|x') != q(x'|x)``). In the case of non-symmetric proposals we need to have a function to get the probability log likelihood of the proposal. This indicates the need for two more pieces of information: * test if the proposal is symmetric * proposal log PDF function A trick in sampling is to have auto-adapting proposals. These proposals commonly have a distribution with a standard deviation that varies in time. The idea is that if the distribution is too tight (low std) only a few of the proposed samples are accepted and we need to broaden the distribution (increase the std). On the other hand, if the std is too high the jumps might not get accepted. This leads us to the following additional functionality: * proposal state update function Since OpenCL < 2.1 does not allow state variables in functions and also does not support classes, we need to find a way to store the state of the proposal distribution inside the kernel function. For that, each proposal CL function has as additional parameter the ``proposal_state``. The initial state can be obtained from this class and needs to be handed to the proposal functions in the kernels. Finally, this interface requires to you specify a :class:`mot.cl_routines.sampling.metropolis_hastings.MHState` that specifies the current state of the sampler. This can be set to a default state when starting sampling or to the output of a previous run to continue sampling. """ def get_proposal_state(self): """Get for every problem instance the list of parameter values to use in the the adaptable proposal. Returns: ndarray: per problem instance the proposal parameter values that are adaptable. """ raise NotImplementedError() def get_log_likelihood_function(self, func_name="getLogLikelihood", evaluation_model=None, full_likelihood=True): """Get the CL Log Likelihood function that evaluates the entire problem instance under a noise model Args: func_name (string): specifies the name of the function. evaluation_model (EvaluationModel): the evaluation model to use for the log likelihood. If not given we use the one defined in the model. full_likelihood (boolean): if we want the complete likelihood, or if we can drop the constant terms. The default is the complete likelihood. Disable for speed. Returns: str: A function of the kind: .. code-block:: c double (const void* const data, mot_float_type* const x); """ raise NotImplementedError() def get_log_likelihood_per_observation_function(self, func_name="getLogLikelihoodPerObservation", evaluation_model=None, full_likelihood=True): """Get the CL Log Likelihood function that evaluates the given instance under a noise model. Args: func_name (string): specifies the name of the function. evaluation_model (EvaluationModel): the evaluation model to use for the log likelihood. If not given we use the one defined in the model. full_likelihood (boolean): if we want the complete likelihood, or if we can drop the constant terms. The default is the complete likelihood. Disable for speed. Returns: str: A function of the kind: .. code-block:: c double (const void* const data, mot_float_type* const x, const uint observation_index); """ raise NotImplementedError() def is_proposal_symmetric(self): """Check if the entire proposal distribution is symmetric: ``q(x|x') == q(x'|x)``. Returns: boolean: True if the proposal distribution is symmetric, false otherwise. """ raise NotImplementedError() def get_proposal_logpdf(self, func_name='getProposalLogPDF', address_space_proposal_state='private'): """Get the probability density function of the proposal in log space (as a CL string). This density function is used if the proposal is not symmetric. Args: func_name (str): the CL function name of the returned function address_space_proposal_state (str): the CL address space of the proposal state vector. Defaults to ``private``. Returns: str: A function with the signature: .. code-block:: c double (const uint param_ind, const mot_float_type proposal, const mot_float_type current, mot_float_type* const proposal_state); Where ``param_ind`` is the index of the parameter we would like to get the proposal from, ``current`` is the current value of that parameter and ``proposal`` the proposal value of the parameter. The final argument ``proposal_state`` are the current settings of the proposal function. It should return for the requested parameter a value ``q(proposal | current)``, the log Probability Density Function (log PDF) of the proposal given the current value. """ raise NotImplementedError() def get_proposal_function(self, func_name='getProposal', address_space_proposal_state='private'): """Get a proposal function that returns proposals for a requested parameter. Args: func_name (str): the CL function name of the returned function address_space_proposal_state (str): the CL address space of the proposal state vector. Defaults to ``private``. Returns: str: A function with the signature: .. code-block:: c mot_float_type ( const uint param_ind, const mot_float_type current, void* rng_data, mot_float_type* const proposal_state); Where ``param_ind`` is the index of the parameter for which we want the proposal and ``current`` is the current value of that parameter. The argument ``proposal_state`` is the state of the proposal distribution. One can obtain random numbers with: .. code-block:: c float randomnr = frand(rng_data); """ raise NotImplementedError() def get_proposal_state_update_function(self, func_name='updateProposalState', address_space='private'): """Get the function to update the proposal parameters Args: func_name (str): the CL function name of the returned function address_space (str): the address space of (all) the given arguments, defaults to ``private`` Returns: str: A function with the signature: .. code-block:: c void ( mot_float_type* const proposal_state, ulong* const sampling_counter, ulong* const acceptance_counter); The ``proposal_state`` holds the current value of all the adaptable proposal parameters and is of length equal to the number of adaptable parameters. The ``sampling_counter`` holds the number of samples drawn since last update (per parameter) and ``acceptance_counter`` holds the number of samples that where accepted since the last update. Both are of length equal to the total number of parameters in the model (!). The implementing function is free to overwrite the values in each array. """ raise NotImplementedError() def proposal_state_update_uses_variance(self): """Check if the proposal state update function requires the variance of each of the parameters. If none of the proposal update functions require the parameter variance then we can save memory in the kernel by not calculating them. Returns: boolean: if at least one parameter proposal state update function requires the parameter variance return True, else return False. """ raise NotImplementedError() def get_log_prior_function(self, func_name='getLogPrior', address_space_parameter_vector='private'): """Get the prior function that returns the prior information about the given parameters. The prior function must be in log space. Args: func_name (str): the CL function name of the returned function address_space_parameter_vector (str): the address space to use for the parameter vector by default this is set to ``private``. Returns: str: A function with the signature: .. code-block:: c mot_float_type ( const void* data_void, const mot_float_type* const x ); Which is called by the sampling routine to calculate the posterior probability. """ raise NotImplementedError() def get_metropolis_hastings_state(self): """Get the current state of the Metropolis Hastings sampler. This can be used to continue execution of an MH sampling from a previous point in time. Returns: mot.cl_routines.sampling.metropolis_hastings.MHState: the current Metropolis Hastings state """ raise NotImplementedError() def samples_to_statistics(self, samples_dict): """Create statistics out of the given set of samples (in a dictionary). Args: samples_dict (dict): Keys being the parameter names, values the roi list in 2d (1st dim. is voxel, 2nd dim. is samples). Returns: dict: The same dictionary but with statistical maps (mean, avg etc.) for each parameter, instead of the raw samples. In essence this is where one can place the logic to go from samples to meaningful maps. """ raise NotImplementedError() PKJk O Omot/load_balance_strategies.py"""Supports hardware level load balancing over multiple CL enabled devices. This load balancing consists of three players, :class:`~mot.cl_environments.CLEnvironment`, :class:`LoadBalanceStrategy` and :class:`~mot.cl_routines.base.CLRoutine`. Every :class:`~mot.cl_routines.base.CLRoutine` (such as the Optimizers and Samplers) requires, in order to do computations, a list of :class:`~mot.cl_environments.CLEnvironment` and a :class:`LoadBalanceStrategy` implementation. The :class:`~mot.cl_environments.CLEnvironment` encapsulate all information needed to run computations on its contained device. The :class:`LoadBalanceStrategy` chooses which environments (i.e. devices) to use for the computations and how to use them. The load balancing itself is done by appointing subsets of problems (voxels) to specific devices. """ import logging import math import time import timeit import warnings import pyopencl as cl from six import string_types from .utils import device_type_from_string __author__ = 'Robbert Harms' __date__ = "2014-06-23" __license__ = "LGPL v3" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl" class LoadBalanceStrategy(object): """Basic interface of a load balancing strategy. Every load balancer has the option to run the calculations in batches. The advantage of batches is that it is interruptable and it may prevent memory errors since we run with smaller buffers. The disadvantage is that it may be slower due to constant waiting to load the new kernel and due to GPU thread starvation. """ def process(self, workers, nmr_items, run_in_batches=None, single_batch_length=None): """Process all of the items using the callback function in the work packages. The idea is that a strategy can be chosen on the fly by for example testing the execution time of the callback functions. Alternatively, a strategy can be determined based on the available environments (in the WorkPackages) and/or by the total number of items to be processed. Args: workers (Worker): a list of workers nmr_items (int): an integer specifying the total number of items to be processed run_in_batches (boolean): a implementing class may overwrite run_in_batches with this parameter. If None the value is not used. single_batch_length (int): a implementing class may overwrite single_batch_length with this parameter. If None the value is not used. """ raise NotImplementedError() def get_used_cl_environments(self, cl_environments): """Get a subset of CL environments that this strategy plans on using. The strategy can decide on which workers to use based on the CL environment of the worker. To prevent the calling function from generating workers that will not be used by this strategy, the calling function can ask this function which CL environments it will use. Args: cl_environments (list): the CL environments we were planning on using and were planning on generating workers for Returns: list: A proper subset of the CL environments or all of them. This should reflect the list of Cl environment we will use in :meth:`process`. """ raise NotImplementedError() class Worker(object): def __init__(self, cl_environment): """Create a new worker. Workload strategies use workers to perform the calculations, in a distributed way determined by the strategy. All computed results should be stored internally by the worker. Args: cl_environment (CLEnvironment): The cl environment, can be used to determine the load """ self._cl_environment = cl_environment self._cl_run_context = self._cl_environment.get_cl_context() @property def cl_environment(self): """Get the used CL environment. Returns: cl_environment (CLEnvironment): The cl environment to use for calculations. """ return self._cl_environment def get_used_queues(self): """Get the queues this worker is using for its GPU computations. The load balancing routine will use these queues to flush and finish the computations. Returns: list of pyopencl queues: the list of queues """ return [self._cl_run_context.queue] def calculate(self, range_start, range_end): """Calculate for this problem the given range. The results of the computations must be stored internally. Args: range_start (int): The start of the processing range range_end (int): The end of the processing range """ def post_process(self, range_start, range_end): """Apply post processing at the end of the calculation. This is called after event.wait() has finished for every worker working per batch. One can use this function to post-process data after kernel execution. Args: range_start (int): The start of the processing range range_end (int): The end of the processing range """ def _build_kernel(self, compile_flags=()): """Build the kernel for this worker. This assumes that the implementer implements the function _get_kernel_source() to get the source. Returns: cl.Program: a compiled CL kernel """ kernel_source = self._get_kernel_source() from mot import configuration if configuration.should_ignore_kernel_compile_warnings(): warnings.simplefilter("ignore") return cl.Program(self._cl_run_context.context, kernel_source).build(' '.join(compile_flags)) def _get_kernel_source(self): """Calculate the kernel source for this worker. Returns: str: the kernel """ def _enqueue_readout(self, buffer, host_array, range_start, range_end, wait_for=None): """Enqueue a readout for a buffer created with use_host_ptr. This encapsulates all the low level details needed to readout the given range of values. Args: buffer: the buffer on the device host_array (ndarray): the host side array of the given buffer range_start (int): the start of the range to read out (in the first dimension) range_end (int): the end of the range to read out (in the first dimension) wait_for (list of event): the list of events to wait for Returns: event; the event of the readout """ nmr_problems = range_end - range_start return cl.enqueue_map_buffer( self._cl_run_context.queue, buffer, cl.map_flags.READ, range_start * host_array.strides[0], (nmr_problems, ) + host_array.shape[1:], host_array.dtype, order="C", wait_for=wait_for, is_blocking=False)[1] class SimpleLoadBalanceStrategy(LoadBalanceStrategy): def __init__(self, run_in_batches=True, single_batch_length=1e6): """An abstract class for quickly implementing load balancing strategies. Args: run_in_batches (boolean): If we want to run the load per worker in batches or in one large run. single_batch_length (float): The length of a single batch, only used if run_in_batches is set to True. This will create batches this size and run each of them one after the other. Attributes: run_in_batches (boolean); See above. single_batch_length (boolean); See above. """ self._logger = logging.getLogger(__name__) self._run_in_batches = run_in_batches self._single_batch_length = single_batch_length @property def run_in_batches(self): return self._run_in_batches @property def single_batch_length(self): return self._single_batch_length def process(self, workers, nmr_items, run_in_batches=None, single_batch_length=None): raise NotImplementedError() def get_used_cl_environments(self, cl_environments): raise NotImplementedError() def _create_batches(self, range_start, range_end, run_in_batches=None, single_batch_length=None): """Created batches in the given range. If self.run_in_batches is False we will only return one batch covering the entire range. If self.run_in_batches is True we will create batches the size of self.single_batch_length. Args: range_start (int): the start of the range to create batches for range_end (int): the end of the range to create batches for run_in_batches (boolean): if other than None, use this as run_with_batches single_batch_length (int): if other than None, use this as single_batch_length Returns: list of list: list of batches which are (start, end) pairs """ if run_in_batches is None: run_in_batches = self.run_in_batches if single_batch_length is None: single_batch_length = self.single_batch_length if run_in_batches: batches = [] for start_pos in range(int(range_start), int(range_end), int(single_batch_length)): batches.append((start_pos, int(min(start_pos + single_batch_length, range_end)))) return batches return [(range_start, range_end)] def _run_batches(self, workers, batches): """Run a list of batches on each of the workers. This will enqueue on all the workers the batches in sequence and waits for completion of each batch before enqueueing the next one. Args: workers (list of Worker): the workers to use in the processing batches (list of lists): for each worker a list with the batches in format (start, end) """ self._logger.debug('Preparing to run on {0} device(s)'.format(len(workers))) total_nmr_problems = 0 most_nmr_batches = 0 for workers_batches in batches: if len(workers_batches) > most_nmr_batches: most_nmr_batches = len(workers_batches) for batch in workers_batches: total_nmr_problems += batch[1] - batch[0] problems_seen = 0 start_time = timeit.default_timer() for batch_nmr in range(most_nmr_batches): for worker_ind, worker in enumerate(workers): if batch_nmr < len(batches[worker_ind]): self._logger.debug('Going to run batch {0} on device {1} with range ({2}, {3})'.format( batch_nmr, worker_ind, *batches[worker_ind][batch_nmr])) worker.calculate(int(batches[worker_ind][batch_nmr][0]), int(batches[worker_ind][batch_nmr][1])) problems_seen += batches[worker_ind][batch_nmr][1] - batches[worker_ind][batch_nmr][0] for queue in worker.get_used_queues(): queue.flush() for worker in workers: for queue in worker.get_used_queues(): queue.finish() for worker_ind, worker in enumerate(workers): if batch_nmr < len(batches[worker_ind]): self._logger.debug('Post processing batch {0} on device {1} with range ({2}, {3})'.format( batch_nmr, worker_ind, *batches[worker_ind][batch_nmr])) worker.post_process(int(batches[worker_ind][batch_nmr][0]), int(batches[worker_ind][batch_nmr][1])) run_time = timeit.default_timer() - start_time current_percentage = problems_seen / float(total_nmr_problems) remaining_time = (run_time / current_percentage) - run_time self._logger.info('Processing is at {0:.2%}, time spent: {1}, time left: {2} (h:m:s).'.format( current_percentage, time.strftime('%H:%M:%S', time.gmtime(run_time)), time.strftime('%H:%M:%S', time.gmtime(remaining_time)))) self._logger.debug('Ran all batches.') class MetaLoadBalanceStrategy(SimpleLoadBalanceStrategy): def __init__(self, lb_strategy): """ Create a load balance strategy that uses another strategy to do the actual computations. Args: lb_strategy (SimpleLoadBalanceStrategy): The load balance strategy this class uses. """ super(MetaLoadBalanceStrategy, self).__init__() self._lb_strategy = lb_strategy or EvenDistribution() def process(self, workers, nmr_items, run_in_batches=None, single_batch_length=None): raise NotImplementedError() def get_used_cl_environments(self, cl_environments): raise NotImplementedError() @property def run_in_batches(self): """ Returns the value for the load balance strategy this class uses. """ return self._lb_strategy.run_in_batches @property def single_batch_length(self): """ Returns the value for the load balance strategy this class uses. """ return self._lb_strategy.single_batch_length class EvenDistribution(SimpleLoadBalanceStrategy): """Give each worker exactly 1/nth of the work.""" def process(self, workers, nmr_items, run_in_batches=None, single_batch_length=None): items_per_worker = int(round(nmr_items / float(len(workers)))) batches = [] current_pos = 0 for worker_ind in range(len(workers)): if worker_ind == len(workers) - 1: batches.append(self._create_batches(current_pos, nmr_items, run_in_batches=run_in_batches, single_batch_length=single_batch_length)) else: batches.append(self._create_batches(current_pos, current_pos + items_per_worker, run_in_batches=run_in_batches, single_batch_length=single_batch_length)) current_pos += items_per_worker self._run_batches(workers, batches) def get_used_cl_environments(self, cl_environments): return cl_environments class RuntimeLoadBalancing(SimpleLoadBalanceStrategy): def __init__(self, test_percentage=10, run_in_batches=True, single_batch_length=1e6): """Distribute the work by trying to minimize the runtime. This first runs a batch of a small size to estimate the runtime per devices. Afterwards the problem instances are distributed such to minimize the overall time. Args: test_percentage (float): The total percentage of items to use for the run time duration test """ super(RuntimeLoadBalancing, self).__init__(run_in_batches=run_in_batches, single_batch_length=single_batch_length) self.test_percentage = test_percentage def process(self, workers, nmr_items, run_in_batches=None, single_batch_length=None): durations = [] start = 0 for worker in workers: end = start + int(math.floor(nmr_items * (self.test_percentage / len(workers)) / 100)) durations.append(self._test_duration(worker, start, end)) start = end total_d = sum(durations) nmr_items_left = nmr_items - start batches = [] for i in range(len(workers)): if i == len(workers) - 1: batches.append(self._create_batches(start, nmr_items, run_in_batches=run_in_batches, single_batch_length=single_batch_length)) else: items = int(math.floor(nmr_items_left * (1 - (durations[i] / total_d)))) batches.append(self._create_batches(start, start + items, run_in_batches=run_in_batches, single_batch_length=single_batch_length)) start += items self._run_batches(workers, batches) def _test_duration(self, worker, start, end): s = timeit.default_timer() self._run_batches([worker], [self._create_batches(start, end)]) return timeit.default_timer() - s def get_used_cl_environments(self, cl_environments): return cl_environments class PreferSingleDeviceType(MetaLoadBalanceStrategy): def __init__(self, lb_strategy=None, device_type=None): """This is a meta load balance strategy, it uses the given strategy and prefers the use of the indicated device. Args: lb_strategy (SimpleLoadBalanceStrategy): The strategy this class uses in the background. device_type (str or cl.device_type): either a cl device type or a string like ('gpu', 'cpu' or 'apu'). This variable indicates the type of device we want to use. """ super(PreferSingleDeviceType, self).__init__(lb_strategy) self._device_type = device_type or cl.device_type.CPU if isinstance(device_type, string_types): self._device_type = device_type_from_string(device_type) def process(self, workers, nmr_items, run_in_batches=None, single_batch_length=None): specific_workers = [worker for worker in workers if worker.cl_environment.device_type == self._device_type] if specific_workers: self._lb_strategy.process(specific_workers, nmr_items, run_in_batches=run_in_batches, single_batch_length=single_batch_length) else: self._lb_strategy.process(workers, nmr_items, run_in_batches=run_in_batches, single_batch_length=single_batch_length) def get_used_cl_environments(self, cl_environments): specific_envs = [cl_env for cl_env in cl_environments if cl_env.device_type == self._device_type] if specific_envs: return specific_envs else: return cl_environments class PreferGPU(PreferSingleDeviceType): def __init__(self, lb_strategy=None): """This is a meta load balance strategy, it uses the given strategy and prefers the use of GPU's. Args: lb_strategy (SimpleLoadBalanceStrategy): The strategy this class uses in the background. """ super(PreferGPU, self).__init__(device_type='GPU', lb_strategy=lb_strategy) class PreferCPU(PreferSingleDeviceType): def __init__(self, lb_strategy=None): """This is a meta load balance strategy, it uses the given strategy and prefers the use of CPU's. Args: lb_strategy (SimpleLoadBalanceStrategy): The strategy this class uses in the background. """ super(PreferCPU, self).__init__(device_type='CPU', lb_strategy=lb_strategy) class PreferSpecificEnvironment(MetaLoadBalanceStrategy): def __init__(self, lb_strategy=None, environment_nmr=0): """This is a meta load balance strategy, it prefers the use of a specific CL environment. Use this only when you are sure how the list of CL devices will look like. For example in use with parallel optimization of multiple subjects with each on a specific device. Args: lb_strategy (SimpleLoadBalanceStrategy): The strategy this class uses in the background. environment_nmr (int): the specific environment to use in the list of CL environments """ super(PreferSpecificEnvironment, self).__init__(lb_strategy) self.environment_nmr = environment_nmr def process(self, workers, nmr_items, run_in_batches=None, single_batch_length=None): self._lb_strategy.process(workers, nmr_items, run_in_batches=run_in_batches, single_batch_length=single_batch_length) def get_used_cl_environments(self, cl_environments): return [cl_environments[self.environment_nmr]] PKrvJ3 mot/factory.pyfrom mot.cl_routines.optimizing.multi_step_optimizer import MultiStepOptimizer from mot.cl_routines.optimizing.random_restart import RandomRestart from mot.cl_routines.sampling.metropolis_hastings import MetropolisHastings from mot.model_building.parameter_functions.proposal_updates import NoOperationUpdateFunction, AcceptanceRateScaling, \ FSLAcceptanceRateScaling, SingleComponentAdaptiveMetropolis from .cl_routines.optimizing.levenberg_marquardt import LevenbergMarquardt from .cl_routines.filters.gaussian import GaussianFilter from .cl_routines.filters.mean import MeanFilter from .cl_routines.filters.median import MedianFilter from .cl_routines.optimizing.nmsimplex import NMSimplex from .cl_routines.optimizing.powell import Powell from .load_balance_strategies import EvenDistribution, RuntimeLoadBalancing, PreferGPU, PreferCPU, \ PreferSpecificEnvironment __author__ = 'Robbert Harms' __date__ = "2015-07-06" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl" optimizers = [LevenbergMarquardt, Powell, NMSimplex, MultiStepOptimizer, RandomRestart] samplers = [MetropolisHastings] filters = [GaussianFilter, MeanFilter, MedianFilter] load_balance_strategies = [EvenDistribution, RuntimeLoadBalancing, PreferGPU, PreferCPU, PreferSpecificEnvironment] proposal_updates = [NoOperationUpdateFunction, AcceptanceRateScaling, FSLAcceptanceRateScaling, SingleComponentAdaptiveMetropolis] def get_optimizer_by_name(name): """ Get the class by the given name. This does not instantiate the class, only returns a reference to it. Args: name: the name of the optimizer we want to return Returns: class: the class of the optimizer requested """ return _get_item(name, optimizers, 'optimizers') def get_sampler_by_name(name): """ Get the class by the given name. This does not instantiate the class, only returns a reference to it. Args: name: the name of the optimizer we want to return Returns: class: the class of the sampler requested """ return _get_item(name, samplers, 'samplers') def get_filter_by_name(name): """ Get the class by the given name. This does not instantiate the class, only returns a reference to it. Args: name: the name of the filter routine we want to return Returns: class: the class of the filter routine requested """ return _get_item(name, filters, 'smoothers') def get_load_balance_strategy_by_name(name): """ Get the class by the given name. This does not instantiate the class, only returns a reference to it. Args: name: the name of the load balance strategy we want to return Returns: class: the class of the load balance strategy requested """ return _get_item(name, load_balance_strategies, 'load balancers') def get_proposal_update_by_name(name): """ Get the class by the given name. This does not instantiate the class, only returns a reference to it. Args: name: the name of the proposal update function we want to return Returns: class: the class of the requested proposal update function """ return _get_item(name, proposal_updates, 'proposal updates') def _get_item(name, item_list, factory_type): for item in item_list: if item.__name__ == name: return item raise ValueError('The item with the name {0} could not be found in the {1} factory.'.format(name, factory_type)) PKqJmot/cl_data_type.pyimport six __author__ = 'Robbert Harms' __date__ = "2015-03-21" __license__ = "LGPL v3" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl" class CLDataType(object): """Interface for CL data type containers. Basically this encapsulates the type and its qualifiers that define a variable in CL. """ def get_declaration(self): """Get the complete CL declaration for this datatype. Returns: str: the declaration for this data type. """ raise NotImplementedError() @property def cl_type(self): """Get the type of this parameter in CL language This only returns the parameter type (like ``double`` or ``int*`` or ``float4*`` ...). It does not include other qualifiers. Returns: str: The name of this data type """ raise NotImplementedError() @property def is_vector_type(self): """Check if this data type is a vector type Returns: boolean: True if it is a vector type, false otherwise """ raise NotImplementedError() class SimpleCLDataType(CLDataType): def __init__(self, raw_data_type, is_pointer_type=False, vector_length=None, address_space_qualifier=None, pre_data_type_type_qualifiers=None, post_data_type_type_qualifier=None): """Create a new CL data type container. The CL type can either be a CL native type (``half``, ``double``, ``int``, ...) or the special ``mot_float_type`` type. Args: raw_data_type (str): the specific data type without the vector number and asterisks is_pointer_type (boolean): If this parameter is a pointer type (appended by a ``*``) vector_length (int or None): If None this data type is not a CL vector type. If it is an integer it is the vector length of this data type (2, 3, 4, ...) address_space_qualifier (str or None): the address space qualifier or None if not used. One of: {``__local``, ``local``, ``__global``, ``global``, ``__constant``, ``constant``, ``__private``, ``private``} or None. pre_data_type_type_qualifiers (list of str or None): the type qualifiers to use before the data type. One of {const, restrict, volatile} post_data_type_type_qualifier (str or None): the type qualifier to use after the data type. Can only be 'const' """ self.raw_data_type = str(raw_data_type) self.is_pointer_type = is_pointer_type self.vector_length = vector_length if self.vector_length: self.vector_length = int(self.vector_length) self.address_space_qualifier = address_space_qualifier self.pre_data_type_type_qualifiers = pre_data_type_type_qualifiers if isinstance(self.pre_data_type_type_qualifiers, six.string_types): self.pre_data_type_type_qualifiers = [self.pre_data_type_type_qualifiers] self.post_data_type_type_qualifier = post_data_type_type_qualifier @classmethod def from_string(cls, parameter_declaration): """Parse the parameter declaration into a CLDataType Args: parameter_declaration (str): the CL parameter declaration. Example: ``global const float4*`` const Returns: mot.cl_data_type.SimpleCLDataType: the CL data type for this parameter declaration """ from mot.parsers.cl.CLDataTypeParser import parse return parse(parameter_declaration) def get_declaration(self): declaration = '' if self.address_space_qualifier: declaration += str(self.address_space_qualifier) + ' ' if self.pre_data_type_type_qualifiers: declaration += str(' '.join(self.pre_data_type_type_qualifiers)) + ' ' declaration += str(self.cl_type) if self.post_data_type_type_qualifier: declaration += ' ' + str(self.post_data_type_type_qualifier) return declaration @property def cl_type(self): s = self.raw_data_type if self.vector_length is not None: s += str(self.vector_length) if self.is_pointer_type: s += '*' return str(s) @property def is_vector_type(self): return self.vector_length is not None def __str__(self): return self.get_declaration() PKJHrccmot/mcmc_diagnostics.py"""This module contains some diagnostic functions to diagnose the performance of MCMC sampling. The two most important functions are :func:`multivariate_ess` and :func:`univariate_ess` to calculate the effective sample size of your samples. """ import os from collections import Mapping import multiprocessing import itertools import numpy as np from numpy.linalg import det from scipy.special import gammaln from scipy.stats import chi2 __author__ = 'Robbert Harms' __date__ = "2017-03-07" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl" def multivariate_ess(samples, batch_size_generator=None): r"""Estimate the multivariate Effective Sample Size for the samples of every problem. This essentially applies :func:`estimate_multivariate_ess` to every problem. Args: samples (ndarray, dict or generator): either an matrix of shape (d, p, n) with d problems, p parameters and n samples, or a dictionary with for every parameter a matrix with shape (d, n) or, finally, a generator function that yields sample arrays of shape (p, n). batch_size_generator (MultiVariateESSBatchSizeGenerator): the batch size generator, tells us how many batches and of which size we use in estimating the minimum ESS. Returns: ndarray: the multivariate ESS per problem """ samples_generator = _get_sample_generator(samples) if os.name == 'nt': # In Windows there is no fork. return np.array(list(map(_MultivariateESSMultiProcessing(batch_size_generator), samples_generator()))) try: p = multiprocessing.Pool() return_data = np.array(list(p.imap(_MultivariateESSMultiProcessing(batch_size_generator), samples_generator()))) p.close() p.join() return return_data except OSError: return np.array(list(map(_MultivariateESSMultiProcessing(batch_size_generator), samples_generator()))) class _MultivariateESSMultiProcessing(object): def __init__(self, batch_size_generator): """Used in the function :func:`multivariate_ess` to estimate the multivariate ESS using multiprocessing.""" self._batch_size_generator = batch_size_generator def __call__(self, samples): return estimate_multivariate_ess(samples, batch_size_generator=self._batch_size_generator) def univariate_ess(samples, method='standard_error', **kwargs): r"""Estimate the univariate Effective Sample Size for the samples of every problem. This essentially applies the chosen univariate ESS method on every problem. Args: samples (ndarray, dict or generator): either an matrix of shape (d, p, n) with d problems, p parameters and n samples, or a dictionary with for every parameter a matrix with shape (d, n) or, finally, a generator function that yields sample arrays of shape (p, n). method (str): one of 'autocorrelation' or 'standard_error' defaults to 'standard_error'. If 'autocorrelation' is chosen we apply the function: :func:`estimate_univariate_ess_autocorrelation`, if 'standard_error` is choosen we apply the function: :func:`estimate_univariate_ess_standard_error`. **kwargs: passed to the chosen compute method Returns: ndarray: a matrix of size (d, p) with for every problem and every parameter an ESS. """ samples_generator = _get_sample_generator(samples) if os.name == 'nt': # In Windows there is no fork. return np.array(list(map(_UnivariateESSMultiProcessing(method, **kwargs), samples_generator()))) p = multiprocessing.Pool() return_data = np.array(list(p.imap(_UnivariateESSMultiProcessing(method, **kwargs), samples_generator()))) p.close() p.join() return return_data class _UnivariateESSMultiProcessing(object): def __init__(self, method, **kwargs): """Used in the function :func:`univariate_ess` to estimate the univariate ESS using multiprocessing.""" self._method = method self._kwargs = kwargs def __call__(self, samples): if self._method == 'autocorrelation': compute_func = estimate_univariate_ess_autocorrelation else: compute_func = estimate_univariate_ess_standard_error result = np.zeros(samples.shape[0]) for param_ind in range(samples.shape[0]): result[param_ind] = compute_func(samples[param_ind], **self._kwargs) return result def _get_sample_generator(samples): """Get a sample generator from the given polymorphic input. Args: samples (ndarray, dict or generator): either an matrix of shape (d, p, n) with d problems, p parameters and n samples, or a dictionary with for every parameter a matrix with shape (d, n) or, finally, a generator function that yields sample arrays of shape (p, n). Returns: generator: a generator that yields a matrix of size (p, n) for every problem in the input. """ if isinstance(samples, Mapping): def samples_generator(): for ind in range(samples[list(samples.keys())[0]].shape[0]): yield np.array([samples[s][ind, :] for s in sorted(samples)]) elif isinstance(samples, np.ndarray): def samples_generator(): for ind in range(samples.shape[0]): yield samples[ind] else: samples_generator = samples return samples_generator def get_auto_correlation(chain, lag): r"""Estimates the auto correlation for the given chain (1d vector) with the given lag. Given a lag :math:`k`, the auto correlation coefficient :math:`\rho_{k}` is estimated as: .. math:: \hat{\rho}_{k} = \frac{E[(X_{t} - \mu)(X_{t + k} - \mu)]}{\sigma^{2}} Please note that this equation only works for lags :math:`k < n` where :math:`n` is the number of samples in the chain. Args: chain (ndarray): the vector with the samples lag (int): the lag to use in the autocorrelation computation Returns: float: the autocorrelation with the given lag """ normalized_chain = chain - np.mean(chain, dtype=np.float64) lagged_mean = np.mean(normalized_chain[:len(chain) - lag] * normalized_chain[lag:], dtype=np.float64) return lagged_mean / np.var(chain, dtype=np.float64) def get_auto_correlation_time(chain, max_lag=None): r"""Compute the auto correlation time up to the given lag for the given chain (1d vector). This will halt when the maximum lag :math:`m` is reached or when the sum of two consecutive lags for any odd lag is lower or equal to zero. The auto correlation sum is estimated as: .. math:: \tau = 1 + 2 * \sum_{k=1}^{m}{\rho_{k}} Where :math:`\rho_{k}` is estimated as: .. math:: \hat{\rho}_{k} = \frac{E[(X_{t} - \mu)(X_{t + k} - \mu)]}{\sigma^{2}} Args: chain (ndarray): the vector with the samples max_lag (int): the maximum lag to use in the autocorrelation computation. If not given we use: :math:`min(n/3, 1000)`. """ max_lag = max_lag or min(len(chain) // 3, 1000) normalized_chain = chain - np.mean(chain, dtype=np.float64) previous_accoeff = 0 auto_corr_sum = 0 for lag in range(1, max_lag): auto_correlation_coeff = np.mean(normalized_chain[:len(chain) - lag] * normalized_chain[lag:], dtype=np.float64) if lag % 2 == 0: if previous_accoeff + auto_correlation_coeff <= 0: break auto_corr_sum += auto_correlation_coeff previous_accoeff = auto_correlation_coeff return auto_corr_sum / np.var(chain, dtype=np.float64) def estimate_univariate_ess_autocorrelation(chain, max_lag=None): r"""Estimate effective sample size (ESS) using the autocorrelation of the chain. The ESS is an estimate of the size of an iid sample with the same variance as the current sample. This function implements the ESS as described in Kass et al. (1998) and Robert and Casella (2004; p. 500): .. math:: ESS(X) = \frac{n}{\tau} = \frac{n}{1 + 2 * \sum_{k=1}^{m}{\rho_{k}}} where :math:`\rho_{k}` is estimated as: .. math:: \hat{\rho}_{k} = \frac{E[(X_{t} - \mu)(X_{t + k} - \mu)]}{\sigma^{2}} References: * Kass, R. E., Carlin, B. P., Gelman, A., and Neal, R. (1998) Markov chain Monte Carlo in practice: A roundtable discussion. The American Statistician, 52, 93--100. * Robert, C. P. and Casella, G. (2004) Monte Carlo Statistical Methods. New York: Springer. * Geyer, C. J. (1992) Practical Markov chain Monte Carlo. Statistical Science, 7, 473--483. Args: chain (ndarray): the chain for which to calculate the ESS, assumes a vector of length ``n`` samples max_lag (int): the maximum lag used in the variance calculations. If not given defaults to :math:`min(n/3, 1000)`. Returns: float: the estimated ESS """ return len(chain) / (1 + 2 * get_auto_correlation_time(chain, max_lag)) def estimate_univariate_ess_standard_error(chain, batch_size_generator=None, compute_method=None): r"""Compute the univariate ESS using the standard error method. This computes the ESS using: .. math:: ESS(X) = n * \frac{\lambda^{2}}{\sigma^{2}} Where :math:`\lambda` is the variance of the chain and :math:`\sigma` is estimated using the monte carlo standard error (which in turn is by default estimated using a batch means estimator). Args: chain (ndarray): the Markov chain batch_size_generator (UniVariateESSBatchSizeGenerator): the method that generates that batch sizes we will use. Per default it uses the :class:`SquareRootSingleBatch` method. compute_method (ComputeMonteCarloStandardError): the method used to compute the standard error. By default we will use the :class:`BatchMeansMCSE` method Returns: float: the estimated ESS """ sigma = (monte_carlo_standard_error(chain, batch_size_generator=batch_size_generator, compute_method=compute_method) ** 2 * len(chain)) lambda_ = np.var(chain, dtype=np.float64) return len(chain) * (lambda_ / sigma) def minimum_multivariate_ess(nmr_params, alpha=0.05, epsilon=0.05): r"""Calculate the minimum multivariate Effective Sample Size you will need to obtain the desired precision. This implements the inequality from Vats et al. (2016): .. math:: \widehat{ESS} \geq \frac{2^{2/p}\pi}{(p\Gamma(p/2))^{2/p}} \frac{\chi^{2}_{1-\alpha,p}}{\epsilon^{2}} Where :math:`p` is the number of free parameters. Args: nmr_params (int): the number of free parameters in the model alpha (float): the level of confidence of the confidence region. For example, an alpha of 0.05 means that we want to be in a 95% confidence region. epsilon (float): the level of precision in our multivariate ESS estimate. An epsilon of 0.05 means that we expect that the Monte Carlo error is 5% of the uncertainty in the target distribution. Returns: float: the minimum multivariate Effective Sample Size that one should aim for in MCMC sampling to obtain the desired confidence region with the desired precision. References: Vats D, Flegal J, Jones G (2016). Multivariate Output Analysis for Markov Chain Monte Carlo. arXiv:1512.07713v2 [math.ST] """ tmp = 2.0 / nmr_params log_min_ess = tmp * np.log(2) + np.log(np.pi) - tmp * (np.log(nmr_params) + gammaln(nmr_params / 2)) \ + np.log(chi2.ppf(1 - alpha, nmr_params)) - 2 * np.log(epsilon) return int(round(np.exp(log_min_ess))) def multivariate_ess_precision(nmr_params, multi_variate_ess, alpha=0.05): r"""Calculate the precision given your multivariate Effective Sample Size. Given that you obtained :math:`ESS` multivariate effective samples in your estimate you can calculate the precision with which you approximated your desired confidence region. This implements the inequality from Vats et al. (2016), slightly restructured to give :math:`\epsilon` back instead of the minimum ESS. .. math:: \epsilon = \sqrt{\frac{2^{2/p}\pi}{(p\Gamma(p/2))^{2/p}} \frac{\chi^{2}_{1-\alpha,p}}{\widehat{ESS}}} Where :math:`p` is the number of free parameters and ESS is the multivariate ESS from your samples. Args: nmr_params (int): the number of free parameters in the model multi_variate_ess (int): the number of iid samples you obtained in your sampling results. alpha (float): the level of confidence of the confidence region. For example, an alpha of 0.05 means that we want to be in a 95% confidence region. Returns: float: the minimum multivariate Effective Sample Size that one should aim for in MCMC sampling to obtain the desired confidence region with the desired precision. References: Vats D, Flegal J, Jones G (2016). Multivariate Output Analysis for Markov Chain Monte Carlo. arXiv:1512.07713v2 [math.ST] """ tmp = 2.0 / nmr_params log_min_ess = tmp * np.log(2) + np.log(np.pi) - tmp * (np.log(nmr_params) + gammaln(nmr_params / 2)) \ + np.log(chi2.ppf(1 - alpha, nmr_params)) - np.log(multi_variate_ess) return np.sqrt(np.exp(log_min_ess)) def estimate_multivariate_ess_sigma(samples, batch_size): r"""Calculates the Sigma matrix which is part of the multivariate ESS calculation. This implementation is based on the Matlab implementation found at: https://github.com/lacerbi/multiESS The Sigma matrix is defined as: .. math:: \Sigma = \Lambda + 2 * \sum_{k=1}^{\infty}{Cov(Y_{1}, Y_{1+k})} Where :math:`Y` are our samples and :math:`\Lambda` is the covariance matrix of the samples. This implementation computes the :math:`\Sigma` matrix using a Batch Mean estimator using the given batch size. The batch size has to be :math:`1 \le b_n \le n` and a typical value is either :math:`\lfloor n^{1/2} \rfloor` for slow mixing chains or :math:`\lfloor n^{1/3} \rfloor` for reasonable mixing chains. If the length of the chain is longer than the sum of the length of all the batches, this implementation calculates :math:`\Sigma` for every offset and returns the average of those offsets. Args: samples (ndarray): the samples for which we compute the sigma matrix. Expects an (p, n) array with p the number of parameters and n the sample size batch_size (int): the batch size used in the approximation of the correlation covariance Returns: ndarray: an pxp array with p the number of parameters in the samples. References: Vats D, Flegal J, Jones G (2016). Multivariate Output Analysis for Markov Chain Monte Carlo. arXiv:1512.07713v2 [math.ST] """ sample_means = np.mean(samples, axis=1, dtype=np.float64) nmr_params, chain_length = samples.shape nmr_batches = int(np.floor(chain_length / batch_size)) sigma = np.zeros((nmr_params, nmr_params)) nmr_offsets = chain_length - nmr_batches * batch_size + 1 for offset in range(nmr_offsets): batches = np.reshape(samples[:, np.array(offset + np.arange(0, nmr_batches * batch_size), dtype=np.int)].T, [batch_size, nmr_batches, nmr_params], order='F') batch_means = np.squeeze(np.mean(batches, axis=0, dtype=np.float64)) Z = batch_means - sample_means for x, y in itertools.product(range(nmr_params), range(nmr_params)): sigma[x, y] += np.sum(Z[:, x] * Z[:, y]) return sigma * batch_size / (nmr_batches - 1) / nmr_offsets def estimate_multivariate_ess(samples, batch_size_generator=None, full_output=False): r"""Compute the multivariate Effective Sample Size of your (single instance set of) samples. This multivariate ESS is defined in Vats et al. (2016) and is given by: .. math:: ESS = n \bigg(\frac{|\Lambda|}{|\Sigma|}\bigg)^{1/p} Where :math:`n` is the number of samples, :math:`p` the number of parameters, :math:`\Lambda` is the covariance matrix of the parameters and :math:`\Sigma` captures the covariance structure in the target together with the covariance due to correlated samples. :math:`\Sigma` is estimated using :func:`estimate_multivariate_ess_sigma`. In the case of NaN in any part of the computation the ESS is set to 0. To compute the multivariate ESS for multiple problems, please use :func:`multivariate_ess`. Args: samples (ndarray): an pxn matrix with for p parameters and n samples. batch_size_generator (MultiVariateESSBatchSizeGenerator): the batch size generator, tells us how many batches and of which size we use for estimating the minimum ESS. Defaults to :class:`SquareRootSingleBatch` full_output (boolean): set to True to return the estimated :math:`\Sigma` and the optimal batch size. Returns: float or tuple: when full_output is set to True we return a tuple with the estimated multivariate ESS, the estimated :math:`\Sigma` matrix and the optimal batch size. When full_output is False (the default) we only return the ESS. References: Vats D, Flegal J, Jones G (2016). Multivariate Output Analysis for Markov Chain Monte Carlo. arXiv:1512.07713v2 [math.ST] """ batch_size_generator = batch_size_generator or SquareRootSingleBatch() batch_sizes = batch_size_generator.get_multivariate_ess_batch_sizes(*samples.shape) nmr_params, chain_length = samples.shape nmr_batches = len(batch_sizes) det_lambda = det(np.cov(samples)) ess_estimates = np.zeros(nmr_batches) sigma_estimates = np.zeros((nmr_params, nmr_params, nmr_batches)) for i in range(0, nmr_batches): sigma = estimate_multivariate_ess_sigma(samples, int(batch_sizes[i])) ess = chain_length * (det_lambda**(1.0 / nmr_params) / det(sigma)**(1.0 / nmr_params)) ess_estimates[i] = ess sigma_estimates[..., i] = sigma ess_estimates = np.nan_to_num(ess_estimates) if nmr_batches > 1: idx = np.argmin(ess_estimates) else: idx = 0 if full_output: return ess_estimates[idx], sigma_estimates[..., idx], batch_sizes[idx] return ess_estimates[idx] def monte_carlo_standard_error(chain, batch_size_generator=None, compute_method=None): """Compute Monte Carlo standard errors for the expectations This is a convenience function that calls the compute method for each batch size and returns the lowest ESS over the used batch sizes. Args: chain (ndarray): the Markov chain batch_size_generator (UniVariateESSBatchSizeGenerator): the method that generates that batch sizes we will use. Per default it uses the :class:`SquareRootSingleBatch` method. compute_method (ComputeMonteCarloStandardError): the method used to compute the standard error. By default we will use the :class:`BatchMeansMCSE` method """ batch_size_generator = batch_size_generator or SquareRootSingleBatch() compute_method = compute_method or BatchMeansMCSE() batch_sizes = batch_size_generator.get_univariate_ess_batch_sizes(len(chain)) return np.min(list(compute_method.compute_standard_error(chain, b) for b in batch_sizes)) class MultiVariateESSBatchSizeGenerator(object): """Objects of this class are used as input to the multivariate ESS function. The multivariate ESS function needs to have at least one batch size to use during the computations. More batch sizes are also possible and the batch size with the lowest ESS is then preferred. Objects of this class implement the logic behind choosing batch sizes. """ def get_multivariate_ess_batch_sizes(self, nmr_params, chain_length): r"""Get the batch sizes to use for the calculation of the Effective Sample Size (ESS). This should return a list of batch sizes that the ESS calculation will use to determine :math:`\Sigma` Args: nmr_params (int): the number of parameters in the samples chain_length (int): the length of the chain Returns: list: the batches of the given sizes we will test in the ESS calculations """ class UniVariateESSBatchSizeGenerator(object): """Objects of this class are used as input to the univariate ESS function that uses the batch means. The univariate batch means ESS function needs to have at least one batch size to use during the computations. More batch sizes are also possible and the batch size with the lowest ESS is then preferred. Objects of this class implement the logic behind choosing batch sizes. """ def get_univariate_ess_batch_sizes(self, chain_length): r"""Get the batch sizes to use for the calculation of the univariate Effective Sample Size (ESS). This should return a list of batch sizes that the ESS calculation will use to determine :math:`\sigma` Args: chain_length (int): the length of the chain Returns: list: the batches of the given sizes we will test in the ESS calculations """ class SquareRootSingleBatch(MultiVariateESSBatchSizeGenerator, UniVariateESSBatchSizeGenerator): r"""Returns :math:`\sqrt(n)`.""" def get_multivariate_ess_batch_sizes(self, nmr_params, chain_length): return [np.floor(chain_length**(1/2.0))] def get_univariate_ess_batch_sizes(self, chain_length): return [np.floor(chain_length ** (1 / 2.0))] class CubeRootSingleBatch(MultiVariateESSBatchSizeGenerator, UniVariateESSBatchSizeGenerator): r"""Returns :math:`n^{1/3}`.""" def get_multivariate_ess_batch_sizes(self, nmr_params, chain_length): return [np.floor(chain_length**(1/3.0))] def get_univariate_ess_batch_sizes(self, chain_length): return [np.floor(chain_length ** (1 / 3.0))] class LinearSpacedBatchSizes(MultiVariateESSBatchSizeGenerator): def __init__(self, nmr_batches=200): r"""Returns a number of batch sizes from which the ESS algorithm will select the one with the lowest ESS. This is a conservative choice since the lowest ESS of all batch sizes is chosen. The batch sizes are generated as linearly spaced values in: .. math:: \Big[ n^{1/4}, max(\lfloor x/max(20,p) \rfloor, \lfloor \sqrt{n} \rfloor) \Big] where :math:`n` is the chain length and :math:`p` is the number of parameters. Args: nmr_batches (int): the number of linearly spaced batches we will generate. """ self._nmr_batches = nmr_batches def get_multivariate_ess_batch_sizes(self, nmr_params, chain_length): b_min = np.floor(chain_length**(1 / 4.0)) b_max = np.max((np.floor(chain_length / np.max((nmr_params, 20))), np.floor(chain_length**(1 / 2.0)))) return list(np.unique(np.round(np.exp(np.linspace(np.log(b_min), np.log(b_max), self._nmr_batches))))) class ComputeMonteCarloStandardError(object): """Method to compute the Monte Carlo Standard error.""" def compute_standard_error(self, chain, batch_size): """Compute the standard error of the given chain and the given batch size. Args: chain (ndarray): the chain for which to compute the SE batch_size (int): batch size or window size to use in the computations Returns: float: the Monte Carlo Standard Error """ raise NotImplementedError() class BatchMeansMCSE(ComputeMonteCarloStandardError): """Computes the Monte Carlo Standard Error using simple batch means.""" def compute_standard_error(self, chain, batch_size): nmr_batches = int(np.floor(len(chain) / batch_size)) batch_means = np.zeros(nmr_batches) for batch_index in range(nmr_batches): batch_means[batch_index] = np.mean( chain[int(batch_index * batch_size):int((batch_index + 1) * batch_size)], dtype=np.float64) var_hat = batch_size * sum((batch_means - np.mean(chain, dtype=np.float64))**2) / (nmr_batches - 1) return np.sqrt(var_hat / len(chain)) class OverlappingBatchMeansMCSE(ComputeMonteCarloStandardError): """Computes the Monte Carlo Standard Error using overlapping batch means.""" def compute_standard_error(self, chain, batch_size): nmr_batches = int(len(chain) - batch_size + 1) batch_means = np.zeros(nmr_batches) for batch_index in range(nmr_batches): batch_means[batch_index] = np.mean(chain[int(batch_index):int(batch_index + batch_size)], dtype=np.float64) var_hat = (len(chain) * batch_size * sum((batch_means - np.mean(chain, dtype=np.float64))**2)) / (nmr_batches - 1) / nmr_batches return np.sqrt(var_hat / len(chain)) PKJmot/__version__.pyVERSION = '0.2.41' _items = VERSION.split('-') VERSION_NUMBER_PARTS = tuple(int(i) for i in _items[0].split('.')) if len(_items) > 1: VERSION_STATUS = _items[1] else: VERSION_STATUS = '' __version__ = VERSION PKR}JU| ! ! mot/utils.pyfrom functools import reduce import numpy as np import pyopencl as cl __author__ = 'Robbert Harms' __date__ = "2014-05-13" __license__ = "LGPL v3" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl" def device_type_from_string(cl_device_type_str): """Converts values like ``gpu`` to a pyopencl device type string. Supported values are: ``accelerator``, ``cpu``, ``custom``, ``gpu``. If ``all`` is given, None is returned. Args: cl_device_type_str (str): The string we want to convert to a device type. Returns: cl.device_type: the pyopencl device type. """ cl_device_type_str = cl_device_type_str.upper() if hasattr(cl.device_type, cl_device_type_str): return getattr(cl.device_type, cl_device_type_str) return None def device_supports_double(cl_device): """Check if the given CL device supports double Args: cl_device (pyopencl cl device): The device to check if it supports double. Returns: boolean: True if the given cl_device supports double, false otherwise. """ return cl_device.get_info(cl.device_info.DOUBLE_FP_CONFIG) == 63 def results_to_dict(results, param_names): """Create a dictionary out of the results. This basically splits the given nd-matrix into sub matrices based on the second dimension. The length of the parameter names should match the length of the second dimension. If a two dimensional matrix of shape (d, p) is given we return a matrix of shape (d,). If a matrix of shape (d, p, s_1, s_2, ..., s_n) is given, we return a matrix of shape (d, s_1, s_2, ..., s_n). Args: results: a multidimensional matrix we index based on the second dimension. param_names (list of str): the names of the parameters, one per column Returns: dict: the results packed in a dictionary """ if results.shape[1] != len(param_names): raise ValueError('The number of columns ({}) in the matrix does not match ' 'the number of dictionary keys provided ({}).'.format(results.shape[1], len(param_names))) return {name: results[:, i, ...] for i, name in enumerate(param_names)} def get_float_type_def(double_precision): """Get the model floating point type definition. The MOT_INT_CMP_TYPE is meant for the select() function where you need a long in the case of double precision. Args: double_precision (boolean): if True we will use the double type for the mot_float_type type. Else, we will use the single precision float type for the mot_float_type type. Returns: str: defines the mot_float_type types, the epsilon and the MIN and MAX values. """ if double_precision: return ''' #if __OPENCL_VERSION__ <= CL_VERSION_1_1 #pragma OPENCL EXTENSION cl_khr_fp64 : enable #endif #define mot_float_type double #define mot_float_type2 double2 #define mot_float_type4 double4 #define mot_float_type8 double8 #define mot_float_type16 double16 #define MOT_EPSILON DBL_EPSILON #define MOT_MIN DBL_MIN #define MOT_MAX DBL_MAX #define MOT_INT_CMP_TYPE long ''' else: return ''' #if __OPENCL_VERSION__ <= CL_VERSION_1_1 #pragma OPENCL EXTENSION cl_khr_fp64 : enable #endif #define mot_float_type float #define mot_float_type2 float2 #define mot_float_type4 float4 #define mot_float_type8 float8 #define mot_float_type16 float16 #define MOT_EPSILON FLT_EPSILON #define MOT_MIN FLT_MIN #define MOT_MAX FLT_MAX #define MOT_INT_CMP_TYPE int ''' def topological_sort(data): """Topological sort the given dictionary structure. Args: data (dict); dictionary structure where the value is a list of dependencies for that given key. For example: ``{'a': (), 'b': ('a',)}``, where ``a`` depends on nothing and ``b`` depends on ``a``. Returns: tuple: the dependencies in constructor order """ def check_self_dependencies(input_data): """Check if there are self dependencies within a node. Self dependencies are for example: ``{'a': ('a',)}``. Args: input_data (dict): the input data. Of a structure similar to {key: (list of values), ...}. Raises: ValueError: if there are indeed self dependencies """ for k, v in input_data.items(): if k in v: raise ValueError('Self-dependency, {} depends on itself.'.format(k)) def prepare_input_data(input_data): """Prepares the input data by making sets of the dependencies. This automatically removes redundant items. Args: input_data (dict): the input data. Of a structure similar to {key: (list of values), ...}. Returns: dict: a copy of the input dict but with sets instead of lists for the dependencies. """ return {k: set(v) for k, v in input_data.items()} def find_items_without_dependencies(input_data): """This searches the dependencies of all the items for items that have no dependencies. For example, suppose the input is: ``{'a': ('b',)}``, then ``a`` depends on ``b`` and ``b`` depends on nothing. This class returns ``(b,)`` in this example. Args: input_data (dict): the input data. Of a structure similar to {key: (list of values), ...}. Returns: list: the list of items without any dependency. """ return list(reduce(set.union, input_data.values()) - set(input_data.keys())) def add_empty_dependencies(data): items_without_dependencies = find_items_without_dependencies(data) data.update({item: set() for item in items_without_dependencies}) def get_sorted(input_data): data = input_data while True: ordered = set(item for item, dep in data.items() if len(dep) == 0) if not ordered: break yield ordered data = {item: (dep - ordered) for item, dep in data.items() if item not in ordered} if len(data) != 0: raise ValueError('Cyclic dependencies exist ' 'among these items: {}'.format(', '.join(repr(x) for x in data.items()))) check_self_dependencies(data) if not len(data): return [] data_copy = prepare_input_data(data) add_empty_dependencies(data_copy) result = [] for d in get_sorted(data_copy): try: d = sorted(d) except TypeError: d = list(d) result.extend(d) return result def is_scalar(value): """Test if the given value is a scalar. This function also works with memory mapped array values, in contrast to the numpy is_scalar method. Args: value: the value to test for being a scalar value Returns: boolean: if the given value is a scalar or not """ return np.isscalar(value) or (isinstance(value, np.ndarray) and (len(np.squeeze(value).shape) == 0)) def all_elements_equal(value): """Checks if all elements in the given value are equal to each other. If the input is a single value the result is trivial. If not, we compare all the values to see if they are exactly the same. Args: value (ndarray or number): a numpy array or a single number. Returns: bool: true if all elements are equal to each other, false otherwise """ if is_scalar(value): return True return (value == value[0]).all() def get_single_value(value): """Get a single value out of the given value. This is meant to be used after a call to :func:`all_elements_equal` that returned True. With this function we return a single number from the input value. Args: value (ndarray or number): a numpy array or a single number. Returns: number: a single number from the input Raises: ValueError: if not all elements are equal """ if not all_elements_equal(value): raise ValueError('Not all values are equal to each other.') if is_scalar(value): return value return value.item(0) PK$ZI{ ##mot/cl_environments.pyimport pyopencl as cl from six import string_types from .utils import device_supports_double, device_type_from_string __author__ = 'Robbert Harms' __date__ = "2014-11-14" __license__ = "LGPL v3" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl" class CLEnvironment(object): def __init__(self, platform, device): """Storage unit for an OpenCL environment. Args: platform (pyopencl platform): An PyOpenCL platform. device (pyopencl device): An PyOpenCL device """ self._platform = platform self._device = device self._cl_context = CLRunContext(self) def get_cl_context(self): """Get a CL context from this environment. Returns: CLContext: a CL context for the computations """ return self._cl_context @property def supports_double(self): """Check if the device listed by this environment supports double Returns: boolean: True if the device supports double, false otherwise. """ return device_supports_double(self.device) @property def platform(self): """Get the platform associated with this environment. Returns: pyopencl platform: The platform associated with this environment. """ return self._platform @property def device(self): """Get the device associated with this environment. Returns: pyopencl device: The device associated with this environment. """ return self._device @property def is_gpu(self): """Check if the device associated with this environment is a GPU. Returns: boolean: True if the device is an GPU, false otherwise. """ return self._device.get_info(cl.device_info.TYPE) == cl.device_type.GPU @property def is_cpu(self): """Check if the device associated with this environment is a CPU. Returns: boolean: True if the device is an CPU, false otherwise. """ return self._device.get_info(cl.device_info.TYPE) == cl.device_type.CPU @property def device_type(self): """Get the device type of the device in this environment. Returns: the device type of this device. """ return self._device.get_info(cl.device_info.TYPE) def __str__(self): s = 'GPU' if self.is_gpu else 'CPU' s += ' - ' + self.device.name + ' (' + self.platform.name + ')' return s def __repr__(self): s = 75*"=" + "\n" s += repr(self._platform) + "\n" s += 75*"=" + "\n" s += self._print_info(self._platform, cl.platform_info) s += 75*"-" + "\n" s += repr(self._device) + "\n" s += 75*"-" + "\n" s += self._print_info(self._device, cl.device_info) return s def _print_info(self, obj, info_cls): s = '' def format_title(title_str): title_str = title_str.lower() title_str = title_str.replace('_', ' ') return title_str for info_name in sorted(dir(info_cls)): if not info_name.startswith("_") and info_name != "to_string": info = getattr(info_cls, info_name) try: info_value = obj.get_info(info) except cl.LogicError: info_value = "" if info_cls == cl.device_info and info_name == "PARTITION_TYPES_EXT" and isinstance(info_value, list): prop_value = [cl.device_partition_property_ext.to_string(v, "") for v in info_value] s += ("%s: %s" % (format_title(info_name), prop_value)) + "\n" else: try: s += ("%s: %s" % (format_title(info_name), info_value)) + "\n" except cl.LogicError: s += ("%s: " % info_name) + "\n" s += "\n" return s def __eq__(self, other): """A device is equal to another if the platform and the device are equal.""" if isinstance(other, CLEnvironment): return other.platform == self.platform and other.device == self.device return False class CLRunContext(object): def __init__(self, cl_environment): """Context for single run use Arguments: cl_environment (CLEnvironment): The environment for which to create a context and queue. """ self.context = cl.Context([cl_environment.device]) self.queue = cl.CommandQueue(self.context, device=cl_environment.device) class CLEnvironmentFactory(object): @staticmethod def single_device(cl_device_type='GPU', platform=None, fallback_to_any_device_type=False): """Get a list containing a single device environment, for a device of the given type on the given platform. This will only fetch devices that support double (possibly only double with a pragma defined, but still, it should support double). Args: cl_device_type (cl.device_type.* or string): The type of the device we want, can be a opencl device type or a string matching 'GPU', 'CPU' or 'ALL'. platform (opencl platform): The opencl platform to select the devices from fallback_to_any_device_type (boolean): If True, try to fallback to any possible device in the system. Returns: list of CLEnvironment: List with one element, the CL runtime environment requested. """ if isinstance(cl_device_type, string_types): cl_device_type = device_type_from_string(cl_device_type) device = None if platform is None: platforms = cl.get_platforms() else: platforms = [platform] for platform in platforms: devices = platform.get_devices(device_type=cl_device_type) for dev in devices: if device_supports_double(dev): try: env = CLEnvironment(platform, dev) return [env] except cl.RuntimeError: pass if not device: if fallback_to_any_device_type: return cl.get_platforms()[0].get_devices() else: raise ValueError('No devices of the specified type ({}) found.'.format( cl.device_type.to_string(cl_device_type))) raise ValueError('No suitable OpenCL device found.') @staticmethod def all_devices(cl_device_type=None, platform=None): """Get multiple device environments, optionally only of the indicated type. This will only fetch devices that support double point precision. Args: cl_device_type (cl.device_type.* or string): The type of the device we want, can be a opencl device type or a string matching 'GPU' or 'CPU'. platform (opencl platform): The opencl platform to select the devices from Returns: list of CLEnvironment: List with the CL device environments. """ if isinstance(cl_device_type, string_types): cl_device_type = device_type_from_string(cl_device_type) runtime_list = [] if platform is None: platforms = cl.get_platforms() else: platforms = [platform] for platform in platforms: if cl_device_type: devices = platform.get_devices(device_type=cl_device_type) else: devices = platform.get_devices() for device in devices: if device_supports_double(device): env = CLEnvironment(platform, device) runtime_list.append(env) return runtime_list @staticmethod def smart_device_selection(): """Get a list of device environments that is suitable for use in MOT. Basically this gets the total list of devices using all_devices() and applies a filter on it. This filter does the following: 1) if the 'AMD Accelerated Parallel Processing' is available remove all environments using the 'Clover' platform. More things may be implemented in the future. Returns: list of CLEnvironment: List with the CL device environments. """ cl_environments = CLEnvironmentFactory.all_devices() platform_names = [env.platform.name for env in cl_environments] has_amd_pro_platform = any('AMD Accelerated Parallel Processing' in name for name in platform_names) if has_amd_pro_platform: return list(filter(lambda env: 'Clover' not in env.platform.name, cl_environments)) return cl_environments PK$ZIqmot/__init__.pyimport logging from .__version__ import VERSION, VERSION_STATUS, __version__ __author__ = 'Robbert Harms' __date__ = '2015-01-01' __email__ = 'robbert.harms@maastrichtuniversity.nl' __license__ = "LGPL v3" __maintainer__ = "Robbert Harms" try: from logging import NullHandler except ImportError: class NullHandler(logging.Handler): def emit(self, record): pass logging.getLogger(__name__).addHandler(NullHandler()) def smart_device_selection(): """Get a list of device environments that is suitable for use in MOT. Returns: list of CLEnvironment: List with the CL device environments. """ from mot.cl_environments import CLEnvironmentFactory return CLEnvironmentFactory.smart_device_selection() PKxJ$Smot/configuration.py"""Contains the runtime configuration of MOT. This consists of two parts, functions to get the current runtime settings and configuration actions to update these settings. To set a new configuration, create a new :py:class:`ConfigAction` and use this within a context environment using :py:func:`config_context`. Example: .. code-block:: python from mot.configuration import RuntimeConfigurationAction, config_context with config_context(RuntimeConfigurationAction(...)): ... """ from contextlib import contextmanager from copy import copy from mot.model_building.parameter_functions.proposal_updates import AcceptanceRateScaling from .cl_environments import CLEnvironmentFactory from .load_balance_strategies import PreferGPU __author__ = 'Robbert Harms' __date__ = "2015-07-22" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl" """The runtime configuration, this can be overwritten at run time. For any of the AbstractCLRoutines it holds that if no suitable defaults are given we use the ones provided by this module. This entire module acts as a singleton containing the current runtime configuration. """ _config = { 'cl_environments': CLEnvironmentFactory.smart_device_selection(), 'load_balancer': PreferGPU(), 'compile_flags': { 'general': { '-cl-single-precision-constant': True, '-cl-denorms-are-zero': True, '-cl-mad-enable': True, '-cl-no-signed-zeros': True }, # CL Routine specific flags 'cl_routine_specific': {}, # The flags to disable when running in double mode 'disable_in_double_precision': ['-cl-single-precision-constant'] }, 'ignore_kernel_compile_warnings': True, # The default proposal update function to use during sampling 'default_proposal_update': AcceptanceRateScaling() } def should_ignore_kernel_compile_warnings(): """Check if we should ignore kernel compile warnings or not. Returns: boolean: True if we should ignore the kernel compile warnings, false if not. """ return _config['ignore_kernel_compile_warnings'] def get_cl_environments(): """Get the current CL environment to use during CL calculations. Returns: list of CLEnvironment: the current list of CL environments. """ return _config['cl_environments'] def set_cl_environments(cl_environments): """Set the current CL environments to the given list Args: cl_environments (list of CLEnvironment): the new list of CL environments. Raises: ValueError: if the list of environments is empty """ if not cl_environments: raise ValueError('The list of CL Environments is empty.') _config['cl_environments'] = cl_environments def get_load_balancer(): """Get the current load balancer to use during CL calculations. Returns: SimpleLoadBalanceStrategy: the current load balancer to use """ return _config['load_balancer'] def set_load_balancer(load_balancer): """Set the current CL environments to the given list Args: load_balancer (SimpleLoadBalanceStrategy): the current load balancer to use """ _config['load_balancer'] = load_balancer def get_compile_flags(cl_routine_name=None): """Get the default compile flags to use in a CL routine. Args: cl_routine_name (str): the name of the CL routine for which we want the compile flags. If not given we return the default flags. If given we return the default flags updated with the routine specific flags. Returns: dict: the default list of compile flags we wish to use """ flags = copy(_config['compile_flags']['general']) if cl_routine_name in _config['compile_flags']['cl_routine_specific']: flags.update(_config['compile_flags']['cl_routine_specific'][cl_routine_name]) return flags def get_compile_flags_to_disable_in_double_precision(): """Get the list of compile flags we want to disable when running in double precision. Returns: boolean: the list of flags we want to disable when running in double mode """ return copy(_config['compile_flags']['disable_in_double_precision']) def get_default_proposal_update(): """Get the default proposal update function to use in sampling. Returns: mot.model_building.parameter_functions.proposal_updates.ProposalUpdate: the proposal update function to use by default if no specific one is provided. """ return _config['default_proposal_update'] def set_default_proposal_update(proposal_update): """Set the default proposal update function to use in sampling. Args: mot.model_building.parameter_functions.proposal_updates.ProposalUpdate: the new proposal update function to use by default if no specific one is provided. """ _config['default_proposal_update'] = proposal_update @contextmanager def config_context(config_action): """Creates a context in which the config action is applied and unapplies the configuration after execution. Args: config_action (ConfigAction): the configuration action to use """ config_action.apply() yield config_action.unapply() class ConfigAction(object): def __init__(self): """Defines a configuration action for use in a configuration context. This should define an apply and unapply function that sets and unsets the configuration options. The applying action needs to remember the state before the application of the action. """ def apply(self): """Apply the current action to the current runtime configuration.""" def unapply(self): """Reset the current configuration to the previous state.""" class SimpleConfigAction(ConfigAction): def __init__(self): """Defines a default implementation of a configuration action. This simple config implements a default ``apply()`` method that saves the current state and a default ``unapply()`` that restores the previous state. For developers, it is easiest to implement ``_apply()`` such that you do not manually need to store the old configuraration. """ super(SimpleConfigAction, self).__init__() self._old_config = {} def apply(self): """Apply the current action to the current runtime configuration.""" self._old_config = {k: v for k, v in _config.items()} self._apply() def unapply(self): """Reset the current configuration to the previous state.""" for key, value in self._old_config.items(): _config[key] = value def _apply(self): """Implement this function add apply() logic after this class saves the current config.""" class RuntimeConfigurationAction(SimpleConfigAction): def __init__(self, cl_environments=None, load_balancer=None): """Updates the runtime settings. Args: cl_environments (list of CLEnvironment): the new CL environments we wish to use for future computations load_balancer (SimpleLoadBalanceStrategy): the load balancer to use """ super(RuntimeConfigurationAction, self).__init__() self._cl_environments = cl_environments self._load_balancer = load_balancer def _apply(self): if self._cl_environments is not None: set_cl_environments(self._cl_environments) if self._load_balancer is not None: set_load_balancer(self._load_balancer) class VoidConfigurationAction(ConfigAction): def __init__(self): """Does nothing, useful as a default config action. """ super(VoidConfigurationAction, self).__init__() PKJmot/cl_routines/base.pyfrom mot import configuration from mot.configuration import get_compile_flags_to_disable_in_double_precision __author__ = 'Robbert Harms' __date__ = "2014-04-26" __license__ = "LGPL v3" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl" class CLRoutine(object): def __init__(self, cl_environments=None, load_balancer=None, compile_flags=None, **kwargs): """Base class for CL routines. Im Args: cl_environments (list of CLEnvironment): The list of CL environments using by this routine. If None is given we use the defaults in the current configuration. load_balancer (LoadBalancingStrategy): The load balancing strategy to be used by this routine. If None is given we use the defaults in the current configuration. compile_flags (dict): the list of compile flags to use during model fitting. As values use the flag name, as keys a boolean flag indicating if that one is active. """ self._cl_environments = cl_environments self._load_balancer = load_balancer self.compile_flags = compile_flags if self._cl_environments is None: self._cl_environments = configuration.get_cl_environments() if self._load_balancer is None: self._load_balancer = configuration.get_load_balancer() if self.compile_flags is None: self.compile_flags = configuration.get_compile_flags(self.__class__.__name__) def set_compile_flag(self, compile_flag, enable): """Enable or disable the given compile flag. Args: compile_flag (str): the compile flag we want to enable or disable enable (boolean): if we enable (True) or disable (False) this compile flag """ self.compile_flags.update({compile_flag: enable}) def get_compile_flags_list(self, double_precision=True): """Get a list of the enabled compile flags. Args: double_precision (boolean): if this is set to True we remove some of the Flags that are only applicable when running in float mode. More specifically, this will set cl-single-precision-constant to False. Set this to False to disable this behaviour and use the flags as specified in the config. Returns: list: the list of enabled compile flags. """ elements = [flag for flag, enabled in self.compile_flags.items() if enabled] if double_precision: elements_to_remove = get_compile_flags_to_disable_in_double_precision() elements = list(filter(lambda e: e not in elements_to_remove, elements)) return elements @property def cl_environments(self): return self._cl_environments @cl_environments.setter def cl_environments(self, cl_environments): if cl_environments is not None: self._cl_environments = cl_environments @property def load_balancer(self): return self._load_balancer @load_balancer.setter def load_balancer(self, load_balancer): self._load_balancer = load_balancer def _create_workers(self, worker_generating_cb): """Create workers for all the CL environments in current use. Args: worker_generating_cb (python function): the callback function that we use to generate the worker for a specific CL environment. This should accept as single argument a CL environment and should return a Worker instance for use in CL computations. """ cl_environments = self.load_balancer.get_used_cl_environments(self.cl_environments) return [worker_generating_cb(env) for env in cl_environments] PKxJ/B6&&"mot/cl_routines/generate_random.pyfrom random import Random import numpy as np import pyopencl as cl from mot.cl_routines.base import CLRoutine from mot.load_balance_strategies import Worker from mot.model_building.cl_functions.library_functions import Rand123 __author__ = 'Robbert Harms' __date__ = "2014-10-29" __license__ = "LGPL v3" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl" def generate_uniform(nmr_samples, minimum=0, maximum=1, dtype=None, seed=None): """Draw random samples from the uniform distribution. Args: nmr_samples (int): The number of samples to draw minimum (double): The minimum value of the random numbers maximum (double): The minimum value of the random numbers dtype (np.dtype): the numpy datatype, either one of float32 (default) or float64. seed (float): the seed, if not given a random seed is used. Returns: ndarray: A numpy array with nmr_samples random samples drawn from the uniform distribution. """ generator = Random123GeneratorBase(seed=seed) return generator.generate_uniform(nmr_samples, minimum=minimum, maximum=maximum, dtype=dtype) def generate_gaussian(nmr_samples, mean=0, std=1, dtype=None, seed=None): """Draw random samples from the Gaussian distribution. Args: nmr_samples (int): The number of samples to draw mean (double): The mean of the distribution std (double): The standard deviation or the distribution dtype (np.dtype): the numpy datatype, either one of float32 (default) or float64. seed (float): the seed, if not given a random seed is used. Returns: ndarray: A numpy array with nmr_samples random samples drawn from the Gaussian distribution. """ generator = Random123GeneratorBase(seed=seed) return generator.generate_gaussian(nmr_samples, mean=mean, std=std, dtype=dtype) class Random123GeneratorBase(CLRoutine): def __init__(self, seed=None, **kwargs): """Create the random123 basis for generating a list of random numbers. *From the Random123 documentation:* Unlike conventional RNGs, counter-based RNGs are stateless functions (or function classes i.e. functors) whose arguments are a counter, and a key and returns a result of the same type as the counter. .. code-block: c result = CBRNGname(counter, key) The returned result is a deterministic function of the key and counter, i.e. a unique (counter, key) tuple will always produce the same result. The result is highly sensitive to small changes in the inputs, so that the sequence of values produced by simply incrementing the counter (or key) is effectively indistinguishable from a sequence of samples of a uniformly distributed random variable. All the Random123 generators are counter-based RNGs that use integer multiplication, xor and permutation of W-bit words to scramble its N-word input key. *Implementation note: In this implementation we generate a counter and key automatically from a single seed. Args: seed (float): the seed, if not given a random seed is used. """ super(Random123GeneratorBase, self).__init__(**kwargs) self.context = self.cl_environments[0].get_cl_context().context self._rng_state = self._get_rng_state(seed) def _get_rng_state(self, seed): if seed is None: seed = Random().randint(0, 2 ** 31) rng = Random(seed) dtype_info = np.iinfo(np.uint32) return np.array(list(rng.randrange(dtype_info.min, dtype_info.max + 1) for _ in range(6)), dtype=np.uint32) def generate_uniform(self, nmr_samples, minimum=0, maximum=1, dtype=None): """Draw random samples from the uniform distribution. Args: nmr_samples (int): The number of samples to draw minimum (double): The minimum value of the random numbers maximum (double): The minimum value of the random numbers dtype (np.dtype): the numpy datatype, either one of float32 (default) or float64. Returns: ndarray: A numpy array with nmr_samples random samples drawn from the uniform distribution. """ dtype = dtype or np.float32 if dtype not in (np.float32, np.float64): raise ValueError('The given dtype should be either float32 or float64, {} given.'.format( dtype.__class__.__name__)) c_type = 'float' if dtype == np.float64: c_type = "double" return self._generate_samples(nmr_samples, self._get_uniform_kernel(minimum, maximum, c_type)) def generate_gaussian(self, nmr_samples, mean=0, std=1, dtype=None): """Draw random samples from the Gaussian distribution. Args: nmr_samples (int): The number of samples to draw mean (double): The mean of the distribution std (double): The standard deviation or the distribution dtype (np.dtype): the numpy datatype, either one of float32 (default) or float64. Returns: ndarray: A numpy array with nmr_samples random samples drawn from the Gaussian distribution. """ dtype = dtype or np.float32 if dtype not in (np.float32, np.float64): raise ValueError('The given dtype should be either float32 or float64, {} given.'.format( dtype.__class__.__name__)) c_type = 'float' if dtype == np.float64: c_type = "double" return self._generate_samples(nmr_samples, self._get_gaussian_kernel(mean, std, c_type)) def _generate_samples(self, nmr_samples, kernel_source): padding = (-nmr_samples) % 4 nmr_samples += padding samples = np.zeros((nmr_samples,), dtype=np.float32) workers = self._create_workers(lambda cl_environment: _Random123Worker(cl_environment, samples, kernel_source, self._rng_state)) self.load_balancer.process(workers, nmr_samples // 4) if padding: return samples[:-padding] return samples def _get_uniform_kernel(self, min_val, max_val, c_type): random_library = Rand123() src = random_library.get_cl_code() # By setting the rand123 state as kernel arguments the kernel does not need to be recompiled for a new state. src += ''' __kernel void generate(constant uint* rng_state, global ''' + c_type + '''* samples){ rand123_data rng_data = rand123_initialize_data( (uint[]){rng_state[0], rng_state[1], rng_state[2], rng_state[3], rng_state[4], rng_state[5]}); ''' + c_type + '''4 randomnr = rand123_uniform_''' + c_type + '''4(&rng_data); ulong gid = get_global_id(0); samples[gid * 4] = ''' + str(min_val) + ''' + randomnr.x * ''' + str(max_val - min_val) + '''; samples[gid * 4 + 1] = ''' + str(min_val) + ''' + randomnr.y * ''' + str(max_val - min_val) + '''; samples[gid * 4 + 2] = ''' + str(min_val) + ''' + randomnr.z * ''' + str(max_val - min_val) + '''; samples[gid * 4 + 3] = ''' + str(min_val) + ''' + randomnr.w * ''' + str(max_val - min_val) + '''; } ''' return src def _get_gaussian_kernel(self, mean, std, c_type): random_library = Rand123() src = random_library.get_cl_code() # By setting the rand123 state as kernel arguments the kernel does not need to be recompiled for a new state. src += ''' __kernel void generate(constant uint* rng_state, global ''' + c_type + '''* samples){ rand123_data rng_data = rand123_initialize_data( (uint[]){rng_state[0], rng_state[1], rng_state[2], rng_state[3], rng_state[4], rng_state[5]}); ''' + c_type + '''4 randomnr = rand123_normal_''' + c_type + '''4(&rng_data); ulong gid = get_global_id(0); samples[gid * 4] = ''' + str(mean) + ''' + randomnr.x * ''' + str(std) + '''; samples[gid * 4 + 1] = ''' + str(mean) + ''' + randomnr.y * ''' + str(std) + '''; samples[gid * 4 + 2] = ''' + str(mean) + ''' + randomnr.z * ''' + str(std) + '''; samples[gid * 4 + 3] = ''' + str(mean) + ''' + randomnr.w * ''' + str(std) + '''; } ''' return src class _Random123Worker(Worker): def __init__(self, cl_environment, samples, kernel_source, rng_state): super(_Random123Worker, self).__init__(cl_environment) self._samples = samples self._nmr_samples = self._samples.shape[0] self._kernel_source = kernel_source self._rng_state = rng_state self._samples_buf = cl.Buffer(self._cl_run_context.context, cl.mem_flags.WRITE_ONLY | cl.mem_flags.USE_HOST_PTR, hostbuf=self._samples) self._rng_state_buffer = cl.Buffer(self._cl_run_context.context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=self._rng_state) self._kernel = self._build_kernel() def calculate(self, range_start, range_end): nmr_problems = range_end - range_start kernel_args = [self._rng_state_buffer, self._samples_buf] self._kernel.generate(self._cl_run_context.queue, (int(nmr_problems), ), None, *kernel_args, global_offset=(range_start,)) self._enqueue_readout(self._samples_buf, self._samples, range_start * 4, range_end * 4) def _get_kernel_source(self): return self._kernel_source PKH1mot/cl_routines/__init__.py__author__ = 'Robbert Harms' __date__ = "2014-05-21" __license__ = "LGPL v3" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl"PKvJm/)mot/cl_routines/mapping/error_measures.pyimport numpy as np from ...cl_routines.base import CLRoutine __author__ = 'Robbert Harms' __date__ = "2014-02-05" __license__ = "LGPL v3" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl" class ErrorMeasures(CLRoutine): def __init__(self, cl_environments=None, load_balancer=None, double_precision=False): """Given a set of raw errors per voxel, calculate some interesting error measures.""" super(ErrorMeasures, self).__init__(cl_environments=cl_environments, load_balancer=load_balancer) self._double_precision = double_precision def calculate(self, errors): """Calculate some error measures given the residuals per problem instance. Args: errors (ndarray): An (d, r) matrix with for d problems r residuals. Returns: dict: A dictionary containing (for each voxel): - Errors.l2: the l2 norm (square root of sum of squares) - Errors.mse: the mean sum of squared errors """ sse = np.sum(np.power(errors, 2), axis=1) return {'Errors.l2': np.linalg.norm(errors, axis=1), 'Errors.mse': sse/errors.shape[1]} PKxJ;;3mot/cl_routines/mapping/loglikelihood_calculator.pyfrom collections import Mapping import pyopencl as cl import numpy as np from ...utils import get_float_type_def from ...cl_routines.base import CLRoutine from ...load_balance_strategies import Worker __author__ = 'Robbert Harms' __date__ = "2014-02-05" __license__ = "LGPL v3" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl" class LogLikelihoodCalculator(CLRoutine): def calculate(self, model, parameters, evaluation_model=None): """Calculate and return the log likelihood of the given model under the given parameters. Args: model (AbstractModel): The model to calculate the full log likelihood for. parameters (dict or ndarray): The parameters to use in the evaluation of the model If a dict is given we assume it is with values for a set of parameters If an ndarray is given we assume that we have data for all parameters. evaluation_model (EvaluationModel): the evaluation model to use for the log likelihood. If not given we use the one defined in the model. Returns: Return per voxel the log likelihood. """ parameters = self._initialize_parameters(parameters, model) log_likelihoods = self._initialize_result_array(model) workers = self._create_workers( lambda cl_environment: _LogLikelihoodCalculatorWorker(cl_environment, self.get_compile_flags_list(model.double_precision), model, parameters, log_likelihoods, evaluation_model)) self.load_balancer.process(workers, model.get_nmr_problems()) return log_likelihoods def _initialize_parameters(self, parameters, model): np_dtype = np.float32 if model.double_precision: np_dtype = np.float64 if isinstance(parameters, Mapping): return np.require(model.get_initial_parameters(parameters), np_dtype, requirements=['C', 'A', 'O']) return np.require(parameters, np_dtype, requirements=['C', 'A', 'O']) def _initialize_result_array(self, model): nmr_problems = model.get_nmr_problems() np_dtype = np.float32 if model.double_precision: np_dtype = np.float64 return np.zeros((nmr_problems,), dtype=np_dtype, order='C') class _LogLikelihoodCalculatorWorker(Worker): def __init__(self, cl_environment, compile_flags, model, parameters, log_likelihoods, evaluation_model): super(_LogLikelihoodCalculatorWorker, self).__init__(cl_environment) self._model = model self._double_precision = model.double_precision self._log_likelihoods = log_likelihoods self._parameters = parameters self._evaluation_model = evaluation_model self._all_buffers, self._likelihoods_buffer = self._create_buffers() self._kernel = self._build_kernel(compile_flags) def __del__(self): for buffer in self._all_buffers: buffer.release() def calculate(self, range_start, range_end): nmr_problems = range_end - range_start self._kernel.run_kernel(self._cl_run_context.queue, (int(nmr_problems), ), None, *self._all_buffers, global_offset=(int(range_start),)) self._enqueue_readout(self._likelihoods_buffer, self._log_likelihoods, range_start, range_end) def _create_buffers(self): likelihoods_buffer = cl.Buffer(self._cl_run_context.context, cl.mem_flags.WRITE_ONLY | cl.mem_flags.USE_HOST_PTR, hostbuf=self._log_likelihoods) params_buffer = cl.Buffer(self._cl_run_context.context, cl.mem_flags.READ_ONLY | cl.mem_flags.USE_HOST_PTR, hostbuf=self._parameters) all_buffers = [params_buffer, likelihoods_buffer] for data in self._model.get_data(): all_buffers.append(cl.Buffer(self._cl_run_context.context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=data)) return all_buffers, likelihoods_buffer def _get_kernel_source(self): cl_func = self._model.get_log_likelihood_function('getLogLikelihood', evaluation_model=self._evaluation_model) nmr_params = self._parameters.shape[1] kernel_param_names = ['global mot_float_type* params', 'global mot_float_type* log_likelihoods'] kernel_param_names.extend(self._model.get_kernel_param_names(self._cl_environment.device)) kernel_source = '' kernel_source += get_float_type_def(self._double_precision) kernel_source += self._model.get_kernel_data_struct(self._cl_environment.device) kernel_source += cl_func kernel_source += ''' __kernel void run_kernel( ''' + ",\n".join(kernel_param_names) + ''' ){ ulong gid = get_global_id(0); ''' + self._model.get_kernel_data_struct_initialization(self._cl_environment.device, 'data') + ''' mot_float_type x[''' + str(nmr_params) + ''']; for(uint i = 0; i < ''' + str(nmr_params) + '''; i++){ x[i] = params[gid * ''' + str(nmr_params) + ''' + i]; } log_likelihoods[gid] = getLogLikelihood((void*)&data, x); } ''' return kernel_source PKxJ%:R}}'mot/cl_routines/mapping/codec_runner.pyimport logging import pyopencl as cl import numpy as np from ...utils import get_float_type_def from ...cl_routines.base import CLRoutine from ...load_balance_strategies import Worker __author__ = 'Robbert Harms' __date__ = "2014-05-18" __license__ = "LGPL v3" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl" class CodecRunner(CLRoutine): def __init__(self, **kwargs): """This class can run the parameter encoding and decoding transformations. These transformations are used to transform the parameters to and from optimization space. """ super(CodecRunner, self).__init__(**kwargs) self._logger = logging.getLogger(__name__) def decode(self, model, data): """Decode the given parameters using the given model. This transforms the data from optimization space to model space. Args: model (mot.model_interfaces.OptimizeModelInterface): The model to use data (ndarray): The parameters to transform to model space Returns: ndarray: The array with the transformed parameters. """ if len(data.shape) > 1: from_width = data.shape[1] else: from_width = 1 return self._transform_parameters(model.get_parameter_decode_function('decodeParameters'), 'decodeParameters', data, from_width, model) def encode(self, model, data): """Encode the given parameters using the given model. This transforms the data from model space to optimization space. Args: model (mot.model_interfaces.OptimizeModelInterface): The model to use data (ndarray): The parameters to transform to optimization space Returns: ndarray: The array with the transformed parameters. """ if len(data.shape) > 1: from_width = data.shape[1] else: from_width = 1 return self._transform_parameters(model.get_parameter_encode_function('encodeParameters'), 'encodeParameters', data, from_width, model) def _transform_parameters(self, cl_func, cl_func_name, data, nmr_params, model): np_dtype = np.float32 if model.double_precision: np_dtype = np.float64 data = np.require(data, np_dtype, requirements=['C', 'A', 'O', 'W']) workers = self._create_workers(lambda cl_environment: _CodecWorker( cl_environment, self.get_compile_flags_list(model.double_precision), cl_func, cl_func_name, data, nmr_params, model)) self.load_balancer.process(workers, data.shape[0]) return data class _CodecWorker(Worker): def __init__(self, cl_environment, compile_flags, cl_func, cl_func_name, data, nmr_params, model): super(_CodecWorker, self).__init__(cl_environment) self._cl_func = cl_func self._cl_func_name = cl_func_name self._data = data self._nmr_params = nmr_params self._model = model self._param_buf = cl.Buffer(self._cl_run_context.context, cl.mem_flags.READ_WRITE | cl.mem_flags.USE_HOST_PTR, hostbuf=self._data) self._all_buffers = [self._param_buf] for data in self._model.get_data(): self._all_buffers.append(cl.Buffer(self._cl_run_context.context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=data)) self._kernel = self._build_kernel(compile_flags) def __del__(self): for buffer in self._all_buffers: buffer.release() def calculate(self, range_start, range_end): nmr_problems = range_end - range_start self._kernel.transformParameterSpace(self._cl_run_context.queue, (int(nmr_problems), ), None, *self._all_buffers, global_offset=(int(range_start),)) self._enqueue_readout(self._param_buf, self._data, range_start, range_end) def _get_kernel_source(self): kernel_param_names = ['global mot_float_type* x_global'] + \ self._model.get_kernel_param_names(self._cl_environment.device) kernel_source = '' kernel_source += get_float_type_def(self._model.double_precision) kernel_source += str(self._model.get_kernel_data_struct(self._cl_environment.device)) kernel_source += self._cl_func kernel_source += ''' __kernel void transformParameterSpace( ''' + ",\n".join(kernel_param_names) + '''){ ulong gid = get_global_id(0); ''' + self._model.get_kernel_data_struct_initialization(self._cl_environment.device, 'data') + ''' mot_float_type x[''' + str(self._nmr_params) + ''']; for(uint i = 0; i < ''' + str(self._nmr_params) + '''; i++){ x[i] = x_global[gid * ''' + str(self._nmr_params) + ''' + i]; } ''' + self._cl_func_name + '''((void*)&data, x); for(uint i = 0; i < ''' + str(self._nmr_params) + '''; i++){ x_global[gid * ''' + str(self._nmr_params) + ''' + i] = x[i]; } } ''' return kernel_source PKxJ0mot/cl_routines/mapping/calc_dependent_params.pyimport pyopencl as cl from ...utils import results_to_dict, get_float_type_def from ...cl_routines.base import CLRoutine from ...load_balance_strategies import Worker import numpy as np __author__ = 'Robbert Harms' __date__ = "2014-02-05" __license__ = "LGPL v3" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl" class CalculateDependentParameters(CLRoutine): def __init__(self, double_precision=False, **kwargs): """CL code for calculating the dependent parameters. Some of the models may contain parameter dependencies. We would like to return the maps for these parameters as well as all the other maps. Since the dependencies are specified in CL, we have to recourse to CL to calculate these maps. Args: double_precision (boolean): if we will use the double (True) or single floating (False) type for the calculations """ super(CalculateDependentParameters, self).__init__(**kwargs) self._double_precision = double_precision def calculate(self, model, estimated_parameters_list, parameters_listing, dependent_parameter_names): """Calculate the dependent parameters This uses the calculated parameters in the results dictionary to run the parameters_listing in CL to obtain the maps for the dependent parameters. Args: model (mot.model_interfaces.OptimizeModelInterface): the model for which to get the dependent parameters estimated_parameters_list (list of ndarray): The list with the one-dimensional ndarray of estimated parameters parameters_listing (str): The parameters listing in CL dependent_parameter_names (list of list of str): Per parameter we would like to obtain the CL name and the result map name. For example: (('Wball_w', 'Wball.w'),) Returns: dict: A dictionary with the calculated maps for the dependent parameters. """ np_dtype = np.float32 if self._double_precision: np_dtype = np.float64 results_list = np.zeros( (estimated_parameters_list[0].shape[0], len(dependent_parameter_names)), dtype=np_dtype, order='C') estimated_parameters = np.require(np.dstack(estimated_parameters_list), np_dtype, requirements=['C', 'A', 'O'])[0, ...] workers = self._create_workers( lambda cl_environment: _CDPWorker(cl_environment, self.get_compile_flags_list(self._double_precision), model, len(estimated_parameters_list), estimated_parameters, parameters_listing, dependent_parameter_names, results_list, self._double_precision)) self.load_balancer.process(workers, estimated_parameters_list[0].shape[0]) return results_to_dict(results_list, [n[1] for n in dependent_parameter_names]) class _CDPWorker(Worker): def __init__(self, cl_environment, compile_flags, model, nmr_estimated_params, estimated_parameters, parameters_listing, dependent_parameter_names, results_list, double_precision): super(_CDPWorker, self).__init__(cl_environment) self._nmr_estimated_params = nmr_estimated_params self._parameters_listing = parameters_listing self._dependent_parameter_names = dependent_parameter_names self._results_list = results_list self._double_precision = double_precision self._model = model self._estimated_parameters = estimated_parameters self._all_buffers, self._results_list_buffer = self._create_buffers() self._kernel = self._build_kernel(compile_flags) def __del__(self): for buffer in self._all_buffers: buffer.release() def calculate(self, range_start, range_end): nmr_problems = int(range_end - range_start) self._kernel.transform(self._cl_run_context.queue, (int(nmr_problems), ), None, *self._all_buffers, global_offset=(int(range_start),)) self._enqueue_readout(self._results_list_buffer, self._results_list, range_start, range_end) def _create_buffers(self): estimated_parameters_buf = cl.Buffer(self._cl_run_context.context, cl.mem_flags.READ_ONLY | cl.mem_flags.USE_HOST_PTR, hostbuf=self._estimated_parameters) results_buffer = cl.Buffer(self._cl_run_context.context, cl.mem_flags.WRITE_ONLY | cl.mem_flags.USE_HOST_PTR, hostbuf=self._results_list) data_buffers = [estimated_parameters_buf, results_buffer] for data in self._model.get_data(): data_buffers.append(cl.Buffer(self._cl_run_context.context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=data)) return data_buffers, results_buffer def _get_kernel_source(self): dependent_parameter_names = [n[0] for n in self._dependent_parameter_names] parameter_write_out = '' for i, p in enumerate(dependent_parameter_names): parameter_write_out += 'results[gid * ' + str(len(dependent_parameter_names)) + \ ' + ' + str(i) + '] = ' + p + ";\n" kernel_param_names = ['global mot_float_type* params', 'global mot_float_type* results'] kernel_param_names.extend(self._model.get_kernel_param_names(self._cl_environment.device)) kernel_source = '' kernel_source += get_float_type_def(self._double_precision) kernel_source += self._model.get_kernel_data_struct(self._cl_environment.device) kernel_source += ''' __kernel void transform( ''' + ",\n".join(kernel_param_names) + ''' ){ ulong gid = get_global_id(0); ''' + self._model.get_kernel_data_struct_initialization(self._cl_environment.device, 'data_var') + ''' ''' + self._model.get_kernel_data_struct_type() + '''* data = &data_var; mot_float_type x[''' + str(self._nmr_estimated_params) + ''']; for(uint i = 0; i < ''' + str(self._nmr_estimated_params) + '''; i++){ x[i] = params[gid * ''' + str(self._nmr_estimated_params) + ''' + i]; } ''' + self._parameters_listing + ''' ''' + parameter_write_out + ''' } ''' return kernel_source PKxJ܈/mot/cl_routines/mapping/objective_calculator.pyfrom collections import Mapping import pyopencl as cl import numpy as np from ...utils import get_float_type_def from ...cl_routines.base import CLRoutine from ...load_balance_strategies import Worker __author__ = 'Robbert Harms' __date__ = "2014-02-05" __license__ = "LGPL v3" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl" class ObjectiveCalculator(CLRoutine): def calculate(self, model, parameters): """Calculate and return the objective function of the given model for the given parameters. This evaluates the model and compares it to the problem data to get objective values. This returns the objective value per problem instance, for an objective function value per observation per problem use the :class:`~.objective_list_calculator.ObjectiveListCalculator`. Args: model (AbstractModel): The model to calculate the objective function of. parameters (dict or ndarray): The parameters to use in the evaluation of the model If a dict is given we assume it is with values for a set of parameters If an ndarray is given we assume that we have data for all parameters. Returns: Return per voxel the objective function value """ parameters = self._initialize_parameters(parameters, model) objective_values = self._initialize_result_array(model) workers = self._create_workers( lambda cl_environment: _ObjectiveCalculatorWorker( cl_environment, self.get_compile_flags_list(model.double_precision), model, parameters, objective_values)) self.load_balancer.process(workers, model.get_nmr_problems()) return objective_values def _initialize_parameters(self, parameters, model): np_dtype = np.float32 if model.double_precision: np_dtype = np.float64 if isinstance(parameters, Mapping): return np.require(model.get_initial_parameters(parameters), np_dtype, requirements=['C', 'A', 'O']) return np.require(parameters, np_dtype, requirements=['C', 'A', 'O']) def _initialize_result_array(self, model): nmr_problems = model.get_nmr_problems() np_dtype = np.float32 if model.double_precision: np_dtype = np.float64 return np.zeros((nmr_problems,), dtype=np_dtype, order='C') class _ObjectiveCalculatorWorker(Worker): def __init__(self, cl_environment, compile_flags, model, parameters, objective_values): super(_ObjectiveCalculatorWorker, self).__init__(cl_environment) self._model = model self._double_precision = model.double_precision self._objective_values = objective_values self._parameters = parameters self._all_buffers, self._objective_values_buffer = self._create_buffers() self._kernel = self._build_kernel(compile_flags) def __del__(self): for buffer in self._all_buffers: buffer.release() def calculate(self, range_start, range_end): nmr_problems = range_end - range_start self._kernel.run_kernel(self._cl_run_context.queue, (int(nmr_problems), ), None, *self._all_buffers, global_offset=(int(range_start),)) self._enqueue_readout(self._objective_values_buffer, self._objective_values, range_start, range_end) def _create_buffers(self): objective_value_buffer = cl.Buffer(self._cl_run_context.context, cl.mem_flags.WRITE_ONLY | cl.mem_flags.USE_HOST_PTR, hostbuf=self._objective_values) params_buffer = cl.Buffer(self._cl_run_context.context, cl.mem_flags.READ_ONLY | cl.mem_flags.USE_HOST_PTR, hostbuf=self._parameters) all_buffers = [params_buffer, objective_value_buffer] for data in self._model.get_data(): all_buffers.append(cl.Buffer(self._cl_run_context.context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=data)) return all_buffers, objective_value_buffer def _get_kernel_source(self): nmr_params = self._parameters.shape[1] kernel_param_names = ['global mot_float_type* params', 'global mot_float_type* objective_values'] kernel_param_names.extend(self._model.get_kernel_param_names(self._cl_environment.device)) kernel_source = '' kernel_source += get_float_type_def(self._double_precision) kernel_source += self._model.get_kernel_data_struct(self._cl_environment.device) kernel_source += self._model.get_objective_function('calculateObjective') kernel_source += ''' __kernel void run_kernel( ''' + ",\n".join(kernel_param_names) + ''' ){ ulong gid = get_global_id(0); ''' + self._model.get_kernel_data_struct_initialization(self._cl_environment.device, 'data') + ''' mot_float_type x[''' + str(nmr_params) + ''']; for(uint i = 0; i < ''' + str(nmr_params) + '''; i++){ x[i] = params[gid * ''' + str(nmr_params) + ''' + i]; } objective_values[gid] = calculateObjective((void*)&data, x); } ''' return kernel_source PKH1#mot/cl_routines/mapping/__init__.py__author__ = 'Robbert Harms' __date__ = "2014-05-21" __license__ = "LGPL v3" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl"PKxJeUU.mot/cl_routines/mapping/residual_calculator.pyimport pyopencl as cl import numpy as np from ...utils import get_float_type_def from ...cl_routines.base import CLRoutine from ...load_balance_strategies import Worker __author__ = 'Robbert Harms' __date__ = "2014-02-05" __license__ = "LGPL v3" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl" class ResidualCalculator(CLRoutine): def __init__(self, cl_environments=None, load_balancer=None): """Calculate the residuals, that is the errors, per problem instance per data point.""" super(ResidualCalculator, self).__init__(cl_environments=cl_environments, load_balancer=load_balancer) def calculate(self, model, parameters_dict, model_estimates=None): """Calculate and return the residuals. Args: model (AbstractModel): The model to calculate the residuals of. parameters_dict (dict): The parameters to use in the evaluation of the model model_estimates (ndarray): The model estimates of the model if available. If not given we calculate the model estimates using the initial parameters from the model. Returns: Return per voxel the errors (eval - data) per protocol item """ np_dtype = np.float32 if model.double_precision: np_dtype = np.float64 nmr_inst_per_problem = model.get_nmr_inst_per_problem() nmr_problems = model.get_nmr_problems() residuals = np.zeros((nmr_problems, nmr_inst_per_problem), dtype=np_dtype, order='C') parameters = np.require(model.get_initial_parameters(parameters_dict), np_dtype, requirements=['C', 'A', 'O']) if model_estimates is not None: model_estimates = np.require(model_estimates, np_dtype, requirements=['C', 'A', 'O']) workers = self._create_workers(lambda cl_environment: _ResidualCalculatorWorker( cl_environment, self.get_compile_flags_list(model.double_precision), model, parameters, residuals, model_estimates)) self.load_balancer.process(workers, model.get_nmr_problems()) return residuals class _ResidualCalculatorWorker(Worker): def __init__(self, cl_environment, compile_flags, model, parameters, residuals, model_estimates=None): super(_ResidualCalculatorWorker, self).__init__(cl_environment) self._model = model self._double_precision = model.double_precision self._residuals = residuals self._parameters = parameters self._model_estimates = model_estimates self._all_buffers, self._residuals_buffer = self._create_buffers() self._kernel = self._build_kernel(compile_flags) def __del__(self): for buffer in self._all_buffers: buffer.release() def calculate(self, range_start, range_end): nmr_problems = range_end - range_start self._kernel.get_errors(self._cl_run_context.queue, (int(nmr_problems), ), None, *self._all_buffers, global_offset=(int(range_start),)) self._enqueue_readout(self._residuals_buffer, self._residuals, range_start, range_end) def _create_buffers(self): errors_buffer = cl.Buffer(self._cl_run_context.context, cl.mem_flags.WRITE_ONLY | cl.mem_flags.USE_HOST_PTR, hostbuf=self._residuals) if self._model_estimates is None: all_buffers = [cl.Buffer(self._cl_run_context.context, cl.mem_flags.READ_ONLY | cl.mem_flags.USE_HOST_PTR, hostbuf=self._parameters), errors_buffer] else: all_buffers = [cl.Buffer(self._cl_run_context.context, cl.mem_flags.READ_ONLY | cl.mem_flags.USE_HOST_PTR, hostbuf=self._model_estimates), errors_buffer] for data in self._model.get_data(): all_buffers.append(cl.Buffer(self._cl_run_context.context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=data)) return all_buffers, errors_buffer def _get_kernel_source(self): nmr_inst_per_problem = self._model.get_nmr_inst_per_problem() nmr_params = self._parameters.shape[1] if self._model_estimates is None: kernel_param_names = ['global mot_float_type* params', 'global mot_float_type* errors'] else: kernel_param_names = ['global mot_float_type* model_estimates', 'global mot_float_type* errors'] kernel_param_names.extend(self._model.get_kernel_param_names(self._cl_environment.device)) kernel_source = ''' #define NMR_INST_PER_PROBLEM ''' + str(nmr_inst_per_problem) + ''' ''' kernel_source += get_float_type_def(self._double_precision) kernel_source += self._model.get_kernel_data_struct(self._cl_environment.device) kernel_source += self._model.get_observation_return_function('getObservation') if self._model_estimates is None: kernel_source += self._model.get_model_eval_function('evaluateModel') kernel_source += ''' __kernel void get_errors( ''' + ",\n".join(kernel_param_names) + ''' ){ ulong gid = get_global_id(0); ''' + self._model.get_kernel_data_struct_initialization(self._cl_environment.device, 'data') + ''' mot_float_type x[''' + str(nmr_params) + ''']; for(uint i = 0; i < ''' + str(nmr_params) + '''; i++){ x[i] = params[gid * ''' + str(nmr_params) + ''' + i]; } global mot_float_type* result = errors + gid * NMR_INST_PER_PROBLEM; for(uint i = 0; i < NMR_INST_PER_PROBLEM; i++){ result[i] = getObservation((void*)&data, i) - evaluateModel((void*)&data, x, i); } } ''' else: kernel_source += ''' __kernel void get_errors( ''' + ",\n".join(kernel_param_names) + ''' ){ ulong gid = get_global_id(0); ''' + self._model.get_kernel_data_struct_initialization(self._cl_environment.device, 'data') + ''' global mot_float_type* result = errors + gid * NMR_INST_PER_PROBLEM; for(uint i = 0; i < NMR_INST_PER_PROBLEM; i++){ result[i] = getObservation(&data, i) - model_estimates[i + gid * NMR_INST_PER_PROBLEM]; } } ''' return kernel_source PKxJW9vv4mot/cl_routines/mapping/calculate_model_estimates.pyfrom collections import Mapping import pyopencl as cl import numpy as np from ...utils import get_float_type_def from ...cl_routines.base import CLRoutine from ...load_balance_strategies import Worker __author__ = 'Robbert Harms' __date__ = "2014-02-05" __license__ = "LGPL v3" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl" class CalculateModelEstimates(CLRoutine): def calculate(self, model, parameters): """Evaluate the model for every problem and every observation and return the estimates. This only evaluates the model at the given data points. It does not use the problem data to calculate objective values. Args: model (AbstractModel): The model to evaluate. parameters (dict or ndarray): The parameters to use in the evaluation of the model If a dict is given we assume it is with values for a set of parameters If an ndarray is given we assume that we have data for all parameters. Returns: ndarray: Return per problem instance the evaluation per data point. """ np_dtype = np.float32 if model.double_precision: np_dtype = np.float64 nmr_inst_per_problem = model.get_nmr_inst_per_problem() if isinstance(parameters, Mapping): parameters = np.require(model.get_initial_parameters(parameters), np_dtype, requirements=['C', 'A', 'O']) else: parameters = np.require(parameters, np_dtype, requirements=['C', 'A', 'O']) nmr_problems = parameters.shape[0] evaluations = np.zeros((nmr_problems, nmr_inst_per_problem), dtype=np_dtype, order='C') workers = self._create_workers(lambda cl_environment: _EvaluateModelWorker( cl_environment, self.get_compile_flags_list(model.double_precision), model, parameters, evaluations)) self.load_balancer.process(workers, nmr_problems) return evaluations class _EvaluateModelWorker(Worker): def __init__(self, cl_environment, compile_flags, model, parameters, evaluations): super(_EvaluateModelWorker, self).__init__(cl_environment) self._model = model self._double_precision = model.double_precision self._evaluations = evaluations self._parameters = parameters self._all_buffers, self._evaluations_buffer = self._create_buffers() self._kernel = self._build_kernel(compile_flags) def calculate(self, range_start, range_end): nmr_problems = range_end - range_start self._kernel.get_estimates(self._cl_run_context.queue, (int(nmr_problems), ), None, *self._all_buffers, global_offset=(int(range_start),)) self._enqueue_readout(self._evaluations_buffer, self._evaluations, range_start, range_end) def _create_buffers(self): evaluations_buffer = cl.Buffer(self._cl_run_context.context, cl.mem_flags.WRITE_ONLY | cl.mem_flags.USE_HOST_PTR, hostbuf=self._evaluations) all_buffers = [cl.Buffer(self._cl_run_context.context, cl.mem_flags.READ_ONLY | cl.mem_flags.USE_HOST_PTR, hostbuf=self._parameters), evaluations_buffer] for data in self._model.get_data(): all_buffers.append(cl.Buffer(self._cl_run_context.context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=data)) return all_buffers, evaluations_buffer def _get_kernel_source(self): cl_func = self._model.get_model_eval_function('evaluateModel') nmr_params = self._parameters.shape[1] kernel_param_names = ['global mot_float_type* params', 'global mot_float_type* estimates'] kernel_param_names.extend(self._model.get_kernel_param_names(self._cl_environment.device)) kernel_source = ''' #define NMR_INST_PER_PROBLEM ''' + str(self._model.get_nmr_inst_per_problem()) + ''' ''' kernel_source += get_float_type_def(self._model.double_precision) kernel_source += self._model.get_kernel_data_struct(self._cl_environment.device) kernel_source += cl_func kernel_source += ''' __kernel void get_estimates( ''' + ",\n".join(kernel_param_names) + ''' ){ ulong gid = get_global_id(0); ''' + self._model.get_kernel_data_struct_initialization(self._cl_environment.device, 'data') + ''' mot_float_type x[''' + str(nmr_params) + ''']; for(uint i = 0; i < ''' + str(nmr_params) + '''; i++){ x[i] = params[gid * ''' + str(nmr_params) + ''' + i]; } global mot_float_type* result = estimates + gid * NMR_INST_PER_PROBLEM; for(uint i = 0; i < NMR_INST_PER_PROBLEM; i++){ result[i] = evaluateModel((void*)&data, x, i); } } ''' return kernel_source PKxJDD  4mot/cl_routines/mapping/objective_list_calculator.pyfrom collections import Mapping import pyopencl as cl import numpy as np from ...utils import get_float_type_def from ...cl_routines.base import CLRoutine from ...load_balance_strategies import Worker __author__ = 'Robbert Harms' __date__ = "2014-02-05" __license__ = "LGPL v3" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl" class ObjectiveListCalculator(CLRoutine): def __init__(self, **kwargs): """Calculate the objective list, that is it can compute the get_objective_list_function of the given model. This evaluates the model and compares it to the problem data to get objective values. This returns objective values per observation, for a complete objective function summarized over the observations use the :class:`~.objective_calculator.ObjectiveCalculator`. This returns a value per problem instance per data point. """ super(ObjectiveListCalculator, self).__init__(**kwargs) def calculate(self, model, parameters): """Calculate and return the objective lists. Args: model (AbstractModel): The model to calculate the residuals of. parameters (dict or ndarray): The parameters to use in the evaluation of the model If a dict is given we assume it is with values for a set of parameters If an ndarray is given we assume that we have data for all parameters. Returns: Return per voxel the objective value (application of the function: "noise_model(eval - data)") per protocol item. """ np_dtype = np.float32 if model.double_precision: np_dtype = np.float64 nmr_inst_per_problem = model.get_nmr_inst_per_problem() nmr_problems = model.get_nmr_problems() if isinstance(parameters, Mapping): parameters = np.require(model.get_initial_parameters(parameters), np_dtype, requirements=['C', 'A', 'O']) else: parameters = np.require(parameters, np_dtype, requirements=['C', 'A', 'O']) objectives = np.zeros((nmr_problems, nmr_inst_per_problem), dtype=np_dtype, order='C') workers = self._create_workers(lambda cl_environment: _ObjectiveListCalculatorWorker( cl_environment, self.get_compile_flags_list(model.double_precision), model, parameters, objectives)) self.load_balancer.process(workers, model.get_nmr_problems()) return objectives class _ObjectiveListCalculatorWorker(Worker): def __init__(self, cl_environment, compile_flags, model, parameters, objectives): super(_ObjectiveListCalculatorWorker, self).__init__(cl_environment) self._model = model self._double_precision = model.double_precision self._objectives = objectives self._parameters = parameters self._all_buffers, self._residuals_buffer = self._create_buffers() self._kernel = self._build_kernel(compile_flags) def __del__(self): for buffer in self._all_buffers: buffer.release() def calculate(self, range_start, range_end): nmr_problems = range_end - range_start self._kernel.get_objectives(self._cl_run_context.queue, (int(nmr_problems), ), None, *self._all_buffers, global_offset=(int(range_start),)) self._enqueue_readout(self._residuals_buffer, self._objectives, range_start, range_end) def _create_buffers(self): objectives_buffer = cl.Buffer(self._cl_run_context.context, cl.mem_flags.WRITE_ONLY | cl.mem_flags.USE_HOST_PTR, hostbuf=self._objectives) all_buffers = [cl.Buffer(self._cl_run_context.context, cl.mem_flags.READ_ONLY | cl.mem_flags.USE_HOST_PTR, hostbuf=self._parameters), objectives_buffer] for data in self._model.get_data(): all_buffers.append(cl.Buffer(self._cl_run_context.context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=data)) return all_buffers, objectives_buffer def _get_kernel_source(self): nmr_inst_per_problem = self._model.get_nmr_inst_per_problem() nmr_params = self._parameters.shape[1] kernel_param_names = ['global mot_float_type* params', 'global mot_float_type* objectives'] kernel_param_names.extend(self._model.get_kernel_param_names(self._cl_environment.device)) kernel_source = ''' #define NMR_INST_PER_PROBLEM ''' + str(nmr_inst_per_problem) + ''' ''' kernel_source += get_float_type_def(self._double_precision) kernel_source += self._model.get_kernel_data_struct(self._cl_environment.device) kernel_source += self._model.get_objective_per_observation_function('getObjectiveInstanceValue') kernel_source += ''' __kernel void get_objectives( ''' + ",\n".join(kernel_param_names) + ''' ){ ulong gid = get_global_id(0); ''' + self._model.get_kernel_data_struct_initialization(self._cl_environment.device, 'data') + ''' mot_float_type x[''' + str(nmr_params) + ''']; for(uint i = 0; i < ''' + str(nmr_params) + '''; i++){ x[i] = params[gid * ''' + str(nmr_params) + ''' + i]; } global mot_float_type* result = objectives + gid * NMR_INST_PER_PROBLEM; for(uint i = 0; i < NMR_INST_PER_PROBLEM; i++){ result[i] = getObjectiveInstanceValue((void*)&data, x, i); } } ''' return kernel_source PK]J!_G_G"mot/cl_routines/optimizing/base.pyimport logging import numpy as np import pyopencl as cl from mot.cl_routines.mapping.error_measures import ErrorMeasures from mot.cl_routines.mapping.residual_calculator import ResidualCalculator from ...utils import get_float_type_def from ...cl_routines.base import CLRoutine from ...load_balance_strategies import Worker from ...cl_routines.mapping.codec_runner import CodecRunner from ...__version__ import __version__ __author__ = 'Robbert Harms' __date__ = "2014-05-18" __license__ = "LGPL v3" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl" return_code_labels = { 0: ['default', 'no return code specified'], 1: ['found zero', 'sum of squares/evaluation below underflow limit'], 2: ['converged', 'the relative error in the sum of squares/evaluation is at most tol'], 3: ['converged', 'the relative error of the parameter vector is at most tol'], 4: ['converged', 'both errors are at most tol'], 5: ['trapped', 'by degeneracy; increasing epsilon might help'], 6: ['exhausted', 'number of function calls exceeding preset patience'], 7: ['failed', 'ftol 0 0 < beta < 1 gamma > 1 gamma > alpha 0 < delta < 1 """ patience = patience or self.default_patience optimizer_settings = optimizer_settings or {} keyword_values = {} keyword_values['scale'] = scale keyword_values['alpha'] = alpha keyword_values['beta'] = beta keyword_values['gamma'] = gamma keyword_values['delta'] = delta keyword_values['adaptive_scales'] = adaptive_scales option_defaults = {'alpha': 1.0, 'beta': 0.5, 'gamma': 2.0, 'delta': 0.5, 'scale': 1.0, 'adaptive_scales': True} def get_value(option_name): value = keyword_values.get(option_name) if value is None: value = optimizer_settings.get(option_name) if value is None: value = option_defaults[option_name] return value for option in option_defaults: optimizer_settings.update({option: get_value(option)}) super(NMSimplex, self).__init__(patience=patience, optimizer_settings=optimizer_settings, **kwargs) def minimize(self, model, init_params=None): if self._optimizer_settings.get('adaptive_scales', True): nmr_params = model.get_nmr_estimable_parameters() self._optimizer_settings.update( {'alpha': 1, 'beta': 0.75 - 1.0 / (2 * nmr_params), 'gamma': 1 + 2.0 / nmr_params, 'delta': 1 - 1.0 / nmr_params} ) return super(NMSimplex, self).minimize(model, init_params=init_params) def _get_worker_generator(self, *args): return lambda cl_environment: NMSimplexWorker(cl_environment, *args) class NMSimplexWorker(AbstractParallelOptimizerWorker): def _get_optimization_function(self): params = {'NMR_PARAMS': self._nmr_params, 'PATIENCE': self._parent_optimizer.patience} for option, value in self._optimizer_settings.items(): if option == 'scale': params['INITIAL_SIMPLEX_SCALES'] = '{' + ', '.join([str(value)] * self._nmr_params) + '}' else: params.update({option.upper(): value}) body = open(os.path.abspath(resource_filename('mot', 'data/opencl/nmsimplex.cl')), 'r').read() if params: body = body % params return body def _get_optimizer_call_name(self): return 'nmsimplex' PK*vJe̳,mot/cl_routines/optimizing/random_restart.pyimport numpy as np from mot.cl_routines.optimizing.base import AbstractOptimizer, SimpleOptimizationResult __author__ = 'Robbert Harms' __date__ = "2016-11-22" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl" class StartingPointGenerator(object): def next(self, model, previous_results): """Returns a next starting point given the model and the previous results. This method can return None which means that no next starting point is available. Args: model: the model for which we are generating points previous_results (ndarray): the previous results, an (d, p) array with for every d problems and n parameters the (semi-)optimum value Returns: ndarray: array of same type and shape as the input but with the new starting points """ return NotImplementedError class PointsFromGrid(StartingPointGenerator): def __init__(self, grid): """Uses the given points as a starting point for all problem instances per run. This accepts a grid with per row a starting point that we will use for all the problem instances for that optimization run. The number of rows determines the number of iterations. Args: grid (ndarray): the grid with one starting point per iteration. """ self.grid = grid self._iteration_counter = 0 def next(self, model, previous_results): if self._iteration_counter == self.grid.shape[0]: return None new_points = np.zeros_like(previous_results) for param_ind in range(new_points.shape[1]): new_points[:, param_ind] = np.ones(model.get_nmr_problems()) * self.grid[self._iteration_counter, param_ind] self._iteration_counter += 1 return new_points class UsePrevious(StartingPointGenerator): def __init__(self, number_of_runs=3): """A strategy that uses the previous results without alterations for the next optimization run. Args: number_of_runs (int): the number of times we optimize using this strategy """ self.number_of_runs = number_of_runs self._iteration_counter = 0 def next(self, model, previous_results): if self._iteration_counter == self.number_of_runs: return None self._iteration_counter += 1 return previous_results class RandomStartingPoint(StartingPointGenerator): def __init__(self, number_of_runs=3): """A strategy that generates uniformly random starting points for every parameter. Per run this generates for each parameter a uniformly distributed random number between the lower and upper bounds and uses that single random value for all problem instances. Hence, this does not generate a unique random point per problem instance, but uses a single random point for all problem instances per iteration. Args: number_of_runs (int): the number of times we optimize using this strategy """ self.number_of_runs = number_of_runs self._iteration_counter = 0 def next(self, model, previous_results): if self._iteration_counter == self.number_of_runs: return None self._iteration_counter += 1 lower_bounds = model.get_lower_bounds() upper_bounds = model.get_upper_bounds() new_points = np.zeros_like(previous_results) for param_ind in range(new_points.shape[1]): new_points[:, param_ind] = np.ones(model.get_nmr_problems()) * \ np.random.uniform(np.min(lower_bounds[param_ind]), np.max(upper_bounds[param_ind])) return new_points class GaussianPerturbation(StartingPointGenerator): def __init__(self, number_of_runs=3): """A strategy that perturbates the previous results using a Normal distribution Per run this generates for each parameter and for each problem instance a new starting position using the previous parameter as a mean and for standard deviation it uses the standard deviation over the problem instances. Hence, this generates a unique random point per problem instance, in contrast to some of the other strategies. Args: number_of_runs (int): the number of times we optimize using this strategy """ self.number_of_runs = number_of_runs self._iteration_counter = 0 def next(self, model, previous_results): if self._iteration_counter == self.number_of_runs: return None self._iteration_counter += 1 lower_bounds = model.get_lower_bounds() upper_bounds = model.get_upper_bounds() new_points = np.zeros_like(previous_results) for param_ind in range(new_points.shape[1]): std = np.std(previous_results[:, param_ind]) for problem_ind in range(model.get_nmr_problems()): new_points[problem_ind, param_ind] = np.clip( np.random.normal(previous_results[problem_ind, param_ind], std), lower_bounds[param_ind], upper_bounds[param_ind]) return new_points class RandomRestart(AbstractOptimizer): def __init__(self, optimizer, starting_point_generator, **kwargs): """A meta optimization routine that allows multiple random restarts. This meta optimizer runs the given optimizer multiple times using different starting positions for each run. The starting positions are obtained using a starting point generator which returns new starting points given the initial starting points. Please note that the initial starting point is always optimized first as a baseline reference. The returned results contain per problem instance the parameter that resulted in the lowest function value. Args: optimizer (AbstractOptimizer): the optimization routines to run one after another. starting_point_generator (StartingPointGenerator): the randomizer instance we use to randomize the starting point """ super(RandomRestart, self).__init__(**kwargs) self._optimizer = optimizer self._starting_point_generator = starting_point_generator def minimize(self, model, init_params=None): opt_output = self._optimizer.minimize(model, init_params) l2_errors = opt_output.get_error_measures()['Errors.l2'] results = opt_output.get_optimization_result() return_codes = opt_output.get_return_codes() starting_points = self._starting_point_generator.next(model, results) while starting_points is not None: new_opt_output = self._optimizer.minimize(model, starting_points) new_results = new_opt_output.get_optimization_result() new_l2_errors = new_opt_output.get_error_measures()['Errors.l2'] return_codes = new_opt_output.get_return_codes() results, l2_errors = self._get_best_results(results, new_results, l2_errors, new_l2_errors) starting_points = self._starting_point_generator.next(model, results) return SimpleOptimizationResult(model, results, return_codes) def _get_best_results(self, previous_results, new_results, previous_l2_errors, new_l2_errors): result_choice = np.argmin([previous_l2_errors, new_l2_errors], axis=0) results = np.zeros_like(previous_results) for param_ind in range(previous_results.shape[1]): choices = np.array([previous_results[:, param_ind], new_results[:, param_ind]]) results[:, param_ind] = choices[(result_choice, range(result_choice.shape[0]))] resulting_l2_errors = np.array([previous_l2_errors, new_l2_errors])[(result_choice, range(result_choice.shape[0]))] return results, resulting_l2_errors PKH1&mot/cl_routines/optimizing/__init__.py__author__ = 'Robbert Harms' __date__ = "2014-05-21" __license__ = "LGPL v3" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl"PK8wJ{o mot/cl_routines/sampling/base.pyimport logging from ...cl_routines.base import CLRoutine __author__ = 'Robbert Harms' __date__ = "2014-05-18" __license__ = "LGPL v3" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl" class AbstractSampler(CLRoutine): def __init__(self, cl_environments=None, load_balancer=None, **kwargs): super(AbstractSampler, self).__init__(cl_environments=cl_environments, load_balancer=load_balancer, **kwargs) self._logger = logging.getLogger(__name__) def sample(self, model, init_params=None): """Sample the given model with the given codec using the given environments. Args: model (SampleModelInterface): the model to sample init_params (dict): a dictionary containing the results of a previous run, provides the starting point Returns: SamplingOutput: the sampling output object """ raise NotImplementedError() class SamplingOutput(object): def get_samples(self): """Get the matrix containing the sampling results. Returns: ndarray: the sampled parameter maps, an (d, p, n) array with for d problems and p parameters n samples. """ raise NotImplementedError() class SimpleSampleOutput(SamplingOutput): def __init__(self, samples): """Simple storage container for the sampling output""" self._samples = samples def get_samples(self): return self._samples PK]J鬾88/mot/cl_routines/sampling/metropolis_hastings.pyfrom random import Random import warnings import numpy as np import pyopencl as cl from mot.model_building.cl_functions.library_functions import Rand123 from ...cl_routines.sampling.base import AbstractSampler, SamplingOutput from ...load_balance_strategies import Worker from ...utils import get_float_type_def __author__ = 'Robbert Harms' __date__ = "2014-02-05" __license__ = "LGPL v3" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl" class MetropolisHastings(AbstractSampler): def __init__(self, nmr_samples=None, burn_length=None, sample_intervals=None, use_adaptive_proposals=True, **kwargs): """An CL implementation of Metropolis Hastings. This implementation uses a random walk single component updating strategy for the sampling. Args: nmr_samples (int): The length of the (returned) chain per voxel, defaults to 0 burn_length (int): The length of the burn in (per voxel), these are extra samples, jump is set to 1 (no thinning) sample_intervals (int): how many sample we wait before storing one. This will draw extra samples (chain_length * sample_intervals). If set to zero we store every sample after the burn in. use_adaptive_proposals (boolean): if we use the adaptive proposals (set to True) or not (set to False). """ super(MetropolisHastings, self).__init__(**kwargs) self._nmr_samples = nmr_samples or 500 self.burn_length = burn_length self.sample_intervals = sample_intervals self.use_adaptive_proposals = use_adaptive_proposals if self.burn_length is None: self.burn_length = 0 if self.sample_intervals is None: self.sample_intervals = 0 if self.burn_length < 0: raise ValueError('The burn length can not be smaller than 0, {} given'.format(self.burn_length)) if self.sample_intervals < 0: raise ValueError('The sampling interval can not be smaller than 0, {} given'.format(self.sample_intervals)) if self._nmr_samples < 1: raise ValueError('The number of samples to draw can ' 'not be smaller than 1, {} given'.format(self._nmr_samples)) @property def nmr_samples(self): return self._nmr_samples def sample(self, model, init_params=None): """Sample the given model with Metropolis Hastings Returns: MHSampleOutput: extension of the default output with some more data """ float_dtype = np.float32 if model.double_precision: float_dtype = np.float64 self._do_initial_logging(model) current_chain_position = np.require(model.get_initial_parameters(init_params), float_dtype, requirements=['C', 'A', 'O', 'W']) samples = np.zeros((model.get_nmr_problems(), current_chain_position.shape[1], self.nmr_samples), dtype=float_dtype, order='C') proposal_state = np.require(model.get_proposal_state(), float_dtype, requirements=['C', 'A', 'O', 'W']) mh_state = _prepare_mh_state(model.get_metropolis_hastings_state(), float_dtype) self._logger.info('Starting sampling with method {0}'.format(self.__class__.__name__)) workers = self._create_workers(lambda cl_environment: _MHWorker( cl_environment, self.get_compile_flags_list(model.double_precision), model, current_chain_position, samples, proposal_state, mh_state, self.nmr_samples, self.burn_length, self.sample_intervals, self.use_adaptive_proposals)) self.load_balancer.process(workers, model.get_nmr_problems()) self._logger.info('Finished sampling') new_mh_state = mh_state.with_nmr_samples_drawn( mh_state.nmr_samples_drawn + self._nmr_samples * (self.sample_intervals + 1) + self.burn_length) return MHSampleOutput(samples, proposal_state, new_mh_state, current_chain_position) def _do_initial_logging(self, model): self._logger.info('Entered sampling routine.') self._logger.info('We will use a {} precision float type for the calculations.'.format( 'double' if model.double_precision else 'single')) for env in self.load_balancer.get_used_cl_environments(self.cl_environments): self._logger.info('Using device \'{}\'.'.format(str(env))) self._logger.info('Using compile flags: {}'.format(self.get_compile_flags_list(model.double_precision))) self._logger.info('The parameters we will sample are: {0}'.format(model.get_free_param_names())) sample_settings = dict(nmr_samples=self.nmr_samples, burn_length=self.burn_length, sample_intervals=self.sample_intervals, use_adaptive_proposals=self.use_adaptive_proposals) self._logger.info('Sample settings: nmr_samples: {nmr_samples}, burn_length: {burn_length}, ' 'sample_intervals: {sample_intervals}, ' 'use_adaptive_proposals: {use_adaptive_proposals}. '.format(**sample_settings)) samples_drawn = dict(samples_drawn=(self.burn_length + (self.sample_intervals + 1) * self.nmr_samples), samples_returned=self.nmr_samples) self._logger.info('Total samples drawn: {samples_drawn}, total samples returned: ' '{samples_returned} (per problem).'.format(**samples_drawn)) class MHSampleOutput(SamplingOutput): def __init__(self, samples, proposal_state, mh_state, current_chain_position): """Simple storage container for the sampling output Args: samples (ndarray): an (d, p, n) matrix with d problems, p parameters and n samples proposal_state (ndarray): (d, p) matrix with for d problems and p parameters the proposal state mh_state (MHState): the current MH state current_chain_position (ndarray): (d, p) matrix with for d observations and p parameters the current chain position. If the samples are not empty the last element in the samples (``samples[..., -1]``) should equal this matrix. """ self._samples = samples self._proposal_state = proposal_state self._mh_state = mh_state self._current_chain_position = current_chain_position def get_samples(self): return self._samples def get_proposal_state(self): """Get the proposal state at the end of this sampling Returns: ndarray: a (d, p) array with for d problems and p parameters the proposal state """ return self._proposal_state def get_mh_state(self): """Get the Metropolis Hastings state as it was at the end of this sampling run. Returns: MHState: the current MH state """ return self._mh_state def get_current_chain_position(self): """Get the current chain position current_chain_position Returns: ndarray: (d, p) matrix with for d observations and p parameters the current chain position. If the samples are not empty the last element in the samples (``samples[..., -1]``) should equal this matrix. """ return self._current_chain_position class _MHWorker(Worker): def __init__(self, cl_environment, compile_flags, model, current_chain_position, samples, proposal_state, mh_state, nmr_samples, burn_length, sample_intervals, use_adaptive_proposals): super(_MHWorker, self).__init__(cl_environment) self._model = model self._current_chain_position = current_chain_position self._nmr_params = current_chain_position.shape[1] self._samples = samples self._proposal_state = proposal_state self._mh_state = mh_state self._mh_state_dict = self._get_mh_state_dict() self._nmr_samples = nmr_samples self._burn_length = burn_length self._sample_intervals = sample_intervals # We split the kernel into multiple batches to (try to) prevent screen freezing. # This does not mean that the computations are interruptable. We enqueue all operations at once # and they will run until completion. self._max_samples_per_batch = np.ceil(5000 / (self._sample_intervals + 1)) self._max_iterations_per_batch = self._max_samples_per_batch * (self._sample_intervals + 1) kernel_builder = _MCMCKernelBuilder( compile_flags, self._mh_state_dict, cl_environment, model, nmr_samples, burn_length, sample_intervals, use_adaptive_proposals, self._nmr_params) self._burnin_kernel = kernel_builder.build_burnin_kernel() self._sampling_kernel = kernel_builder.build_sampling_kernel() def calculate(self, range_start, range_end): nmr_problems = range_end - range_start workgroup_size = cl.Kernel(self._sampling_kernel, 'sample').get_work_group_info( cl.kernel_work_group_info.PREFERRED_WORK_GROUP_SIZE_MULTIPLE, self._cl_environment.device) data_buffers, readout_items = self._get_buffers(workgroup_size) iteration_offset = self._mh_state.nmr_samples_drawn if self._burn_length > 0: iteration_offset = self._enqueue_burnin(range_start, range_end, workgroup_size, data_buffers, iteration_offset) samples_buf = cl.Buffer(self._cl_run_context.context, cl.mem_flags.WRITE_ONLY | cl.mem_flags.USE_HOST_PTR, hostbuf=self._samples) data_buffers.append(samples_buf) readout_items.append([samples_buf, self._samples]) iteration_batch_sizes = self._get_sampling_batch_sizes( self._nmr_samples * (self._sample_intervals + 1), self._max_iterations_per_batch) for nmr_iterations in iteration_batch_sizes: self._sampling_kernel.sample(self._cl_run_context.queue, (int(nmr_problems * workgroup_size),), (int(workgroup_size),), np.uint64(nmr_iterations), np.uint64(iteration_offset), *data_buffers, global_offset=(range_start * workgroup_size,)) iteration_offset += nmr_iterations for buffer, host_array in readout_items: self._enqueue_readout(buffer, host_array, range_start, range_end) def _enqueue_burnin(self, range_start, range_end, workgroup_size, data_buffers, iteration_offset): nmr_problems = range_end - range_start batch_sizes = self._get_sampling_batch_sizes(self._burn_length, self._max_iterations_per_batch) for nmr_iterations in batch_sizes: self._burnin_kernel.sample( self._cl_run_context.queue, (int(nmr_problems * workgroup_size),), (int(workgroup_size),), np.uint64(nmr_iterations), np.uint64(iteration_offset), *data_buffers, global_offset=(range_start * workgroup_size,)) iteration_offset += nmr_iterations return iteration_offset def _get_buffers(self, workgroup_size): data_buffers = [] readout_items = [] current_chain_position_buffer = cl.Buffer(self._cl_run_context.context, cl.mem_flags.READ_WRITE | cl.mem_flags.USE_HOST_PTR, hostbuf=self._current_chain_position) data_buffers.append(current_chain_position_buffer) readout_items.append([current_chain_position_buffer, self._current_chain_position]) proposal_buffer = cl.Buffer(self._cl_run_context.context, cl.mem_flags.READ_WRITE | cl.mem_flags.USE_HOST_PTR, hostbuf=self._proposal_state) data_buffers.append(proposal_buffer) readout_items.append([proposal_buffer, self._proposal_state]) mcmc_state_buffers = {} for mcmc_state_element in sorted(self._mh_state_dict): host_array = self._mh_state_dict[mcmc_state_element]['data'] buffer = cl.Buffer(self._cl_run_context.context, cl.mem_flags.READ_WRITE | cl.mem_flags.USE_HOST_PTR, hostbuf=host_array) mcmc_state_buffers[mcmc_state_element] = buffer data_buffers.append(buffer) readout_items.append([buffer, host_array]) data_buffers.append(cl.LocalMemory(workgroup_size * np.dtype('double').itemsize)) for data in self._model.get_data(): data_buffers.append(cl.Buffer(self._cl_run_context.context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=data)) return data_buffers, readout_items def _get_sampling_batch_sizes(self, total_nmr_samples, max_batch_length): """Cuts the total number of samples into smaller batches. This returns the size of every batch, which can be used as the ``nmr_samples`` input in running a kernel. Examples: self._get_sampling_batch_sizes(30, 8) -> [8, 8, 8, 6] Returns: list: the list of batch sizes """ batch_sizes = [max_batch_length] * (total_nmr_samples // max_batch_length) if total_nmr_samples % max_batch_length > 0: batch_sizes.append(total_nmr_samples % max_batch_length) return batch_sizes def _get_mh_state_dict(self): """Get a dictionary with the MH state kernel arrays""" state_dict = { 'rng_state': {'data': self._mh_state.get_rng_state(), 'cl_type': 'uint'}, 'proposal_state_sampling_counter': {'data': self._mh_state.get_proposal_state_sampling_counter(), 'cl_type': 'ulong'}, 'proposal_state_acceptance_counter': {'data': self._mh_state.get_proposal_state_acceptance_counter(), 'cl_type': 'ulong'} } if self._model.proposal_state_update_uses_variance(): state_dict.update({ 'online_parameter_variance': {'data': self._mh_state.get_online_parameter_variance(), 'cl_type': 'mot_float_type'}, 'online_parameter_variance_update_m2': { 'data': self._mh_state.get_online_parameter_variance_update_m2(), 'cl_type': 'mot_float_type'}, 'online_parameter_mean': {'data': self._mh_state.get_online_parameter_mean(), 'cl_type': 'mot_float_type'}, }) return state_dict class _MCMCKernelBuilder(object): def __init__(self, compile_flags, mh_state_dict, cl_environment, model, nmr_samples, burn_length, sample_intervals, use_adaptive_proposals, nmr_params): self._cl_run_context = cl_environment.get_cl_context() self._compile_flags = compile_flags self._mh_state_dict = mh_state_dict self._cl_environment = cl_environment self._model = model self._nmr_params = nmr_params self._nmr_samples = nmr_samples self._burn_length = burn_length self._sample_intervals = sample_intervals self._use_adaptive_proposals = use_adaptive_proposals self._update_parameter_variances = self._model.proposal_state_update_uses_variance() def build_burnin_kernel(self): """Build the kernel for the burn in. This kernel only updates the MC state and does not store the samples. Returns: cl.Program: a compiled CL kernel """ return self._compile_kernel(self._get_kernel_source(store_samples=False)) def build_sampling_kernel(self): """Build the kernel for the sampling. Returns: cl.Program: a compiled CL kernel """ return self._compile_kernel(self._get_kernel_source(store_samples=True)) def _compile_kernel(self, kernel_source): from mot import configuration if configuration.should_ignore_kernel_compile_warnings(): warnings.simplefilter("ignore") return cl.Program(self._cl_run_context.context, kernel_source).build(' '.join(self._compile_flags)) def _get_kernel_source(self, store_samples=True): kernel_param_names = [ 'ulong nmr_iterations', 'ulong iteration_offset'] kernel_param_names.extend([ 'global mot_float_type* current_chain_position', 'global mot_float_type* global_proposal_state']) for mcmc_state_element in sorted(self._mh_state_dict): cl_type = self._mh_state_dict[mcmc_state_element]['cl_type'] kernel_param_names.append('global {}* global_{}'.format(cl_type, mcmc_state_element)) kernel_param_names.append('local double* log_likelihood_tmp') kernel_param_names.extend(self._model.get_kernel_param_names(self._cl_environment.device)) if store_samples: kernel_param_names.append('global mot_float_type* samples') proposal_state_size = self._model.get_proposal_state().shape[1] kernel_source = ''' #define NMR_INST_PER_PROBLEM ''' + str(self._model.get_nmr_inst_per_problem()) + ''' ''' kernel_source += get_float_type_def(self._model.double_precision) kernel_source += self._get_rng_functions() kernel_source += self._model.get_kernel_data_struct(self._cl_environment.device) kernel_source += self._model.get_log_prior_function('getLogPrior', address_space_parameter_vector='local') kernel_source += self._model.get_proposal_function('getProposal', address_space_proposal_state='global') if self._use_adaptive_proposals: kernel_source += self._model.get_proposal_state_update_function('updateProposalState', address_space='global') if not self._model.is_proposal_symmetric(): kernel_source += self._model.get_proposal_logpdf('getProposalLogPDF', address_space_proposal_state='global') kernel_source += self._get_log_likelihood_functions() kernel_source += self._get_state_update_functions() if self._update_parameter_variances: kernel_source += self._chain_statistics_update_function() kernel_source += ''' void _sample(local mot_float_type* const x_local, void* rng_data, local double* const current_likelihood, local mot_float_type* const current_prior, const void* const data, ulong nmr_iterations, ulong iteration_offset, global mot_float_type* const proposal_state, global ulong* const sampling_counter, global ulong* const acceptance_counter, ''' + ('''global mot_float_type* parameter_mean, global mot_float_type* parameter_variance, global mot_float_type* parameter_variance_update_m2,''' if self._update_parameter_variances else '') + ''' ''' + ('global mot_float_type* samples, ' if store_samples else '') + ''' local double* log_likelihood_tmp){ ulong i; uint j; ulong problem_ind = (ulong)(get_global_id(0) / get_local_size(0)); bool is_first_work_item = get_local_id(0) == 0; for(i = 0; i < nmr_iterations; i++){ ''' if self._update_parameter_variances: kernel_source += ''' if(is_first_work_item){ for(j = 0; j < ''' + str(self._nmr_params) + '''; j++){ _update_chain_statistics(i + iteration_offset, x_local[j], parameter_mean + j, parameter_variance + j, parameter_variance_update_m2 + j); } } ''' kernel_source += ''' _update_state(x_local, rng_data, current_likelihood, current_prior, data, proposal_state, acceptance_counter, log_likelihood_tmp); if(is_first_work_item){ for(j = 0; j < ''' + str(self._nmr_params) + '''; j++){ sampling_counter[j]++; } ''' + ('updateProposalState(proposal_state, sampling_counter, acceptance_counter' + (', parameter_variance' if self._update_parameter_variances else '') + ');' if self._use_adaptive_proposals else '') + ''' ''' if store_samples: kernel_source += ''' if((i - ''' + str(self._burn_length) + ''' + iteration_offset) % ''' + \ str(self._sample_intervals + 1) + ''' == 0){ for(j = 0; j < ''' + str(self._nmr_params) + '''; j++){ samples[(ulong)((i - ''' + str(self._burn_length) + ''' + iteration_offset) / ''' + str(self._sample_intervals + 1) + ''') // remove the interval + j * ''' + str(self._nmr_samples) + ''' // parameter index + problem_ind * ''' + str(self._nmr_params * self._nmr_samples) + ''' ] = x_local[j]; } } ''' kernel_source += ''' } } } ''' kernel_source += ''' __kernel void sample( ''' + ",\n".join(kernel_param_names) + ''' ){ ulong problem_ind = (ulong)(get_global_id(0) / get_local_size(0)); ''' + self._model.get_kernel_data_struct_initialization(self._cl_environment.device, 'data', 'problem_ind') + ''' rand123_data rand123_rng_data = _rng_data_from_array(global_rng_state); void* rng_data = (void*)&rand123_rng_data; local mot_float_type x_local[''' + str(self._nmr_params) + ''']; local double current_likelihood; local mot_float_type current_prior; global mot_float_type* proposal_state = global_proposal_state + problem_ind * ''' + str(proposal_state_size) + '''; global ulong* sampling_counter = global_proposal_state_sampling_counter + problem_ind * ''' + str(self._nmr_params) + '''; global ulong* acceptance_counter = global_proposal_state_acceptance_counter + problem_ind * ''' + str(self._nmr_params) + '''; ''' if self._update_parameter_variances: kernel_source += ''' global mot_float_type* parameter_mean = global_online_parameter_mean + problem_ind * ''' + str(self._nmr_params) + '''; global mot_float_type* parameter_variance = global_online_parameter_variance + problem_ind * ''' + str(self._nmr_params) + '''; global mot_float_type* parameter_variance_update_m2 = global_online_parameter_variance_update_m2 + problem_ind * ''' + str(self._nmr_params) + '''; ''' kernel_source += ''' if(get_local_id(0) == 0){ for(uint i = 0; i < ''' + str(self._nmr_params) + '''; i++){ x_local[i] = current_chain_position[problem_ind * ''' + str(self._nmr_params) + ''' + i]; } current_prior = getLogPrior((void*)&data, x_local); } _fill_log_likelihood_tmp((void*)&data, x_local, log_likelihood_tmp); _sum_log_likelihood_tmp_local(log_likelihood_tmp, ¤t_likelihood); _sample(x_local, rng_data, ¤t_likelihood, ¤t_prior, (void*)&data, nmr_iterations, iteration_offset, proposal_state, sampling_counter, acceptance_counter, ''' + ('parameter_mean, parameter_variance, parameter_variance_update_m2,' if self._update_parameter_variances else '') + ''' ''' + ('samples, ' if store_samples else '') + ''' log_likelihood_tmp); if(get_local_id(0) == 0){ for(uint i = 0; i < ''' + str(self._nmr_params) + '''; i++){ current_chain_position[problem_ind * ''' + str(self._nmr_params) + ''' + i] = x_local[i]; } _rng_data_to_array(rand123_rng_data, global_rng_state); } } ''' return kernel_source def _get_rng_functions(self): random_library = Rand123() kernel_source = random_library.get_cl_code() kernel_source += ''' rand123_data _rng_data_from_array(global uint* rng_state){ ulong problem_ind = (ulong)(get_global_id(0) / get_local_size(0)); return rand123_initialize_data( (uint[]){rng_state[0 + problem_ind * 6], rng_state[1 + problem_ind * 6], rng_state[2 + problem_ind * 6], rng_state[3 + problem_ind * 6], rng_state[4 + problem_ind * 6], rng_state[5 + problem_ind * 6]}); } void _rng_data_to_array(rand123_data data, global uint* rng_state){ ulong problem_ind = (ulong)(get_global_id(0) / get_local_size(0)); uint state[6]; rand123_data_to_array(data, state); for(uint i = 0; i < 6; i++){ rng_state[i + problem_ind * 6] = state[i]; } } ''' return kernel_source def _get_log_likelihood_functions(self): kernel_source = self._model.get_log_likelihood_per_observation_function('getLogLikelihoodPerObservation', full_likelihood=False) kernel_source += ''' void _fill_log_likelihood_tmp(const void* const data, local mot_float_type* const x_local, local double* log_likelihood_tmp){ ulong observation_ind; ulong local_id = get_local_id(0); log_likelihood_tmp[local_id] = 0; uint workgroup_size = get_local_size(0); mot_float_type x_private[''' + str(self._nmr_params) + ''']; for(uint i = 0; i < ''' + str(self._nmr_params) + '''; i++){ x_private[i] = x_local[i]; } for(uint i = 0; i < ceil(NMR_INST_PER_PROBLEM / (mot_float_type)workgroup_size); i++){ observation_ind = i * workgroup_size + local_id; if(observation_ind < NMR_INST_PER_PROBLEM){ log_likelihood_tmp[local_id] += getLogLikelihoodPerObservation( data, x_private, observation_ind); } } barrier(CLK_LOCAL_MEM_FENCE); } void _sum_log_likelihood_tmp_local(local double* log_likelihood_tmp, local double* log_likelihood){ *log_likelihood = 0; for(uint i = 0; i < get_local_size(0); i++){ *log_likelihood += log_likelihood_tmp[i]; } } void _sum_log_likelihood_tmp_private(local double* log_likelihood_tmp, private double* log_likelihood){ *log_likelihood = 0; for(uint i = 0; i < get_local_size(0); i++){ *log_likelihood += log_likelihood_tmp[i]; } } ''' return kernel_source def _get_state_update_functions(self): kernel_source = ''' void _update_state(local mot_float_type* const x_local, void* rng_data, local double* const current_likelihood, local mot_float_type* const current_prior, const void* const data, global mot_float_type* const proposal_state, global ulong * const acceptance_counter, local double* log_likelihood_tmp){ float4 random_nmr; mot_float_type new_prior = 0; double new_likelihood; double bayesian_f; mot_float_type old_x; bool is_first_work_item = get_local_id(0) == 0; #pragma unroll 1 for(uint k = 0; k < ''' + str(self._nmr_params) + '''; k++){ if(is_first_work_item){ random_nmr = frand(rng_data); old_x = x_local[k]; x_local[k] = getProposal(k, x_local[k], rng_data, proposal_state); new_prior = getLogPrior(data, x_local); } if(exp(new_prior) > 0){ _fill_log_likelihood_tmp(data, x_local, log_likelihood_tmp); if(is_first_work_item){ _sum_log_likelihood_tmp_private(log_likelihood_tmp, &new_likelihood); ''' if self._model.is_proposal_symmetric(): kernel_source += ''' bayesian_f = exp((new_likelihood + new_prior) - (*current_likelihood + *current_prior)); ''' else: kernel_source += ''' mot_float_type x_to_prop = getProposalLogPDF(k, old_x, x_local[k], proposal_state); mot_float_type prop_to_x = getProposalLogPDF(k, x_local[k], x_local[k], proposal_state); bayesian_f = exp((new_likelihood + new_prior + x_to_prop) - (*current_likelihood + *current_prior + prop_to_x)); ''' kernel_source += ''' if(random_nmr.x < bayesian_f){ *current_likelihood = new_likelihood; *current_prior = new_prior; acceptance_counter[k]++; } else{ x_local[k] = old_x; } } } else{ // prior returned 0 if(is_first_work_item){ x_local[k] = old_x; } } } } ''' return kernel_source def _chain_statistics_update_function(self): kernel_source = ''' /** Online variance algorithm by Welford * B. P. Welford (1962)."Note on a method for calculating corrected sums of squares * and products". Technometrics 4(3):419–420. * * Also studied in: * Chan, Tony F.; Golub, Gene H.; LeVeque, Randall J. (1983). * Algorithms for Computing the Sample Variance: Analysis and Recommendations. * The American Statistician 37, 242-247. http://www.jstor.org/stable/2683386 */ void _update_chain_statistics(const ulong chain_count, const mot_float_type new_param_value, global mot_float_type* const parameter_mean, global mot_float_type* const parameter_variance, global mot_float_type* const parameter_variance_update_m2){ mot_float_type previous_mean = *parameter_mean; *parameter_mean += (new_param_value - *parameter_mean) / (chain_count + 1); *parameter_variance_update_m2 += (new_param_value - previous_mean) * (new_param_value - *parameter_mean); if(chain_count > 1){ *parameter_variance = *parameter_variance_update_m2 / (chain_count - 1); } } ''' return kernel_source class MHState(object): """The Metropolis Hastings state is used to initialize the state of the MH sampler. The state is stored at the end of every MH run and can be used to continue sampling again from the previous end point. """ @property def nmr_samples_drawn(self): """Get the amount of samples already drawn, i.e. at what point in time is this state. Returns: uint64: the current number of samples already drawn before this state """ raise NotImplementedError() def get_proposal_state_sampling_counter(self): """Get the current state of the sampling counter that can be used by the adaptive proposals. This value is per problem instance passed to the adaptive proposals which may reset the value. Returns: ndarray: a (d, p) array with for d problems and p parameters the current sampling counter, should be of a np.uint64 type. """ raise NotImplementedError() def get_proposal_state_acceptance_counter(self): """Get the current state of the acceptance counter that can be used by the adaptive proposals. This value is per problem instance passed to the adaptive proposals which may reset the value. Returns: ndarray: a (d, p) array with for d problems and p parameters the current acceptance counter, should be of a np.uint64 type. """ raise NotImplementedError() def get_online_parameter_variance(self): """Get the current state of the online parameter variance that can be used by the adaptive proposals. This value is updated while sampling and is passed as a constant to the adaptive proposals. Returns: ndarray: a (d, p) array with for d problems and p parameters the current parameter variance, should be of a np.float32 or np.float64 type (it will still be auto-converted to the current double type in the MCMC function). """ raise NotImplementedError() def get_online_parameter_variance_update_m2(self): """A helper variable used in updating the online parameter variance. Returns: ndarray: a (d, p) array with for d problems and p parameters the current M2 state, should be of a np.float32 or np.float64 type (it will still be auto-converted to the current double type in the MCMC function). """ raise NotImplementedError() def get_online_parameter_mean(self): """Get the current state of the online parameter mean, a helper variance in updating the variance. Returns: ndarray: a (d, p) array with for d problems and p parameters the current parameter mean, should be of a np.float32 or np.float64 type (it will still be auto-converted to the current double type in the MCMC function). """ raise NotImplementedError() def get_rng_state(self): """Get the RNG state array for every problem instance. Returns: ndarray: a (d, *) state array with for every d problem the state of size > 0 """ raise NotImplementedError() class DefaultMHState(MHState): def __init__(self, nmr_problems, nmr_params, double_precision=False): """Creates a initial (default) MCMC state. Args: nmr_problems (int): the number of problems we are optimizing, used to create the default state. nmr_params (int): the number of parameters in the model, used to create the default state. double_precision (boolean): used when auto-creating some of the default state items. """ self._nmr_problems = nmr_problems self._nmr_params = nmr_params self._double_precision = double_precision self._float_dtype = np.float32 if double_precision: self._float_dtype = np.float64 @property def nmr_samples_drawn(self): return 0 def get_proposal_state_sampling_counter(self): return np.zeros((self._nmr_problems, self._nmr_params), dtype=np.uint64, order='C') def get_proposal_state_acceptance_counter(self): return np.zeros((self._nmr_problems, self._nmr_params), dtype=np.uint64, order='C') def get_online_parameter_variance(self): return np.zeros((self._nmr_problems, self._nmr_params), dtype=self._float_dtype, order='C') def get_online_parameter_variance_update_m2(self): return np.zeros((self._nmr_problems, self._nmr_params), dtype=self._float_dtype, order='C') def get_online_parameter_mean(self): return np.zeros((self._nmr_problems, self._nmr_params), dtype=self._float_dtype, order='C') def get_rng_state(self): rng = Random() dtype_info = np.iinfo(np.uint32) starting_point = np.array(list(rng.randrange(dtype_info.min, dtype_info.max + 1) for _ in range(6)), dtype=np.uint32) return np.tile(np.hstack([starting_point]), (self._nmr_problems, 1)).astype(np.uint32) class SimpleMHState(MHState): def __init__(self, nmr_samples_drawn, proposal_state_sampling_counter, proposal_state_acceptance_counter, online_parameter_variance, online_parameter_variance_update_m2, online_parameter_mean, rng_state): """A simple MCMC state containing provided items Args: nmr_samples_drawn (ndarray): the current number of samples already drawn to reach this state. proposal_state_sampling_counter (ndarray): a (d, p) array with for d problems and p parameters the current sampling counter. proposal_state_acceptance_counter (ndarray): a (d, p) array with for d problems and p parameters the current acceptance counter. online_parameter_variance (ndarray): a (d, p) array with for d problems and p parameters the current state of the online parameter variance online_parameter_variance_update_m2 (ndarray): a (d, p) array with for d problems and p parameters the current state of the online parameter variance update variable. online_parameter_mean (ndarray): a (d, p) array with for d problems and p parameters the current state of the online parameter mean rng_state (ndarray): a (d, *) array with for d problems the rng state vector """ self._nmr_samples_drawn = nmr_samples_drawn self._proposal_state_sampling_counter = proposal_state_sampling_counter self._proposal_state_acceptance_counter = proposal_state_acceptance_counter self._online_parameter_variance = online_parameter_variance self._online_parameter_variance_update_m2 = online_parameter_variance_update_m2 self._online_parameter_mean = online_parameter_mean self._rng_state = rng_state @property def nmr_samples_drawn(self): return self._nmr_samples_drawn def with_nmr_samples_drawn(self, nmr_samples_drawn): """Recreate this object and set the number of samples drawn to the specified value.""" return type(self)( nmr_samples_drawn, self.get_proposal_state_sampling_counter(), self.get_proposal_state_acceptance_counter(), self.get_online_parameter_variance(), self.get_online_parameter_variance_update_m2(), self.get_online_parameter_mean(), self.get_rng_state() ) def get_proposal_state_sampling_counter(self): return self._proposal_state_sampling_counter def get_proposal_state_acceptance_counter(self): return self._proposal_state_acceptance_counter def get_online_parameter_variance(self): return self._online_parameter_variance def get_online_parameter_variance_update_m2(self): return self._online_parameter_variance_update_m2 def get_online_parameter_mean(self): return self._online_parameter_mean def get_rng_state(self): return self._rng_state def _prepare_mh_state(mh_state, float_dtype): """Return a new MH state in which all the state variables are sanitized to the correct data type. Args: mh_state (MHState): the MH state we wish to sanitize float_dtype (np.dtype): the numpy dtype for the floats, either np.float32 or np.float64 Returns: SimpleMHState: MH state with the same data only then possibly sanitized """ proposal_state_sampling_counter = np.require(np.copy(mh_state.get_proposal_state_sampling_counter()), np.uint64,requirements=['C', 'A', 'O', 'W']) proposal_state_acceptance_counter = np.require(np.copy(mh_state.get_proposal_state_acceptance_counter()), np.uint64, requirements=['C', 'A', 'O', 'W']) online_parameter_variance = np.require(np.copy(mh_state.get_online_parameter_variance()), float_dtype, requirements=['C', 'A', 'O', 'W']) online_parameter_variance_update_m2 = np.require(np.copy(mh_state.get_online_parameter_variance_update_m2()), float_dtype, requirements=['C', 'A', 'O', 'W']) online_parameter_mean = np.require(np.copy(mh_state.get_online_parameter_mean()), float_dtype, requirements=['C', 'A', 'O', 'W']) rng_state = np.require(np.copy(mh_state.get_rng_state()), np.uint32, requirements=['C', 'A', 'O', 'W']) return SimpleMHState(mh_state.nmr_samples_drawn, proposal_state_sampling_counter, proposal_state_acceptance_counter, online_parameter_variance, online_parameter_variance_update_m2, online_parameter_mean, rng_state ) PKH1$mot/cl_routines/sampling/__init__.py__author__ = 'Robbert Harms' __date__ = "2014-05-21" __license__ = "LGPL v3" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl"PKjJM?R'R'mot/cl_routines/filters/base.pyimport logging import numbers import numpy as np import pyopencl as cl from mot.cl_routines.base import CLRoutine from mot.load_balance_strategies import Worker __author__ = 'Robbert Harms' __date__ = "2014-04-26" __license__ = "LGPL v3" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl" class AbstractFilter(CLRoutine): def __init__(self, size, cl_environments=None, load_balancer=None): """Initialize the filter routine. Args: size (int or tuple): Either a single dimension size for all dimensions or one value for each dimension of the input data to the filter function. Maximum number of dimensions is 3. Either way this value is the distance to the left and to the right of each value. That means that the total kernel size is the product of 1 + 2*s for each size s of each dimension. cl_environments: The cl environments load_balancer: The load balancer to use Attributes: size (int or tuple): (x, y, z, ...). Either a single dimension size for all dimensions or one value for each dimension of the input data to the filter function. Either way this value is the distance to the left and to the right of each value. That means that the total kernel size is the product of 1 + 2*s for each size s of each dimension. """ super(AbstractFilter, self).__init__(cl_environments=cl_environments, load_balancer=load_balancer) self.size = size self._logger = logging.getLogger(__name__) def filter(self, value, mask=None, double_precision=False, nmr_of_times=1): """Filter the given volumes in the given dictionary. If a dict is given as a value the filtering is applied to every value in the dictionary. This can be spread over the different devices. If a single ndarray is given the filtering is performed only on one device. Args: value (dict or array like): an single array to filter (dimensions must match the size specified in the constructor). Can also be a dictionary with a list of ndarrays. mask (array like): A single array of the same dimension as the input value. This can be used to mask values from being used by the filtering routines. They are not used for filtering other values and are not filtered themselves. double_precision (boolean): if we will use double or float nmr_of_times (int): how many times we would like to repeat the filtering (per input volume). Returns: The same type as the input. A new set of volumes with the same keys, or a single array. All filtered. """ if nmr_of_times < 1: nmr_of_times = 1 if nmr_of_times == 1: self._logger.info('Applying filtering with filter {0}'.format(self.__class__.__name__)) if isinstance(value, dict): return self._filter(value, mask, double_precision) else: return self._filter({'value': value}, mask, double_precision)['value'] else: filtered = self.filter(value, mask, double_precision, 1) return self.filter(filtered, mask, double_precision, nmr_of_times - 1) def _filter(self, volumes_dict, mask, double_precision): results_dict = {} np_dtype = np.float32 if double_precision: np_dtype = np.float64 for key, value in volumes_dict.items(): if len(value.shape) > 3 and value.shape[3] > 1: raise ValueError('The given volume {} is a 4d volume with a 4th dimension >1. We can not use this.') volumes_dict[key] = value.astype(dtype=np_dtype, copy=False, order='C') results_dict[key] = np.zeros_like(volumes_dict[key], dtype=np_dtype, order='C') if mask is not None: mask = mask.astype(np.int8, order='C', copy=True) volumes_list = list(volumes_dict.items()) workers = self._create_workers(self._get_worker_generator(self, results_dict, volumes_list, mask, double_precision)) self._load_balancer.process(workers, len(volumes_list)) return results_dict def _get_worker_generator(self, *args): """Generate the worker generator callback for the function _create_workers() This is supposed to be overwritten by the implementing filterer. Returns: the python callback for generating the worker """ return lambda cl_environment: AbstractFilterWorker(cl_environment, self.get_compile_flags_list(args[-1]), *args) class AbstractFilterWorker(Worker): def __init__(self, cl_environment, compile_flags, parent_filter, results_dict, volumes_list, mask, double_precision): """Create a filter worker. Args: nmr_of_times: the number of times we want to apply the filter per dataset. """ super(AbstractFilterWorker, self).__init__(cl_environment) self._parent_filter = parent_filter self._size = self._parent_filter.size self._results_dict = results_dict self._volumes_list = volumes_list self._volume_shape = volumes_list[0][1].shape[0:3] self._double_precision = double_precision self._mask = mask self._logger = logging.getLogger(__name__) if mask is None: self._use_mask = False else: self._use_mask = True self._mask_buf = cl.Buffer(self._cl_run_context.context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=self._mask) self._kernel = self._build_kernel(compile_flags) def calculate(self, range_start, range_end): volumes_to_run = [self._volumes_list[i] for i in range(len(self._volumes_list)) if range_start <= i < range_end] volume_buf = cl.Buffer(self._cl_run_context.context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=volumes_to_run[0][1]) results_buf = cl.Buffer(self._cl_run_context.context, cl.mem_flags.WRITE_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=self._results_dict[volumes_to_run[0][0]]) for key, value in volumes_to_run: cl.enqueue_copy(self._cl_run_context.queue, volume_buf, value, is_blocking=False) cl.enqueue_copy(self._cl_run_context.queue, results_buf, self._results_dict[key], is_blocking=False) buffers = [volume_buf] if self._use_mask: buffers.append(self._mask_buf) buffers.append(results_buf) self._kernel.filter(self._cl_run_context.queue, self._volume_shape, None, *buffers) cl.enqueue_copy(self._cl_run_context.queue, self._results_dict[key], results_buf, is_blocking=False) def _get_kernel_source(self): """Get the kernel source for this filtering kernel. This should be implemented by the subclass. """ def _get_size_in_dimension(self, dimension): if isinstance(self._size, numbers.Number): return self._size else: return self._size[dimension] def _calculate_kernel_size_in_dimension(self, dimension): return self._get_size_in_dimension(dimension) * 2 + 1 def _get_ks_dimension_inits(self, nmr_dimensions): """Get the kernel source part for the dimension initializations""" s = '' for i in range(nmr_dimensions): s += 'long dim' + str(i) + ' = get_global_id(' + str(i) + ');' + "\n" return s def _get_ks_sub2ind_func(self, volume_shape): """Get the kernel source part for converting array subscripts to indices""" s = 'long sub2ind(' for i in range(len(volume_shape)): s += 'const long dim' + str(i) + ', ' s = s[0:-2] + '){' + "\n" s += 'return ' for i, d in enumerate(volume_shape): stride = '' for ds in volume_shape[(i + 1):]: stride += ' * ' + str(ds) s += 'dim' + str(i) + stride + ' + ' s = s[0:-3] + ';' + "\n" s += '}' + "\n" return s def _get_ks_sub2ind_func_call(self, nmr_dimensions): """Get the kernel source part for the function call for converting array subscripts to indices""" s = 'sub2ind(' for i in range(nmr_dimensions): s += 'dim' + str(i) + ', ' return s[0:-2] + ')' def _calculate_length(self, nmr_dimensions): """Calculate the length of the array given the number of dimensions. The kernel size is determined by the global size tuple. For each dimension this specifies the number of values we look to the right and to the left to calculate the new value. This means that the total kernel size of all dimensions together is the multiplication of 2 * n + 1 for each dimension: left + right + current. Args: nmr_dimensions (int): the number of dimensions """ n = 1 for dimension in range(nmr_dimensions): s = self._get_size_in_dimension(dimension) n *= (s * 2 + 1) return n def _get_ks_dimension_sizes(self, volume_shape): """Get the kernel source for the start and end of each of the dimensions""" s = '' for i, d in enumerate(volume_shape): s += 'long dim' + str(i) + '_start = max((long)0, dim' + str(i) + ' - ' + \ str(self._get_size_in_dimension(i)) + ');' + "\n" s += 'long dim' + str(i) + '_end = min((long)' + str(d) + ', dim' + str(i) + ' + ' + \ str(self._get_size_in_dimension(i)) + ' + 1);' + "\n" return s PKyJk(  mot/cl_routines/filters/mean.pyfrom mot.cl_routines.filters.base import AbstractFilter, AbstractFilterWorker from mot.utils import get_float_type_def __author__ = 'Robbert Harms' __date__ = "2014-04-26" __license__ = "LGPL v3" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl" class MeanFilter(AbstractFilter): def _get_worker_generator(self, *args): return lambda cl_environment: _MeanFilterWorker( cl_environment, self.get_compile_flags_list(double_precision=True), *args) class _MeanFilterWorker(AbstractFilterWorker): def _get_kernel_source(self): kernel_source = '' kernel_source += get_float_type_def(self._double_precision) kernel_source += self._get_ks_sub2ind_func(self._volume_shape) kernel_source += ''' __kernel void filter( global mot_float_type* volume, ''' + ('global char* mask,' if self._use_mask else '') + ''' global mot_float_type* results ){ ''' + self._get_ks_dimension_inits(len(self._volume_shape)) + ''' long ind = ''' + self._get_ks_sub2ind_func_call(len(self._volume_shape)) + '''; ''' + ('if(mask[ind] > 0){' if self._use_mask else 'if(true){') + ''' ''' + self._get_ks_dimension_sizes(self._volume_shape) + ''' mot_float_type sum = 0.0; uint count = 0; ''' + self._get_ks_loop(self._volume_shape) + ''' results[ind] = sum/count; } } ''' return kernel_source def _get_ks_loop(self, volume_shape): s = '' for i in range(len(volume_shape)): s += 'for(dim' + str(i) + ' = dim' + str(i) + '_start; dim' + str(i) + \ ' < dim' + str(i) + '_end; dim' + str(i) + '++){' + "\n" if self._use_mask: s += 'if(mask[' + self._get_ks_sub2ind_func_call(len(volume_shape)) + '] > 0){' + "\n" s += 'sum += volume[' + self._get_ks_sub2ind_func_call(len(volume_shape)) + '];' + "\n" s += 'count++;' + "\n" if self._use_mask: s += '}' + "\n" for i in range(len(volume_shape)): s += '}' + "\n" return s PKyJ9n>88!mot/cl_routines/filters/median.pyfrom mot.cl_routines.filters.base import AbstractFilter, AbstractFilterWorker from mot.utils import get_float_type_def __author__ = 'Robbert Harms' __date__ = "2014-04-26" __license__ = "LGPL v3" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl" class MedianFilter(AbstractFilter): def _get_worker_generator(self, *args): return lambda cl_environment: _MedianFilterWorker( cl_environment, self.get_compile_flags_list(double_precision=True), *args) class _MedianFilterWorker(AbstractFilterWorker): def _get_kernel_source(self): kernel_source = '' kernel_source += get_float_type_def(self._double_precision) kernel_source += self._get_ks_sub2ind_func(self._volume_shape) kernel_source += ''' __kernel void filter( global mot_float_type* volume, ''' + ('global char* mask,' if self._use_mask else '') + ''' global mot_float_type* results ){ ''' + self._get_ks_dimension_inits(len(self._volume_shape)) + ''' const long ind = ''' + self._get_ks_sub2ind_func_call(len(self._volume_shape)) + '''; ''' + ('if(mask[ind] > 0){' if self._use_mask else 'if(true){') + ''' ''' + self._get_ks_dimension_sizes(self._volume_shape) + ''' mot_float_type guess; mot_float_type maxltguess; mot_float_type mingtguess; mot_float_type less; mot_float_type greater; mot_float_type equal; mot_float_type minv = volume[ind]; mot_float_type maxv = volume[ind]; uint number_of_items = 0; mot_float_type tmp_val = 0.0; ''' + self._loop_encapsulate(''' tmp_val = volume[''' + self._get_ks_sub2ind_func_call(len(self._volume_shape)) + ''']; if(tmp_val < minv){ minv = tmp_val; } if(tmp_val > maxv){ maxv = tmp_val; } number_of_items++; ''') + ''' while(1){ guess = (minv+maxv)/2.0; less = 0; greater = 0; equal = 0; maxltguess = minv; mingtguess = maxv; ''' + self._loop_encapsulate(''' tmp_val = volume[''' + self._get_ks_sub2ind_func_call(len(self._volume_shape)) + ''']; if(tmp_val < guess){ less += 1; if(tmp_val > maxltguess){ maxltguess = tmp_val; } } else if (tmp_val > guess) { greater += 1; if(tmp_val < mingtguess){ mingtguess = tmp_val; } } else{ equal += 1; } ''') + ''' if(less <= (number_of_items + 1)/2 && greater <= (number_of_items + 1)/2){ break; } else if(less > greater){ maxv = maxltguess; } else{ minv = mingtguess; } } if(less >= (number_of_items + 1)/2){ guess = maxltguess; } else if(less + equal >= (number_of_items + 1)/2){} else{ guess = mingtguess; } results[ind] = guess; } } ''' return kernel_source def _loop_encapsulate(self, body): s = '' for i in range(len(self._volume_shape)): s += 'for(dim' + str(i) + ' = dim' + str(i) + '_start; dim' + str(i) + \ ' < dim' + str(i) + '_end; dim' + str(i) + '++){' + "\n" if self._use_mask: s += 'if(mask[' + self._get_ks_sub2ind_func_call(len(self._volume_shape)) + '] > 0){' + "\n" s += body if self._use_mask: s += '}' + "\n" for i in range(len(self._volume_shape)): s += '}' + "\n" return s PKyJ&n))#mot/cl_routines/filters/gaussian.pyimport numbers import numpy as np import pyopencl as cl from mot.cl_routines.filters.base import AbstractFilter, AbstractFilterWorker from mot.utils import get_float_type_def __author__ = 'Robbert Harms' __date__ = "2014-04-26" __license__ = "LGPL v3" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl" class GaussianFilter(AbstractFilter): def __init__(self, size, sigma=None, cl_environments=None, load_balancer=None): """Initialize a Gaussian filter. Args: size (int or tuple): (x, y, z, ...). Either a single dimension size for all dimensions or one value for each dimension of the input data to the filter function. Either way this value is the distance to the left and to the right of each value. That means that the total kernel size is the product of 1 + 2*s for each size s of each dimension. sigma (double or list of double): Either a single double or a list of doubles, one for each size. This parameter defines the sigma of the Gaussian distribution used for creating the Gaussian filtering kernel. If None, the sigma is calculated using size / 3.0. """ super(GaussianFilter, self).__init__(size, cl_environments=cl_environments, load_balancer=load_balancer) self.sigma = sigma def _get_worker_generator(self, *args): return lambda cl_environment: _GaussianFilterWorker( cl_environment, self.get_compile_flags_list(double_precision=True), *args) class _GaussianFilterWorker(AbstractFilterWorker): def calculate(self, range_start, range_end): volumes_to_run = [self._volumes_list[i] for i in range(len(self._volumes_list)) if range_start <= i < range_end] volume_buf = cl.Buffer(self._cl_run_context.context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=volumes_to_run[0][1]) results_buf = cl.Buffer(self._cl_run_context.context, cl.mem_flags.WRITE_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=self._results_dict[volumes_to_run[0][0]]) for volume_name, volume in volumes_to_run: cl.enqueue_copy(self._cl_run_context.queue, volume_buf, volume, is_blocking=False) cl.enqueue_copy(self._cl_run_context.queue, results_buf, self._results_dict[volume_name], is_blocking=False) for dimension in range(len(self._volume_shape)): kernel_length = self._calculate_kernel_size_in_dimension(dimension) kernel_sigma = self._get_sigma_in_dimension(dimension) filter_kernel = self._get_1d_gaussian_kernel_array(kernel_length, kernel_sigma) filter_kernel_buf = cl.Buffer(self._cl_run_context.context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=filter_kernel) kernel_source = self._get_gaussian_kernel_source(dimension) kernel = cl.Program(self._cl_run_context.context, kernel_source).build() if dimension % 2 == 0: buffers_list = self._list_all_buffers(volume_buf, filter_kernel_buf, results_buf) results_buf_ptr = results_buf else: buffers_list = self._list_all_buffers(results_buf, filter_kernel_buf, volume_buf) results_buf_ptr = volume_buf kernel.filter(self._cl_run_context.queue, self._volume_shape, None, *buffers_list) if dimension == len(self._volume_shape) - 1: cl.enqueue_copy(self._cl_run_context.queue, self._results_dict[volume_name], results_buf_ptr, is_blocking=False) def _build_kernel(self, compile_flags=()): pass def _list_all_buffers(self, input_buffer, filter_kernel_buffer, output_buffer): """Helper function of calculate(). This creates a list with buffers and inserts the mask buffer if needed. """ buffers_list = [input_buffer] if self._use_mask: buffers_list.append(self._mask_buf) buffers_list.extend([filter_kernel_buffer, output_buffer]) return buffers_list def _get_gaussian_kernel_source(self, dimension): left_right = self._get_size_in_dimension(dimension) working_dim = 'dim' + str(dimension) kernel_source = '' kernel_source += get_float_type_def(self._double_precision) kernel_source += self._get_ks_sub2ind_func(self._volume_shape) kernel_source += ''' __kernel void filter( global mot_float_type* volume, ''' + ('global char* mask,' if self._use_mask else '') + ''' global mot_float_type* filter, global mot_float_type* results ){ ''' + self._get_ks_dimension_inits(len(self._volume_shape)) + ''' const long ind = ''' + self._get_ks_sub2ind_func_call(len(self._volume_shape)) + '''; ''' + ('if(mask[ind] > 0){' if self._use_mask else 'if(true){') + ''' long filter_index = 0; mot_float_type filtered_value = 0; const long start = dim''' + str(dimension) + ''' - ''' + str(left_right) + '''; const long end = dim''' + str(dimension) + ''' + ''' + str(left_right) + '''; long tmp_ind = 0; for(''' + working_dim + ''' = start; ''' + working_dim + ''' <= end; ''' + working_dim + '''++){ tmp_ind = ''' + self._get_ks_sub2ind_func_call(len(self._volume_shape)) + '''; if(''' + working_dim + ''' >= 0 && ''' + working_dim + ''' < ''' + \ str(self._volume_shape[dimension]) + '''){ ''' + ('if(mask[tmp_ind] > 0){' if self._use_mask else 'if(true){') + ''' filtered_value += filter[filter_index] * volume[tmp_ind]; } } filter_index++; } results[ind] = filtered_value; } } ''' return kernel_source def _get_sigma_in_dimension(self, dimension): if self._parent_filter.sigma is None: return self._get_size_in_dimension(dimension) / 3.0 elif isinstance(self._parent_filter.sigma, numbers.Number): return self._parent_filter.sigma else: return self._parent_filter.sigma[dimension] def _get_1d_gaussian_kernel_array(self, kernel_length, sigma): """Generate a new gaussian kernel of length kernel_length and with the given sigma in one dimension. Args: kernel_length (integer): odd integer defining the length of the kernel (in one dimension). sigma (double): The sigma used in constructing the kernel. Returns: A list of the indicated length filled with a Gaussian filtering kernel. The kernel is normalized to sum to 1. """ r = range(-int(kernel_length/2), int(kernel_length/2)+1) kernel = np.array([1 / (sigma * np.sqrt(2 * np.pi)) * np.exp(-x**2.0 / (2 * sigma**2)) for x in r]) np_dtype = np.float32 if self._double_precision: np_dtype = np.float64 kernel = kernel.astype(dtype=np_dtype, order='C', copy=False) return kernel / sum(kernel) PKHWq#mot/cl_routines/filters/__init__.py__author__ = 'Robbert Harms' __date__ = "2015-03-31" __license__ = "LGPL v3" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl"PK=cJ`mot/data/opencl/lmmin.cl#ifndef LMMIN_CL #define LMMIN_CL /* * Library: lmfit (Levenberg-Marquardt least squares fitting) * File: lmmin.c * Contents: Levenberg-Marquardt minimization. * Copyright: MINPACK authors, The University of Chikago (1980-1999) * Joachim Wuttke, Forschungszentrum Juelich GmbH (2004-2013) * Robbert Harms (2013) * License: see ../COPYING (FreeBSD) * Homepage: apps.jcns.fz-juelich.de/lmfit Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /** * Adapted by = Robbert Harms * Date = 2014-02-05 * License = LGPL v3 * Maintainer = Robbert Harms * Email = robbert.harms@maastrichtuniversity.nl */ /* function declarations. */ void lm_lmpar( const int n, global mot_float_type * const r, int ldr, const int* const Pivot, mot_float_type *const diag, mot_float_type* const qtb, const mot_float_type delta, mot_float_type * const par, mot_float_type * const x, mot_float_type * const Sdiag, mot_float_type * const aux, mot_float_type * const xdi ); void lm_qrfac( const int m, const int n, global mot_float_type * const A, int* const Pivot, mot_float_type* const Rdiag, mot_float_type * const Acnorm, mot_float_type* const W ); void lm_qrsolv( const int n, global mot_float_type * const r, const int ldr, const int * const Pivot, const mot_float_type * const diag, const mot_float_type * const qtb, mot_float_type * const x, mot_float_type * const Sdiag, mot_float_type * const W ); double lm_euclidian_norm(const mot_float_type* const x, const int n); double lm_euclidian_norm_global(global const mot_float_type* const x, const int n); /*****************************************************************************/ /* Numeric constants */ /*****************************************************************************/ /* machine-dependent constants from float.h */ #define LM_MACHEP MOT_EPSILON /* resolution of arithmetic */ #define LM_DWARF MOT_MIN /* smallest nonzero number */ #define LM_SQRT_DWARF sqrt(MOT_MIN) /* square should not underflow */ #define LM_SQRT_GIANT sqrt(MOT_MAX) /* square should not overflow */ #define LM_USERTOL (%(USERTOL_MULT)r * LM_MACHEP) /* users are recommended to require this */ #define FTOL LM_USERTOL /* Relative error desired in the sum of squares. Termination occurs when both the actual and predicted relative reductions in the sum of squares are at most ftol. */ #define XTOL LM_USERTOL /* Relative error between last two approximations. Termination occurs when the relative error between two consecutive iterates is at most xtol. */ #define GTOL LM_USERTOL /* Orthogonality desired between fvec and its derivs. Termination occurs when the cosine of the angle between fvec and any column of the Jacobian is at most gtol in absolute value. */ #define EPSILON LM_USERTOL /* Step used to calculate the Jacobian, should be slightly larger than the relative error in the user-supplied functions. */ #define STEP_BOUND %(STEP_BOUND)r /* Used in determining the initial step bound. This bound is set to the product of stepbound and the Euclidean norm of diag*x if nonzero, or else to stepbound itself. In most cases stepbound should lie in the interval (0.1,100.0). Generally, the value 100.0 is recommended. */ #define PATIENCE %(PATIENCE)r /* Used to set the maximum number of function evaluations to patience*(number_of_parameters+1). */ #define SCALE_DIAG %(SCALE_DIAG)r /* If 1, the variables will be rescaled internally. Recommended value is 1. */ #define MAXFEV (PATIENCE * (%(NMR_PARAMS)s+1)) /** the maximum number of evaluations */ #define EPS ((mot_float_type)sqrt(max(EPSILON, LM_MACHEP))) /* for forward differences */ #define LM_ENORM_SQRT_GIANT LM_SQRT_GIANT /* square should not overflow */ #define LM_ENORM_SQRT_DWARF LM_SQRT_DWARF /* square should not underflow */ /** * Make sure that the following holds: * %(NMR_PARAMS)s > 0 * %(NMR_INST_PER_PROBLEM)s >= %(NMR_PARAMS)s * FTOL >= 0. && XTOL >= 0. && GTOL >= 0. * MAXFEV > 0 * STEP_BOUND > 0. * SCALE_DIAG == 0 || SCALE_DIAG == 1 */ /******************************************************************************/ /* lmmin (main minimization routine) */ /******************************************************************************/ int lmmin(mot_float_type * const x, const void* const data, global mot_float_type* fjac){ int j, i; mot_float_type actred, dirder, fnorm, fnorm1, gnorm, pnorm, prered, ratio, step, temp, temp1, temp2, temp3; double sum; bool done_first = false; /* loop counters, for monitoring */ bool inner_success; /* flag for loop control */ mot_float_type lmpar = 0; /* Levenberg-Marquardt parameter */ mot_float_type delta = 0; mot_float_type xnorm = 0; int nfev = 0; /*** Allocate work space. ***/ mot_float_type fvec[%(NMR_INST_PER_PROBLEM)s]; mot_float_type diag[%(NMR_PARAMS)s]; mot_float_type qtf[%(NMR_PARAMS)s]; mot_float_type wa1[%(NMR_PARAMS)s]; mot_float_type wa2[%(NMR_PARAMS)s]; mot_float_type wa3[%(NMR_PARAMS)s]; mot_float_type wf[%(NMR_INST_PER_PROBLEM)s]; int Pivot[%(NMR_PARAMS)s]; /* Initialize diag. */ if (!SCALE_DIAG) { for (j = 0; j < %(NMR_PARAMS)s; j++) diag[j] = 1; } /*** Evaluate function at starting point and calculate norm. ***/ evaluate(x, data, fvec); nfev = 1; fnorm = lm_euclidian_norm(fvec, %(NMR_INST_PER_PROBLEM)s); if (!isfinite(fnorm)) { return 10; /* nan */ } else if (fnorm <= LM_DWARF) { return 1; } /*** The outer loop: compute gradient, then descend. ***/ while(true){ /** Calculate the Jacobian. **/ for (j = 0; j < %(NMR_PARAMS)s; j++) { temp = x[j]; step = max(EPS*EPS, EPS * fabs(temp)); x[j] += step; /* replace temporarily */ evaluate(x, data, wf); ++nfev; for (i = 0; i < %(NMR_INST_PER_PROBLEM)s; i++){ fjac[j*%(NMR_INST_PER_PROBLEM)s+i] = (wf[i] - fvec[i]) / step; } x[j] = temp; /* restore */ } /** Compute the QR factorization of the Jacobian. **/ /* fjac is an m by n array. The upper n by n submatrix of fjac is made * to contain an upper triangular matrix R with diagonal elements of * nonincreasing magnitude such that * * P^T*(J^T*J)*P = R^T*R * * (NOTE: ^T stands for matrix transposition), * * where P is a permutation matrix and J is the final calculated * Jacobian. Column j of P is column Pivot(j) of the identity matrix. * The lower trapezoidal part of fjac contains information generated * during the computation of R. * * Pivot is an integer array of length n. It defines a permutation * matrix P such that jac*P = Q*R, where jac is the final calculated * Jacobian, Q is orthogonal (not stored), and R is upper triangular * with diagonal elements of nonincreasing magnitude. Column j of P * is column Pivot(j) of the identity matrix. */ lm_qrfac(%(NMR_INST_PER_PROBLEM)s, %(NMR_PARAMS)s, fjac, Pivot, wa1, wa2, wa3); /* return values are Pivot, wa1=rdiag, wa2=acnorm */ /** Form Q^T * fvec, and store first n components in qtf. **/ for (i = 0; i < %(NMR_INST_PER_PROBLEM)s; i++){ wf[i] = fvec[i]; } for(j = 0; j < %(NMR_PARAMS)s; j++){ temp3 = fjac[j*%(NMR_INST_PER_PROBLEM)s+j]; if (temp3 != 0) { sum = 0; for (i = j; i < %(NMR_INST_PER_PROBLEM)s; i++){ sum += fjac[j*%(NMR_INST_PER_PROBLEM)s+i] * wf[i]; } temp = -sum / temp3; for (i = j; i < %(NMR_INST_PER_PROBLEM)s; i++){ wf[i] += fjac[j*%(NMR_INST_PER_PROBLEM)s+i] * temp; } } fjac[j*%(NMR_INST_PER_PROBLEM)s+j] = wa1[j]; qtf[j] = wf[j]; } /** Compute norm of scaled gradient and detect degeneracy. **/ gnorm = 0; for (j = 0; j < %(NMR_PARAMS)s; j++) { if(wa2[Pivot[j]] == 0){ } else{ sum = 0; for (i = 0; i <= j; i++){ sum += fjac[j*%(NMR_INST_PER_PROBLEM)s+i] * qtf[i]; } gnorm = max((double)gnorm, fabs(sum / wa2[Pivot[j]] / fnorm)); } } if (gnorm <= GTOL) { return 5; } /** Initialize or update diag and delta. **/ if(!done_first){ /* first iteration only */ if (SCALE_DIAG) { /* diag := norms of the columns of the initial Jacobian */ for (j = 0; j < %(NMR_PARAMS)s; j++){ diag[j] = wa2[j] ? wa2[j] : 1; } /* xnorm := || D x || */ for (j = 0; j < %(NMR_PARAMS)s; j++){ wa3[j] = diag[j] * x[j]; } xnorm = lm_euclidian_norm(wa3, %(NMR_PARAMS)s); } else { xnorm = lm_euclidian_norm(x, %(NMR_PARAMS)s); } if(!isfinite(xnorm)){ return 10; } /* initialize the step bound delta. */ if(xnorm){ delta = STEP_BOUND * xnorm; } else{ delta = STEP_BOUND; } } else { if (SCALE_DIAG) { for (j = 0; j < %(NMR_PARAMS)s; j++){ diag[j] = max( diag[j], wa2[j] ); } } } /** The inner loop. **/ do { /** Determine the Levenberg-Marquardt parameter. **/ lm_lmpar(%(NMR_PARAMS)s, fjac, %(NMR_INST_PER_PROBLEM)s, Pivot, diag, qtf, delta, &lmpar, wa1, wa2, wf, wa3 ); /* used return values are fjac (partly), lmpar, wa1=x, wa3=diag*x */ /* Predict scaled reduction */ pnorm = lm_euclidian_norm(wa3, %(NMR_PARAMS)s); if(!isfinite(pnorm)) { return 10; } temp2 = lmpar * ((pnorm / fnorm)*(pnorm / fnorm)); for (j = 0; j < %(NMR_PARAMS)s; j++) { wa3[j] = 0; for (i = 0; i <= j; i++){ wa3[i] -= fjac[j*%(NMR_INST_PER_PROBLEM)s+i] * wa1[Pivot[j]]; } } temp1 = lm_euclidian_norm(wa3, %(NMR_PARAMS)s) / fnorm; temp1 *= temp1; if (!isfinite(temp1)){ return 10; } prered = temp1 + 2 * temp2; dirder = -temp1 + temp2; /* scaled directional derivative */ /* At first call, adjust the initial step bound. */ if (!done_first && pnorm < delta ){ delta = pnorm; } /** Evaluate the function at x + p. **/ for (j = 0; j < %(NMR_PARAMS)s; j++){ wa2[j] = x[j] - wa1[j]; } evaluate(wa2, data, wf); ++nfev; fnorm1 = lm_euclidian_norm(wf, %(NMR_INST_PER_PROBLEM)s); if(!isfinite(fnorm1)){ return 10; } /** Evaluate the scaled reduction. **/ /* Actual scaled reduction */ actred = 1 - ((fnorm1/fnorm)*(fnorm1/fnorm)); /* Ratio of actual to predicted reduction */ ratio = prered ? actred / prered : 0; /* Update the step bound */ if( ratio <= 0.25 ) { if( actred >= 0 ){ temp = 0.5; } else if ( actred > -99 ){ /* -99 = 1-1/0.1^2 */ temp = max( dirder / (2*dirder + actred), (mot_float_type) 0.1 ); } else{ temp = 0.1; } delta = temp * min(delta, (mot_float_type)(pnorm / 0.1)); lmpar /= temp; } else if ( ratio >= 0.75 ) { delta = 2*pnorm; lmpar *= 0.5; } else if ( !lmpar ) { delta = 2*pnorm; } /** On success, update solution, and test for convergence. **/ inner_success = ratio >= 1e-4; if ( inner_success ) { /* Update x, fvec, and their norms */ if (SCALE_DIAG) { for (j = 0; j < %(NMR_PARAMS)s; j++) { x[j] = wa2[j]; wa2[j] = diag[j] * x[j]; } } else { for (j = 0; j < %(NMR_PARAMS)s; j++){ x[j] = wa2[j]; } } for (i = 0; i < %(NMR_INST_PER_PROBLEM)s; i++){ fvec[i] = wf[i]; } xnorm = lm_euclidian_norm(wa2, %(NMR_PARAMS)s); if (!isfinite(xnorm)){ return 10; /* nan */ } fnorm = fnorm1; } /* convergence tests */ if (fnorm <= LM_DWARF){ return 1; /* success: sum of squares almost zero */ } /* test two criteria (both may be fulfilled) */ if (fabs(actred) <= FTOL && prered <= FTOL && ratio <= 2){ if (delta <= XTOL * xnorm){ return 4; /* success: sum of squares almost stable */ } return 2; /* success: x almost stable */ } /** Tests for termination and stringent tolerances. **/ if ( nfev >= MAXFEV ){ return 6; } if ( fabs(actred) <= LM_MACHEP && prered <= LM_MACHEP && ratio <= 2 ){ return 7; } if ( delta <= LM_MACHEP * xnorm ){ return 8; } if ( gnorm <= LM_MACHEP){ return 9; } /** End of the inner loop. Repeat if iteration unsuccessful. **/ } while ( !inner_success ); done_first = true; };/*** End of the loop. ***/ } /*** lmmin. ***/ /*****************************************************************************/ /* lm_lmpar (determine Levenberg-Marquardt parameter) */ /*****************************************************************************/ void lm_lmpar( const int n, global mot_float_type* const r, int ldr, const int* const Pivot, mot_float_type* const diag, mot_float_type* const qtb, const mot_float_type delta, mot_float_type* const par, mot_float_type * const x, mot_float_type * const Sdiag, mot_float_type * const aux, mot_float_type * const xdi) { /* Given an m by n matrix A, an n by n nonsingular diagonal matrix D, * an m-vector b, and a positive number delta, the problem is to * determine a parameter value par such that if x solves the system * * A*x = b and sqrt(par)*D*x = 0 * * in the least squares sense, and dxnorm is the Euclidean norm of D*x, * then either par=0 and (dxnorm-delta) < 0.1*delta, or par>0 and * abs(dxnorm-delta) < 0.1*delta. * * Using lm_qrsolv, this subroutine completes the solution of the * problem if it is provided with the necessary information from the * QR factorization, with column pivoting, of A. That is, if A*P = Q*R, * where P is a permutation matrix, Q has orthogonal columns, and R is * an upper triangular matrix with diagonal elements of nonincreasing * magnitude, then lmpar expects the full upper triangle of R, the * permutation matrix P, and the first n components of Q^T*b. On output * lmpar also provides an upper triangular matrix S such that * * P^T*(A^T*A + par*D*D)*P = S^T*S. * * S is employed within lmpar and may be of separate interest. * * Only a few iterations are generally needed for convergence of the * algorithm. If, however, the limit of 10 iterations is reached, then * the output par will contain the best value obtained so far. * * Parameters: * * n is a positive integer INPUT variable set to the order of r. * * r is an n by n array. On INPUT the full upper triangle must contain * the full upper triangle of the matrix R. On OUTPUT the full upper * triangle is unaltered, and the strict lower triangle contains the * strict upper triangle (transposed) of the upper triangular matrix S. * * ldr is a positive integer INPUT variable not less than n which * specifies the leading dimension of the array R. * * Pivot is an integer INPUT array of length n which defines the * permutation matrix P such that A*P = Q*R. Column j of P is column * Pivot(j) of the identity matrix. * * diag is an INPUT array of length n which must contain the diagonal * elements of the matrix D. * * qtb is an INPUT array of length n which must contain the first * n elements of the vector Q^T*b. * * delta is a positive INPUT variable which specifies an upper bound * on the Euclidean norm of D*x. * * par is a nonnegative variable. On INPUT par contains an initial * estimate of the Levenberg-Marquardt parameter. On OUTPUT par * contains the final estimate. * * x is an OUTPUT array of length n which contains the least-squares * solution of the system A*x = b, sqrt(par)*D*x = 0, for the output par. * * Sdiag is an array of length n needed as workspace; on OUTPUT it * contains the diagonal elements of the upper triangular matrix S. * * aux is a multi-purpose work array of length n. * * xdi is a work array of length n. On OUTPUT: diag[j] * x[j]. * */ // used as both iter and nsing int iter, nsing; int i, j; mot_float_type gnorm, parc; mot_float_type dxnorm, fp, fp_old, parl, paru; mot_float_type temp; mot_float_type p1 = 0.1; /*** Compute and store in x the Gauss-Newton direction. If the Jacobian is rank-deficient, obtain a least-squares solution. ***/ nsing = n; for (j = 0; j < n; j++) { aux[j] = qtb[j]; if (r[j * ldr + j] == 0 && nsing == n){ nsing = j; } if (nsing < n){ aux[j] = 0; } } for (j = nsing - 1; j >= 0; j--) { aux[j] = aux[j] / r[j + ldr * j]; temp = aux[j]; for (i = 0; i < j; i++){ aux[i] -= r[j * ldr + i] * temp; } } for (j = 0; j < n; j++){ x[Pivot[j]] = aux[j]; } /*** Initialize the iteration counter, evaluate the function at the origin, and test for acceptance of the Gauss-Newton direction. ***/ for (j = 0; j < n; j++){ xdi[j] = diag[j] * x[j]; } dxnorm = lm_euclidian_norm(xdi, n); fp = dxnorm - delta; if (fp <= p1 * delta) { *par = 0; return; } /*** If the Jacobian is not rank deficient, the Newton step provides a lower bound, parl, for the zero of the function. Otherwise set this bound to zero. ***/ parl = 0; if (nsing >= n) { for (j = 0; j < n; j++){ aux[j] = diag[Pivot[j]] * xdi[Pivot[j]] / dxnorm; } for (j = 0; j < n; j++) { temp = 0; for (i = 0; i < j; i++){ temp += r[j*ldr+i] * aux[i]; } aux[j] = (aux[j] - temp) / r[j+ldr*j]; } temp = lm_euclidian_norm(aux, n); parl = fp / delta / temp / temp; } /*** Calculate an upper bound, paru, for the zero of the function. ***/ for (j = 0; j < n; j++) { temp = 0; for (i = 0; i <= j; i++){ temp += r[j*ldr+i] * qtb[i]; } aux[j] = temp / diag[Pivot[j]]; } gnorm = lm_euclidian_norm(aux, n); paru = gnorm / delta; if (paru == 0){ paru = LM_DWARF / min(delta, p1); } /*** If the input par lies outside of the interval (parl,paru), set par to the closer endpoint. ***/ *par = max(*par, parl); *par = min(*par, paru); if (*par == 0){ *par = gnorm / dxnorm; } /*** Iterate. ***/ for (iter=0; ; iter++) { /** Evaluate the function at the current value of par. **/ if (*par == 0){ *par = max((mot_float_type)LM_DWARF, (mot_float_type)(0.001 * paru)); } temp = sqrt(*par); for (j = 0; j < n; j++){ aux[j] = temp * diag[j]; } lm_qrsolv( n, r, ldr, Pivot, aux, qtb, x, Sdiag, xdi ); /* return values are r, x, Sdiag */ for (j = 0; j < n; j++){ xdi[j] = diag[j] * x[j]; /* used as output */ } dxnorm = lm_euclidian_norm(xdi, n); fp_old = fp; fp = dxnorm - delta; /** If the function is small enough, accept the current value of par. Also test for the exceptional cases where parl is zero or the number of iterations has reached 10. **/ if (fabs(fp) <= p1 * delta || (parl == 0 && fp <= fp_old && fp_old < 0) || iter == 10){ break; /* the only exit from the iteration. */ } /** Compute the Newton correction. **/ for (j = 0; j < n; j++){ aux[j] = diag[Pivot[j]] * xdi[Pivot[j]] / dxnorm; } for (j = 0; j < n; j++) { aux[j] = aux[j] / Sdiag[j]; for (i = j+1; i < n; i++){ aux[i] -= r[j*ldr+i] * aux[j]; } } temp = lm_euclidian_norm(aux, n); parc = fp / delta / temp / temp; /** Depending on the sign of the function, update parl or paru. **/ if (fp > 0){ parl = max(parl, *par); } else{ /* fp < 0 [the case fp==0 is precluded by the break condition] */ paru = min(paru, *par); } /** Compute an improved estimate for par. **/ *par = max(parl, *par + parc); } } /*** lm_lmpar. ***/ /******************************************************************************/ /* lm_qrfac (QR factorization, from lapack) */ /******************************************************************************/ void lm_qrfac(const int m, const int n, global mot_float_type* const A, int* const Pivot, mot_float_type* const Rdiag, mot_float_type* const Acnorm, mot_float_type* const W) { /* * This subroutine uses Householder transformations with column pivoting * to compute a QR factorization of the m by n matrix A. That is, qrfac * determines an orthogonal matrix Q, a permutation matrix P, and an * upper trapezoidal matrix R with diagonal elements of nonincreasing * magnitude, such that A*P = Q*R. The Householder transformation for * column k, k = 1,2,...,n, is of the form * * I - 2*w*wT/|w|^2 * * where w has zeroes in the first k-1 positions. * * Parameters: * * m is an INPUT parameter set to the number of rows of A. * * n is an INPUT parameter set to the number of columns of A. * * A is an m by n array. On INPUT, A contains the matrix for which the * QR factorization is to be computed. On OUTPUT the strict upper * trapezoidal part of A contains the strict upper trapezoidal part * of R, and the lower trapezoidal part of A contains a factored form * of Q (the non-trivial elements of the vectors w described above). * * Pivot is an integer OUTPUT array of length n that describes the * permutation matrix P. Column j of P is column Pivot(j) of the * identity matrix. * * Rdiag is an OUTPUT array of length n which contains the diagonal * elements of R. * * Acnorm is an OUTPUT array of length n which contains the norms of * the corresponding columns of the input matrix A. If this information * is not needed, then Acnorm can share storage with Rdiag. * * W is a work array of length n. * */ int i, j, k, kmax; mot_float_type ajnorm, temp; /** Compute initial column norms; initialize Pivot with identity permutation. ***/ for (j = 0; j < n; j++) { W[j] = Rdiag[j] = Acnorm[j] = lm_euclidian_norm_global(&A[j*m], m); Pivot[j] = j; } /** Loop over columns of A. **/ for (j = 0; j < n; j++) { /** Bring the column of largest norm into the pivot position. **/ kmax = j; for (k = j+1; k < n; k++) if (Rdiag[k] > Rdiag[kmax]) kmax = k; if (kmax != j) { /* Swap columns j and kmax. */ k = Pivot[j]; Pivot[j] = Pivot[kmax]; Pivot[kmax] = k; for (i = 0; i < m; i++) { temp = A[j*m+i]; A[j*m+i] = A[kmax*m+i]; A[kmax*m+i] = temp; } /* Half-swap: Rdiag[j], W[j] won't be needed any further. */ Rdiag[kmax] = Rdiag[j]; W[kmax] = W[j]; } /** Compute the Householder reflection vector w_j to reduce the j-th column of A to a multiple of the j-th unit vector. **/ ajnorm = lm_euclidian_norm_global(&A[j*m+j], m-j); if (ajnorm == 0) { Rdiag[j] = 0; } else{ /* Let the partial column vector A[j][j:] contain w_j := e_j+-a_j/|a_j|, where the sign +- is chosen to avoid cancellation in w_jj. */ if (A[j*m+j] < 0){ ajnorm = -ajnorm; } for (i = j; i < m; i++){ A[j*m+i] /= ajnorm; } A[j*m+j] += 1; /** Apply the Householder transformation U_w := 1 - 2*w_j.w_j/|w_j|^2 to the remaining columns, and update the norms. **/ for (k = j + 1; k < n; k++){ /* Compute scalar product w_j * a_j. */ temp = 0; for (i = j; i < m; i++){ temp += A[j*m+i] * A[k*m+i]; } /* Normalization is simplified by the coincidence |w_j|^2=2w_jj. */ temp = temp / A[j*m+j]; /* Carry out transform U_w_j * a_k. */ for (i = j; i < m; i++){ A[k*m+i] -= temp * A[j*m+i]; } /* No idea what happens here. */ if (Rdiag[k] != 0) { temp = A[m*k+j] / Rdiag[k]; if (fabs(temp) < 1) { Rdiag[k] *= sqrt(1 - (temp*temp)); temp = Rdiag[k] / W[k]; } else { temp = 0; } if(temp == 0 || 0.05 * (temp * temp) <= LM_MACHEP){ Rdiag[k] = lm_euclidian_norm_global(&A[m*k+j+1], m-j-1); W[k] = Rdiag[k]; } } } Rdiag[j] = -ajnorm; } } } /*** lm_qrfac. ***/ /*****************************************************************************/ /* lm_qrsolv (linear least-squares) */ /*****************************************************************************/ void lm_qrsolv(const int n, global mot_float_type* const r, const int ldr, const int* const Pivot, const mot_float_type* const diag, const mot_float_type* const qtb, mot_float_type* const x, mot_float_type* const Sdiag, mot_float_type* const W) { /* * Given an m by n matrix A, an n by n diagonal matrix D, and an * m-vector b, the problem is to determine an x which solves the * system * * A*x = b and D*x = 0 * * in the least squares sense. * * This subroutine completes the solution of the problem if it is * provided with the necessary information from the QR factorization, * with column pivoting, of A. That is, if A*P = Q*R, where P is a * permutation matrix, Q has orthogonal columns, and R is an upper * triangular matrix with diagonal elements of nonincreasing magnitude, * then qrsolv expects the full upper triangle of R, the permutation * matrix P, and the first n components of Q^T*b. The system * A*x = b, D*x = 0, is then equivalent to * * R*z = Q^T*b, P^T*D*P*z = 0, * * where x = P*z. If this system does not have full rank, then a least * squares solution is obtained. On output qrsolv also provides an upper * triangular matrix S such that * * P^T*(A^T*A + D*D)*P = S^T*S. * * S is computed within qrsolv and may be of separate interest. * * Parameters: * * n is a positive integer INPUT variable set to the order of R. * * r is an n by n array. On INPUT the full upper triangle must contain * the full upper triangle of the matrix R. On OUTPUT the full upper * triangle is unaltered, and the strict lower triangle contains the * strict upper triangle (transposed) of the upper triangular matrix S. * * ldr is a positive integer INPUT variable not less than n which * specifies the leading dimension of the array R. * * Pivot is an integer INPUT array of length n which defines the * permutation matrix P such that A*P = Q*R. Column j of P is column * Pivot(j) of the identity matrix. * * diag is an INPUT array of length n which must contain the diagonal * elements of the matrix D. * * qtb is an INPUT array of length n which must contain the first * n elements of the vector Q^T*b. * * x is an OUTPUT array of length n which contains the least-squares * solution of the system A*x = b, D*x = 0. * * Sdiag is an OUTPUT array of length n which contains the diagonal * elements of the upper triangular matrix S. * * W is a work array of length n. * */ int i, kk, j, k, nsing; mot_float_type qtbpj, temp; mot_float_type _sin, _cos, _tan, _cot; /* local variables, not functions */ /*** Copy R and Q^T*b to preserve input and initialize S. In particular, save the diagonal elements of R in x. ***/ for (j = 0; j < n; j++) { for (i = j; i < n; i++) r[j*ldr+i] = r[i*ldr+j]; x[j] = r[j*ldr+j]; W[j] = qtb[j]; } /*** Eliminate the diagonal matrix D using a Givens rotation. ***/ for (j = 0; j < n; j++) { /*** Prepare the row of D to be eliminated, locating the diagonal element using P from the QR factorization. ***/ if (diag[Pivot[j]] != 0) { for (k = j; k < n; k++) Sdiag[k] = 0; Sdiag[j] = diag[Pivot[j]]; /*** The transformations to eliminate the row of D modify only a single element of Q^T*b beyond the first n, which is initially 0. ***/ qtbpj = 0; for (k = j; k < n; k++) { /** Determine a Givens rotation which eliminates the appropriate element in the current row of D. **/ if (Sdiag[k] == 0){ } else{ kk = k + ldr * k; if (fabs(r[kk]) < fabs(Sdiag[k])) { _cot = r[kk] / Sdiag[k]; _sin = 1 / hypot(1, _cot); _cos = _sin * _cot; } else { _tan = Sdiag[k] / r[kk]; _cos = 1 / hypot(1, _tan); _sin = _cos * _tan; } /** Compute the modified diagonal element of R and the modified element of (Q^T*b,0). **/ r[kk] = _cos * r[kk] + _sin * Sdiag[k]; temp = _cos * W[k] + _sin * qtbpj; qtbpj = -_sin * W[k] + _cos * qtbpj; W[k] = temp; /** Accumulate the transformation in the row of S. **/ for (i = k+1; i < n; i++) { temp = _cos * r[k * ldr + i] + _sin * Sdiag[i]; Sdiag[i] = -_sin * r[k * ldr + i] + _cos * Sdiag[i]; r[k * ldr + i] = temp; } } } } /** Store the diagonal element of S and restore the corresponding diagonal element of R. **/ Sdiag[j] = r[j * ldr + j]; r[j * ldr + j] = x[j]; } /*** Solve the triangular system for z. If the system is singular, then obtain a least-squares solution. ***/ nsing = n; for (j = 0; j < n; j++) { if (Sdiag[j] == 0 && nsing == n){ nsing = j; } if (nsing < n){ W[j] = 0; } } for (j = nsing - 1; j >= 0; j--) { temp = 0; for (i = j + 1; i < nsing; i++){ temp += r[j * ldr + i] * W[i]; } W[j] = (W[j] - temp) / Sdiag[j]; } /*** Permute the components of z back to components of x. ***/ for (j = 0; j < n; j++) x[Pivot[j]] = W[j]; } /*** lm_qrsolv. ***/ /******************************************************************************/ /* lm_enorm (Euclidean norm) */ /******************************************************************************/ double lm_euclidian_norm(const mot_float_type* const x, const int n){ /* This function calculates the Euclidean norm of an n-vector x. * * The Euclidean norm is computed by accumulating the sum of squares * in three different sums. The sums of squares for the small and large * components are scaled so that no overflows occur. Non-destructive * underflows are permitted. Underflows and overflows do not occur in * the computation of the unscaled sum of squares for the intermediate * components. The definitions of small, intermediate and large components * depend on two constants, LM_SQRT_DWARF and LM_SQRT_GIANT. The main * restrictions on these constants are that LM_SQRT_DWARF**2 not underflow * and LM_SQRT_GIANT**2 not overflow. * * Parameters: * * n is a positive integer INPUT variable. * * x is an INPUT array of length n. */ int i; double agiant, s1, s2, s3, xabs, x1max, x3max; s1 = 0; s2 = 0; s3 = 0; x1max = 0; x3max = 0; agiant = LM_ENORM_SQRT_GIANT / n; /** Sum squares. **/ for (i = 0; i < n; i++) { xabs = fabs(x[i]); if (xabs > LM_ENORM_SQRT_DWARF) { if (xabs < agiant) { s2 += xabs * xabs; } else if (xabs > x1max) { s1 = 1 + s1 * ((x1max / xabs) * (x1max / xabs)); x1max = xabs; } else { s1 += ((xabs / x1max) * (xabs / x1max)); } } else if (xabs > x3max) { s3 = 1 + s3 * ((x3max / xabs) * (x3max / xabs)); x3max = xabs; } else if (xabs != 0) { s3 += ((xabs / x3max) * (xabs / x3max)); } } /** Calculate the norm. **/ if (s1 != 0) return x1max * sqrt(s1 + (s2 / x1max) / x1max); else if (s2 != 0) if (s2 >= x3max) return sqrt(s2 * (1 + (x3max / s2) * (x3max * s3))); else return sqrt(x3max * ((s2 / x3max) + (x3max * s3))); else return x3max * sqrt(s3); } /*** euclidian_norm. ***/ /******************************************************************************/ /* lm_enorm (Euclidean norm) for global memory */ /******************************************************************************/ double lm_euclidian_norm_global(global const mot_float_type* const x, const int n){ /* This function calculates the Euclidean norm of an n-vector x. * * The Euclidean norm is computed by accumulating the sum of squares * in three different sums. The sums of squares for the small and large * components are scaled so that no overflows occur. Non-destructive * underflows are permitted. Underflows and overflows do not occur in * the computation of the unscaled sum of squares for the intermediate * components. The definitions of small, intermediate and large components * depend on two constants, LM_SQRT_DWARF and LM_SQRT_GIANT. The main * restrictions on these constants are that LM_SQRT_DWARF**2 not underflow * and LM_SQRT_GIANT**2 not overflow. * * Parameters: * * n is a positive integer INPUT variable. * * x is an INPUT array of length n. */ int i; double agiant, s1, s2, s3, xabs, x1max, x3max; s1 = 0; s2 = 0; s3 = 0; x1max = 0; x3max = 0; agiant = LM_ENORM_SQRT_GIANT / n; /** Sum squares. **/ for (i = 0; i < n; i++) { xabs = fabs(x[i]); if (xabs > LM_ENORM_SQRT_DWARF) { if (xabs < agiant) { s2 += xabs * xabs; } else if (xabs > x1max) { s1 = 1 + s1 * ((x1max / xabs) * (x1max / xabs)); x1max = xabs; } else { s1 += ((xabs / x1max) * (xabs / x1max)); } } else if (xabs > x3max) { s3 = 1 + s3 * ((x3max / xabs) * (x3max / xabs)); x3max = xabs; } else if (xabs != 0) { s3 += ((xabs / x3max) * (xabs / x3max)); } } /** Calculate the norm. **/ if (s1 != 0) return x1max * sqrt(s1 + (s2 / x1max) / x1max); else if (s2 != 0) if (s2 >= x3max) return sqrt(s2 * (1 + (x3max / s2) * (x3max * s3))); else return sqrt(x3max * ((s2 / x3max) + (x3max * s3))); else return x3max * sqrt(s3); } /*** euclidian_norm. ***/ #undef LM_MACHEP #undef LM_DWARF #undef LM_SQRT_DWARF #undef LM_SQRT_GIANT #undef LM_USERTOL #undef FTOL #undef XTOL #undef GTOL #undef EPSILON #undef STEP_BOUND #undef PATIENCE #undef SCALE_DIAG #undef MAXFEV #undef EPS #undef LM_ENORM_SQRT_GIANT #undef LM_ENORM_SQRT_DWARF #endif // LMMIN_CL PK.6kJ/sLFF!mot/data/opencl/trigonometrics.cl/** * Author = Robbert Harms * Date = 2017-03-11 * License = LGPL v3 * Maintainer = Robbert Harms * Email = robbert.harms@maastrichtuniversity.nl */ /** * Computes log(cosh(x)). * * For large x this will try to estimate it without overflow. For small x we use the opencl functions log and cos. * * The estimation for large numbers has been taken from: * https://github.com/JaneliaSciComp/tmt/blob/master/basics/logcosh.m * */ double log_cosh(double x){ if(x < 50){ return log(cosh(x)); } return fabs(x) + log(1 + exp(-2.0 * fabs(x))) - log(2.0); } PKAJyKCCmot/data/opencl/powell.cl#ifndef POWELL_CL #define POWELL_CL /** * Creator = Robbert Harms * Date = 2014-02-05 * License = LGPL v3 * Maintainer = Robbert Harms * Email = robbert.harms@maastrichtuniversity.nl */ /** Uses the Powell's Quadratically Convergent Method of minimizing an objective function in a multidimensional space. This function is implemented in OpenCL by Robbert Harms, using the original Powell 1964 paper and the Numerical Recipes chapter on Powell. */ /* Used to set the maximum number of iterations to patience*(number_of_parameters+1). */ #define PATIENCE %(PATIENCE)r #define POWELL_MAX_ITERATIONS (PATIENCE * (%(NMR_PARAMS)r+1)) #define POWELL_FUNCTION_TOLERANCE 30*MOT_EPSILON #define BRENT_MAX_ITERATIONS (PATIENCE * (%(NMR_PARAMS)r+1)) #define BRENT_TOL 2*30*MOT_EPSILON #define BRACKET_GOLD 1.618034 /* the default ratio by which successive intervals are magnified in Bracketing */ #define GLIMIT 100.0 /* the maximum magnification allowed for a parabolic-fit step in Bracketing */ #define EPSILON 30*MOT_EPSILON #define CGOLD 0.3819660 /* golden ratio = (3 - sqrt(5))/2 */ #define ZEPS 30*MOT_EPSILON /** * Set one of the reset methods. These are used to reset the search directions after a set number of steps to * prevent linear dependence between the search vectors */ #define POWELL_RESET_METHOD_RESET_TO_IDENTITY 0 /* Resets the search vectors to the I(nxn) matrix after every cycle of N (N = number of parameters) */ #define POWELL_RESET_METHOD_EXTRAPOLATED_POINT 1 /* see Numerical Recipes */ #define POWELL_RESET_METHOD %(RESET_METHOD)s /** * A structure used to hold the data we are passing to the linear optimizer. * * The linear optimizer in turn should pass it to the linear evaluation function. */ typedef struct{ const mot_float_type* const point_0; const mot_float_type* const point_1; const void* const data; } linear_function_data; /** * Simple swapping function that swaps a and b */ void swap(mot_float_type* a, mot_float_type* b){ mot_float_type temp; temp = *b; *b = *a; *a = temp; } mot_float_type bracket_and_brent(mot_float_type *xmin, const void* const eval_data); /** * Initializes the starting vectors. * * This fills the starting vector matrix with the identity matrix ensuring every vector is linearly independent. * * Args: * search_directions (2d nxn array): the array with vectors to initialize) */ void powell_init_search_directions(mot_float_type search_directions[%(NMR_PARAMS)r][%(NMR_PARAMS)r]){ int i, j; for(i=0; i < %(NMR_PARAMS)r; i++){ for(j=0; j < %(NMR_PARAMS)r; j++){ search_directions[i][j] = (i == j ? 1.0 : 0.0); } } } /** * Checks if Powell should terminate * * Checks the stopping criteria. If the difference between the old function value and the new function value * is lower then a certain threshold, stop. * * Args: * previous_fval: the previous function value * new_fval: the new function value * * Returns: * True if the optimizer should top, False otherwise */ bool powell_fval_diff_within_threshold(mot_float_type previous_fval, mot_float_type new_fval){ return 2.0 * (previous_fval - new_fval) <= POWELL_FUNCTION_TOLERANCE * (fabs(previous_fval) + fabs(new_fval)) + EPSILON; } /** * Finds the linear minimum on the line joining the first and the second data points. * * Suppose you have two points, ``p_0`` and ``p_1``, both in R^n. * This function tries to find the minimum function value on points on that line. The first point is supposed to be * fixed and we add to that a multiple of the second point. In other words, this uses a linear line search * to find the ``x`` that minimizes the function ``g(x) = f(p_0 + x * p_1)`` where f(y) is the function the user * tries to optimize with Powell and ``g(x)`` is the linear function we try to optimize in this function. * * Since OpenCL 1.2 does not have lambda expressions we can not use a lambda function to pass to the linear optimization * routine. To solve that we implement the linear optimizer here and set it up to use the * given ``powell_linear_data`` struct to pass the necessary data for optimization. * * Args: * point_0: the static point * point_1: the point we are moving towards * * Modifies: * point_0: set to ``p_0 + x * p_1`` * point_1: set to ``x * p_1`` * * Returns: * the function value at the optimum point found on the line. */ mot_float_type powell_find_linear_minimum( mot_float_type* const point_0, mot_float_type* const point_1, const void* const data){ linear_function_data eval_data = {point_0, point_1, data}; mot_float_type xmin = 0; mot_float_type fval = bracket_and_brent(&xmin, (const void*) &eval_data); for(int j=0; j < %(NMR_PARAMS)r; j++){ point_1[j] *= xmin; point_0[j] += point_1[j]; } return fval; } /** * The linear evaluation function used by the 1d line optimization routine. * * For its usage and reason of existence please check the docs of the function :ref:`powell_find_linear_minimum`. * * Args: * x: the point at which to evaluate the function * eval_data: the data used to evaluate the function. * * Returns: * the function value at the given point */ double powell_linear_eval_function(mot_float_type x, const void* const eval_data){ linear_function_data f_data = *((linear_function_data*)eval_data); mot_float_type xt[%(NMR_PARAMS)r]; for(int j=0; j < %(NMR_PARAMS)r; j++){ xt[j] = f_data.point_0[j] + x * f_data.point_1[j]; } return evaluate(xt, f_data.data); } /** * Do the line searches. This is the first step in the basic procedure in Powell (1964). * * This loops through the search vectors and finds for every search vector the point with the lowest function * value between the starting point and the search vector. During this process the starting point for the next * iteration is set to the optimum value of the current iteration. * * Args: * starting_point: the starting point for the search * search_directions: the nxn array with search directions to loop through * data: the evaluation data * fval: the current best known function value * largest_decrease: - * index_largest_decrease: - * * Modifies: * starting_point: set to the position of the new lowest function point * largest_decrease: set to the search vector that yielded the largest decrease with respect to * the at the time best found function value. * index_largest_decrease: the index of the search vector that yielded the largest decrease * * Returns: * the new lowest function value */ mot_float_type powell_do_line_searches( mot_float_type search_directions[%(NMR_PARAMS)r][%(NMR_PARAMS)r], const void* const data, mot_float_type fval, mot_float_type* starting_point, mot_float_type* largest_decrease, int* index_largest_decrease){ int i, j; *index_largest_decrease = 0; *largest_decrease = 0.0; mot_float_type fval_previous; mot_float_type search_vector[%(NMR_PARAMS)r]; for(i = 0; i < %(NMR_PARAMS)r; i++){ for(j = 0; j < %(NMR_PARAMS)r; j++){ search_vector[j] = search_directions[j][i]; } fval_previous = fval; fval = powell_find_linear_minimum(starting_point, search_vector, data); if(fabs(fval_previous - fval) > *largest_decrease){ *largest_decrease = fabs(fval_previous - fval); *index_largest_decrease = i; } } return fval; } #if POWELL_RESET_METHOD == POWELL_RESET_METHOD_EXTRAPOLATED_POINT /** * Evaluate the problem function at an extrapolated point lying between the best point found and the old point. * * This is a method described in Numerical Recipes as part of a way to prevent linear dependency of the search vectors. * We extrapolate the best point by a factor of two and subtract from that the old point. The new point is * evaluated and the resulting function value is returned. * * Args: * new_best_point: the currently found best point * old_point: the old point * data: problem data * * Returns: * the function value at the extrapolated point */ mot_float_type powell_evaluate_extrapolated(mot_float_type* new_best_point, mot_float_type* old_point, const void* const data){ int i; mot_float_type tmp[%(NMR_PARAMS)r]; for(i = 0; i < %(NMR_PARAMS)r; i++){ tmp[i] = 2.0 * new_best_point[i] - old_point[i]; } return evaluate(tmp, data); } /** * Test if Powell should exchange the search directions or not. * * This is the test in Numerical Recipes, other tests can also be used for the other methods. */ bool powell_should_exchange_search_directions( mot_float_type fval_at_start_of_iteration, mot_float_type fval_best_found, mot_float_type fval_extrapolated, mot_float_type largest_decrease){ if(fval_extrapolated >= fval_at_start_of_iteration){ return false; } if((2.0 * (fval_at_start_of_iteration - 2.0 * fval_best_found + fval_extrapolated) * (pown(fval_at_start_of_iteration - fval_best_found - largest_decrease, 2))) >= largest_decrease * pown(fval_at_start_of_iteration - fval_extrapolated, 2)){ return false; } return true; } #define SHOULD_EXCHANGE_SEARCH_DIRECTION powell_should_exchange_search_directions(fval_at_start_of_iteration, fval, fval_extrapolated, largest_decrease) #elif POWELL_RESET_METHOD == POWELL_RESET_METHOD_RESET_TO_IDENTITY #define SHOULD_EXCHANGE_SEARCH_DIRECTION true #endif int powell(mot_float_type* model_parameters, const void* const data){ int i, j, index_largest_decrease; int iteration = 0; mot_float_type largest_decrease, fval_at_start_of_iteration, fval_extrapolated; mot_float_type parameters_at_start_of_iteration[%(NMR_PARAMS)r]; mot_float_type search_directions[%(NMR_PARAMS)r][%(NMR_PARAMS)r]; powell_init_search_directions(search_directions); mot_float_type fval = evaluate(model_parameters, data); while(iteration++ < POWELL_MAX_ITERATIONS){ fval_at_start_of_iteration = fval; for(i=0; i < %(NMR_PARAMS)r; i++){ parameters_at_start_of_iteration[i] = model_parameters[i]; } fval = powell_do_line_searches(search_directions, data, fval, model_parameters, &largest_decrease, &index_largest_decrease); if(powell_fval_diff_within_threshold(fval_at_start_of_iteration, fval)){ return 1; } #if POWELL_RESET_METHOD == POWELL_RESET_METHOD_EXTRAPOLATED_POINT fval_extrapolated = powell_evaluate_extrapolated(model_parameters, parameters_at_start_of_iteration, data); #endif if(SHOULD_EXCHANGE_SEARCH_DIRECTION){ #if POWELL_RESET_METHOD == POWELL_RESET_METHOD_EXTRAPOLATED_POINT for(i = 0; i < %(NMR_PARAMS)r; i++){ // remove the one with the largest increase (see Numerical Recipes) search_directions[i][index_largest_decrease] = search_directions[i][%(NMR_PARAMS)r-1]; // add p_n - p_0, see Powell 1964. search_directions[i][%(NMR_PARAMS)r-1] = model_parameters[i] - parameters_at_start_of_iteration[i]; } #elif POWELL_RESET_METHOD == POWELL_RESET_METHOD_RESET_TO_IDENTITY if((iteration + 1) %% %(NMR_PARAMS)r == 0){ powell_init_search_directions(search_directions); } else{ for(i = 0; i < %(NMR_PARAMS)r; i++){ for(j = 0; j < %(NMR_PARAMS)r - 1; j++){ search_directions[i][j] = search_directions[i][j+1]; } // add p_n - p_0, see Powell 1964. search_directions[i][%(NMR_PARAMS)r-1] = model_parameters[i] - parameters_at_start_of_iteration[i]; } } #endif // this uses ``parameters_at_start_of_iteration`` to find the last function minimum, this saves an array for(i = 0; i < %(NMR_PARAMS)r; i++){ parameters_at_start_of_iteration[i] = model_parameters[i] - parameters_at_start_of_iteration[i]; } fval = powell_find_linear_minimum(model_parameters, parameters_at_start_of_iteration, data); } } return 6; } mot_float_type bracket_and_brent(mot_float_type* xmin, const void* const eval_data){ mot_float_type ax = 0.0; mot_float_type bx = 1.0; mot_float_type cx; mot_float_type ulim, u, r, q, fu, tmp; mot_float_type maxarg = 0.0; mot_float_type fa = 0.0; mot_float_type fb = 0.0; mot_float_type fc = 0.0; fa = powell_linear_eval_function(ax, eval_data); fb = powell_linear_eval_function(bx, eval_data); if(fb > fa){ swap(&ax, &bx); swap(&fa, &fb); } cx = bx + BRACKET_GOLD * (bx - ax); fc = powell_linear_eval_function(cx, eval_data); while(fb > fc){ r = (bx - ax) * (fb - fc); q = (bx - cx) * (fb - fa); maxarg = fmax(fabs(q-r), (mot_float_type)EPSILON); u = (bx) - ((bx - cx) * q - (bx - ax) * r) / (2.0 * copysign(maxarg, q-r)); ulim = (bx) + GLIMIT * (cx - bx); if((bx - u) * (u - cx) > 0.0){ fu = powell_linear_eval_function(u, eval_data); if(fu < fc){ ax = bx; bx = u; fa = fb; fb = fu; break; } else if(fu > fb){ cx = u; fc = fu; break; } u = (cx) + BRACKET_GOLD * (cx - bx); fu = powell_linear_eval_function(u, eval_data); } else if((cx - u) * (u - ulim) > 0.0){ fu = powell_linear_eval_function(u, eval_data); if(fu < fc){ bx = cx; cx = u; u = cx+BRACKET_GOLD*(cx-bx); fb = fc; fc = fu; fu = powell_linear_eval_function(u, eval_data); } } else if((u - ulim) * (ulim - cx) >= 0.0){ u = ulim; fu = powell_linear_eval_function(u, eval_data); } else{ u = (cx) + BRACKET_GOLD * (cx - bx); fu = powell_linear_eval_function(u, eval_data); } ax = bx; bx = cx; cx = u; fa = fb; fb = fc; fc = fu; } /** from here starts brent */ /** I inlined this function to save memory. Please view the original implementation for the details. */ mot_float_type d, fx, fv, fw; mot_float_type p, tol1, tol2, v, w, x, xm; mot_float_type e=0.0; int iter; mot_float_type a=(ax < cx ? ax : cx); mot_float_type b=(ax > cx ? ax : cx); x=w=v=bx; fw=fv=fx=powell_linear_eval_function(x, eval_data); #pragma unroll 1 for(iter=0; iter < BRENT_MAX_ITERATIONS; iter++){ xm = 0.5 * (a + b); tol1 = BRENT_TOL * fabs(x) + ZEPS; tol2 = 2.0 * tol1; if(fabs(x - xm) <= (tol2 - 0.5 * (b - a))){ *xmin = x; return fx; } if(fabs(e) > tol1){ r = (x - w) * (fx - fv); q = (x - v) * (fx - fw); p = (x - v) * q - (x - w) * r; q = 2.0 * (q - r); if(q > 0.0){ p = -p; } q = fabs(q); tmp = e; e = d; if(fabs(p) >= fabs(0.5 * q * tmp) || p <= q * (a - x) || p >= q * (b - x)){ e = (x >= xm ? a : b) - x; d = CGOLD * e; } else { d = p / q; u = x + d; if(u - a < tol2 || b - u < tol2){ d = copysign(tol1, xm - x); } } } else{ e = (x >= xm ? a : b) - x; d = CGOLD * e; } u = (fabs(d) >= tol1 ? x + d : x + copysign(tol1, d)); fu = powell_linear_eval_function(u, eval_data); if(fu <= fx){ if(u >= x){ a=x; } else{ b=x; } v = w; w = x; x = u; fv = fw; fw = fx; fx = fu; } else { if(u < x){ a=u; } else{ b=u; } if (fu <= fw || w == x) { v=w; w=u; fv=fw; fw=fu; } else if(fu <= fv || v == x || v == w){ v=u; fv=fu; } } } *xmin=x; return fx; } #undef PATIENCE #undef MAX_ITERATIONS #undef POWELL_FUNCTION_TOLERANCE #undef BRENT_TOL #undef BRACKET_GOLD #undef GLIMIT #undef EPSILON #undef CGOLD #undef ZEPS #undef POWELL_RESET_METHOD_RESET_TO_IDENTITY #undef POWELL_RESET_METHOD_EXTRAPOLATED_POINT #undef POWELL_RESET_METHOD #undef SHOULD_EXCHANGE_SEARCH_DIRECTION #endif // POWELL_CL PKuI-$mot/data/opencl/firstLegendreTerm.cl#ifndef FIRST_LEGENDRE_TERM_CL #define FIRST_LEGENDRE_TERM_CL /** * Author = Robbert Harms * Date = 2014-02-01 * License = LGPL v3 * Maintainer = Robbert Harms * Email = robbert.harms@maastrichtuniversity.nl */ //////////////////////////////////////////////////////////////////////////////// // double getFirstLegendreTerm(double x, int n) // // // // Description: // // The Legendre polynomials, Pn(x), are orthogonal on the interval [-1,1] // // with weight function w(x) = 1 for -1 <= x <= 1 and 0 elsewhere. They // // are normalized so that Pn(1) = 1. The inner products are: // // = 0 if n != m, // // = 2/(2n+1) if n >= 0. // // This routine calculates Pn(x) using the following recursion: // // (k+1) P[k+1](x) = (2k+1)x P[k](x) - k P[k-1](x), k = 1,...,n-1 // // P[0](x) = 1, P[1](x) = x. // // // // Arguments: // // double x // // The argument of the Legendre polynomial Pn. // // int n // // The degree of the Legendre polynomial Pn. // // // // Return Value: // // Pn(x) if n is a nonnegative integer. If n is negative, 0 is returned. // // // // Example: // // double Pn; // // double x; // // int n; // // // // (user code to set x and n) // // // // Pn = xLegendre_Pn(x, n); // //////////////////////////////////////////////////////////////////////////////// /** * Berenger (contact at berenger dot eu) * This is the source code to construct the legendre polynome in C * This is fast but you can improve the code by using pointer instead of * accessing using index on the array and to compute (2*l-1) with a recurrence. * Ref: Fast and accurate determination of the Wigner rotation matrices in FMM * url: http://berenger.eu/blog/c-legendre-polynomial-by-recurrence-programming/ */ /** * Compute the first term of the legendre polynome for the given value x and the polynomial degree n */ double getFirstLegendreTerm(const double x, const int n){ if (n < 0){ return 0.0; } if(fabs(x) == 1.0){ if(x > 0.0 || n % 2 == 0){ return 1.0; } return -1.0; } if (n == 0){ return 1.0; } if (n == 1){ return x; } double P0 = 1.0; double P1 = x; double Pn; for(int k = 1; k < n; k++){ Pn = ((2 * k + 1) * x * P1 - (k * P0)) / (k + 1); P0 = P1; P1 = Pn; } return Pn; } #endif // FIRST_LEGENDRE_TERM_CL PKH[g !mot/data/opencl/euclidian_norm.cl#ifndef EUCLIDIAN_NORM_%(MEMSPACE)s_%(MEMTYPE)s_CL #define EUCLIDIAN_NORM_%(MEMSPACE)s_%(MEMTYPE)s_CL /** * Author = Robbert Harms * Date = 2014-02-01 * License = LGPL v3 * Maintainer = Robbert Harms * Email = robbert.harms@maastrichtuniversity.nl */ #ifndef ENORM_SQRT_GIANT #define ENORM_SQRT_GIANT sqrt(DBL_MAX) /* square should not overflow */ #endif #ifndef ENORM_SQRT_DWARF #define ENORM_SQRT_DWARF sqrt(DBL_MIN) /* square should not underflow */ #endif /*****************************************************************************/ /* euclidian_norm (Euclidean norm) */ /*****************************************************************************/ %(MEMTYPE)s euclidian_norm_%(MEMSPACE)s(const %(MEMSPACE)s %(MEMTYPE)s* const x, const int n){ /* Given an n-vector x, this function calculates the * euclidean norm of x. * * The euclidean norm is computed by accumulating the sum of * squares in three different sums. The sums of squares for the * small and large components are scaled so that no overflows * occur. Non-destructive underflows are permitted. Underflows * and overflows do not occur in the computation of the unscaled * sum of squares for the intermediate components. * The definitions of small, intermediate and large components * depend on two constants, LM_SQRT_DWARF and LM_SQRT_GIANT. The main * restrictions on these constants are that LM_SQRT_DWARF**2 not * underflow and LM_SQRT_GIANT**2 not overflow. * * Parameters * * n is a positive integer input variable. * * x is an input array of length n. */ int i; %(MEMTYPE)s s1, s2, s3, xabs, x1max, x3max, sqrt_n_tmp; s1 = 0; s2 = 0; s3 = 0; x1max = 0; x3max = 0; sqrt_n_tmp = ENORM_SQRT_GIANT / n; /** sum squares. **/ for (i = 0; i < n; i++) { xabs = fabs(x[i]); if (xabs > ENORM_SQRT_DWARF) { if ( xabs < sqrt_n_tmp ) { s2 += xabs * xabs; } else if ( xabs > x1max ) { s1 = s1 * ((x1max / xabs) * (x1max / xabs)) + 1; x1max = xabs; } else { s1 += ((xabs / x1max) * (xabs / x1max)); } } else if ( xabs > x3max ) { s3 = s3 * ((x3max / xabs) * (x3max / xabs)) + 1; x3max = xabs; } else if (xabs != 0.) { s3 += ((xabs / x3max) * (xabs / x3max)); } } /** calculation of norm. **/ if (s1 != 0){ return x1max * sqrt(s1 + (s2 / x1max) / x1max); } else if(s2 != 0){ if(s2 >= x3max){ return sqrt(s2 * (1 + (x3max / s2) * (x3max * s3))); } else{ return sqrt(x3max * ((s2 / x3max) + (x3max * s3))); } } else{ return x3max * sqrt(s3); } } /*** euclidian_norm. ***/ #endif // EUCLIDIAN_NORM_%(MEMSPACE)s_%(MEMTYPE)s_CL PK@_bJC  mot/data/opencl/nmsimplex.cl#ifndef NMSIMPLEX_CL #define NMSIMPLEX_CL /** * Author = Robbert Harms * Date = 2014-09-29 * License = see hereunder * Maintainer = Robbert Harms * Email = robbert.harms@maastrichtuniversity.nl */ /* * Program: nmsimplex.c * Author : Michael F. Hutt * http://www.mikehutt.com * 11/3/97 * * An implementation of the Nelder-Mead simplex method. * * Copyright (c) 1997-2011 * * (Licence: X11 license) * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * Jan. 6, 1999 * Modified to conform to the algorithm presented * in Margaret H. Wright's paper on Direct Search Methods. * * Jul. 23, 2007 * Fixed memory leak. * * Mar. 1, 2011 * Added constraints. * * 2014 * Removed constraints since MOT features parameter transformations */ #define PATIENCE %(PATIENCE)r /* Used to set the maximum number of iterations to patience * (number_of_parameters + 1). */ #define MAX_IT (PATIENCE * (%(NMR_PARAMS)r+1)) #define ALPHA %(ALPHA)r /* reflection coefficient, default 1 */ #define BETA %(BETA)r /* contraction coefficient, default 0.5 */ #define GAMMA %(GAMMA)r /* expansion coefficient default 2 */ #define DELTA %(DELTA)r /* reduction coefficient default 0.5 */ #define USER_TOL_X 30*MOT_EPSILON /** the precision we break at*/ int nmsimplex(mot_float_type* const model_parameters, const void* const data){ int return_code = 6; /** the default return code is that we exhausted our patience */ int vs; /* vertex with smallest value */ int vh; /* vertex with next smallest value */ int vg; /* vertex with largest value */ int i, j; /** helper variables */ int itr; /* track the number of iterations */ double tmp; mot_float_type fr; /* value of function at reflection point */ mot_float_type fe; /* value of function at expansion point */ mot_float_type fc; /* value of function at contraction point */ mot_float_type vm[%(NMR_PARAMS)r]; /* centroid - coordinates */ mot_float_type vr[%(NMR_PARAMS)r]; /* reflection - coordinates */ mot_float_type ve_vc[%(NMR_PARAMS)r]; /* expansion - coordinates, & contraction - coordinates, that is, we use this variable at two points for different purposes.*/ mot_float_type vertices[%(NMR_PARAMS)r + 1][%(NMR_PARAMS)r]; /* holds vertices of simplex */ mot_float_type func_vals[%(NMR_PARAMS)r + 1]; /* value of function at each vertex */ /** the scale of the initial simplex, should be set by python code as a string: {v1, v2, ...} */ mot_float_type simplex_scale[%(NMR_PARAMS)r] = %(INITIAL_SIMPLEX_SCALES)s; /* * Create the initial simplex. * We assume one of the vertices is 0,0 * Furthermore we set x_0 = x_input to allow for proper restarts. */ for (i=0;i<%(NMR_PARAMS)r;i++) { vertices[0][i] = model_parameters[i]; } for (i=1;i<=%(NMR_PARAMS)r;i++) { for (j=0;j<%(NMR_PARAMS)r;j++) { vertices[i][j] = sqrt(%(NMR_PARAMS)r + 1.0) - 1; if (i-1 == j){ vertices[i][j] += %(NMR_PARAMS)r; } vertices[i][j] /= (%(NMR_PARAMS)r * M_SQRT2); /** vertices now contains the unit vector e_j in R^n. */ /** set x_j = x_input + h_j * e_j */ vertices[i][j] = model_parameters[j] + simplex_scale[i-1] * vertices[i][j]; } } /* find the initial function values */ for (j=0;j<=%(NMR_PARAMS)r;j++) { func_vals[j] = evaluate(vertices[j], data); } /* begin the main loop of the minimization */ for (itr=0; itr <= MAX_IT; itr++) { /* find the index of the largest and smallest value */ vg=0; vs=0; for (j=0;j<=%(NMR_PARAMS)r;j++) { /* find largest */ if (func_vals[j] > func_vals[vg]) { vg = j; } /* find smallest */ if (func_vals[j] < func_vals[vs]) { vs = j; } } /* find the index of the second largest value */ vh=vs; for (j=0;j<=%(NMR_PARAMS)r;j++) { if (func_vals[j] > func_vals[vh] && func_vals[j] < func_vals[vg]) { vh = j; } } /* calculate the centroid */ for (j=0;j<=%(NMR_PARAMS)r-1;j++) { tmp=0.0; for (i=0;i<=%(NMR_PARAMS)r;i++) { if (i!=vg) { tmp += vertices[i][j]; } } vm[j] = tmp/%(NMR_PARAMS)r; } /* reflect vg to new vertex vr */ for (j=0;j<=%(NMR_PARAMS)r-1;j++) { vr[j] = vm[j] + ALPHA * (vm[j] - vertices[vg][j]); } fr = evaluate(vr, data); if (fr < func_vals[vh] && fr >= func_vals[vs]) { for (j=0; j <= %(NMR_PARAMS)r-1; j++){ vertices[vg][j] = vr[j]; } func_vals[vg] = fr; } /* investigate a step further in this direction */ if(fr < func_vals[vs]){ for (j=0;j<=%(NMR_PARAMS)r-1;j++) { /** ve_vc here used as ve */ ve_vc[j] = vm[j] + GAMMA * (vr[j] - vm[j]); } fe = evaluate(ve_vc, data); if (fe < fr){ for (j=0;j<=%(NMR_PARAMS)r-1;j++) { vertices[vg][j] = ve_vc[j]; } func_vals[vg] = fe; } else { for (j=0;j<=%(NMR_PARAMS)r-1;j++) { vertices[vg][j] = vr[j]; } func_vals[vg] = fr; } } /* check to see if a contraction is necessary */ if (fr >= func_vals[vh]) { if (fr < func_vals[vg] && fr >= func_vals[vh]) { /* perform outside contraction */ for (j=0;j<=%(NMR_PARAMS)r-1;j++) { /** ve_vc here used as vc */ ve_vc[j] = vm[j] + BETA * (vr[j]-vm[j]); } } else { /* perform inside contraction */ for (j=0;j<=%(NMR_PARAMS)r-1;j++) { ve_vc[j] = vm[j] - BETA * (vm[j] - vertices[vg][j]); } } fc = evaluate(ve_vc, data); if (fc < func_vals[vg]) { for (j=0;j<=%(NMR_PARAMS)r-1;j++) { vertices[vg][j] = ve_vc[j]; } func_vals[vg] = fc; } else { /* at this point the contraction is not successful, we must reduce (by default halve) the distance from vs to all the vertices of the simplex and then continue. */ for (i=0;i<=%(NMR_PARAMS)r;i++) { if (i != vs) { for (j=0;j<=%(NMR_PARAMS)r-1;j++) { vertices[i][j] = vertices[vs][j] + (vertices[i][j]-vertices[vs][j]) * DELTA; } } } func_vals[vg] = evaluate(vertices[vg], data); func_vals[vh] = evaluate(vertices[vh], data); } } /* test for convergence */ tmp = 0.0; for (j=0;j<=%(NMR_PARAMS)r;j++) { tmp += func_vals[j]; } /** fr here used as tmp dummy */ fr = tmp/(%(NMR_PARAMS)r+1); tmp = 0.0; for (j=0;j<=%(NMR_PARAMS)r;j++) { tmp += ((func_vals[j]-fr) * (func_vals[j]-fr)) / (%(NMR_PARAMS)r); } tmp = sqrt(tmp); if (tmp < USER_TOL_X){ return_code = 1; break; } } /* end main loop of the minimization */ /* find the index of the largest and smallest value */ vs = 0; for (j=0;j<=%(NMR_PARAMS)r;j++) { /* find smallest */ if (func_vals[j] < func_vals[vs]) { vs = j; } } for (j=0;j<%(NMR_PARAMS)r;j++) { model_parameters[j] = vertices[vs][j]; } return return_code; } #undef PATIENCE #undef MAX_IT #undef ALPHA #undef BETA #undef GAMMA #undef USER_TOL_X #endif // NMSIMPLEX_CL PKH mot/data/opencl/bessel.cl/** * Author = Robbert Harms * Date = 2016-02-09 * License = LGPL v3 * Maintainer = Robbert Harms * Email = robbert.harms@maastrichtuniversity.nl */ /* Copied from: bessel.c Copyright (c) 1998 Kapteyn Institute Groningen All Rights Reserved. */ /* #> bessel.dc2 Function: BESSEL Purpose: Evaluate Bessel function J, Y, I, K of integer order. Category: MATH File: bessel.c Author: M.G.R. Vogelaar Use: See bessj.dc2, bessy.dc2, bessi.dc2 or bessk.dc2 Description: The differential equation 2 2 d w dw 2 2 x . --- + x . --- + (x - v ).w = 0 2 dx dx has two solutions called Bessel functions of the first kind Jv(x) and Bessel functions of the second kind Yv(x). The routines bessj and bessy return the J and Y for integer v and therefore are called Bessel functions of integer order. The differential equation 2 2 d w dw 2 2 x . --- + x . --- - (x + v ).w = 0 2 dx dx has two solutions called modified Bessel functions Iv(x) and Kv(x). The routines bessi and bessk return the I and K for integer v and therefore are called Modified Bessel functions of integer order. (Abramowitz & Stegun, Handbook of mathematical functions, ch. 9, pages 358,- and 374,- ) The implementation is based on the ideas from Numerical Recipes, Press et. al. This routine is NOT callable in FORTRAN. Updates: Jun 29, 1998: VOG, Document created. #< */ /*------------------------------------------------------------*/ /* Zeroth-order modified Bessel function of the first kind. */ /*------------------------------------------------------------*/ double bessel_i0(double x){ double y; if(fabs(x) < 3.75f){ y = (x/3.75) * (x/3.75); return 1.0+y*(3.5156229+y*(3.0899424+y*(1.2067492+y*(0.2659732+y*(0.360768e-1+y*0.45813e-2))))); } y=3.75/fabs(x); return (exp(fabs(x))/sqrt(fabs(x)))*(0.39894228+y*(0.1328592e-1 +y*(0.225319e-2+y*(-0.157565e-2+y*(0.916281e-2 +y*(-0.2057706e-1+y*(0.2635537e-1+y*(-0.1647633e-1 +y*0.392377e-2)))))))); } /** * Return the log of the zeroth-order modified Bessel function of the first kind. */ double log_bessel_i0(double x){ if(x < 700){ return log(bessel_i0(x)); } return x - log(2.0 * M_PI * x)/2.0; } PK[ISF *mot/data/opencl/random123/openclfeatures.h/* Copyright 2010-2011, D. E. Shaw Research. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of D. E. Shaw Research nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef __openclfeatures_dot_hpp #define __openclfeatures_dot_hpp #ifndef R123_STATIC_INLINE #define R123_STATIC_INLINE inline #endif #ifndef R123_FORCE_INLINE #define R123_FORCE_INLINE(decl) decl __attribute__((always_inline)) #endif #ifndef R123_CUDA_DEVICE #define R123_CUDA_DEVICE #endif #ifndef R123_ASSERT #define R123_ASSERT(x) #endif #ifndef R123_BUILTIN_EXPECT #define R123_BUILTIN_EXPECT(expr,likely) expr #endif #ifndef R123_USE_GNU_UINT128 #define R123_USE_GNU_UINT128 0 #endif #ifndef R123_USE_MULHILO64_ASM #define R123_USE_MULHILO64_ASM 0 #endif #ifndef R123_USE_MULHILO64_MSVC_INTRIN #define R123_USE_MULHILO64_MSVC_INTRIN 0 #endif #ifndef R123_USE_MULHILO64_CUDA_INTRIN #define R123_USE_MULHILO64_CUDA_INTRIN 0 #endif #ifndef R123_USE_MULHILO64_OPENCL_INTRIN #define R123_USE_MULHILO64_OPENCL_INTRIN 1 #endif #ifndef R123_USE_AES_NI #define R123_USE_AES_NI 0 #endif // XXX ATI APP SDK 2.4 clBuildProgram SEGVs if one uses uint64_t instead of // ulong to mul_hi. And gets lots of complaints from stdint.h // on some machines. // But these typedefs mean we cannot include stdint.h with // these headers? Do we need R123_64T, R123_32T, R123_8T? typedef ulong uint64_t; typedef uint uint32_t; typedef uchar uint8_t; #define UINT64_C(x) ((ulong)(x##UL)) #endif PK[IhzBB!mot/data/opencl/random123/array.h/* Copyright 2010-2011, D. E. Shaw Research. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of D. E. Shaw Research nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _r123array_dot_h__ #define _r123array_dot_h__ #ifndef __cplusplus #define CXXMETHODS(_N, W, T) #define CXXOVERLOADS(_N, W, T) #else #include #include #include #include #include #include /** @defgroup arrayNxW The r123arrayNxW classes Each of the r123arrayNxW is a fixed size array of N W-bit unsigned integers. It is functionally equivalent to the C++0x std::array, but does not require C++0x features or libraries. In addition to meeting most of the requirements of a Container, it also has a member function, incr(), which increments the zero-th element and carrys overflows into higher indexed elements. Thus, by using incr(), sequences of up to 2^(N*W) distinct values can be produced. If SSE is supported by the compiler, then the class r123array1xm128i is also defined, in which the data member is an array of one r123128i object. @cond HIDDEN_FROM_DOXYGEN */ template inline R123_CUDA_DEVICE value_type assemble_from_u32(uint32_t *p32){ value_type v=0; for(size_t i=0; i<(3+sizeof(value_type))/4; ++i) v |= ((value_type)(*p32++)) << (32*i); return v; } // Work-alike methods and typedefs modeled on std::array: #define CXXMETHODS(_N, W, T) \ typedef T value_type; \ typedef T* iterator; \ typedef const T* const_iterator; \ typedef value_type& reference; \ typedef const value_type& const_reference; \ typedef size_t size_type; \ typedef ptrdiff_t difference_type; \ typedef T* pointer; \ typedef const T* const_pointer; \ typedef std::reverse_iterator reverse_iterator; \ typedef std::reverse_iterator const_reverse_iterator; \ /* Boost.array has static_size. C++11 specializes tuple_size */ \ enum {static_size = _N}; \ R123_CUDA_DEVICE reference operator[](size_type i){return v[i];} \ R123_CUDA_DEVICE const_reference operator[](size_type i) const {return v[i];} \ R123_CUDA_DEVICE reference at(size_type i){ if(i >= _N) R123_THROW(std::out_of_range("array index out of range")); return (*this)[i]; } \ R123_CUDA_DEVICE const_reference at(size_type i) const { if(i >= _N) R123_THROW(std::out_of_range("array index out of range")); return (*this)[i]; } \ R123_CUDA_DEVICE size_type size() const { return _N; } \ R123_CUDA_DEVICE size_type max_size() const { return _N; } \ R123_CUDA_DEVICE bool empty() const { return _N==0; }; \ R123_CUDA_DEVICE iterator begin() { return &v[0]; } \ R123_CUDA_DEVICE iterator end() { return &v[_N]; } \ R123_CUDA_DEVICE const_iterator begin() const { return &v[0]; } \ R123_CUDA_DEVICE const_iterator end() const { return &v[_N]; } \ R123_CUDA_DEVICE const_iterator cbegin() const { return &v[0]; } \ R123_CUDA_DEVICE const_iterator cend() const { return &v[_N]; } \ R123_CUDA_DEVICE reverse_iterator rbegin(){ return reverse_iterator(end()); } \ R123_CUDA_DEVICE const_reverse_iterator rbegin() const{ return const_reverse_iterator(end()); } \ R123_CUDA_DEVICE reverse_iterator rend(){ return reverse_iterator(begin()); } \ R123_CUDA_DEVICE const_reverse_iterator rend() const{ return const_reverse_iterator(begin()); } \ R123_CUDA_DEVICE const_reverse_iterator crbegin() const{ return const_reverse_iterator(cend()); } \ R123_CUDA_DEVICE const_reverse_iterator crend() const{ return const_reverse_iterator(cbegin()); } \ R123_CUDA_DEVICE pointer data(){ return &v[0]; } \ R123_CUDA_DEVICE const_pointer data() const{ return &v[0]; } \ R123_CUDA_DEVICE reference front(){ return v[0]; } \ R123_CUDA_DEVICE const_reference front() const{ return v[0]; } \ R123_CUDA_DEVICE reference back(){ return v[_N-1]; } \ R123_CUDA_DEVICE const_reference back() const{ return v[_N-1]; } \ R123_CUDA_DEVICE bool operator==(const r123array##_N##x##W& rhs) const{ \ /* CUDA3 does not have std::equal */ \ for (size_t i = 0; i < _N; ++i) \ if (v[i] != rhs.v[i]) return false; \ return true; \ } \ R123_CUDA_DEVICE bool operator!=(const r123array##_N##x##W& rhs) const{ return !(*this == rhs); } \ /* CUDA3 does not have std::fill_n */ \ R123_CUDA_DEVICE void fill(const value_type& val){ for (size_t i = 0; i < _N; ++i) v[i] = val; } \ R123_CUDA_DEVICE void swap(r123array##_N##x##W& rhs){ \ /* CUDA3 does not have std::swap_ranges */ \ for (size_t i = 0; i < _N; ++i) { \ T tmp = v[i]; \ v[i] = rhs.v[i]; \ rhs.v[i] = tmp; \ } \ } \ R123_CUDA_DEVICE r123array##_N##x##W& incr(R123_ULONG_LONG n=1){ \ /* This test is tricky because we're trying to avoid spurious \ complaints about illegal shifts, yet still be compile-time \ evaulated. */ \ if(sizeof(T)>((sizeof(T)3?3:0] is to silence \ a spurious error from icpc \ */ \ ++v[_N>1?1:0]; \ if(_N==2 || R123_BUILTIN_EXPECT(!!v[_N>1?1:0], 1)) return *this; \ ++v[_N>2?2:0]; \ if(_N==3 || R123_BUILTIN_EXPECT(!!v[_N>2?2:0], 1)) return *this; \ ++v[_N>3?3:0]; \ for(size_t i=4; i<_N; ++i){ \ if( R123_BUILTIN_EXPECT(!!v[i-1], 1) ) return *this; \ ++v[i]; \ } \ return *this; \ } \ /* seed(SeedSeq) would be a constructor if having a constructor */ \ /* didn't cause headaches with defaults */ \ template \ R123_CUDA_DEVICE static r123array##_N##x##W seed(SeedSeq &ss){ \ r123array##_N##x##W ret; \ const size_t Ngen = _N*((3+sizeof(value_type))/4); \ uint32_t u32[Ngen]; \ uint32_t *p32 = &u32[0]; \ ss.generate(&u32[0], &u32[Ngen]); \ for(size_t i=0; i<_N; ++i){ \ ret.v[i] = assemble_from_u32(p32); \ p32 += (3+sizeof(value_type))/4; \ } \ return ret; \ } \ protected: \ R123_CUDA_DEVICE r123array##_N##x##W& incr_carefully(R123_ULONG_LONG n){ \ /* n may be greater than the maximum value of a single value_type */ \ value_type vtn; \ vtn = n; \ v[0] += n; \ const unsigned rshift = 8* ((sizeof(n)>sizeof(value_type))? sizeof(value_type) : 0); \ for(size_t i=1; i<_N; ++i){ \ if(rshift){ \ n >>= rshift; \ }else{ \ n=0; \ } \ if( v[i-1] < vtn ) \ ++n; \ if( n==0 ) break; \ vtn = n; \ v[i] += n; \ } \ return *this; \ } \ // There are several tricky considerations for the insertion and extraction // operators: // - we would like to be able to print r123array16x8 as a sequence of 16 integers, // not as 16 bytes. // - we would like to be able to print r123array1xm128i. // - we do not want an int conversion operator in r123m128i because it causes // lots of ambiguity problems with automatic promotions. // Solution: r123arrayinsertable and r123arrayextractable template struct r123arrayinsertable{ const T& v; r123arrayinsertable(const T& t_) : v(t_) {} friend std::ostream& operator<<(std::ostream& os, const r123arrayinsertable& t){ return os << t.v; } }; template<> struct r123arrayinsertable{ const uint8_t& v; r123arrayinsertable(const uint8_t& t_) : v(t_) {} friend std::ostream& operator<<(std::ostream& os, const r123arrayinsertable& t){ return os << (int)t.v; } }; template struct r123arrayextractable{ T& v; r123arrayextractable(T& t_) : v(t_) {} friend std::istream& operator>>(std::istream& is, r123arrayextractable& t){ return is >> t.v; } }; template<> struct r123arrayextractable{ uint8_t& v; r123arrayextractable(uint8_t& t_) : v(t_) {} friend std::istream& operator>>(std::istream& is, r123arrayextractable& t){ int i; is >> i; t.v = i; return is; } }; #define CXXOVERLOADS(_N, W, T) \ \ inline std::ostream& operator<<(std::ostream& os, const r123array##_N##x##W& a){ \ os << r123arrayinsertable(a.v[0]); \ for(size_t i=1; i<_N; ++i) \ os << " " << r123arrayinsertable(a.v[i]); \ return os; \ } \ \ inline std::istream& operator>>(std::istream& is, r123array##_N##x##W& a){ \ for(size_t i=0; i<_N; ++i){ \ r123arrayextractable x(a.v[i]); \ is >> x; \ } \ return is; \ } \ \ namespace r123{ \ typedef r123array##_N##x##W Array##_N##x##W; \ } #endif /* __cplusplus */ /* _r123array_tpl expands to a declaration of struct r123arrayNxW. In C, it's nothing more than a struct containing an array of N objects of type T. In C++ it's the same, but endowed with an assortment of member functions, typedefs and friends. In C++, r123arrayNxW looks a lot like std::array, has most of the capabilities of a container, and satisfies the requirements outlined in compat/Engine.hpp for counter and key types. ArrayNxW, in the r123 namespace is a typedef equivalent to r123arrayNxW. */ #define _r123array_tpl(_N, W, T) \ /** @ingroup arrayNxW */ \ /** @see arrayNxW */ \ struct r123array##_N##x##W{ \ T v[_N]; \ CXXMETHODS(_N, W, T) \ }; \ \ CXXOVERLOADS(_N, W, T) /** @endcond */ _r123array_tpl(1, 32, uint32_t) /* r123array1x32 */ _r123array_tpl(2, 32, uint32_t) /* r123array2x32 */ _r123array_tpl(4, 32, uint32_t) /* r123array4x32 */ _r123array_tpl(8, 32, uint32_t) /* r123array8x32 */ _r123array_tpl(1, 64, uint64_t) /* r123array1x64 */ _r123array_tpl(2, 64, uint64_t) /* r123array2x64 */ _r123array_tpl(4, 64, uint64_t) /* r123array4x64 */ _r123array_tpl(16, 8, uint8_t) /* r123array16x8 for ARSsw, AESsw */ #if R123_USE_SSE _r123array_tpl(1, m128i, r123m128i) /* r123array1x128i for ARSni, AESni */ #endif /* In C++, it's natural to use sizeof(a::value_type), but in C it's pretty convoluted to figure out the width of the value_type of an r123arrayNxW: */ #define R123_W(a) (8*sizeof(((a *)0)->v[0])) /** @namespace r123 Most of the Random123 C++ API is contained in the r123 namespace. */ #endif PK[Iyc TT"mot/data/opencl/random123/philox.h/* Copyright 2010-2011, D. E. Shaw Research. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of D. E. Shaw Research nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _philox_dot_h_ #define _philox_dot_h_ /** \cond HIDDEN_FROM_DOXYGEN */ /* // Macros _Foo_tpl are code generation 'templates' They define // inline functions with names obtained by mangling Foo and the // macro arguments. E.g., // _mulhilo_tpl(32, uint32_t, uint64_t) // expands to a definition of: // mulhilo32(uint32_t, uint32_t, uint32_t *, uint32_t *) // We then 'instantiate the template' to define // several different functions, e.g., // mulhilo32 // mulhilo64 // These functions will be visible to user code, and may // also be used later in subsequent templates and definitions. // A template for mulhilo using a temporary of twice the word-width. // Gcc figures out that this can be reduced to a single 'mul' instruction, // despite the apparent use of double-wide variables, shifts, etc. It's // obviously not guaranteed that all compilers will be that smart, so // other implementations might be preferable, e.g., using an intrinsic // or an asm block. On the other hand, for 32-bit multiplies, // this *is* perfectly standard C99 - any C99 compiler should // understand it and produce correct code. For 64-bit multiplies, // it's only usable if the compiler recognizes that it can do // arithmetic on a 128-bit type. That happens to be true for gcc on // x86-64, and powerpc64 but not much else. */ #define _mulhilo_dword_tpl(W, Word, Dword) \ R123_CUDA_DEVICE R123_STATIC_INLINE Word mulhilo##W(Word a, Word b, Word* hip){ \ Dword product = ((Dword)a)*((Dword)b); \ *hip = product>>W; \ return (Word)product; \ } /* // A template for mulhilo using gnu-style asm syntax. // INSN can be "mulw", "mull" or "mulq". // FIXME - porting to other architectures, we'll need still-more conditional // branching here. Note that intrinsics are usually preferable. */ #ifdef __powerpc__ #define _mulhilo_asm_tpl(W, Word, INSN) \ R123_STATIC_INLINE Word mulhilo##W(Word ax, Word b, Word *hip){ \ Word dx = 0; \ __asm__("\n\t" \ INSN " %0,%1,%2\n\t" \ : "=r"(dx) \ : "r"(b), "r"(ax) \ ); \ *hip = dx; \ return ax*b; \ } #else #define _mulhilo_asm_tpl(W, Word, INSN) \ R123_STATIC_INLINE Word mulhilo##W(Word ax, Word b, Word *hip){ \ Word dx; \ __asm__("\n\t" \ INSN " %2\n\t" \ : "=a"(ax), "=d"(dx) \ : "r"(b), "0"(ax) \ ); \ *hip = dx; \ return ax; \ } #endif /* __powerpc__ */ /* // A template for mulhilo using MSVC-style intrinsics // For example,_umul128 is an msvc intrinsic, c.f. // http://msdn.microsoft.com/en-us/library/3dayytw9.aspx */ #define _mulhilo_msvc_intrin_tpl(W, Word, INTRIN) \ R123_STATIC_INLINE Word mulhilo##W(Word a, Word b, Word* hip){ \ return INTRIN(a, b, hip); \ } /* N.B. This really should be called _mulhilo_mulhi_intrin. It just happens that CUDA was the first time we used the idiom. */ #define _mulhilo_cuda_intrin_tpl(W, Word, INTRIN) \ R123_CUDA_DEVICE R123_STATIC_INLINE Word mulhilo##W(Word a, Word b, Word* hip){ \ *hip = INTRIN(a, b); \ return a*b; \ } /* // A template for mulhilo using only word-size operations and // C99 operators (no adc, no mulhi). It // requires four multiplies and a dozen or so shifts, adds // and tests. It's not clear what this is good for, other than // completeness. On 32-bit platforms, it could be used to // implement philoxNx64, but on such platforms both the philoxNx32 // and the threefryNx64 cbrngs are going to have much better // performance. It is enabled below by R123_USE_MULHILO64_C99, // but that is currently (Sep 2011) not set by any of the // features/XXfeatures.h headers. It can, of course, be // set with a compile-time -D option. */ #define _mulhilo_c99_tpl(W, Word) \ R123_STATIC_INLINE Word mulhilo##W(Word a, Word b, Word *hip){ \ const unsigned WHALF = W/2; \ const Word LOMASK = ((((Word)1)<>WHALF; \ Word alo = a& LOMASK; \ Word bhi = b>>WHALF; \ Word blo = b& LOMASK; \ \ Word ahbl = ahi*blo; \ Word albh = alo*bhi; \ \ Word ahbl_albh = ((ahbl&LOMASK) + (albh&LOMASK)); \ Word hi = ahi*bhi + (ahbl>>WHALF) + (albh>>WHALF); \ hi += ahbl_albh >> WHALF; /* carry from the sum of lo(ahbl) + lo(albh) ) */ \ /* carry from the sum with alo*blo */ \ hi += ((lo >> WHALF) < (ahbl_albh&LOMASK)); \ *hip = hi; \ return lo; \ } /* // A template for mulhilo on a platform that can't do it // We could put a C version here, but is it better to run *VERY* // slowly or to just stop and force the user to find another CBRNG? */ #define _mulhilo_fail_tpl(W, Word) \ R123_STATIC_INLINE Word mulhilo##W(Word a, Word b, Word *hip){ \ R123_STATIC_ASSERT(0, "mulhilo" #W " is not implemented on this machine\n"); \ } /* // N.B. There's an MSVC intrinsic called _emul, // which *might* compile into better code than // _mulhilo_dword_tpl */ #if R123_USE_MULHILO32_ASM #ifdef __powerpc__ _mulhilo_asm_tpl(32, uint32_t, "mulhwu") #else _mulhilo_asm_tpl(32, uint32_t, "mull") #endif /* __powerpc__ */ #else _mulhilo_dword_tpl(32, uint32_t, uint64_t) #endif #if R123_USE_PHILOX_64BIT #if R123_USE_MULHILO64_ASM #ifdef __powerpc64__ _mulhilo_asm_tpl(64, uint64_t, "mulhdu") #else _mulhilo_asm_tpl(64, uint64_t, "mulq") #endif /* __powerpc64__ */ #elif R123_USE_MULHILO64_MSVC_INTRIN _mulhilo_msvc_intrin_tpl(64, uint64_t, _umul128) #elif R123_USE_MULHILO64_CUDA_INTRIN _mulhilo_cuda_intrin_tpl(64, uint64_t, __umul64hi) #elif R123_USE_MULHILO64_OPENCL_INTRIN _mulhilo_cuda_intrin_tpl(64, uint64_t, mul_hi) #elif R123_USE_MULHILO64_MULHI_INTRIN _mulhilo_cuda_intrin_tpl(64, uint64_t, R123_MULHILO64_MULHI_INTRIN) #elif R123_USE_GNU_UINT128 _mulhilo_dword_tpl(64, uint64_t, __uint128_t) #elif R123_USE_MULHILO64_C99 _mulhilo_c99_tpl(64, uint64_t) #else _mulhilo_fail_tpl(64, uint64_t) #endif #endif /* // The multipliers and Weyl constants are "hard coded". // To change them, you can #define them with different // values before #include-ing this file. // This isn't terribly elegant, but it works for C as // well as C++. A nice C++-only solution would be to // use template parameters in the style of */ #ifndef PHILOX_M2x64_0 #define PHILOX_M2x64_0 R123_64BIT(0xD2B74407B1CE6E93) #endif #ifndef PHILOX_M4x64_0 #define PHILOX_M4x64_0 R123_64BIT(0xD2E7470EE14C6C93) #endif #ifndef PHILOX_M4x64_1 #define PHILOX_M4x64_1 R123_64BIT(0xCA5A826395121157) #endif #ifndef PHILOX_M2x32_0 #define PHILOX_M2x32_0 ((uint32_t)0xd256d193) #endif #ifndef PHILOX_M4x32_0 #define PHILOX_M4x32_0 ((uint32_t)0xD2511F53) #endif #ifndef PHILOX_M4x32_1 #define PHILOX_M4x32_1 ((uint32_t)0xCD9E8D57) #endif #ifndef PHILOX_W64_0 #define PHILOX_W64_0 R123_64BIT(0x9E3779B97F4A7C15) /* golden ratio */ #endif #ifndef PHILOX_W64_1 #define PHILOX_W64_1 R123_64BIT(0xBB67AE8584CAA73B) /* sqrt(3)-1 */ #endif #ifndef PHILOX_W32_0 #define PHILOX_W32_0 ((uint32_t)0x9E3779B9) #endif #ifndef PHILOX_W32_1 #define PHILOX_W32_1 ((uint32_t)0xBB67AE85) #endif #ifndef PHILOX2x32_DEFAULT_ROUNDS #define PHILOX2x32_DEFAULT_ROUNDS 10 #endif #ifndef PHILOX2x64_DEFAULT_ROUNDS #define PHILOX2x64_DEFAULT_ROUNDS 10 #endif #ifndef PHILOX4x32_DEFAULT_ROUNDS #define PHILOX4x32_DEFAULT_ROUNDS 10 #endif #ifndef PHILOX4x64_DEFAULT_ROUNDS #define PHILOX4x64_DEFAULT_ROUNDS 10 #endif /* The ignored fourth argument allows us to instantiate the same macro regardless of N. */ #define _philox2xWround_tpl(W, T) \ R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(struct r123array2x##W _philox2x##W##round(struct r123array2x##W ctr, struct r123array1x##W key)); \ R123_CUDA_DEVICE R123_STATIC_INLINE struct r123array2x##W _philox2x##W##round(struct r123array2x##W ctr, struct r123array1x##W key){ \ T hi; \ T lo = mulhilo##W(PHILOX_M2x##W##_0, ctr.v[0], &hi); \ struct r123array2x##W out = {{hi^key.v[0]^ctr.v[1], lo}}; \ return out; \ } #define _philox2xWbumpkey_tpl(W) \ R123_CUDA_DEVICE R123_STATIC_INLINE struct r123array1x##W _philox2x##W##bumpkey( struct r123array1x##W key) { \ key.v[0] += PHILOX_W##W##_0; \ return key; \ } #define _philox4xWround_tpl(W, T) \ R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(struct r123array4x##W _philox4x##W##round(struct r123array4x##W ctr, struct r123array2x##W key)); \ R123_CUDA_DEVICE R123_STATIC_INLINE struct r123array4x##W _philox4x##W##round(struct r123array4x##W ctr, struct r123array2x##W key){ \ T hi0; \ T hi1; \ T lo0 = mulhilo##W(PHILOX_M4x##W##_0, ctr.v[0], &hi0); \ T lo1 = mulhilo##W(PHILOX_M4x##W##_1, ctr.v[2], &hi1); \ struct r123array4x##W out = {{hi1^ctr.v[1]^key.v[0], lo1, \ hi0^ctr.v[3]^key.v[1], lo0}}; \ return out; \ } #define _philox4xWbumpkey_tpl(W) \ R123_CUDA_DEVICE R123_STATIC_INLINE struct r123array2x##W _philox4x##W##bumpkey( struct r123array2x##W key) { \ key.v[0] += PHILOX_W##W##_0; \ key.v[1] += PHILOX_W##W##_1; \ return key; \ } #define _philoxNxW_tpl(N, Nhalf, W, T) \ /** @ingroup PhiloxNxW */ \ enum r123_enum_philox##N##x##W { philox##N##x##W##_rounds = PHILOX##N##x##W##_DEFAULT_ROUNDS }; \ typedef struct r123array##N##x##W philox##N##x##W##_ctr_t; \ typedef struct r123array##Nhalf##x##W philox##N##x##W##_key_t; \ typedef struct r123array##Nhalf##x##W philox##N##x##W##_ukey_t; \ R123_CUDA_DEVICE R123_STATIC_INLINE philox##N##x##W##_key_t philox##N##x##W##keyinit(philox##N##x##W##_ukey_t uk) { return uk; } \ R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(philox##N##x##W##_ctr_t philox##N##x##W##_R(unsigned int R, philox##N##x##W##_ctr_t ctr, philox##N##x##W##_key_t key)); \ R123_CUDA_DEVICE R123_STATIC_INLINE philox##N##x##W##_ctr_t philox##N##x##W##_R(unsigned int R, philox##N##x##W##_ctr_t ctr, philox##N##x##W##_key_t key) { \ R123_ASSERT(R<=16); \ if(R>0){ ctr = _philox##N##x##W##round(ctr, key); } \ if(R>1){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \ if(R>2){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \ if(R>3){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \ if(R>4){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \ if(R>5){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \ if(R>6){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \ if(R>7){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \ if(R>8){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \ if(R>9){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \ if(R>10){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \ if(R>11){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \ if(R>12){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \ if(R>13){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \ if(R>14){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \ if(R>15){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \ return ctr; \ } _philox2xWbumpkey_tpl(32) _philox4xWbumpkey_tpl(32) _philox2xWround_tpl(32, uint32_t) /* philo2x32round */ _philox4xWround_tpl(32, uint32_t) /* philo4x32round */ /** \endcond */ _philoxNxW_tpl(2, 1, 32, uint32_t) /* philox2x32bijection */ _philoxNxW_tpl(4, 2, 32, uint32_t) /* philox4x32bijection */ #if R123_USE_PHILOX_64BIT /** \cond HIDDEN_FROM_DOXYGEN */ _philox2xWbumpkey_tpl(64) _philox4xWbumpkey_tpl(64) _philox2xWround_tpl(64, uint64_t) /* philo2x64round */ _philox4xWround_tpl(64, uint64_t) /* philo4x64round */ /** \endcond */ _philoxNxW_tpl(2, 1, 64, uint64_t) /* philox2x64bijection */ _philoxNxW_tpl(4, 2, 64, uint64_t) /* philox4x64bijection */ #endif /* R123_USE_PHILOX_64BIT */ #define philox2x32(c,k) philox2x32_R(philox2x32_rounds, c, k) #define philox4x32(c,k) philox4x32_R(philox4x32_rounds, c, k) #if R123_USE_PHILOX_64BIT #define philox2x64(c,k) philox2x64_R(philox2x64_rounds, c, k) #define philox4x64(c,k) philox4x64_R(philox4x64_rounds, c, k) #endif /* R123_USE_PHILOX_64BIT */ #ifdef __cplusplus #include /** \cond HIDDEN_FROM_DOXYGEN */ #define _PhiloxNxW_base_tpl(CType, KType, N, W) \ namespace r123{ \ template \ struct Philox##N##x##W##_R{ \ typedef CType ctr_type; \ typedef KType key_type; \ typedef KType ukey_type; \ static const unsigned int rounds=ROUNDS; \ inline R123_CUDA_DEVICE R123_FORCE_INLINE(ctr_type operator()(ctr_type ctr, key_type key) const){ \ R123_STATIC_ASSERT(ROUNDS<=16, "philox is only unrolled up to 16 rounds\n"); \ return philox##N##x##W##_R(ROUNDS, ctr, key); \ } \ }; \ typedef Philox##N##x##W##_R Philox##N##x##W; \ } // namespace r123 /** \endcond */ _PhiloxNxW_base_tpl(r123array2x32, r123array1x32, 2, 32) // Philox2x32_R _PhiloxNxW_base_tpl(r123array4x32, r123array2x32, 4, 32) // Philox4x32_R #if R123_USE_PHILOX_64BIT _PhiloxNxW_base_tpl(r123array2x64, r123array1x64, 2, 64) // Philox2x64_R _PhiloxNxW_base_tpl(r123array4x64, r123array2x64, 4, 64) // Philox4x64_R #endif /* The _tpl macros don't quite work to do string-pasting inside comments. so we just write out the boilerplate documentation four times... */ /** @defgroup PhiloxNxW Philox Classes and Typedefs The PhiloxNxW classes export the member functions, typedefs and operator overloads required by a @ref CBRNG "CBRNG" class. As described in Parallel Random Numbers: As Easy as 1, 2, 3 . The Philox family of counter-based RNGs use integer multiplication, xor and permutation of W-bit words to scramble its N-word input key. Philox is a mnemonic for Product HI LO Xor). @class r123::Philox2x32_R @ingroup PhiloxNxW exports the member functions, typedefs and operator overloads required by a @ref CBRNG "CBRNG" class. The template argument, ROUNDS, is the number of times the Philox round function will be applied. As of November 2011, the authors know of no statistical flaws with ROUNDS=6 or more for Philox2x32. @typedef r123::Philox2x32 @ingroup PhiloxNxW Philox2x32 is equivalent to Philox2x32_R<10>. With 10 rounds, Philox2x32 has a considerable safety margin over the minimum number of rounds with no known statistical flaws, but still has excellent performance. @class r123::Philox2x64_R @ingroup PhiloxNxW exports the member functions, typedefs and operator overloads required by a @ref CBRNG "CBRNG" class. The template argument, ROUNDS, is the number of times the Philox round function will be applied. As of September 2011, the authors know of no statistical flaws with ROUNDS=6 or more for Philox2x64. @typedef r123::Philox2x64 @ingroup PhiloxNxW Philox2x64 is equivalent to Philox2x64_R<10>. With 10 rounds, Philox2x64 has a considerable safety margin over the minimum number of rounds with no known statistical flaws, but still has excellent performance. @class r123::Philox4x32_R @ingroup PhiloxNxW exports the member functions, typedefs and operator overloads required by a @ref CBRNG "CBRNG" class. The template argument, ROUNDS, is the number of times the Philox round function will be applied. In November 2011, the authors recorded some suspicious p-values (approximately 1.e-7) from some very long (longer than the default BigCrush length) SimpPoker tests. Despite the fact that even longer tests reverted to "passing" p-values, a cloud remains over Philox4x32 with 7 rounds. The authors know of no statistical flaws with ROUNDS=8 or more for Philox4x32. @typedef r123::Philox4x32 @ingroup PhiloxNxW Philox4x32 is equivalent to Philox4x32_R<10>. With 10 rounds, Philox4x32 has a considerable safety margin over the minimum number of rounds with no known statistical flaws, but still has excellent performance. @class r123::Philox4x64_R @ingroup PhiloxNxW exports the member functions, typedefs and operator overloads required by a @ref CBRNG "CBRNG" class. The template argument, ROUNDS, is the number of times the Philox round function will be applied. As of September 2011, the authors know of no statistical flaws with ROUNDS=7 or more for Philox4x64. @typedef r123::Philox4x64 @ingroup PhiloxNxW Philox4x64 is equivalent to Philox4x64_R<10>. With 10 rounds, Philox4x64 has a considerable safety margin over the minimum number of rounds with no known statistical flaws, but still has excellent performance. */ #endif /* __cplusplus */ #endif /* _philox_dot_h_ */ PKXJBiG8#mot/data/opencl/random123/rand123.h#ifndef _RAND123_DOT_CL #define _RAND123_DOT_CL /** * This CL file is the MOT interface to the rand123 library. It contains various random number generating functions * for generating random numbers in uniform and gaussian distributions. * * The rand123 supports various modes and precisions, in this front-end we have chosen for a precision of 4 words * of 32 bits (In rand123 terms, we have the 4x32 bit generators). */ /** * The information needed by the random functions to generate unique random numbers. * The elements of this struct are unsigned integers with N words of W bits (specified by the * generator function in use). */ typedef struct{ %(GENERATOR_NAME)s4x32_ctr_t counter; %(GENERATOR_NAME)s4x32_key_t key; } rand123_data; /** * Generates the random bits used by the random functions */ uint4 rand123_generate_bits(rand123_data* rng_data){ %(GENERATOR_NAME)s4x32_ctr_t* ctr = &rng_data->counter; %(GENERATOR_NAME)s4x32_key_t* key = &rng_data->key; union { %(GENERATOR_NAME)s4x32_ctr_t ctr_el; uint4 vec_el; } u; u.ctr_el = %(GENERATOR_NAME)s4x32(*ctr, *key); return u.vec_el; } /** * Initializes the rand123_data structure. * * The state is implicitly extended with the global id of the kernel such that every work item generates * it's own unique random numbers. */ rand123_data rand123_initialize_data(uint state[6]){ %(GENERATOR_NAME)s4x32_ctr_t c = {{state[0], state[1], state[2], state[3]}}; %(GENERATOR_NAME)s4x32_key_t k = {{state[4], state[5], get_global_id(0), 0}}; rand123_data rng_data = {c, k}; return rng_data; } /** * Convert the rand123 state back into a state array. */ void rand123_data_to_array(rand123_data data, uint rng_state[6]){ rng_state[0] = data.counter.v[0]; rng_state[1] = data.counter.v[1]; rng_state[2] = data.counter.v[2]; rng_state[3] = data.counter.v[3]; rng_state[4] = data.key.v[0]; rng_state[5] = data.key.v[1]; } /** * Increments the rand123 state counters for the next iteration. * * One needs to call this function after every call to a random number generating function * to ensure the next number will be different. */ void rand123_increment_counters(rand123_data* rng_data){ if (++rng_data->counter.v[0] == 0){ if (++rng_data->counter.v[1] == 0){ ++rng_data->counter.v[2]; } } } /** * Applies the Box-Muller transformation on four uniformly distributed random numbers. * * This transforms uniform random numbers into Normal distributed random numbers. */ double4 rand123_box_muller_double4(double4 x){ double r0 = sqrt(-2 * log(x.x)); double c0; double s0 = sincos(((double) 2 * M_PI) * x.y, &c0); double r1 = sqrt(-2 * log(x.z)); double c1; double s1 = sincos(((double) 2 * M_PI) * x.w, &c1); return (double4) (r0*c0, r0*s0, r1*c1, r1*s1); } float4 rand123_box_muller_float4(float4 x){ float r0 = sqrt(-2 * log(x.x)); float c0; float s0 = sincos(((double) 2 * M_PI) * x.y, &c0); float r1 = sqrt(-2 * log(x.z)); float c1; float s1 = sincos(((double) 2 * M_PI) * x.w, &c1); return (float4) (r0*c0, r0*s0, r1*c1, r1*s1); } /** end of Box Muller transforms */ /** Random number generating functions in the Rand123 space */ double4 rand123_uniform_double4(rand123_data* rng_data){ uint4 generated_bits = rand123_generate_bits(rng_data); return ((double) (1/pown(2.0, 32))) * convert_double4(generated_bits) + ((double) (1/pown(2.0, 64))) * convert_double4(generated_bits); } double4 rand123_normal_double4(rand123_data* rng_data){ return rand123_box_muller_double4(rand123_uniform_double4(rng_data)); } float4 rand123_uniform_float4(rand123_data* rng_data){ uint4 generated_bits = rand123_generate_bits(rng_data); return (float)(1/pown(2.0, 32)) * convert_float4(generated_bits); } float4 rand123_normal_float4(rand123_data* rng_data){ return rand123_box_muller_float4(rand123_uniform_float4(rng_data)); } /** End of the random number generating functions */ double4 rand4(void* rng_data){ double4 val = rand123_uniform_double4((rand123_data*)rng_data); rand123_increment_counters((rand123_data*)rng_data); return val; } double4 randn4(void* rng_data){ double4 val = rand123_normal_double4((rand123_data*)rng_data); rand123_increment_counters((rand123_data*)rng_data); return val; } float4 frand4(void* rng_data){ float4 val = rand123_uniform_float4((rand123_data*)rng_data); rand123_increment_counters((rand123_data*)rng_data); return val; } float4 frandn4(void* rng_data){ float4 val = rand123_normal_float4((rand123_data*)rng_data); rand123_increment_counters((rand123_data*)rng_data); return val; } double rand(void* rng_data){ return rand4(rng_data).x; } double randn(void* rng_data){ return randn4(rng_data).x; } float frand(void* rng_data){ return frand4(rng_data).x; } float frandn(void* rng_data){ return frandn4(rng_data).x; } #endif // _RAND123_DOT_CL PK[Iqt&}gg$mot/data/opencl/random123/threefry.h/* Copyright 2010-2011, D. E. Shaw Research. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of D. E. Shaw Research nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _threefry_dot_h_ #define _threefry_dot_h_ /** \cond HIDDEN_FROM_DOXYGEN */ /* Significant parts of this file were copied from from: Skein_FinalRnd/ReferenceImplementation/skein.h Skein_FinalRnd/ReferenceImplementation/skein_block.c in http://csrc.nist.gov/groups/ST/hash/sha-3/Round3/documents/Skein_FinalRnd.zip This file has been modified so that it may no longer perform its originally intended function. If you're looking for a Skein or Threefish source code, please consult the original file. The original file had the following header: ************************************************************************** ** ** Interface declarations and internal definitions for Skein hashing. ** ** Source code author: Doug Whiting, 2008. ** ** This algorithm and source code is released to the public domain. ** *************************************************************************** */ /* See comment at the top of philox.h for the macro pre-process strategy. */ /* Rotation constants: */ enum r123_enum_threefry64x4 { /* These are the R_256 constants from the Threefish reference sources with names changed to R_64x4... */ R_64x4_0_0=14, R_64x4_0_1=16, R_64x4_1_0=52, R_64x4_1_1=57, R_64x4_2_0=23, R_64x4_2_1=40, R_64x4_3_0= 5, R_64x4_3_1=37, R_64x4_4_0=25, R_64x4_4_1=33, R_64x4_5_0=46, R_64x4_5_1=12, R_64x4_6_0=58, R_64x4_6_1=22, R_64x4_7_0=32, R_64x4_7_1=32 }; enum r123_enum_threefry64x2 { /* // Output from skein_rot_search: (srs64_B64-X1000) // Random seed = 1. BlockSize = 128 bits. sampleCnt = 1024. rounds = 8, minHW_or=57 // Start: Tue Mar 1 10:07:48 2011 // rMin = 0.136. #0325[*15] [CRC=455A682F. hw_OR=64. cnt=16384. blkSize= 128].format */ R_64x2_0_0=16, R_64x2_1_0=42, R_64x2_2_0=12, R_64x2_3_0=31, R_64x2_4_0=16, R_64x2_5_0=32, R_64x2_6_0=24, R_64x2_7_0=21 /* 4 rounds: minHW = 4 [ 4 4 4 4 ] // 5 rounds: minHW = 8 [ 8 8 8 8 ] // 6 rounds: minHW = 16 [ 16 16 16 16 ] // 7 rounds: minHW = 32 [ 32 32 32 32 ] // 8 rounds: minHW = 64 [ 64 64 64 64 ] // 9 rounds: minHW = 64 [ 64 64 64 64 ] //10 rounds: minHW = 64 [ 64 64 64 64 ] //11 rounds: minHW = 64 [ 64 64 64 64 ] */ }; enum r123_enum_threefry32x4 { /* Output from skein_rot_search: (srs-B128-X5000.out) // Random seed = 1. BlockSize = 64 bits. sampleCnt = 1024. rounds = 8, minHW_or=28 // Start: Mon Aug 24 22:41:36 2009 // ... // rMin = 0.472. #0A4B[*33] [CRC=DD1ECE0F. hw_OR=31. cnt=16384. blkSize= 128].format */ R_32x4_0_0=10, R_32x4_0_1=26, R_32x4_1_0=11, R_32x4_1_1=21, R_32x4_2_0=13, R_32x4_2_1=27, R_32x4_3_0=23, R_32x4_3_1= 5, R_32x4_4_0= 6, R_32x4_4_1=20, R_32x4_5_0=17, R_32x4_5_1=11, R_32x4_6_0=25, R_32x4_6_1=10, R_32x4_7_0=18, R_32x4_7_1=20 /* 4 rounds: minHW = 3 [ 3 3 3 3 ] // 5 rounds: minHW = 7 [ 7 7 7 7 ] // 6 rounds: minHW = 12 [ 13 12 13 12 ] // 7 rounds: minHW = 22 [ 22 23 22 23 ] // 8 rounds: minHW = 31 [ 31 31 31 31 ] // 9 rounds: minHW = 32 [ 32 32 32 32 ] //10 rounds: minHW = 32 [ 32 32 32 32 ] //11 rounds: minHW = 32 [ 32 32 32 32 ] */ }; enum r123_enum_threefry32x2 { /* Output from skein_rot_search (srs32x2-X5000.out) // Random seed = 1. BlockSize = 64 bits. sampleCnt = 1024. rounds = 8, minHW_or=28 // Start: Tue Jul 12 11:11:33 2011 // rMin = 0.334. #0206[*07] [CRC=1D9765C0. hw_OR=32. cnt=16384. blkSize= 64].format */ R_32x2_0_0=13, R_32x2_1_0=15, R_32x2_2_0=26, R_32x2_3_0= 6, R_32x2_4_0=17, R_32x2_5_0=29, R_32x2_6_0=16, R_32x2_7_0=24 /* 4 rounds: minHW = 4 [ 4 4 4 4 ] // 5 rounds: minHW = 6 [ 6 8 6 8 ] // 6 rounds: minHW = 9 [ 9 12 9 12 ] // 7 rounds: minHW = 16 [ 16 24 16 24 ] // 8 rounds: minHW = 32 [ 32 32 32 32 ] // 9 rounds: minHW = 32 [ 32 32 32 32 ] //10 rounds: minHW = 32 [ 32 32 32 32 ] //11 rounds: minHW = 32 [ 32 32 32 32 ] */ }; enum r123_enum_threefry_wcnt { WCNT2=2, WCNT4=4 }; R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(uint64_t RotL_64(uint64_t x, unsigned int N)); R123_CUDA_DEVICE R123_STATIC_INLINE uint64_t RotL_64(uint64_t x, unsigned int N) { return (x << (N & 63)) | (x >> ((64-N) & 63)); } R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(uint32_t RotL_32(uint32_t x, unsigned int N)); R123_CUDA_DEVICE R123_STATIC_INLINE uint32_t RotL_32(uint32_t x, unsigned int N) { return (x << (N & 31)) | (x >> ((32-N) & 31)); } #define SKEIN_MK_64(hi32,lo32) ((lo32) + (((uint64_t) (hi32)) << 32)) #define SKEIN_KS_PARITY64 SKEIN_MK_64(0x1BD11BDA,0xA9FC1A22) #define SKEIN_KS_PARITY32 0x1BD11BDA #ifndef THREEFRY2x32_DEFAULT_ROUNDS #define THREEFRY2x32_DEFAULT_ROUNDS 20 #endif #ifndef THREEFRY2x64_DEFAULT_ROUNDS #define THREEFRY2x64_DEFAULT_ROUNDS 20 #endif #ifndef THREEFRY4x32_DEFAULT_ROUNDS #define THREEFRY4x32_DEFAULT_ROUNDS 20 #endif #ifndef THREEFRY4x64_DEFAULT_ROUNDS #define THREEFRY4x64_DEFAULT_ROUNDS 20 #endif #define _threefry2x_tpl(W) \ typedef struct r123array2x##W threefry2x##W##_ctr_t; \ typedef struct r123array2x##W threefry2x##W##_key_t; \ typedef struct r123array2x##W threefry2x##W##_ukey_t; \ R123_CUDA_DEVICE R123_STATIC_INLINE threefry2x##W##_key_t threefry2x##W##keyinit(threefry2x##W##_ukey_t uk) { return uk; } \ R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(threefry2x##W##_ctr_t threefry2x##W##_R(unsigned int Nrounds, threefry2x##W##_ctr_t in, threefry2x##W##_key_t k)); \ R123_CUDA_DEVICE R123_STATIC_INLINE \ threefry2x##W##_ctr_t threefry2x##W##_R(unsigned int Nrounds, threefry2x##W##_ctr_t in, threefry2x##W##_key_t k){ \ threefry2x##W##_ctr_t X; \ uint##W##_t ks[2+1]; \ int i; /* avoid size_t to avoid need for stddef.h */ \ R123_ASSERT(Nrounds<=32); \ ks[2] = SKEIN_KS_PARITY##W; \ for (i=0;i < 2; i++) \ { \ ks[i] = k.v[i]; \ X.v[i] = in.v[i]; \ ks[2] ^= k.v[i]; \ } \ \ /* Insert initial key before round 0 */ \ X.v[0] += ks[0]; X.v[1] += ks[1]; \ \ if(Nrounds>0){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_0_0); X.v[1] ^= X.v[0]; } \ if(Nrounds>1){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_1_0); X.v[1] ^= X.v[0]; } \ if(Nrounds>2){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_2_0); X.v[1] ^= X.v[0]; } \ if(Nrounds>3){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_3_0); X.v[1] ^= X.v[0]; } \ if(Nrounds>3){ \ /* InjectKey(r=1) */ \ X.v[0] += ks[1]; X.v[1] += ks[2]; \ X.v[1] += 1; /* X.v[2-1] += r */ \ } \ if(Nrounds>4){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_4_0); X.v[1] ^= X.v[0]; } \ if(Nrounds>5){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_5_0); X.v[1] ^= X.v[0]; } \ if(Nrounds>6){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_6_0); X.v[1] ^= X.v[0]; } \ if(Nrounds>7){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_7_0); X.v[1] ^= X.v[0]; } \ if(Nrounds>7){ \ /* InjectKey(r=2) */ \ X.v[0] += ks[2]; X.v[1] += ks[0]; \ X.v[1] += 2; \ } \ if(Nrounds>8){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_0_0); X.v[1] ^= X.v[0]; } \ if(Nrounds>9){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_1_0); X.v[1] ^= X.v[0]; } \ if(Nrounds>10){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_2_0); X.v[1] ^= X.v[0]; } \ if(Nrounds>11){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_3_0); X.v[1] ^= X.v[0]; } \ if(Nrounds>11){ \ /* InjectKey(r=3) */ \ X.v[0] += ks[0]; X.v[1] += ks[1]; \ X.v[1] += 3; \ } \ if(Nrounds>12){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_4_0); X.v[1] ^= X.v[0]; } \ if(Nrounds>13){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_5_0); X.v[1] ^= X.v[0]; } \ if(Nrounds>14){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_6_0); X.v[1] ^= X.v[0]; } \ if(Nrounds>15){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_7_0); X.v[1] ^= X.v[0]; } \ if(Nrounds>15){ \ /* InjectKey(r=4) */ \ X.v[0] += ks[1]; X.v[1] += ks[2]; \ X.v[1] += 4; \ } \ if(Nrounds>16){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_0_0); X.v[1] ^= X.v[0]; } \ if(Nrounds>17){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_1_0); X.v[1] ^= X.v[0]; } \ if(Nrounds>18){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_2_0); X.v[1] ^= X.v[0]; } \ if(Nrounds>19){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_3_0); X.v[1] ^= X.v[0]; } \ if(Nrounds>19){ \ /* InjectKey(r=5) */ \ X.v[0] += ks[2]; X.v[1] += ks[0]; \ X.v[1] += 5; \ } \ if(Nrounds>20){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_4_0); X.v[1] ^= X.v[0]; } \ if(Nrounds>21){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_5_0); X.v[1] ^= X.v[0]; } \ if(Nrounds>22){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_6_0); X.v[1] ^= X.v[0]; } \ if(Nrounds>23){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_7_0); X.v[1] ^= X.v[0]; } \ if(Nrounds>23){ \ /* InjectKey(r=6) */ \ X.v[0] += ks[0]; X.v[1] += ks[1]; \ X.v[1] += 6; \ } \ if(Nrounds>24){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_0_0); X.v[1] ^= X.v[0]; } \ if(Nrounds>25){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_1_0); X.v[1] ^= X.v[0]; } \ if(Nrounds>26){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_2_0); X.v[1] ^= X.v[0]; } \ if(Nrounds>27){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_3_0); X.v[1] ^= X.v[0]; } \ if(Nrounds>27){ \ /* InjectKey(r=7) */ \ X.v[0] += ks[1]; X.v[1] += ks[2]; \ X.v[1] += 7; \ } \ if(Nrounds>28){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_4_0); X.v[1] ^= X.v[0]; } \ if(Nrounds>29){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_5_0); X.v[1] ^= X.v[0]; } \ if(Nrounds>30){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_6_0); X.v[1] ^= X.v[0]; } \ if(Nrounds>31){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_7_0); X.v[1] ^= X.v[0]; } \ if(Nrounds>31){ \ /* InjectKey(r=8) */ \ X.v[0] += ks[2]; X.v[1] += ks[0]; \ X.v[1] += 8; \ } \ return X; \ } \ /** @ingroup ThreefryNxW */ \ enum r123_enum_threefry2x##W { threefry2x##W##_rounds = THREEFRY2x##W##_DEFAULT_ROUNDS }; \ R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(threefry2x##W##_ctr_t threefry2x##W(threefry2x##W##_ctr_t in, threefry2x##W##_key_t k)); \ R123_CUDA_DEVICE R123_STATIC_INLINE \ threefry2x##W##_ctr_t threefry2x##W(threefry2x##W##_ctr_t in, threefry2x##W##_key_t k){ \ return threefry2x##W##_R(threefry2x##W##_rounds, in, k); \ } #define _threefry4x_tpl(W) \ typedef struct r123array4x##W threefry4x##W##_ctr_t; \ typedef struct r123array4x##W threefry4x##W##_key_t; \ typedef struct r123array4x##W threefry4x##W##_ukey_t; \ R123_CUDA_DEVICE R123_STATIC_INLINE threefry4x##W##_key_t threefry4x##W##keyinit(threefry4x##W##_ukey_t uk) { return uk; } \ R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(threefry4x##W##_ctr_t threefry4x##W##_R(unsigned int Nrounds, threefry4x##W##_ctr_t in, threefry4x##W##_key_t k)); \ R123_CUDA_DEVICE R123_STATIC_INLINE \ threefry4x##W##_ctr_t threefry4x##W##_R(unsigned int Nrounds, threefry4x##W##_ctr_t in, threefry4x##W##_key_t k){ \ threefry4x##W##_ctr_t X; \ uint##W##_t ks[4+1]; \ int i; /* avoid size_t to avoid need for stddef.h */ \ R123_ASSERT(Nrounds<=72); \ ks[4] = SKEIN_KS_PARITY##W; \ for (i=0;i < 4; i++) \ { \ ks[i] = k.v[i]; \ X.v[i] = in.v[i]; \ ks[4] ^= k.v[i]; \ } \ \ /* Insert initial key before round 0 */ \ X.v[0] += ks[0]; X.v[1] += ks[1]; X.v[2] += ks[2]; X.v[3] += ks[3]; \ \ if(Nrounds>0){ \ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \ X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \ } \ if(Nrounds>1){ \ X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \ X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \ } \ if(Nrounds>2){ \ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \ X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \ } \ if(Nrounds>3){ \ X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \ X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \ } \ if(Nrounds>3){ \ /* InjectKey(r=1) */ \ X.v[0] += ks[1]; X.v[1] += ks[2]; X.v[2] += ks[3]; X.v[3] += ks[4]; \ X.v[4-1] += 1; /* X.v[WCNT4-1] += r */ \ } \ \ if(Nrounds>4){ \ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \ X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \ } \ if(Nrounds>5){ \ X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \ X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \ } \ if(Nrounds>6){ \ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \ X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \ } \ if(Nrounds>7){ \ X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \ X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \ } \ if(Nrounds>7){ \ /* InjectKey(r=2) */ \ X.v[0] += ks[2]; X.v[1] += ks[3]; X.v[2] += ks[4]; X.v[3] += ks[0]; \ X.v[4-1] += 2; /* X.v[WCNT4-1] += r */ \ } \ \ if(Nrounds>8){ \ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \ X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \ } \ if(Nrounds>9){ \ X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \ X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \ } \ if(Nrounds>10){ \ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \ X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \ } \ if(Nrounds>11){ \ X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \ X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \ } \ if(Nrounds>11){ \ /* InjectKey(r=3) */ \ X.v[0] += ks[3]; X.v[1] += ks[4]; X.v[2] += ks[0]; X.v[3] += ks[1]; \ X.v[4-1] += 3; /* X.v[WCNT4-1] += r */ \ } \ \ if(Nrounds>12){ \ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \ X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \ } \ if(Nrounds>13){ \ X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \ X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \ } \ if(Nrounds>14){ \ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \ X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \ } \ if(Nrounds>15){ \ X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \ X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \ } \ if(Nrounds>15){ \ /* InjectKey(r=1) */ \ X.v[0] += ks[4]; X.v[1] += ks[0]; X.v[2] += ks[1]; X.v[3] += ks[2]; \ X.v[4-1] += 4; /* X.v[WCNT4-1] += r */ \ } \ \ if(Nrounds>16){ \ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \ X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \ } \ if(Nrounds>17){ \ X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \ X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \ } \ if(Nrounds>18){ \ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \ X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \ } \ if(Nrounds>19){ \ X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \ X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \ } \ if(Nrounds>19){ \ /* InjectKey(r=1) */ \ X.v[0] += ks[0]; X.v[1] += ks[1]; X.v[2] += ks[2]; X.v[3] += ks[3]; \ X.v[4-1] += 5; /* X.v[WCNT4-1] += r */ \ } \ \ if(Nrounds>20){ \ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \ X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \ } \ if(Nrounds>21){ \ X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \ X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \ } \ if(Nrounds>22){ \ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \ X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \ } \ if(Nrounds>23){ \ X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \ X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \ } \ if(Nrounds>23){ \ /* InjectKey(r=1) */ \ X.v[0] += ks[1]; X.v[1] += ks[2]; X.v[2] += ks[3]; X.v[3] += ks[4]; \ X.v[4-1] += 6; /* X.v[WCNT4-1] += r */ \ } \ \ if(Nrounds>24){ \ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \ X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \ } \ if(Nrounds>25){ \ X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \ X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \ } \ if(Nrounds>26){ \ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \ X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \ } \ if(Nrounds>27){ \ X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \ X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \ } \ if(Nrounds>27){ \ /* InjectKey(r=1) */ \ X.v[0] += ks[2]; X.v[1] += ks[3]; X.v[2] += ks[4]; X.v[3] += ks[0]; \ X.v[4-1] += 7; /* X.v[WCNT4-1] += r */ \ } \ \ if(Nrounds>28){ \ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \ X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \ } \ if(Nrounds>29){ \ X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \ X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \ } \ if(Nrounds>30){ \ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \ X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \ } \ if(Nrounds>31){ \ X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \ X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \ } \ if(Nrounds>31){ \ /* InjectKey(r=1) */ \ X.v[0] += ks[3]; X.v[1] += ks[4]; X.v[2] += ks[0]; X.v[3] += ks[1]; \ X.v[4-1] += 8; /* X.v[WCNT4-1] += r */ \ } \ \ if(Nrounds>32){ \ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \ X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \ } \ if(Nrounds>33){ \ X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \ X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \ } \ if(Nrounds>34){ \ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \ X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \ } \ if(Nrounds>35){ \ X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \ X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \ } \ if(Nrounds>35){ \ /* InjectKey(r=1) */ \ X.v[0] += ks[4]; X.v[1] += ks[0]; X.v[2] += ks[1]; X.v[3] += ks[2]; \ X.v[4-1] += 9; /* X.v[WCNT4-1] += r */ \ } \ \ if(Nrounds>36){ \ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \ X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \ } \ if(Nrounds>37){ \ X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \ X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \ } \ if(Nrounds>38){ \ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \ X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \ } \ if(Nrounds>39){ \ X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \ X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \ } \ if(Nrounds>39){ \ /* InjectKey(r=1) */ \ X.v[0] += ks[0]; X.v[1] += ks[1]; X.v[2] += ks[2]; X.v[3] += ks[3]; \ X.v[4-1] += 10; /* X.v[WCNT4-1] += r */ \ } \ \ if(Nrounds>40){ \ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \ X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \ } \ if(Nrounds>41){ \ X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \ X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \ } \ if(Nrounds>42){ \ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \ X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \ } \ if(Nrounds>43){ \ X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \ X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \ } \ if(Nrounds>43){ \ /* InjectKey(r=1) */ \ X.v[0] += ks[1]; X.v[1] += ks[2]; X.v[2] += ks[3]; X.v[3] += ks[4]; \ X.v[4-1] += 11; /* X.v[WCNT4-1] += r */ \ } \ \ if(Nrounds>44){ \ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \ X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \ } \ if(Nrounds>45){ \ X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \ X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \ } \ if(Nrounds>46){ \ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \ X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \ } \ if(Nrounds>47){ \ X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \ X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \ } \ if(Nrounds>47){ \ /* InjectKey(r=1) */ \ X.v[0] += ks[2]; X.v[1] += ks[3]; X.v[2] += ks[4]; X.v[3] += ks[0]; \ X.v[4-1] += 12; /* X.v[WCNT4-1] += r */ \ } \ \ if(Nrounds>48){ \ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \ X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \ } \ if(Nrounds>49){ \ X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \ X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \ } \ if(Nrounds>50){ \ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \ X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \ } \ if(Nrounds>51){ \ X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \ X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \ } \ if(Nrounds>51){ \ /* InjectKey(r=1) */ \ X.v[0] += ks[3]; X.v[1] += ks[4]; X.v[2] += ks[0]; X.v[3] += ks[1]; \ X.v[4-1] += 13; /* X.v[WCNT4-1] += r */ \ } \ \ if(Nrounds>52){ \ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \ X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \ } \ if(Nrounds>53){ \ X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \ X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \ } \ if(Nrounds>54){ \ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \ X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \ } \ if(Nrounds>55){ \ X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \ X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \ } \ if(Nrounds>55){ \ /* InjectKey(r=1) */ \ X.v[0] += ks[4]; X.v[1] += ks[0]; X.v[2] += ks[1]; X.v[3] += ks[2]; \ X.v[4-1] += 14; /* X.v[WCNT4-1] += r */ \ } \ \ if(Nrounds>56){ \ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \ X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \ } \ if(Nrounds>57){ \ X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \ X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \ } \ if(Nrounds>58){ \ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \ X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \ } \ if(Nrounds>59){ \ X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \ X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \ } \ if(Nrounds>59){ \ /* InjectKey(r=1) */ \ X.v[0] += ks[0]; X.v[1] += ks[1]; X.v[2] += ks[2]; X.v[3] += ks[3]; \ X.v[4-1] += 15; /* X.v[WCNT4-1] += r */ \ } \ \ if(Nrounds>60){ \ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \ X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \ } \ if(Nrounds>61){ \ X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \ X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \ } \ if(Nrounds>62){ \ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \ X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \ } \ if(Nrounds>63){ \ X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \ X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \ } \ if(Nrounds>63){ \ /* InjectKey(r=1) */ \ X.v[0] += ks[1]; X.v[1] += ks[2]; X.v[2] += ks[3]; X.v[3] += ks[4]; \ X.v[4-1] += 16; /* X.v[WCNT4-1] += r */ \ } \ \ if(Nrounds>64){ \ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \ X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \ } \ if(Nrounds>65){ \ X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \ X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \ } \ if(Nrounds>66){ \ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \ X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \ } \ if(Nrounds>67){ \ X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \ X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \ } \ if(Nrounds>67){ \ /* InjectKey(r=1) */ \ X.v[0] += ks[2]; X.v[1] += ks[3]; X.v[2] += ks[4]; X.v[3] += ks[0]; \ X.v[4-1] += 17; /* X.v[WCNT4-1] += r */ \ } \ \ if(Nrounds>68){ \ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \ X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \ } \ if(Nrounds>69){ \ X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \ X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \ } \ if(Nrounds>70){ \ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \ X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \ } \ if(Nrounds>71){ \ X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \ X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \ } \ if(Nrounds>71){ \ /* InjectKey(r=1) */ \ X.v[0] += ks[3]; X.v[1] += ks[4]; X.v[2] += ks[0]; X.v[3] += ks[1]; \ X.v[4-1] += 18; /* X.v[WCNT4-1] += r */ \ } \ \ return X; \ } \ /** @ingroup ThreefryNxW */ \ enum r123_enum_threefry4x##W { threefry4x##W##_rounds = THREEFRY4x##W##_DEFAULT_ROUNDS }; \ R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(threefry4x##W##_ctr_t threefry4x##W(threefry4x##W##_ctr_t in, threefry4x##W##_key_t k)); \ R123_CUDA_DEVICE R123_STATIC_INLINE \ threefry4x##W##_ctr_t threefry4x##W(threefry4x##W##_ctr_t in, threefry4x##W##_key_t k){ \ return threefry4x##W##_R(threefry4x##W##_rounds, in, k); \ } /** \endcond */ _threefry2x_tpl(64) _threefry2x_tpl(32) _threefry4x_tpl(64) _threefry4x_tpl(32) /* gcc4.5 and 4.6 seem to optimize a macro-ized threefryNxW better than a static inline function. Why? */ #define threefry2x32(c,k) threefry2x32_R(threefry2x32_rounds, c, k) #define threefry4x32(c,k) threefry4x32_R(threefry4x32_rounds, c, k) #define threefry2x64(c,k) threefry2x64_R(threefry2x64_rounds, c, k) #define threefry4x64(c,k) threefry4x64_R(threefry4x64_rounds, c, k) #ifdef __cplusplus /** \cond HIDDEN_FROM_DOXYGEN */ #define _threefryNxWclass_tpl(NxW) \ namespace r123{ \ template \ struct Threefry##NxW##_R{ \ typedef threefry##NxW##_ctr_t ctr_type; \ typedef threefry##NxW##_key_t key_type; \ typedef threefry##NxW##_key_t ukey_type; \ static const unsigned int rounds=R; \ inline R123_CUDA_DEVICE R123_FORCE_INLINE(ctr_type operator()(ctr_type ctr, key_type key)){ \ R123_STATIC_ASSERT(R<=72, "threefry is only unrolled up to 72 rounds\n"); \ return threefry##NxW##_R(R, ctr, key); \ } \ }; \ typedef Threefry##NxW##_R Threefry##NxW; \ } // namespace r123 /** \endcond */ _threefryNxWclass_tpl(2x32) _threefryNxWclass_tpl(4x32) _threefryNxWclass_tpl(2x64) _threefryNxWclass_tpl(4x64) /* The _tpl macros don't quite work to do string-pasting inside comments. so we just write out the boilerplate documentation four times... */ /** @defgroup ThreefryNxW Threefry Classes and Typedefs The ThreefryNxW classes export the member functions, typedefs and operator overloads required by a @ref CBRNG "CBRNG" class. As described in Parallel Random Numbers: As Easy as 1, 2, 3 , the Threefry family is closely related to the Threefish block cipher from Skein Hash Function. Threefry is \b not suitable for cryptographic use. Threefry uses integer addition, bitwise rotation, xor and permutation of words to randomize its output. @class r123::Threefry2x32_R @ingroup ThreefryNxW exports the member functions, typedefs and operator overloads required by a @ref CBRNG "CBRNG" class. The template argument, ROUNDS, is the number of times the Threefry round function will be applied. As of September 2011, the authors know of no statistical flaws with ROUNDS=13 or more for Threefry2x32. @typedef r123::Threefry2x32 @ingroup ThreefryNxW Threefry2x32 is equivalent to Threefry2x32_R<20>. With 20 rounds, Threefry2x32 has a considerable safety margin over the minimum number of rounds with no known statistical flaws, but still has excellent performance. @class r123::Threefry2x64_R @ingroup ThreefryNxW exports the member functions, typedefs and operator overloads required by a @ref CBRNG "CBRNG" class. The template argument, ROUNDS, is the number of times the Threefry round function will be applied. In November 2011, the authors discovered that 13 rounds of Threefry2x64 sequenced by strided, interleaved key and counter increments failed a very long (longer than the default BigCrush length) WeightDistrub test. At the same time, it was confirmed that 14 rounds passes much longer tests (up to 5x10^12 samples) of a similar nature. The authors know of no statistical flaws with ROUNDS=14 or more for Threefry2x64. @typedef r123::Threefry2x64 @ingroup ThreefryNxW Threefry2x64 is equivalent to Threefry2x64_R<20>. With 20 rounds, Threefry2x64 has a considerable safety margin over the minimum number of rounds with no known statistical flaws, but still has excellent performance. @class r123::Threefry4x32_R @ingroup ThreefryNxW exports the member functions, typedefs and operator overloads required by a @ref CBRNG "CBRNG" class. The template argument, ROUNDS, is the number of times the Threefry round function will be applied. As of September 2011, the authors know of no statistical flaws with ROUNDS=12 or more for Threefry4x32. @typedef r123::Threefry4x32 @ingroup ThreefryNxW Threefry4x32 is equivalent to Threefry4x32_R<20>. With 20 rounds, Threefry4x32 has a considerable safety margin over the minimum number of rounds with no known statistical flaws, but still has excellent performance. @class r123::Threefry4x64_R @ingroup ThreefryNxW exports the member functions, typedefs and operator overloads required by a @ref CBRNG "CBRNG" class. The template argument, ROUNDS, is the number of times the Threefry round function will be applied. As of September 2011, the authors know of no statistical flaws with ROUNDS=12 or more for Threefry4x64. @typedef r123::Threefry4x64 @ingroup ThreefryNxW Threefry4x64 is equivalent to Threefry4x64_R<20>. With 20 rounds, Threefry4x64 has a considerable safety margin over the minimum number of rounds with no known statistical flaws, but still has excellent performance. */ #endif #endif PK[Iee)mot/data/opencl/model_functions/Scalar.cl#ifndef CM_SCALAR_CL #define CM_SCALAR_CL /** * Author = Robbert Harms * Date = 2014-02-01 * License = LGPL v3 * Maintainer = Robbert Harms * Email = robbert.harms@maastrichtuniversity.nl */ /** * The Scalar compartment model, this just returns the input. */ mot_float_type cmScalar(const mot_float_type c){ return c; } #endif // CM_SCALAR_CLPKHfڣmot/data/opencl/cerf/dawson.cl#ifndef CERF_DAWSON_CL #define CERF_DAWSON_CL /** * Author = robbert * Date = 2014-05-17 * License = LGPL v3 * Maintainer = Robbert Harms * Email = robbert.harms@maastrichtuniversity.nl */ // sqrt(pi)/2 #define M_SQRTPI_2 0.8862269254527580 /** * Calculate the Dawson's integral for a real argument. */ double dawson(double x){ return M_SQRTPI_2 * im_w_of_x(x); } #undef M_SQRTPI_2 #endif // CERF_DAWSON_CL PKH^Amʆʆ!mot/data/opencl/cerf/im_w_of_x.cl#ifndef CERF_IM_W_OF_X_CL #define CERF_IM_W_OF_X_CL /** * Author = robbert * Date = 2014-05-17 * License = LGPL v3 * Maintainer = Robbert Harms * Email = robbert.harms@maastrichtuniversity.nl */ // sqrt(pi)/2 #define M_SQRTPI_2 0.8862269254527580 // 1 / sqrt(pi) #define M_1_SQRTPI 0.5641895835477562 // sqrt(pi) #define M_SQRTPI 1.7724538509055160 double w_im_y100(double y100, double x); /** * Main function from libcerf */ double im_w_of_x(double x){ // Steven G. Johnson, October 2012. // Uses methods similar to the erfcx calculation: // continued fractions for large |x|, // a lookup table of Chebyshev polynomials for smaller |x|, // and finally a Taylor expansion for |x|<0.01. if (x > 5e7 || x < -5e7){ // 1-term expansion, important to avoid overflow return M_1_SQRTPI / x; } if (x > 45 || x < -45) { // continued-fraction expansion is faster /* 5-term expansion (rely on compiler for CSE), simplified from: ispi / (x-0.5/(x-1/(x-1.5/(x-2/x)))) */ return M_1_SQRTPI * ((x*x) * (x*x-4.5) + 2) / (x * ((x*x) * (x*x-5) + 3.75)); } if (x >= 0) { return w_im_y100(100.0/(1+x), x); } // = -im_w_of_x(-x) return -w_im_y100(100.0/(1-x), -x); } /******************************************************************************/ /* Lookup-table for Chebyshev polynomials for smaller |x| */ /******************************************************************************/ double w_im_y100(double y100, double x) { // Steven G. Johnson, October 2012. // Given y100=100*y, where y = 1/(1+x) for x >= 0, compute w_im(x). // Uses a look-up table of 100 different Chebyshev polynomials // for y intervals [0,0.01], [0.01,0.02], ...., [0.99,1], generated // with the help of Maple and a little shell script. // This allows the Chebyshev polynomials to be of significantly lower // degree (about 1/30) compared to fitting the whole [0,1] interval // with a single polynomial. double t; switch ((int) y100) { case 0: { t = 2*y100 - 1; return 0.28351593328822191546e-2 + (0.28494783221378400759e-2 + (0.14427470563276734183e-4 + (0.10939723080231588129e-6 + (0.92474307943275042045e-9 + (0.89128907666450075245e-11 + 0.92974121935111111110e-13 * t) * t) * t) * t) * t) * t; } case 1: { t = 2*y100 - 3; return 0.85927161243940350562e-2 + (0.29085312941641339862e-2 + (0.15106783707725582090e-4 + (0.11716709978531327367e-6 + (0.10197387816021040024e-8 + (0.10122678863073360769e-10 + 0.10917479678400000000e-12 * t) * t) * t) * t) * t) * t; } case 2: { t = 2*y100 - 5; return 0.14471159831187703054e-1 + (0.29703978970263836210e-2 + (0.15835096760173030976e-4 + (0.12574803383199211596e-6 + (0.11278672159518415848e-8 + (0.11547462300333495797e-10 + 0.12894535335111111111e-12 * t) * t) * t) * t) * t) * t; } case 3: { t = 2*y100 - 7; return 0.20476320420324610618e-1 + (0.30352843012898665856e-2 + (0.16617609387003727409e-4 + (0.13525429711163116103e-6 + (0.12515095552507169013e-8 + (0.13235687543603382345e-10 + 0.15326595042666666667e-12 * t) * t) * t) * t) * t) * t; } case 4: { t = 2*y100 - 9; return 0.26614461952489004566e-1 + (0.31034189276234947088e-2 + (0.17460268109986214274e-4 + (0.14582130824485709573e-6 + (0.13935959083809746345e-8 + (0.15249438072998932900e-10 + 0.18344741882133333333e-12 * t) * t) * t) * t) * t) * t; } case 5: { t = 2*y100 - 11; return 0.32892330248093586215e-1 + (0.31750557067975068584e-2 + (0.18369907582308672632e-4 + (0.15761063702089457882e-6 + (0.15577638230480894382e-8 + (0.17663868462699097951e-10 + (0.22126732680711111111e-12 + 0.30273474177737853668e-14 * t) * t) * t) * t) * t) * t) * t; } case 6: { t = 2*y100 - 13; return 0.39317207681134336024e-1 + (0.32504779701937539333e-2 + (0.19354426046513400534e-4 + (0.17081646971321290539e-6 + (0.17485733959327106250e-8 + (0.20593687304921961410e-10 + (0.26917401949155555556e-12 + 0.38562123837725712270e-14 * t) * t) * t) * t) * t) * t) * t; } case 7: { t = 2*y100 - 15; return 0.45896976511367738235e-1 + (0.33300031273110976165e-2 + (0.20423005398039037313e-4 + (0.18567412470376467303e-6 + (0.19718038363586588213e-8 + (0.24175006536781219807e-10 + (0.33059982791466666666e-12 + 0.49756574284439426165e-14 * t) * t) * t) * t) * t) * t) * t; } case 8: { t = 2*y100 - 17; return 0.52640192524848962855e-1 + (0.34139883358846720806e-2 + (0.21586390240603337337e-4 + (0.20247136501568904646e-6 + (0.22348696948197102935e-8 + (0.28597516301950162548e-10 + (0.41045502119111111110e-12 + 0.65151614515238361946e-14 * t) * t) * t) * t) * t) * t) * t; } case 9: { t = 2*y100 - 19; return 0.59556171228656770456e-1 + (0.35028374386648914444e-2 + (0.22857246150998562824e-4 + (0.22156372146525190679e-6 + (0.25474171590893813583e-8 + (0.34122390890697400584e-10 + (0.51593189879111111110e-12 + 0.86775076853908006938e-14 * t) * t) * t) * t) * t) * t) * t; } case 10: { t = 2*y100 - 21; return 0.66655089485108212551e-1 + (0.35970095381271285568e-2 + (0.24250626164318672928e-4 + (0.24339561521785040536e-6 + (0.29221990406518411415e-8 + (0.41117013527967776467e-10 + (0.65786450716444444445e-12 + 0.11791885745450623331e-13 * t) * t) * t) * t) * t) * t) * t; } case 11: { t = 2*y100 - 23; return 0.73948106345519174661e-1 + (0.36970297216569341748e-2 + (0.25784588137312868792e-4 + (0.26853012002366752770e-6 + (0.33763958861206729592e-8 + (0.50111549981376976397e-10 + (0.85313857496888888890e-12 + 0.16417079927706899860e-13 * t) * t) * t) * t) * t) * t) * t; } case 12: { t = 2*y100 - 25; return 0.81447508065002963203e-1 + (0.38035026606492705117e-2 + (0.27481027572231851896e-4 + (0.29769200731832331364e-6 + (0.39336816287457655076e-8 + (0.61895471132038157624e-10 + (0.11292303213511111111e-11 + 0.23558532213703884304e-13 * t) * t) * t) * t) * t) * t) * t; } case 13: { t = 2*y100 - 27; return 0.89166884027582716628e-1 + (0.39171301322438946014e-2 + (0.29366827260422311668e-4 + (0.33183204390350724895e-6 + (0.46276006281647330524e-8 + (0.77692631378169813324e-10 + (0.15335153258844444444e-11 + 0.35183103415916026911e-13 * t) * t) * t) * t) * t) * t) * t; } case 14: { t = 2*y100 - 29; return 0.97121342888032322019e-1 + (0.40387340353207909514e-2 + (0.31475490395950776930e-4 + (0.37222714227125135042e-6 + (0.55074373178613809996e-8 + (0.99509175283990337944e-10 + (0.21552645758222222222e-11 + 0.55728651431872687605e-13 * t) * t) * t) * t) * t) * t) * t; } case 15: { t = 2*y100 - 31; return 0.10532778218603311137e0 + (0.41692873614065380607e-2 + (0.33849549774889456984e-4 + (0.42064596193692630143e-6 + (0.66494579697622432987e-8 + (0.13094103581931802337e-9 + (0.31896187409777777778e-11 + 0.97271974184476560742e-13 * t) * t) * t) * t) * t) * t) * t; } case 16: { t = 2*y100 - 33; return 0.11380523107427108222e0 + (0.43099572287871821013e-2 + (0.36544324341565929930e-4 + (0.47965044028581857764e-6 + (0.81819034238463698796e-8 + (0.17934133239549647357e-9 + (0.50956666166186293627e-11 + (0.18850487318190638010e-12 + 0.79697813173519853340e-14 * t) * t) * t) * t) * t) * t) * t) * t; } case 17: { t = 2*y100 - 35; return 0.12257529703447467345e0 + (0.44621675710026986366e-2 + (0.39634304721292440285e-4 + (0.55321553769873381819e-6 + (0.10343619428848520870e-7 + (0.26033830170470368088e-9 + (0.87743837749108025357e-11 + (0.34427092430230063401e-12 + 0.10205506615709843189e-13 * t) * t) * t) * t) * t) * t) * t) * t; } case 18: { t = 2*y100 - 37; return 0.13166276955656699478e0 + (0.46276970481783001803e-2 + (0.43225026380496399310e-4 + (0.64799164020016902656e-6 + (0.13580082794704641782e-7 + (0.39839800853954313927e-9 + (0.14431142411840000000e-10 + 0.42193457308830027541e-12 * t) * t) * t) * t) * t) * t) * t; } case 19: { t = 2*y100 - 39; return 0.14109647869803356475e0 + (0.48088424418545347758e-2 + (0.47474504753352150205e-4 + (0.77509866468724360352e-6 + (0.18536851570794291724e-7 + (0.60146623257887570439e-9 + (0.18533978397305276318e-10 + (0.41033845938901048380e-13 - 0.46160680279304825485e-13 * t) * t) * t) * t) * t) * t) * t) * t; } case 20: { t = 2*y100 - 41; return 0.15091057940548936603e0 + (0.50086864672004685703e-2 + (0.52622482832192230762e-4 + (0.95034664722040355212e-6 + (0.25614261331144718769e-7 + (0.80183196716888606252e-9 + (0.12282524750534352272e-10 + (-0.10531774117332273617e-11 - 0.86157181395039646412e-13 * t) * t) * t) * t) * t) * t) * t) * t; } case 21: { t = 2*y100 - 43; return 0.16114648116017010770e0 + (0.52314661581655369795e-2 + (0.59005534545908331315e-4 + (0.11885518333915387760e-5 + (0.33975801443239949256e-7 + (0.82111547144080388610e-9 + (-0.12357674017312854138e-10 + (-0.24355112256914479176e-11 - 0.75155506863572930844e-13 * t) * t) * t) * t) * t) * t) * t) * t; } case 22: { t = 2*y100 - 45; return 0.17185551279680451144e0 + (0.54829002967599420860e-2 + (0.67013226658738082118e-4 + (0.14897400671425088807e-5 + (0.40690283917126153701e-7 + (0.44060872913473778318e-9 + (-0.52641873433280000000e-10 - 0.30940587864543343124e-11 * t) * t) * t) * t) * t) * t) * t; } case 23: { t = 2*y100 - 47; return 0.18310194559815257381e0 + (0.57701559375966953174e-2 + (0.76948789401735193483e-4 + (0.18227569842290822512e-5 + (0.41092208344387212276e-7 + (-0.44009499965694442143e-9 + (-0.92195414685628803451e-10 + (-0.22657389705721753299e-11 + 0.10004784908106839254e-12 * t) * t) * t) * t) * t) * t) * t) * t; } case 24: { t = 2*y100 - 49; return 0.19496527191546630345e0 + (0.61010853144364724856e-2 + (0.88812881056342004864e-4 + (0.21180686746360261031e-5 + (0.30652145555130049203e-7 + (-0.16841328574105890409e-8 + (-0.11008129460612823934e-9 + (-0.12180794204544515779e-12 + 0.15703325634590334097e-12 * t) * t) * t) * t) * t) * t) * t) * t; } case 25: { t = 2*y100 - 51; return 0.20754006813966575720e0 + (0.64825787724922073908e-2 + (0.10209599627522311893e-3 + (0.22785233392557600468e-5 + (0.73495224449907568402e-8 + (-0.29442705974150112783e-8 + (-0.94082603434315016546e-10 + (0.23609990400179321267e-11 + 0.14141908654269023788e-12 * t) * t) * t) * t) * t) * t) * t) * t; } case 26: { t = 2*y100 - 53; return 0.22093185554845172146e0 + (0.69182878150187964499e-2 + (0.11568723331156335712e-3 + (0.22060577946323627739e-5 + (-0.26929730679360840096e-7 + (-0.38176506152362058013e-8 + (-0.47399503861054459243e-10 + (0.40953700187172127264e-11 + 0.69157730376118511127e-13 * t) * t) * t) * t) * t) * t) * t) * t; } case 27: { t = 2*y100 - 55; return 0.23524827304057813918e0 + (0.74063350762008734520e-2 + (0.12796333874615790348e-3 + (0.18327267316171054273e-5 + (-0.66742910737957100098e-7 + (-0.40204740975496797870e-8 + (0.14515984139495745330e-10 + (0.44921608954536047975e-11 - 0.18583341338983776219e-13 * t) * t) * t) * t) * t) * t) * t) * t; } case 28: { t = 2*y100 - 57; return 0.25058626331812744775e0 + (0.79377285151602061328e-2 + (0.13704268650417478346e-3 + (0.11427511739544695861e-5 + (-0.10485442447768377485e-6 + (-0.34850364756499369763e-8 + (0.72656453829502179208e-10 + (0.36195460197779299406e-11 - 0.84882136022200714710e-13 * t) * t) * t) * t) * t) * t) * t) * t; } case 29: { t = 2*y100 - 59; return 0.26701724900280689785e0 + (0.84959936119625864274e-2 + (0.14112359443938883232e-3 + (0.17800427288596909634e-6 + (-0.13443492107643109071e-6 + (-0.23512456315677680293e-8 + (0.11245846264695936769e-9 + (0.19850501334649565404e-11 - 0.11284666134635050832e-12 * t) * t) * t) * t) * t) * t) * t) * t; } case 30: { t = 2*y100 - 61; return 0.28457293586253654144e0 + (0.90581563892650431899e-2 + (0.13880520331140646738e-3 + (-0.97262302362522896157e-6 + (-0.15077100040254187366e-6 + (-0.88574317464577116689e-9 + (0.12760311125637474581e-9 + (0.20155151018282695055e-12 - 0.10514169375181734921e-12 * t) * t) * t) * t) * t) * t) * t) * t; } case 31: { t = 2*y100 - 63; return 0.30323425595617385705e0 + (0.95968346790597422934e-2 + (0.12931067776725883939e-3 + (-0.21938741702795543986e-5 + (-0.15202888584907373963e-6 + (0.61788350541116331411e-9 + (0.11957835742791248256e-9 + (-0.12598179834007710908e-11 - 0.75151817129574614194e-13 * t) * t) * t) * t) * t) * t) * t) * t; } case 32: { t = 2*y100 - 65; return 0.32292521181517384379e0 + (0.10082957727001199408e-1 + (0.11257589426154962226e-3 + (-0.33670890319327881129e-5 + (-0.13910529040004008158e-6 + (0.19170714373047512945e-8 + (0.94840222377720494290e-10 + (-0.21650018351795353201e-11 - 0.37875211678024922689e-13 * t) * t) * t) * t) * t) * t) * t) * t; } case 33: { t = 2*y100 - 67; return 0.34351233557911753862e0 + (0.10488575435572745309e-1 + (0.89209444197248726614e-4 + (-0.43893459576483345364e-5 + (-0.11488595830450424419e-6 + (0.28599494117122464806e-8 + (0.61537542799857777779e-10 - 0.24935749227658002212e-11 * t) * t) * t) * t) * t) * t) * t; } case 34: { t = 2*y100 - 69; return 0.36480946642143669093e0 + (0.10789304203431861366e-1 + (0.60357993745283076834e-4 + (-0.51855862174130669389e-5 + (-0.83291664087289801313e-7 + (0.33898011178582671546e-8 + (0.27082948188277716482e-10 + (-0.23603379397408694974e-11 + 0.19328087692252869842e-13 * t) * t) * t) * t) * t) * t) * t) * t; } case 35: { t = 2*y100 - 71; return 0.38658679935694939199e0 + (0.10966119158288804999e-1 + (0.27521612041849561426e-4 + (-0.57132774537670953638e-5 + (-0.48404772799207914899e-7 + (0.35268354132474570493e-8 + (-0.32383477652514618094e-11 + (-0.19334202915190442501e-11 + 0.32333189861286460270e-13 * t) * t) * t) * t) * t) * t) * t) * t; } case 36: { t = 2*y100 - 73; return 0.40858275583808707870e0 + (0.11006378016848466550e-1 + (-0.76396376685213286033e-5 + (-0.59609835484245791439e-5 + (-0.13834610033859313213e-7 + (0.33406952974861448790e-8 + (-0.26474915974296612559e-10 + (-0.13750229270354351983e-11 + 0.36169366979417390637e-13 * t) * t) * t) * t) * t) * t) * t) * t; } case 37: { t = 2*y100 - 75; return 0.43051714914006682977e0 + (0.10904106549500816155e-1 + (-0.43477527256787216909e-4 + (-0.59429739547798343948e-5 + (0.17639200194091885949e-7 + (0.29235991689639918688e-8 + (-0.41718791216277812879e-10 + (-0.81023337739508049606e-12 + 0.33618915934461994428e-13 * t) * t) * t) * t) * t) * t) * t) * t; } case 38: { t = 2*y100 - 77; return 0.45210428135559607406e0 + (0.10659670756384400554e-1 + (-0.78488639913256978087e-4 + (-0.56919860886214735936e-5 + (0.44181850467477733407e-7 + (0.23694306174312688151e-8 + (-0.49492621596685443247e-10 + (-0.31827275712126287222e-12 + 0.27494438742721623654e-13 * t) * t) * t) * t) * t) * t) * t) * t; } case 39: { t = 2*y100 - 79; return 0.47306491195005224077e0 + (0.10279006119745977570e-1 + (-0.11140268171830478306e-3 + (-0.52518035247451432069e-5 + (0.64846898158889479518e-7 + (0.17603624837787337662e-8 + (-0.51129481592926104316e-10 + (0.62674584974141049511e-13 + 0.20055478560829935356e-13 * t) * t) * t) * t) * t) * t) * t) * t; } case 40: { t = 2*y100 - 81; return 0.49313638965719857647e0 + (0.97725799114772017662e-2 + (-0.14122854267291533334e-3 + (-0.46707252568834951907e-5 + (0.79421347979319449524e-7 + (0.11603027184324708643e-8 + (-0.48269605844397175946e-10 + (0.32477251431748571219e-12 + 0.12831052634143527985e-13 * t) * t) * t) * t) * t) * t) * t) * t; } case 41: { t = 2*y100 - 83; return 0.51208057433416004042e0 + (0.91542422354009224951e-2 + (-0.16726530230228647275e-3 + (-0.39964621752527649409e-5 + (0.88232252903213171454e-7 + (0.61343113364949928501e-9 + (-0.42516755603130443051e-10 + (0.47910437172240209262e-12 + 0.66784341874437478953e-14 * t) * t) * t) * t) * t) * t) * t) * t; } case 42: { t = 2*y100 - 85; return 0.52968945458607484524e0 + (0.84400880445116786088e-2 + (-0.18908729783854258774e-3 + (-0.32725905467782951931e-5 + (0.91956190588652090659e-7 + (0.14593989152420122909e-9 + (-0.35239490687644444445e-10 + 0.54613829888448694898e-12 * t) * t) * t) * t) * t) * t) * t; } case 43: { t = 2*y100 - 87; return 0.54578857454330070965e0 + (0.76474155195880295311e-2 + (-0.20651230590808213884e-3 + (-0.25364339140543131706e-5 + (0.91455367999510681979e-7 + (-0.23061359005297528898e-9 + (-0.27512928625244444444e-10 + 0.54895806008493285579e-12 * t) * t) * t) * t) * t) * t) * t; } case 44: { t = 2*y100 - 89; return 0.56023851910298493910e0 + (0.67938321739997196804e-2 + (-0.21956066613331411760e-3 + (-0.18181127670443266395e-5 + (0.87650335075416845987e-7 + (-0.51548062050366615977e-9 + (-0.20068462174044444444e-10 + 0.50912654909758187264e-12 * t) * t) * t) * t) * t) * t) * t; } case 45: { t = 2*y100 - 91; return 0.57293478057455721150e0 + (0.58965321010394044087e-2 + (-0.22841145229276575597e-3 + (-0.11404605562013443659e-5 + (0.81430290992322326296e-7 + (-0.71512447242755357629e-9 + (-0.13372664928000000000e-10 + 0.44461498336689298148e-12 * t) * t) * t) * t) * t) * t) * t; } case 46: { t = 2*y100 - 93; return 0.58380635448407827360e0 + (0.49717469530842831182e-2 + (-0.23336001540009645365e-3 + (-0.51952064448608850822e-6 + (0.73596577815411080511e-7 + (-0.84020916763091566035e-9 + (-0.76700972702222222221e-11 + 0.36914462807972467044e-12 * t) * t) * t) * t) * t) * t) * t; } case 47: { t = 2*y100 - 95; return 0.59281340237769489597e0 + (0.40343592069379730568e-2 + (-0.23477963738658326185e-3 + (0.34615944987790224234e-7 + (0.64832803248395814574e-7 + (-0.90329163587627007971e-9 + (-0.30421940400000000000e-11 + 0.29237386653743536669e-12 * t) * t) * t) * t) * t) * t) * t; } case 48: { t = 2*y100 - 97; return 0.59994428743114271918e0 + (0.30976579788271744329e-2 + (-0.23308875765700082835e-3 + (0.51681681023846925160e-6 + (0.55694594264948268169e-7 + (-0.91719117313243464652e-9 + (0.53982743680000000000e-12 + 0.22050829296187771142e-12 * t) * t) * t) * t) * t) * t) * t; } case 49: { t = 2*y100 - 99; return 0.60521224471819875444e0 + (0.21732138012345456060e-2 + (-0.22872428969625997456e-3 + (0.92588959922653404233e-6 + (0.46612665806531930684e-7 + (-0.89393722514414153351e-9 + (0.31718550353777777778e-11 + 0.15705458816080549117e-12 * t) * t) * t) * t) * t) * t) * t; } case 50: { t = 2*y100 - 101; return 0.60865189969791123620e0 + (0.12708480848877451719e-2 + (-0.22212090111534847166e-3 + (0.12636236031532793467e-5 + (0.37904037100232937574e-7 + (-0.84417089968101223519e-9 + (0.49843180828444444445e-11 + 0.10355439441049048273e-12 * t) * t) * t) * t) * t) * t) * t; } case 51: { t = 2*y100 - 103; return 0.61031580103499200191e0 + (0.39867436055861038223e-3 + (-0.21369573439579869291e-3 + (0.15339402129026183670e-5 + (0.29787479206646594442e-7 + (-0.77687792914228632974e-9 + (0.61192452741333333334e-11 + 0.60216691829459295780e-13 * t) * t) * t) * t) * t) * t) * t; } case 52: { t = 2*y100 - 105; return 0.61027109047879835868e0 + (-0.43680904508059878254e-3 + (-0.20383783788303894442e-3 + (0.17421743090883439959e-5 + (0.22400425572175715576e-7 + (-0.69934719320045128997e-9 + (0.67152759655111111110e-11 + 0.26419960042578359995e-13 * t) * t) * t) * t) * t) * t) * t; } case 53: { t = 2*y100 - 107; return 0.60859639489217430521e0 + (-0.12305921390962936873e-2 + (-0.19290150253894682629e-3 + (0.18944904654478310128e-5 + (0.15815530398618149110e-7 + (-0.61726850580964876070e-9 + 0.68987888999111111110e-11 * t) * t) * t) * t) * t) * t; } case 54: { t = 2*y100 - 109; return 0.60537899426486075181e0 + (-0.19790062241395705751e-2 + (-0.18120271393047062253e-3 + (0.19974264162313241405e-5 + (0.10055795094298172492e-7 + (-0.53491997919318263593e-9 + (0.67794550295111111110e-11 - 0.17059208095741511603e-13 * t) * t) * t) * t) * t) * t) * t; } case 55: { t = 2*y100 - 111; return 0.60071229457904110537e0 + (-0.26795676776166354354e-2 + (-0.16901799553627508781e-3 + (0.20575498324332621581e-5 + (0.51077165074461745053e-8 + (-0.45536079828057221858e-9 + (0.64488005516444444445e-11 - 0.29311677573152766338e-13 * t) * t) * t) * t) * t) * t) * t; } case 56: { t = 2*y100 - 113; return 0.59469361520112714738e0 + (-0.33308208190600993470e-2 + (-0.15658501295912405679e-3 + (0.20812116912895417272e-5 + (0.93227468760614182021e-9 + (-0.38066673740116080415e-9 + (0.59806790359111111110e-11 - 0.36887077278950440597e-13 * t) * t) * t) * t) * t) * t) * t; } case 57: { t = 2*y100 - 115; return 0.58742228631775388268e0 + (-0.39321858196059227251e-2 + (-0.14410441141450122535e-3 + (0.20743790018404020716e-5 + (-0.25261903811221913762e-8 + (-0.31212416519526924318e-9 + (0.54328422462222222221e-11 - 0.40864152484979815972e-13 * t) * t) * t) * t) * t) * t) * t; } case 58: { t = 2*y100 - 117; return 0.57899804200033018447e0 + (-0.44838157005618913447e-2 + (-0.13174245966501437965e-3 + (0.20425306888294362674e-5 + (-0.53330296023875447782e-8 + (-0.25041289435539821014e-9 + (0.48490437205333333334e-11 - 0.42162206939169045177e-13 * t) * t) * t) * t) * t) * t) * t; } case 59: { t = 2*y100 - 119; return 0.56951968796931245974e0 + (-0.49864649488074868952e-2 + (-0.11963416583477567125e-3 + (0.19906021780991036425e-5 + (-0.75580140299436494248e-8 + (-0.19576060961919820491e-9 + (0.42613011928888888890e-11 - 0.41539443304115604377e-13 * t) * t) * t) * t) * t) * t) * t; } case 60: { t = 2*y100 - 121; return 0.55908401930063918964e0 + (-0.54413711036826877753e-2 + (-0.10788661102511914628e-3 + (0.19229663322982839331e-5 + (-0.92714731195118129616e-8 + (-0.14807038677197394186e-9 + (0.36920870298666666666e-11 - 0.39603726688419162617e-13 * t) * t) * t) * t) * t) * t) * t; } case 61: { t = 2*y100 - 123; return 0.54778496152925675315e0 + (-0.58501497933213396670e-2 + (-0.96582314317855227421e-4 + (0.18434405235069270228e-5 + (-0.10541580254317078711e-7 + (-0.10702303407788943498e-9 + (0.31563175582222222222e-11 - 0.36829748079110481422e-13 * t) * t) * t) * t) * t) * t) * t; } case 62: { t = 2*y100 - 125; return 0.53571290831682823999e0 + (-0.62147030670760791791e-2 + (-0.85782497917111760790e-4 + (0.17553116363443470478e-5 + (-0.11432547349815541084e-7 + (-0.72157091369041330520e-10 + (0.26630811607111111111e-11 - 0.33578660425893164084e-13 * t) * t) * t) * t) * t) * t) * t; } case 63: { t = 2*y100 - 127; return 0.52295422962048434978e0 + (-0.65371404367776320720e-2 + (-0.75530164941473343780e-4 + (0.16613725797181276790e-5 + (-0.12003521296598910761e-7 + (-0.42929753689181106171e-10 + (0.22170894940444444444e-11 - 0.30117697501065110505e-13 * t) * t) * t) * t) * t) * t) * t; } case 64: { t = 2*y100 - 129; return 0.50959092577577886140e0 + (-0.68197117603118591766e-2 + (-0.65852936198953623307e-4 + (0.15639654113906716939e-5 + (-0.12308007991056524902e-7 + (-0.18761997536910939570e-10 + (0.18198628922666666667e-11 - 0.26638355362285200932e-13 * t) * t) * t) * t) * t) * t) * t; } case 65: { t = 2*y100 - 131; return 0.49570040481823167970e0 + (-0.70647509397614398066e-2 + (-0.56765617728962588218e-4 + (0.14650274449141448497e-5 + (-0.12393681471984051132e-7 + (0.92904351801168955424e-12 + (0.14706755960177777778e-11 - 0.23272455351266325318e-13 * t) * t) * t) * t) * t) * t) * t; } case 66: { t = 2*y100 - 133; return 0.48135536250935238066e0 + (-0.72746293327402359783e-2 + (-0.48272489495730030780e-4 + (0.13661377309113939689e-5 + (-0.12302464447599382189e-7 + (0.16707760028737074907e-10 + (0.11672928324444444444e-11 - 0.20105801424709924499e-13 * t) * t) * t) * t) * t) * t) * t; } case 67: { t = 2*y100 - 135; return 0.46662374675511439448e0 + (-0.74517177649528487002e-2 + (-0.40369318744279128718e-4 + (0.12685621118898535407e-5 + (-0.12070791463315156250e-7 + (0.29105507892605823871e-10 + (0.90653314645333333334e-12 - 0.17189503312102982646e-13 * t) * t) * t) * t) * t) * t) * t; } case 68: { t = 2*y100 - 137; return 0.45156879030168268778e0 + (-0.75983560650033817497e-2 + (-0.33045110380705139759e-4 + (0.11732956732035040896e-5 + (-0.11729986947158201869e-7 + (0.38611905704166441308e-10 + (0.68468768305777777779e-12 - 0.14549134330396754575e-13 * t) * t) * t) * t) * t) * t) * t; } case 69: { t = 2*y100 - 139; return 0.43624909769330896904e0 + (-0.77168291040309554679e-2 + (-0.26283612321339907756e-4 + (0.10811018836893550820e-5 + (-0.11306707563739851552e-7 + (0.45670446788529607380e-10 + (0.49782492549333333334e-12 - 0.12191983967561779442e-13 * t) * t) * t) * t) * t) * t) * t; } case 70: { t = 2*y100 - 141; return 0.42071877443548481181e0 + (-0.78093484015052730097e-2 + (-0.20064596897224934705e-4 + (0.99254806680671890766e-6 + (-0.10823412088884741451e-7 + (0.50677203326904716247e-10 + (0.34200547594666666666e-12 - 0.10112698698356194618e-13 * t) * t) * t) * t) * t) * t) * t; } case 71: { t = 2*y100 - 143; return 0.40502758809710844280e0 + (-0.78780384460872937555e-2 + (-0.14364940764532853112e-4 + (0.90803709228265217384e-6 + (-0.10298832847014466907e-7 + (0.53981671221969478551e-10 + (0.21342751381333333333e-12 - 0.82975901848387729274e-14 * t) * t) * t) * t) * t) * t) * t; } case 72: { t = 2*y100 - 145; return 0.38922115269731446690e0 + (-0.79249269708242064120e-2 + (-0.91595258799106970453e-5 + (0.82783535102217576495e-6 + (-0.97484311059617744437e-8 + (0.55889029041660225629e-10 + (0.10851981336888888889e-12 - 0.67278553237853459757e-14 * t) * t) * t) * t) * t) * t) * t; } case 73: { t = 2*y100 - 147; return 0.37334112915460307335e0 + (-0.79519385109223148791e-2 + (-0.44219833548840469752e-5 + (0.75209719038240314732e-6 + (-0.91848251458553190451e-8 + (0.56663266668051433844e-10 + (0.23995894257777777778e-13 - 0.53819475285389344313e-14 * t) * t) * t) * t) * t) * t) * t; } case 74: { t = 2*y100 - 149; return 0.35742543583374223085e0 + (-0.79608906571527956177e-2 + (-0.12530071050975781198e-6 + (0.68088605744900552505e-6 + (-0.86181844090844164075e-8 + (0.56530784203816176153e-10 + (-0.43120012248888888890e-13 - 0.42372603392496813810e-14 * t) * t) * t) * t) * t) * t) * t; } case 75: { t = 2*y100 - 151; return 0.34150846431979618536e0 + (-0.79534924968773806029e-2 + (0.37576885610891515813e-5 + (0.61419263633090524326e-6 + (-0.80565865409945960125e-8 + (0.55684175248749269411e-10 + (-0.95486860764444444445e-13 - 0.32712946432984510595e-14 * t) * t) * t) * t) * t) * t) * t; } case 76: { t = 2*y100 - 153; return 0.32562129649136346824e0 + (-0.79313448067948884309e-2 + (0.72539159933545300034e-5 + (0.55195028297415503083e-6 + (-0.75063365335570475258e-8 + (0.54281686749699595941e-10 - 0.13545424295111111111e-12 * t) * t) * t) * t) * t) * t; } case 77: { t = 2*y100 - 155; return 0.30979191977078391864e0 + (-0.78959416264207333695e-2 + (0.10389774377677210794e-4 + (0.49404804463196316464e-6 + (-0.69722488229411164685e-8 + (0.52469254655951393842e-10 - 0.16507860650666666667e-12 * t) * t) * t) * t) * t) * t; } case 78: { t = 2*y100 - 157; return 0.29404543811214459904e0 + (-0.78486728990364155356e-2 + (0.13190885683106990459e-4 + (0.44034158861387909694e-6 + (-0.64578942561562616481e-8 + (0.50354306498006928984e-10 - 0.18614473550222222222e-12 * t) * t) * t) * t) * t) * t; } case 79: { t = 2*y100 - 159; return 0.27840427686253660515e0 + (-0.77908279176252742013e-2 + (0.15681928798708548349e-4 + (0.39066226205099807573e-6 + (-0.59658144820660420814e-8 + (0.48030086420373141763e-10 - 0.20018995173333333333e-12 * t) * t) * t) * t) * t) * t; } case 80: { t = 2*y100 - 161; return 0.26288838011163800908e0 + (-0.77235993576119469018e-2 + (0.17886516796198660969e-4 + (0.34482457073472497720e-6 + (-0.54977066551955420066e-8 + (0.45572749379147269213e-10 - 0.20852924954666666667e-12 * t) * t) * t) * t) * t) * t; } case 81: { t = 2*y100 - 163; return 0.24751539954181029717e0 + (-0.76480877165290370975e-2 + (0.19827114835033977049e-4 + (0.30263228619976332110e-6 + (-0.50545814570120129947e-8 + (0.43043879374212005966e-10 - 0.21228012028444444444e-12 * t) * t) * t) * t) * t) * t; } case 82: { t = 2*y100 - 165; return 0.23230087411688914593e0 + (-0.75653060136384041587e-2 + (0.21524991113020016415e-4 + (0.26388338542539382413e-6 + (-0.46368974069671446622e-8 + (0.40492715758206515307e-10 - 0.21238627815111111111e-12 * t) * t) * t) * t) * t) * t; } case 83: { t = 2*y100 - 167; return 0.21725840021297341931e0 + (-0.74761846305979730439e-2 + (0.23000194404129495243e-4 + (0.22837400135642906796e-6 + (-0.42446743058417541277e-8 + (0.37958104071765923728e-10 - 0.20963978568888888889e-12 * t) * t) * t) * t) * t) * t; } case 84: { t = 2*y100 - 169; return 0.20239979200788191491e0 + (-0.73815761980493466516e-2 + (0.24271552727631854013e-4 + (0.19590154043390012843e-6 + (-0.38775884642456551753e-8 + (0.35470192372162901168e-10 - 0.20470131678222222222e-12 * t) * t) * t) * t) * t) * t; } case 85: { t = 2*y100 - 171; return 0.18773523211558098962e0 + (-0.72822604530339834448e-2 + (0.25356688567841293697e-4 + (0.16626710297744290016e-6 + (-0.35350521468015310830e-8 + (0.33051896213898864306e-10 - 0.19811844544000000000e-12 * t) * t) * t) * t) * t) * t; } case 86: { t = 2*y100 - 173; return 0.17327341258479649442e0 + (-0.71789490089142761950e-2 + (0.26272046822383820476e-4 + (0.13927732375657362345e-6 + (-0.32162794266956859603e-8 + (0.30720156036105652035e-10 - 0.19034196304000000000e-12 * t) * t) * t) * t) * t) * t; } case 87: { t = 2*y100 - 175; return 0.15902166648328672043e0 + (-0.70722899934245504034e-2 + (0.27032932310132226025e-4 + (0.11474573347816568279e-6 + (-0.29203404091754665063e-8 + (0.28487010262547971859e-10 - 0.18174029063111111111e-12 * t) * t) * t) * t) * t) * t; } case 88: { t = 2*y100 - 177; return 0.14498609036610283865e0 + (-0.69628725220045029273e-2 + (0.27653554229160596221e-4 + (0.92493727167393036470e-7 + (-0.26462055548683583849e-8 + (0.26360506250989943739e-10 - 0.17261211260444444444e-12 * t) * t) * t) * t) * t) * t; } case 89: { t = 2*y100 - 179; return 0.13117165798208050667e0 + (-0.68512309830281084723e-2 + (0.28147075431133863774e-4 + (0.72351212437979583441e-7 + (-0.23927816200314358570e-8 + (0.24345469651209833155e-10 - 0.16319736960000000000e-12 * t) * t) * t) * t) * t) * t; } case 90: { t = 2*y100 - 181; return 0.11758232561160626306e0 + (-0.67378491192463392927e-2 + (0.28525664781722907847e-4 + (0.54156999310046790024e-7 + (-0.21589405340123827823e-8 + (0.22444150951727334619e-10 - 0.15368675584000000000e-12 * t) * t) * t) * t) * t) * t; } case 91: { t = 2*y100 - 183; return 0.10422112945361673560e0 + (-0.66231638959845581564e-2 + (0.28800551216363918088e-4 + (0.37758983397952149613e-7 + (-0.19435423557038933431e-8 + (0.20656766125421362458e-10 - 0.14422990012444444444e-12 * t) * t) * t) * t) * t) * t; } case 92: { t = 2*y100 - 185; return 0.91090275493541084785e-1 + (-0.65075691516115160062e-2 + (0.28982078385527224867e-4 + (0.23014165807643012781e-7 + (-0.17454532910249875958e-8 + (0.18981946442680092373e-10 - 0.13494234691555555556e-12 * t) * t) * t) * t) * t) * t; } case 93: { t = 2*y100 - 187; return 0.78191222288771379358e-1 + (-0.63914190297303976434e-2 + (0.29079759021299682675e-4 + (0.97885458059415717014e-8 + (-0.15635596116134296819e-8 + (0.17417110744051331974e-10 - 0.12591151763555555556e-12 * t) * t) * t) * t) * t) * t; } case 94: { t = 2*y100 - 189; return 0.65524757106147402224e-1 + (-0.62750311956082444159e-2 + (0.29102328354323449795e-4 + (-0.20430838882727954582e-8 + (-0.13967781903855367270e-8 + (0.15958771833747057569e-10 - 0.11720175765333333333e-12 * t) * t) * t) * t) * t) * t; } case 95: { t = 2*y100 - 191; return 0.53091065838453612773e-1 + (-0.61586898417077043662e-2 + (0.29057796072960100710e-4 + (-0.12597414620517987536e-7 + (-0.12440642607426861943e-8 + (0.14602787128447932137e-10 - 0.10885859114666666667e-12 * t) * t) * t) * t) * t) * t; } case 96: { t = 2*y100 - 193; return 0.40889797115352738582e-1 + (-0.60426484889413678200e-2 + (0.28953496450191694606e-4 + (-0.21982952021823718400e-7 + (-0.11044169117553026211e-8 + (0.13344562332430552171e-10 - 0.10091231402844444444e-12 * t) * t) * t) * t) * t) * t; } case 97: case 98: case 99: case 100: { // use Taylor expansion for small x (|x| <= 0.0309...) // (2/sqrt(pi)) * (x - 2/3 x^3 + 4/15 x^5 - 8/105 x^7 + 16/945 x^9) t = x*x; return x * (1.1283791670955125739 - t * (0.75225277806367504925 - t * (0.30090111122547001970 - t * (0.085971746064420005629 - t * 0.016931216931216931217)))); } } /* Since 0 <= y100 < 101, this is only reached if x is NaN, in which case we should return NaN. */ return NAN; } // w_im_y100 #undef M_SQRTPI_2 #undef M_1_SQRTPI #undef M_SQRTPI #endif // CERF_IM_W_OF_X_CL PKHiOmot/data/opencl/cerf/erfi.cl#ifndef CERF_ERFI_CL #define CERF_ERFI_CL /** * Author = robbert * Date = 2014-05-17 * License = LGPL v3 * Maintainer = Robbert Harms * Email = robbert.harms@maastrichtuniversity.nl */ /** * Calculate the imaginary error function for a real argument (special case) */ double erfi(double x){ // Compute erfi(x) = -i erf(ix), // the imaginary error function. return x*x > 720 ? (x > 0 ? INFINITY : -INFINITY) : exp(x*x) * im_w_of_x(x); } float ferfi(float x){ // Compute erfi(x) = -i erf(ix), // the imaginary error function. return x*x > 720 ? (x > 0 ? INFINITY : -INFINITY) : exp(x*x) * im_w_of_x(x); } #endif //CERF_ERFI_CL PKxJ6`Z__'mot/model_building/evaluation_models.pyfrom mot.model_building.cl_functions.parameters import FreeParameter from mot.model_building.cl_functions.base import ModelFunction from mot.model_building.cl_functions.library_functions import Bessel from mot.model_building.parameter_functions.transformations import ClampTransform from mot.cl_data_type import SimpleCLDataType __author__ = 'Robbert Harms' __date__ = "2014-08-05" __license__ = "LGPL v3" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl" class EvaluationModel(ModelFunction): def __init__(self, name, cl_function_name, parameter_list, dependency_list=()): """The evaluation model is the model under which you evaluate the estimated results against the data. This normally embed the noise model assumptions of your data. """ super(EvaluationModel, self).__init__(name, cl_function_name, parameter_list, dependency_list=dependency_list) def get_objective_function(self, fname, inst_per_problem, eval_fname, obs_fname, param_listing): """Get the cl code for the objective function under the given noise model. Args: fname (str): the name of the resulting function inst_per_problem (int): the number of instances per problem eval_fname (str): the name of the function that can be called to get the evaluation, its signature is: .. code-block:: c double (const void* data, const mot_float_type* x, const uint observation_index); obs_fname (str): the name of the function that can be called for the observed data, its signature is: .. code-block:: c double (const void* data, const uint observation_index); param_listing (str): the parameter listings for the parameters of the noise model Returns: str: The objective function under this noise model, its signature is: .. code-block:: c double (const void* const data, mot_float_type* const x); That is, it always returns a double since the summations may get large. """ def get_objective_per_observation_function(self, fname, inst_per_problem, eval_fname, obs_fname, param_listing): """Get the cl code for the objective function for a given instance under the given noise model. This function is used by some evaluation routines (like for example LevenbergMarquardt) that need a list of objective values (one per instance point), instead of a single objective function scalar. This function provides the information to build that list. Args: fname (str): the name of the resulting function inst_per_problem (int): the number of instances per problem eval_fname (str): the name of the function that can be called to get the evaluation, its signature is: .. code-block:: c double (const void* data, const mot_float_type* x, const uint observation_index); obs_fname (str): the name of the function that can be called for the observed data, its signature is: .. code-block:: c double (const void* data, const uint observation_index); param_listing (str): the parameter listings for the parameters of the noise model Returns: str: The objective function for the given observation index under this noise model, its signature is: .. code-block:: c double (const void* const data, mot_float_type* const x, const uint observation_index); """ def get_log_likelihood_function(self, fname, inst_per_problem, eval_fname, obs_fname, param_listing, full_likelihood=True): """Get the cl code for the log likelihood function under the given noise model. Args: fname (str): the name of the resulting function inst_per_problem (int): the number of instances per problem eval_fname (str): the name of the function that can be called to get the evaluation, its signature is: .. code-block:: c double (const void* data, const mot_float_type* x, const uint observation_index); obs_fname (str): the name of the function that can be called for the observed data, its signature is: .. code-block:: c double (const void* data, const uint observation_index); param_listing (str): the parameter listings for the parameters of the noise model full_likelihood (boolean): if we want the complete likelihood, or if we can drop the constant terms. The default is the complete likelihood. Disable for speed. Returns: str: the objective function under this noise model, its signature is: .. code-block:: c double (const void* const data, mot_float_type* const x); That is, it always returns a double since the summations may get large. """ def get_log_likelihood_per_observation_function(self, fname, inst_per_problem, eval_fname, obs_fname, param_listing, full_likelihood=True): """Get the cl code for the log likelihood function under the given noise model for the given observation index. Args: fname (str): the name of the resulting function inst_per_problem (int): the number of instances per problem eval_fname (str): the name of the function that can be called to get the evaluation, its signature is: .. code-block:: c double (const void* data, const mot_float_type* x, const uint observation_index); obs_fname (str): the name of the function that can be called for the observed data, its signature is: .. code-block:: c double (const void* data, const uint observation_index); param_listing (str): the parameter listings for the parameters of the noise model full_likelihood (boolean): if we want the complete likelihood, or if we can drop the constant terms. The default is the complete likelihood. Disable for speed. Returns: str: the objective function under this noise model, its signature is: .. code-block:: c double (const void* const data, mot_float_type* const x, const uint observation_index); """ def get_noise_std_param_name(self): """Get the name of the parameter that is associated with the noise standard deviation in the problem data. Returns: str: the name of the parameter that is associated with the noise_std in the problem data. """ return 'sigma' class SumOfSquaresEvaluationModel(EvaluationModel): def __init__(self): """Evaluates the distance between the estimated signal and the data using the sum of squared differences. This is implemented as:: sum((observation - evaluation)^2) """ super(EvaluationModel, self).__init__('SumOfSquaresNoise', 'sumOfSquaresNoise', (), ()) def get_objective_function(self, fname, inst_per_problem, eval_fname, obs_fname, param_listing): return ''' double ''' + fname + '''(const void* const data, mot_float_type* const x){ ''' + param_listing + ''' double sum = 0.0; for(uint i = 0; i < ''' + str(inst_per_problem) + '''; i++){ sum += pown(''' + obs_fname + '''(data, i) - ''' + eval_fname + '''(data, x, i), 2); } return sum; } ''' def get_objective_per_observation_function(self, fname, inst_per_problem, eval_fname, obs_fname, param_listing): return ''' double ''' + fname + '''(const void* const data, mot_float_type* const x, const uint observation_index){ ''' + param_listing + ''' return ''' + obs_fname + '''(data, observation_index) - ''' + eval_fname + '''(data, x, observation_index); } ''' def get_log_likelihood_function(self, fname, inst_per_problem, eval_fname, obs_fname, param_listing, full_likelihood=True): return ''' double ''' + fname + '''(const void* const data, const mot_float_type* const x){ ''' + param_listing + ''' double sum = 0.0; for(uint i = 0; i < ''' + str(inst_per_problem) + '''; i++){ sum += pown(''' + obs_fname + '''(data, i) - ''' + eval_fname + '''(data, x, i), 2); } return - sum; } ''' def get_log_likelihood_per_observation_function(self, fname, inst_per_problem, eval_fname, obs_fname, param_listing, full_likelihood=True): return ''' double ''' + fname + '''(const void* const data, const mot_float_type* const x, const uint observation_index){ ''' + param_listing + ''' return - (pown(''' + obs_fname + '''(data, observation_index) - ''' + eval_fname + '''(data, x, observation_index), 2)); } ''' class GaussianEvaluationModel(EvaluationModel): def __init__(self): """This uses the log of the Gaussian PDF for the maximum likelihood estimator and for the log likelihood. The PDF is defined as: .. code-block:: c PDF = 1/(sigma * sqrt(2*pi)) * exp(-(observation - evaluation)^2 / (2 * sigma^2)) To have the joined probability over all instances one would normally have to take the product over all ``n`` instances: .. code-block:: c product(PDF) Instead of taking the product of this PDF we take the sum of the log of the PDF: .. code-block:: c sum(log(PDF)) Where the log of the PDF is given by: .. code-block:: c log(PDF) = - ((observation - evaluation)^2 / (2 * sigma^2)) - log(sigma * sqrt(2*pi)) For the maximum likelihood estimator we then need to use the negative of this sum: .. code-block:: c - sum(log(PDF)). """ super(GaussianEvaluationModel, self).__init__( 'GaussianNoise', 'gaussianNoiseModel', (FreeParameter(SimpleCLDataType.from_string('mot_float_type'), 'sigma', True, 1, 0, 'INFINITY', parameter_transform=ClampTransform()),), ()) def get_objective_function(self, fname, inst_per_problem, eval_fname, obs_fname, param_listing): """Get the Gaussian objective function. This omits the constant terms for speed reasons. Omitted terms are: .. code-block:: c + log(GaussianNoise_sigma * sqrt(2 * M_PI)) """ return ''' double ''' + fname + '''(const void* const data, mot_float_type* const x){ ''' + param_listing + ''' double sum = 0.0; for(uint i = 0; i < ''' + str(inst_per_problem) + '''; i++){ sum += pown(''' + obs_fname + '''(data, i) - ''' + eval_fname + '''(data, x, i), 2); } return sum / (2 * GaussianNoise_sigma * GaussianNoise_sigma); } ''' def get_objective_per_observation_function(self, fname, inst_per_problem, eval_fname, obs_fname, param_listing): return ''' double ''' + fname + '''(const void* const data, mot_float_type* const x, const uint observation_index){ ''' + param_listing + ''' return ''' + obs_fname + '''(data, observation_index) - ''' + eval_fname + '''(data, x, observation_index); } ''' def get_log_likelihood_function(self, fname, inst_per_problem, eval_fname, obs_fname, param_listing, full_likelihood=True): return ''' double ''' + fname + '''(const void* const data, const mot_float_type* const x){ ''' + param_listing + ''' double sum = 0.0; for(uint i = 0; i < ''' + str(inst_per_problem) + '''; i++){ sum += pown(''' + obs_fname + '''(data, i) - ''' + eval_fname + '''(data, x, i), 2); } return - sum / (2 * GaussianNoise_sigma * GaussianNoise_sigma) ''' + ('-' + str(inst_per_problem) + ' * log(GaussianNoise_sigma * sqrt(2 * M_PI))' if full_likelihood else '') + '''; } ''' def get_log_likelihood_per_observation_function(self, fname, inst_per_problem, eval_fname, obs_fname, param_listing, full_likelihood=True): return ''' double ''' + fname + '''(const void* const data, const mot_float_type* const x, const uint observation_index){ ''' + param_listing + ''' return - pown(''' + obs_fname + '''(data, observation_index) - ''' + eval_fname + '''(data, x, observation_index), 2) / (2 * GaussianNoise_sigma * GaussianNoise_sigma) ''' + ('-' + str(inst_per_problem) + ' * log(GaussianNoise_sigma * sqrt(2 * M_PI))' if full_likelihood else '') + '''; } ''' class OffsetGaussianEvaluationModel(EvaluationModel): def __init__(self): """This uses the log of the Gaussian PDF for the maximum likelihood estimator and for the log likelihood. The PDF is defined as: .. code-block:: c PDF = 1/(sigma * sqrt(2*pi)) * exp(-(observation - sqrt(evaluation^2 + sigma^2))^2 / (2 * sigma^2)) To have the joined probability over all instances one would have to take the product over all n instances: .. code-block:: c product(PDF) Instead of taking the product of this PDF we take the sum of the log of the PDF: .. code-block:: c sum(log(PDF)) Where the log of the PDF is given by: .. code-block:: c log(PDF) = - ((observation - sqrt(evaluation^2 + sigma^2))^2 / (2 * sigma^2)) - log(sigma * sqrt(2*pi)) For the maximum likelihood estimator we use the negative of this sum: .. code-block:: c -sum_n(log(PDF)). """ super(OffsetGaussianEvaluationModel, self).__init__( 'OffsetGaussianNoise', 'offsetGaussianNoiseModel', (FreeParameter(SimpleCLDataType.from_string('mot_float_type'), 'sigma', True, 1, 0, 'INFINITY', parameter_transform=ClampTransform()),), ()) def get_objective_function(self, fname, inst_per_problem, eval_fname, obs_fname, param_listing): """Get the Offset Gaussian objective function. This omits the constant terms for speed reasons. Omitted terms are: .. code-block:: c (+ log(OffsetGaussianNoise_sigma * sqrt(2 * M_PI))) """ return ''' double ''' + fname + '''(const void* const data, mot_float_type* const x){ ''' + param_listing + ''' double sum = 0.0; for(uint i = 0; i < ''' + str(inst_per_problem) + '''; i++){ sum += pown(''' + obs_fname + '''(data, i) - sqrt(pown(''' + eval_fname + '''(data, x, i), 2) + (OffsetGaussianNoise_sigma * OffsetGaussianNoise_sigma)), 2); } return sum / (2 * pown(OffsetGaussianNoise_sigma, 2)); } ''' def get_objective_per_observation_function(self, fname, inst_per_problem, eval_fname, obs_fname, param_listing): return ''' double ''' + fname + '''(const void* const data, mot_float_type* const x, const uint observation_index){ ''' + param_listing + ''' return ''' + obs_fname + '''(data, observation_index) - sqrt(pown(''' + eval_fname + '''(data, x, observation_index), 2) + (OffsetGaussianNoise_sigma * OffsetGaussianNoise_sigma)); } ''' def get_log_likelihood_function(self, fname, inst_per_problem, eval_fname, obs_fname, param_listing, full_likelihood=True): return ''' double ''' + fname + '''(const void* const data, mot_float_type* const x){ ''' + param_listing + ''' double sum = 0.0; for(uint i = 0; i < ''' + str(inst_per_problem) + '''; i++){ sum += pown(''' + obs_fname + '''(data, i) - sqrt(pown(''' + eval_fname + '''(data, x, i), 2) + (OffsetGaussianNoise_sigma * OffsetGaussianNoise_sigma)), 2); } return - sum / (2 * pown(OffsetGaussianNoise_sigma, 2)) ''' + ('-' + str(inst_per_problem) + ' * log(OffsetGaussianNoise_sigma * sqrt(2 * M_PI))' if full_likelihood else '') + '''; } ''' def get_log_likelihood_per_observation_function(self, fname, inst_per_problem, eval_fname, obs_fname, param_listing, full_likelihood=True): return ''' double ''' + fname + '''(const void* const data, mot_float_type* const x, const uint observation_index){ ''' + param_listing + ''' return - (pown(''' + obs_fname + '''(data, observation_index) - sqrt(pown(''' + eval_fname + '''(data, x, observation_index), 2) + (OffsetGaussianNoise_sigma * OffsetGaussianNoise_sigma)), 2)) / (2 * pown(OffsetGaussianNoise_sigma, 2)) ''' + ('-' + str(inst_per_problem) + ' * log(OffsetGaussianNoise_sigma * sqrt(2 * M_PI))' if full_likelihood else '') + '''; } ''' class RicianEvaluationModel(EvaluationModel): def __init__(self): """This uses the log of the Rice PDF for the maximum likelihood estimator and for the log likelihood. The PDF is defined as: .. code-block:: c PDF = (observation/sigma^2) * exp(-(observation^2 + evaluation^2) / (2 * sigma^2)) * bessel_i0((observation * evaluation) / sigma^2) Where where ``bessel_i0(z)`` is the modified Bessel function of the first kind with order zero. To have the joined probability over all instances one would have to take the product over all n instances: .. code-block:: c product(PDF) Instead of taking the product of this PDF over all instances we take the sum of the log of the PDF: .. code-block:: c sum(log(PDF)) Where the log of the PDF is given by: .. code-block:: c log(PDF) = log(observation/sigma^2) - (observation^2 + evaluation^2) / (2 * sigma^2) + log(bessel_i0((observation * evaluation) / sigma^2)) For the maximum likelihood estimator we use the negative of this sum: .. code-block:: c -sum(log(PDF)). """ super(RicianEvaluationModel, self).__init__( 'RicianNoise', 'ricianNoiseModel', (FreeParameter(SimpleCLDataType.from_string('mot_float_type'), 'sigma', True, 1, 0, 'INFINITY', parameter_transform=ClampTransform()),), (Bessel(),)) def get_objective_function(self, fname, inst_per_problem, eval_fname, obs_fname, param_listing): """Get the Rician objective function. This omits the constant terms for speed reasons. Omitted terms are: .. code-block:: c + log(observation / (RicianNoise_sigma * RicianNoise_sigma)) - ((observation * observation) / (2 * (RicianNoise_sigma * RicianNoise_sigma))) """ return ''' double ''' + fname + '''(const void* const data, mot_float_type* const x){ ''' + param_listing + ''' double sum = 0.0; double observation; double evaluation; for(uint i = 0; i < ''' + str(inst_per_problem) + '''; i++){ observation = (double)''' + obs_fname + '''(data, i); evaluation = (double)''' + eval_fname + '''(data, x, i); sum += - ((evaluation * evaluation) / (2 * RicianNoise_sigma * RicianNoise_sigma)) + log_bessel_i0((observation * evaluation) / (RicianNoise_sigma * RicianNoise_sigma)); } return -sum; } ''' def get_objective_per_observation_function(self, fname, inst_per_problem, eval_fname, obs_fname, param_listing): return ''' double ''' + fname + '''(const void* const data, mot_float_type* const x, const uint observation_index){ ''' + param_listing + ''' double observation = (double)''' + obs_fname + '''(data, observation_index); double evaluation = (double)''' + eval_fname + '''(data, x, observation_index); return - ((evaluation * evaluation) / (2 * RicianNoise_sigma * RicianNoise_sigma)) + log_bessel_i0((observation * evaluation) / (RicianNoise_sigma * RicianNoise_sigma)); } ''' def get_log_likelihood_function(self, fname, inst_per_problem, eval_fname, obs_fname, param_listing, full_likelihood=True): return ''' double ''' + fname + '''(const void* const data, const mot_float_type* const x){ ''' + param_listing + ''' double sum = 0.0; double observation; double evaluation; for(uint i = 0; i < ''' + str(inst_per_problem) + '''; i++){ observation = (double)''' + obs_fname + '''(data, i); evaluation = (double)''' + eval_fname + '''(data, x, i); sum += log(observation / (RicianNoise_sigma * RicianNoise_sigma)) - ((observation * observation) / (2 * RicianNoise_sigma * RicianNoise_sigma)) - ((evaluation * evaluation) / (2 * RicianNoise_sigma * RicianNoise_sigma)) + log_bessel_i0((observation * evaluation) / (RicianNoise_sigma * RicianNoise_sigma)); } return sum; } ''' def get_log_likelihood_per_observation_function(self, fname, inst_per_problem, eval_fname, obs_fname, param_listing, full_likelihood=True): return ''' double ''' + fname + '''(const void* const data, const mot_float_type* const x, const uint observation_index){ ''' + param_listing + ''' double observation = (double)''' + obs_fname + '''(data, observation_index); double evaluation = (double)''' + eval_fname + '''(data, x, observation_index); return log(observation / (RicianNoise_sigma * RicianNoise_sigma)) - ((observation * observation) / (2 * RicianNoise_sigma * RicianNoise_sigma)) - ((evaluation * evaluation) / (2 * RicianNoise_sigma * RicianNoise_sigma)) + log_bessel_i0((observation * evaluation) / (RicianNoise_sigma * RicianNoise_sigma)); } ''' PK]J<#/^/^$mot/model_building/model_builders.pyimport numpy as np import copy from six import string_types from mot.cl_data_type import SimpleCLDataType from mot.cl_routines.mapping.calc_dependent_params import CalculateDependentParameters from mot.cl_routines.sampling.metropolis_hastings import DefaultMHState from mot.model_building.cl_functions.model_functions import Weight from mot.model_building.cl_functions.parameters import CurrentObservationParam, StaticMapParameter, ProtocolParameter, \ ModelDataParameter, FreeParameter from mot.model_building.data_adapter import SimpleDataAdapter from mot.model_building.parameter_functions.dependencies import SimpleAssignment, AbstractParameterDependency from mot.model_interfaces import OptimizeModelInterface, SampleModelInterface from mot.utils import is_scalar, all_elements_equal, get_single_value, results_to_dict, topological_sort __author__ = 'Robbert Harms' __date__ = "2014-03-14" __license__ = "LGPL v3" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl" class OptimizeModelBuilder(OptimizeModelInterface): def __init__(self, name, model_tree, evaluation_model, signal_noise_model=None, problem_data=None, enforce_weights_sum_to_one=True): """Create a new model builder that can construct an optimization model using parts. Args: name (str): the name of the model model_tree (mot.model_building.trees.CompartmentModelTree): the model tree object evaluation_model (mot.model_building.evaluation_models.EvaluationModel): the evaluation model to use for the resulting complete model signal_noise_model (mot.model_building.signal_noise_models.SignalNoiseModel): the optional signal noise model to use to add noise to the model prediction problem_data (ProblemData): the problem data object enforce_weights_sum_to_one (boolean): if we want to enforce that weights sum to one. This does the following things; it fixes the first weight to the sum of the others and it adds a transformation that ensures that those other weights sum to at most one. Attributes: problems_to_analyze (list): the list with problems we want to analyze. Suppose we have a few thousands problems defined in this model, but we want to run the optimization only on a few problems. By setting this attribute to a list of problem indices, only those problems will be analyzed. use_parameter_transformations (boolean): set to False to disable the parameter transformations. This basically sets the encode and decode functions to the identity function. """ super(OptimizeModelBuilder, self).__init__() self._name = name self._model_tree = model_tree self._evaluation_model = evaluation_model self._signal_noise_model = signal_noise_model self._enforce_weights_sum_to_one = enforce_weights_sum_to_one self.use_parameter_transformations = True self._double_precision = False self._dependency_store = _DependencyStore() self._model_functions_info = self._init_model_information_container( self._dependency_store, model_tree, evaluation_model, signal_noise_model) self._post_optimization_modifiers = [] self.problems_to_analyze = None # The values to use for the parameters, this is the place subclasses should use for their information. # At init this is filled with the values as defined in the parameters. self._parameter_values = {'{}.{}'.format(m.name, p.name): p.value for m, p in self._model_functions_info.get_free_parameters_list() + self._model_functions_info.get_static_parameters_list()} self._lower_bounds = {'{}.{}'.format(m.name, p.name): p.lower_bound for m, p in self._model_functions_info.get_free_parameters_list()} self._upper_bounds = {'{}.{}'.format(m.name, p.name): p.upper_bound for m, p in self._model_functions_info.get_free_parameters_list()} self._problem_data = None if problem_data: self.set_problem_data(problem_data) self._set_default_dependencies() def _init_model_information_container(self, dependency_store, model_tree, evaluation_model, signal_noise_model): """Get the model information container object. This is called in the __init__ to provide the new model with the correct subclass function information object. The rationale is that some subclasses may have additional parameters not present in optimization. For example, in sampling one can have priors with parameters. These parameters must be added to the model and the best point to do that is in the ModelFunctionsInformation object. Returns: ModelFunctionsInformation: the model function information object """ return ModelFunctionsInformation(dependency_store, model_tree, evaluation_model, signal_noise_model) @property def name(self): """See super class OptimizeModelInterface for details""" return self._name @property def double_precision(self): """See super class OptimizeModelInterface for details""" return self._double_precision @double_precision.setter def double_precision(self, value): """Set the value for double_precision. Args: value (boolean): if we would like to do the computations in double of single floating point type. """ self._double_precision = value def fix(self, model_param_name, value): """Fix the given model.param to the given value. Args: model_param_name (string): A model.param name like 'Ball.d' value (scalar or vector or string or AbstractParameterDependency): The value to fix the given parameter to Returns: Returns self for chainability """ if isinstance(value, (string_types, AbstractParameterDependency)): self._add_parameter_dependency(model_param_name, value) else: self._dependency_store.remove_dependency(model_param_name) self._parameter_values[model_param_name] = value self._model_functions_info.set_fixed_to_value(model_param_name, True) return self def init(self, model_param_name, value): """Init the given model.param to the given value. Args: model_param_name (string): A model.param name like 'Ball.d' value (scalar or vector): The value to initialize the given parameter to Returns: Returns self for chainability """ self._parameter_values[model_param_name] = value return self def set_initial_parameters(self, initial_params): """Update the initial parameters for this model by the given values. This only affects free parameters. Args: initial_params (dict): a dictionary containing as keys full parameter names (.) and as values numbers or arrays to be used as starting point """ for m, p in self._model_functions_info.get_free_parameters_list(): param_name = '{}.{}'.format(m.name, p.name) if param_name in initial_params: self.init(param_name, initial_params[param_name]) return self def set_lower_bound(self, model_param_name, value): """Set the lower bound for the given parameter to the given value. Args: model_param_name (string): A model.param name like 'Ball.d' value (scalar or vector): The value to set the lower bounds to Returns: Returns self for chainability """ self._lower_bounds[model_param_name] = value return self def set_lower_bounds(self, lower_bounds): """Apply multiple lower bounds from a dictionary. Args: lower_bounds (dict): per parameter a lower bound Returns: Returns self for chainability """ for param, value in lower_bounds.items(): self.set_lower_bound(param, value) return self def set_upper_bound(self, model_param_name, value): """Set the upper bound for the given parameter to the given value. Args: model_param_name (string): A model.param name like 'Ball.d' value (scalar or vector): The value to set the upper bounds to Returns: Returns self for chainability """ self._upper_bounds[model_param_name] = value return self def set_upper_bounds(self, upper_bounds): """Apply multiple upper bounds from a dictionary. Args: upper_bounds (dict): per parameter a upper bound Returns: Returns self for chainability """ for param, value in upper_bounds.items(): self.set_upper_bound(param, value) return self def unfix(self, model_param_name): """Unfix the given model.param Args: model_param_name (string): A model.param name like 'Ball.d' Returns: Returns self for chainability """ self._dependency_store.remove_dependency(model_param_name) self._model_functions_info.set_fixed_to_value(model_param_name, False) return self def has_parameter(self, model_param_name): """Check to see if the given parameter is defined in this model. Args: model_param_name (string): A model.param name like 'Ball.d' Returns: boolean: true if the parameter is defined in this model, false otherwise. """ return self._model_functions_info.has_parameter(model_param_name) def set_problem_data(self, problem_data): """Set the problem data this model will deal with. This will also call the function set_noise_level_std() with the noise_std from the new problem data. Args: problem_data (ProblemData): The container for the problem data we will use for this model. Returns: Returns self for chainability """ self._problem_data = problem_data if self._problem_data.noise_std is not None: self._parameter_values['{}.{}'.format( self._evaluation_model.name, self._evaluation_model.get_noise_std_param_name())] = self._problem_data.noise_std return self def add_post_optimization_modifier(self, model_param_name, mod_routine): """Add a modification function that can update the results of model optimization. The mod routine should be a function accepting a dictionary as input and should return a single map of the same dimension as the maps in the dictionary. The idea is that the mod_routine function gets the result dictionary from the optimization routine and calculates a new map. This map is returned and the dictionary is updated with the returned map as value and the here given model_param_name as key. It is possible to add more than one modifier function. In that case, they are called in the order they were appended to this model. Args: model_param_name (str): the parameter to which to add the modification routine mod_routine (python function): the callback function to apply on the results of the referenced parameter. """ self._post_optimization_modifiers.append((model_param_name, mod_routine)) return self def add_post_optimization_modifiers(self, modifiers): """Add a list of modifier functions. The same as add_post_optimization_modifier() except that it accepts a list of lists. Every element in the list should be a tuple like (model_param_name, mod_routine) Args: modifiers (tuple or list): The list of modifiers to add (in order). """ self._post_optimization_modifiers.extend(modifiers) def get_required_protocol_names(self): """Get a list with the constant data names that are needed for this model to work. For example, an implementing diffusion MRI model might require the presence of the protocol parameter 'g' and 'b'. This function should then return ('g', 'b'). Returns: list: A list of columns names that need to be present in the protocol """ return list(set([p.name for m, p in self._model_functions_info.get_model_parameter_list() if isinstance(p, ProtocolParameter)])) def get_optimization_output_param_names(self): """See super class for details""" items = ['{}.{}'.format(m.name, p.name) for m, p in self._model_functions_info.get_free_parameters_list()] items.extend(name for name, _ in self._post_optimization_modifiers) return items def get_free_param_names(self): """See super class for details""" return ['{}.{}'.format(m.name, p.name) for m, p in self._model_functions_info.get_estimable_parameters_list()] def get_nmr_problems(self): """See super class for details""" if self.problems_to_analyze is None: if self._problem_data: return self._problem_data.get_nmr_problems() return 0 return len(self.problems_to_analyze) def get_nmr_inst_per_problem(self): """See super class for details""" return self._problem_data.get_nmr_inst_per_problem() def get_nmr_estimable_parameters(self): """See super class for details""" return len(self._model_functions_info.get_estimable_parameters_list()) def get_data(self): """See super class for details""" data = [] for data_dict in [self._get_variable_data(), self._get_protocol_data(), self._get_static_data()]: for el in data_dict.values(): data.append(el.get_opencl_data()) return data def get_kernel_data_struct(self, device): """See super class for details""" return self._get_all_kernel_source_items(device)['data_struct'] def get_kernel_param_names(self, device): """See super class for details""" return self._get_all_kernel_source_items(device)['kernel_param_names'] def get_kernel_data_struct_initialization(self, device, variable_name, problem_id_name='gid'): """See super class for details""" data_struct_init = self._get_all_kernel_source_items(device, problem_id_name)['data_struct_init'] struct_code = '0' if data_struct_init: struct_code = ', '.join(data_struct_init) return self.get_kernel_data_struct_type() + ' ' + variable_name + ' = {' + struct_code + '};' def get_kernel_data_struct_type(self): """Get the CL type of the kernel datastruct. Returns: str: the CL type of the data struct """ return '_model_data' def get_parameter_decode_function(self, fname='decodeParameters'): func = ''' void ''' + fname + '''(const void* data_void, mot_float_type* x){ ''' if self.use_parameter_transformations: func += self.get_kernel_data_struct_type() + \ '* data = (' + self.get_kernel_data_struct_type() + '*)data_void;' for d in self._get_parameter_transformations()[1]: func += "\n" + "\t" * 4 + d.format('x') if self._enforce_weights_sum_to_one: func += self._get_weight_sum_to_one_transformation() return func + ''' } ''' def get_parameter_encode_function(self, fname='encodeParameters'): func = ''' void ''' + fname + '''(const void* data_void, mot_float_type* x){ ''' if self._enforce_weights_sum_to_one: func += self._get_weight_sum_to_one_transformation() if self.use_parameter_transformations: func += self.get_kernel_data_struct_type() + \ '* data = (' + self.get_kernel_data_struct_type() + '*)data_void;' for d in self._get_parameter_transformations()[0]: func += "\n" + "\t" * 4 + d.format('x') return func + ''' } ''' def get_initial_parameters(self, previous_results=None): """When overriding this function, please note that it should adhere to the attribute problems_to_analyze. Args: previous_results (dict or ndarray): the initialization settings for the specific parameters. The number of items per dictionary item should match the number of problems to analyze, or, if an ndarray is given then the length in the first dimension should match the number of problems to analyze. """ np_dtype = np.float32 if self.double_precision: np_dtype = np.float64 if isinstance(previous_results, np.ndarray): previous_results = results_to_dict(previous_results, self.get_free_param_names()) starting_points = [] for m, p in self._model_functions_info.get_estimable_parameters_list(): param_name = '{}.{}'.format(m.name, p.name) value = self._parameter_values[param_name] if previous_results and param_name in previous_results: starting_points.append(previous_results['{}.{}'.format(m.name, p.name)]) elif is_scalar(value): if self.get_nmr_problems() == 0: starting_points.append(np.full((1, 1), value, dtype=np_dtype)) else: starting_points.append(np.full((self.get_nmr_problems(), 1), value, dtype=np_dtype)) else: if len(value.shape) < 2: value = np.transpose(np.asarray([value])) elif value.shape[1] > value.shape[0]: value = np.transpose(value) else: value = value if self.problems_to_analyze is None: starting_points.append(value) else: starting_points.append(value[self.problems_to_analyze, ...]) starting_points = np.concatenate([np.transpose(np.array([s])) if len(s.shape) < 2 else s for s in starting_points], axis=1) data_adapter = SimpleDataAdapter(starting_points, SimpleCLDataType.from_string('mot_float_type'), self._get_mot_float_type()) return data_adapter.get_opencl_data() def get_lower_bounds(self): """See super class for details""" return list(self._lower_bounds['{}.{}'.format(m.name, p.name)] for m, p in self._model_functions_info.get_estimable_parameters_list()) def get_upper_bounds(self): """See super class for details""" return list(self._upper_bounds['{}.{}'.format(m.name, p.name)] for m, p in self._model_functions_info.get_estimable_parameters_list()) def get_observation_return_function(self, func_name='getObservation'): if self._problem_data.observations.shape[1] < 2: return ''' double ''' + func_name + '''(const void* const data, const uint observation_index){ return ((''' + self.get_kernel_data_struct_type() + '''*)data)->var_data_observations; } ''' return ''' double ''' + func_name + '''(const void* const data, const uint observation_index){ return ((''' + \ self.get_kernel_data_struct_type() + '''*)data)->var_data_observations[observation_index]; } ''' def get_model_eval_function(self, func_name='evaluateModel'): noise_func_name = func_name + '_signalNoiseModel' func = self._get_model_functions_cl_code(noise_func_name) pre_model_function = self._get_pre_model_expression_eval_function() if pre_model_function: func += pre_model_function func += ''' double ''' + func_name + \ '(const void* const void_data, const mot_float_type* const x, const uint observation_index){' + "\n" func += self.get_kernel_data_struct_type() + '* data = (' + self.get_kernel_data_struct_type() + '*)void_data;' func += self._get_parameters_listing( exclude_list=['{}.{}'.format(m.name, p.name).replace('.', '_') for (m, p) in self._model_functions_info.get_non_model_tree_param_listing()]) if self._signal_noise_model: noise_params_listing = '' for p in self._signal_noise_model.get_free_parameters(): noise_params_listing += "\t" * 4 + self._get_param_listing_for_param(self._signal_noise_model, p) func += "\n" func += noise_params_listing pre_model_code = self._get_pre_model_expression_eval_code() if pre_model_code: func += self._get_pre_model_expression_eval_code() func += "\n" + "\t"*4 + 'return ' + str(self._construct_model_expression(noise_func_name)) func += "\n\t\t\t}" return func def get_objective_function(self, func_name="calculateObjective"): inst_per_problem = self.get_nmr_inst_per_problem() eval_func_name = func_name + '_evaluateModel' obs_func_name = func_name + '_getObservation' param_listing = '' for p in self._evaluation_model.get_free_parameters(): param_listing += self._get_param_listing_for_param(self._evaluation_model, p) func = '' func += self._evaluation_model.get_cl_dependency_code() func += self.get_model_eval_function(eval_func_name) func += self.get_observation_return_function(obs_func_name) func += str(self._evaluation_model.get_objective_function(func_name, inst_per_problem, eval_func_name, obs_func_name, param_listing)) return str(func) def get_objective_per_observation_function(self, func_name="getObjectiveInstanceValue"): inst_per_problem = self.get_nmr_inst_per_problem() eval_func_name = func_name + '_evaluateModel' obs_func_name = func_name + '_getObservation' param_listing = '' for p in self._evaluation_model.get_free_parameters(): param_listing += self._get_param_listing_for_param(self._evaluation_model, p) func = '' func += self._evaluation_model.get_cl_dependency_code() func += self.get_model_eval_function(eval_func_name) func += self.get_observation_return_function(obs_func_name) func += str(self._evaluation_model.get_objective_per_observation_function( func_name, inst_per_problem, eval_func_name, obs_func_name, param_listing)) return str(func) def add_extra_result_maps(self, results_dict): """This adds some extra optimization maps to the results dictionary. This function behaves as a procedure and as a function. The input dict can be updated in place, but it should also return a dict but that is merely for the purpose of chaining. Steps in finalizing the results dict: 1) It first adds the maps for the dependent and fixed parameters 2) Second it adds the extra maps defined in the models itself. 3) Third it loops through the post_optimization_modifiers callback functions for the final updates. 4) Finally it adds additional maps defined in this model subclass For more documentation see the base method. Args: results_dict (dict): A dictionary with as keys the names of the parameters and as values the 1d maps with for each voxel the optimized parameter value. The given dictionary can be altered by this function. Returns: dict: The same result dictionary but with updated values or with additional maps. It should at least return the results_dict. """ self._add_dependent_parameter_maps(results_dict) self._add_fixed_parameter_maps(results_dict) for model in self._model_functions_info.get_model_list(): results_dict.update(model.get_extra_results_maps(results_dict)) for name, routine in self._post_optimization_modifiers: results_dict[name] = routine(results_dict) self._add_finalizing_result_maps(results_dict) return results_dict def _add_fixed_parameter_maps(self, results_dict): """In place add complete maps for the fixed parameters.""" param_lists = self._get_parameter_type_lists() fixed_params = param_lists['fixed'] for (m, p) in fixed_params: if not self._model_functions_info.is_parameter_fixed_to_dependency(m, p): name = '{}.{}'.format(m.name, p.name) value = self._parameter_values['{}.{}'.format(m.name, p.name)] if is_scalar(value): results_dict.update({name: np.tile(np.array([value]), (self.get_nmr_problems(),))}) else: if self.problems_to_analyze is not None: value = value[self.problems_to_analyze, ...] results_dict.update({name: value}) def _add_dependent_parameter_maps(self, results_dict): """In place add complete maps for the dependent parameters.""" param_lists = self._get_parameter_type_lists() if len(param_lists['dependent']): func = '' func += self._get_fixed_parameters_listing(param_lists['fixed']) func += self._get_estimable_parameters_listing(param_lists['estimable']) func += self._get_dependent_parameters_listing(param_lists['dependent']) estimable_params = ['{}.{}'.format(m.name, p.name) for m, p in param_lists['estimable']] estimated_parameters = [results_dict[k] for k in estimable_params] dependent_parameter_names = [('{}.{}'.format(m.name, p.name).replace('.', '_'), '{}.{}'.format(m.name, p.name)) for m, p in param_lists['dependent']] cpd = CalculateDependentParameters(double_precision=self.double_precision) dependent_parameters = cpd.calculate(self, estimated_parameters, func, dependent_parameter_names) results_dict.update(dependent_parameters) def _get_parameter_transformations(self): dep_list = {} for m, p in self._model_functions_info.get_estimable_parameters_list(): dep_list.update({(m, p): (tuple(dep) for dep in p.parameter_transform.dependencies)}) dep_list = topological_sort(dep_list) dec_func_list = [] enc_func_list = [] for m, p in dep_list: name = '{}.{}'.format(m.name, p.name) parameter = p ind = self._model_functions_info.get_parameter_estimable_index(m, p) transform = parameter.parameter_transform dependency_names = [] for dep in transform.dependencies: dep_ind = self._model_functions_info.get_parameter_estimable_index(dep[0], dep[1]) dependency_names.append('{0}[' + str(dep_ind) + ']') if all_elements_equal(self._lower_bounds[name]): lower_bound = str(get_single_value(self._lower_bounds[name])) else: lower_bound = 'data->var_data_lb_' + name.replace('.', '_') if all_elements_equal(self._upper_bounds[name]): upper_bound = str(get_single_value(self._upper_bounds[name])) else: upper_bound = 'data->var_data_ub_' + name.replace('.', '_') s = '{0}[' + str(ind) + '] = ' + transform.get_cl_decode().create_assignment( '{0}[' + str(ind) + ']', lower_bound, upper_bound, dependency_names) + ';' dec_func_list.append(s) s = '{0}[' + str(ind) + '] = ' + transform.get_cl_encode().create_assignment( '{0}[' + str(ind) + ']', lower_bound, upper_bound, dependency_names) + ';' enc_func_list.append(s) return tuple(reversed(enc_func_list)), dec_func_list def _add_finalizing_result_maps(self, results_dict): """Add some final results maps to the results dictionary. This called by the function add_extra_result_maps() as last call to add more maps. Args: results_dict (args): the results from model optmization. We are to modify this in-place. """ def _transform_observations(self, observations): """Apply a transformation on the observations before fitting. This function is called by get_problems_var_data() just before the observations are handed over to the CL routine, and just after the the list has been (optionally) limited with self.problems_to_analyze. To implement any behaviour here, you can override this function and add behaviour that changes the observations. Args: observations (ndarray): the 2d matrix with the observations. This is the list of observations *after* the list has been (optionally) limited with self.problems_to_analyze. Returns: observations (ndarray): a 2d matrix of the same shape as the input. This should hold the transformed data. """ return observations def _construct_model_expression(self, noise_func_name): """Construct the model signel expression. This is supposed to be used in get_model_eval_function. Args: noise_func_name (str): the name of the noise function. """ func = '' if self._signal_noise_model: noise_params_func = '' for p in self._signal_noise_model.get_free_parameters(): noise_params_func += ', ' + '{}.{}'.format(self._signal_noise_model.name, p.name).replace('.', '_') func += noise_func_name + '((' + self._build_model_from_tree(self._model_tree, 0) + ')' + \ noise_params_func + ');' else: func += '(' + self._build_model_from_tree(self._model_tree, 0) + ');' return func def _build_model_from_tree(self, node, depth): if not node.children: return self._model_to_string(node.data) else: subfuncs = [] for child in node.children: if child.children: subfuncs.append(self._build_model_from_tree(child, depth+1)) else: subfuncs.append(self._model_to_string(child.data)) operator = node.data func = (' ' + operator + ' ').join(subfuncs) if func[0] == '(': return '(' + func + ')' return '(' + "\n" + ("\t" * int((depth/2)+5)) + func + "\n" + ("\t" * int((depth/2)+4)) + ')' def _model_to_string(self, model): """Convert a model to CL string.""" param_list = [] for param in model.parameter_list: if isinstance(param, ProtocolParameter): param_list.append(param.name) elif isinstance(param, ModelDataParameter): value = self._parameter_values['{}.{}'.format(model.name, param.name)] if all_elements_equal(value): param_list.append(str(get_single_value(value))) else: param_list.append('data->model_data_' + param.name) elif isinstance(param, StaticMapParameter): static_map_value = self._get_static_map_value(model, param) if all_elements_equal(static_map_value): param_list.append(str(get_single_value(static_map_value))) else: if len(static_map_value.shape) > 1 \ and static_map_value.shape[1] == self._problem_data.observations.shape[1]: param_list.append('data->var_data_' + '{}.{}'.format(model.name, param.name).replace('.', '_') + '[observation_index]') else: param_list.append('data->var_data_' + '{}.{}'.format(model.name, param.name).replace('.', '_')) elif isinstance(param, CurrentObservationParam): param_list.append('data->var_data_observations[observation_index]') else: param_list.append('{}.{}'.format(model.name, param.name).replace('.', '_')) return model.cl_function_name + '(' + ', '.join(param_list) + ')' def _get_model_functions_cl_code(self, noise_func_name): """Get the model functions CL. This is used in get_model_eval_function().""" cl_code = '' for compartment_model in self._model_tree.get_compartment_models(): cl_code += compartment_model.get_cl_code() + "\n" if self._signal_noise_model: cl_code += self._signal_noise_model.get_signal_function(noise_func_name) return cl_code def _get_parameters_listing(self, exclude_list=()): """Get the CL code for the parameter listing, this goes on top of the evaluate function. Args: exclude_list: an optional list containing parameters to exclude from the listing. This should contain full parameter names like: _ Returns: An CL string that contains all the parameters as primitive data types. """ func = '' param_lists = self._get_parameter_type_lists() func += self._get_protocol_parameters_listing(param_lists['protocol'], exclude_list=exclude_list) func += self._get_fixed_parameters_listing(param_lists['fixed'], exclude_list=exclude_list) func += self._get_estimable_parameters_listing(param_lists['estimable'], exclude_list=exclude_list) func += self._get_dependent_parameters_listing(param_lists['dependent'], exclude_list=exclude_list) return str(func) def _get_estimable_parameters_listing(self, param_list=None, exclude_list=()): """Get the parameter listing for the free parameters. For performance reasons, the parameter list should already be given. If not given it is calculated using: self._get_parameter_type_lists()['estimable'] Args: param_list: the list with the estimable parameters exclude_list: a list of parameters to exclude from this listing """ if param_list is None: param_list = self._get_parameter_type_lists()['estimable'] func = '' estimable_param_counter = 0 for m, p in param_list: name = '{}.{}'.format(m.name, p.name).replace('.', '_') if name not in exclude_list: data_type = p.data_type.cl_type assignment = 'x[' + str(estimable_param_counter) + ']' func += "\t"*4 + data_type + ' ' + name + ' = ' + assignment + ';' + "\n" estimable_param_counter += 1 return func def _get_protocol_parameters_listing(self, param_list=None, exclude_list=()): """Get the parameter listing for the protocol parameters. For performance reasons, the parameter list should already be given. If not given it is calculated using: self._get_parameter_type_lists()['protocol'] Args: param_list: the list with the protocol parameters exclude_list: a list of parameters to exclude from this listing """ protocol_info = self._problem_data.protocol if param_list is None: param_list = self._get_parameter_type_lists()['protocol'] const_params_seen = [] func = '' for m, p in param_list: if ('{}.{}'.format(m.name, p.name).replace('.', '_')) not in exclude_list: data_type = p.data_type.cl_type if p.name not in const_params_seen: if all_elements_equal(protocol_info[p.name]): if p.data_type.is_vector_type: vector_length = p.data_type.vector_length values = [str(val) for val in protocol_info[p.name][0]] if len(values) < vector_length: values.append(str(0)) assignment = '(' + data_type + ')(' + ', '.join(values) + ')' else: assignment = str(float(protocol_info[p.name][0])) else: if p.data_type.is_pointer_type: # this requires generic address spaces available in OpenCL >= 2.0. assignment = '&data->protocol_data_' + p.name + '[observation_index]' else: assignment = 'data->protocol_data_' + p.name + '[observation_index]' func += "\t"*4 + data_type + ' ' + p.name + ' = ' + assignment + ';' + "\n" const_params_seen.append(p.name) return func def _get_fixed_parameters_listing(self, param_list=None, exclude_list=()): """Get the parameter listing for the fixed parameters. For performance reasons, the fixed parameter list should already be given. If not given it is calculated using: self._get_parameter_type_lists()['fixed'] Args: dependent_param_list: the list list of fixed params exclude_list: a list of parameters to exclude from this listing """ if param_list is None: param_list = self._get_parameter_type_lists()['fixed'] func = '' for m, p in param_list: name = '{}.{}'.format(m.name, p.name).replace('.', '_') if name not in exclude_list: data_type = p.data_type.raw_data_type value = self._parameter_values['{}.{}'.format(m.name, p.name)] if all_elements_equal(value): assignment = '(' + data_type + ')' + str(float(get_single_value(value))) else: assignment = '(' + data_type + ') data->var_data_' + \ '{}.{}'.format(m.name, p.name).replace('.', '_') func += "\t"*4 + data_type + ' ' + name + ' = ' + assignment + ';' + "\n" return func def _get_dependent_parameters_listing(self, dependent_param_list=None, exclude_list=()): """Get the parameter listing for the dependent parameters. For performance reasons, the dependent parameter list should already be given. If not given it is calculated using: self._get_parameter_type_lists()['dependent'] Args: dependent_param_list: the list list of dependent params exclude_list: a list of parameters to exclude from this listing, note that this will only exclude the definition of the parameter, not the dependency code. """ if dependent_param_list is None: dependent_param_list = self._get_parameter_type_lists()['dependent'] func = '' for m, p in dependent_param_list: pd = self._dependency_store.get_dependency('{}.{}'.format(m.name, p.name)) if pd.pre_transform_code: func += "\t"*4 + self._convert_parameters_dot_to_bar(pd.pre_transform_code) assignment = self._convert_parameters_dot_to_bar(pd.assignment_code) name = '{}.{}'.format(m.name, p.name).replace('.', '_') data_type = p.data_type.raw_data_type if self._model_functions_info.is_parameter_fixed_to_dependency(m, p): if ('{}.{}'.format(m.name, p.name).replace('.', '_')) not in exclude_list: func += "\t"*4 + data_type + ' ' + name + ' = ' + assignment + ';' + "\n" else: func += "\t"*4 + name + ' = ' + assignment + ';' + "\n" return func def _get_fixed_parameters_as_var_data(self): var_data_dict = {} for m, p in self._model_functions_info.get_free_parameters_list(): value = self._parameter_values['{}.{}'.format(m.name, p.name)] if self._model_functions_info.is_fixed_to_value('{}.{}'.format(m.name, p.name)) \ and not all_elements_equal(value) \ and not self._model_functions_info.is_parameter_fixed_to_dependency(m, p): if self.problems_to_analyze is not None: value = value[self.problems_to_analyze, ...] var_data_dict.update({'{}.{}'.format(m.name, p.name).replace('.', '_'): SimpleDataAdapter(value, p.data_type, self._get_mot_float_type())}) return var_data_dict def _get_static_parameters_as_var_data(self): static_data_dict = {} for m, p in self._model_functions_info.get_static_parameters_list(): static_map_value = self._get_static_map_value(m, p) if not all_elements_equal(static_map_value): data_adapter = SimpleDataAdapter(static_map_value, p.data_type, self._get_mot_float_type()) static_data_dict.update({'{}.{}'.format(m.name, p.name).replace('.', '_'): data_adapter}) return static_data_dict def _get_bounds_as_var_data(self): bounds_dict = {} for m, p in self._model_functions_info.get_free_parameters_list(): lower_bound = self._lower_bounds['{}.{}'.format(m.name, p.name)] upper_bound = self._upper_bounds['{}.{}'.format(m.name, p.name)] if not all_elements_equal(lower_bound): data_adapter = SimpleDataAdapter(lower_bound, p.data_type, self._get_mot_float_type()) bounds_dict.update({'lb_' + '{}.{}'.format(m.name, p.name).replace('.', '_'): data_adapter}) if not all_elements_equal(upper_bound): data_adapter = SimpleDataAdapter(upper_bound, p.data_type, self._get_mot_float_type()) bounds_dict.update({'ub_' + '{}.{}'.format(m.name, p.name).replace('.', '_'): data_adapter}) return bounds_dict def _get_static_map_value(self, model, parameter): """Get the map value for the given parameter of the given model. This first checks if the parameter is defined in the static maps data in the problem data. If not, we try to get it from the value stored in the parameter itself. If that fails as well we raise an error. Also, this only returns the problems for which problems_to_analyze is set. Args: model (ModelFunction): the model function parameter (CLParameter): the parameter for which we want to get the value Returns: ndarray or number: the value for the given parameter. """ data = None value = self._parameter_values.get('{}.{}'.format(model.name, parameter.name), None) if parameter.name in self._problem_data.static_maps: data = self._problem_data.static_maps[parameter.name] elif value is not None: data = value if data is None: raise ValueError('No suitable data could be found for the static parameter {}.'.format(parameter.name)) if is_scalar(data): return data if self.problems_to_analyze is not None: return data[self.problems_to_analyze, ...] return data def _is_non_model_tree_model(self, model): return model is self._evaluation_model or (self._signal_noise_model is not None and model is self._signal_noise_model) def _get_param_listing_for_param(self, m, p): """Get the param listing for one specific parameter. This can be used for example for the noise model params. Please note, that on the moment this function does not support the complete dependency graph for the dependent parameters. """ data_type = p.data_type.raw_data_type name = '{}.{}'.format(m.name, p.name).replace('.', '_') assignment = '' if isinstance(p, ProtocolParameter): assignment = 'data->protocol_data_' + p.name + '[observation_index]' elif isinstance(p, FreeParameter): value = self._parameter_values['{}.{}'.format(m.name, p.name)] if self._model_functions_info.is_fixed_to_value('{}.{}'.format(m.name, p.name)) \ and not self._model_functions_info.parameter_has_dependency(m, p): if all_elements_equal(value): assignment = '(' + data_type + ')' + str(float(get_single_value(value))) else: assignment = '(' + data_type + ') data->var_data_' + \ '{}.{}'.format(m.name, p.name).replace('.', '_') elif not self._model_functions_info.parameter_has_dependency(m, p) \ or (self._model_functions_info.parameter_has_dependency(m, p) and not self._model_functions_info.is_parameter_fixed_to_dependency(m, p)): ind = self._model_functions_info.get_parameter_estimable_index(m, p) assignment += 'x[' + str(ind) + ']' if self._model_functions_info.parameter_has_dependency(m, p): return self._get_dependent_parameters_listing(((m, p),)) return data_type + ' ' + name + ' = ' + assignment + ';' + "\n" def _get_parameter_type_lists(self): """Returns a dictionary with the parameters sorted in the types protocol, fixed, estimable and dependent. Parameters may occur in different lists (estimable and dependent for example). """ protocol_parameters = self._model_functions_info.get_protocol_parameters_list() fixed_parameters = [] estimable_parameters = [] depended_parameters = [] for m, p in self._model_functions_info.get_free_parameters_list(exclude_priors=True): if self._model_functions_info.is_fixed_to_value('{}.{}'.format(m.name, p.name)) \ and not self._model_functions_info.parameter_has_dependency(m, p): fixed_parameters.append((m, p)) elif self._model_functions_info.is_parameter_estimable(m, p): estimable_parameters.append((m, p)) if self._model_functions_info.parameter_has_dependency(m, p): ind = self._dependency_store.get_index('{}.{}'.format(m.name, p.name)) depended_parameters.insert(ind, (m, p)) return {'protocol': protocol_parameters, 'fixed': fixed_parameters, 'estimable': estimable_parameters, 'dependent': depended_parameters} def _convert_parameters_dot_to_bar(self, string): """Convert a string containing parameters with . to parameter names with _""" for m, p in self._model_functions_info.get_model_parameter_list(): dname = '{}.{}'.format(m.name, p.name) bname = '{}.{}'.format(m.name, p.name).replace('.', '_') string = string.replace(dname, bname) return string def _init_fixed_duplicates_dependencies(self): """Find duplicate fixed parameters, and make dependencies of them. This saves data transfer in CL.""" var_data_dict = {} for m, p in self._model_functions_info.get_free_parameters_list(): value = self._parameter_values['{}.{}'.format(m.name, p.name)] if self._model_functions_info.is_fixed_to_value('{}.{}'.format(m.name, p.name)) \ and not is_scalar(value) \ and not self._model_functions_info.is_parameter_fixed_to_dependency(m, p): duplicate_found = False duplicate_key = None for key, data in var_data_dict.items(): if np.array_equal(data, value): duplicate_found = True duplicate_key = key break if duplicate_found: self._add_parameter_dependency('{}.{}'.format(m.name, p.name), SimpleAssignment(duplicate_key)) else: var_data_dict.update({'{}.{}'.format(m.name, p.name): value}) def _get_variable_data(self): """See super class OptimizeModelInterface for details When overriding this function, please note that it should adhere to the attribute problems_to_analyze. """ var_data_dict = {} observations = self._problem_data.observations if observations is not None: if self.problems_to_analyze is not None: observations = observations[self.problems_to_analyze, ...] observations = self._transform_observations(observations) data_adapter = SimpleDataAdapter(observations, SimpleCLDataType.from_string('mot_float_type*'), self._get_mot_float_type()) var_data_dict.update({'observations': data_adapter}) var_data_dict.update(self._get_fixed_parameters_as_var_data()) var_data_dict.update(self._get_static_parameters_as_var_data()) var_data_dict.update(self._get_bounds_as_var_data()) return var_data_dict def _get_protocol_data(self): protocol_info = self._problem_data.protocol return_data = {} for m, p in self._model_functions_info.get_model_parameter_list(): if isinstance(p, ProtocolParameter): if p.name in protocol_info: if not all_elements_equal(protocol_info[p.name]): const_d = {p.name: SimpleDataAdapter(protocol_info[p.name], p.data_type, self._get_mot_float_type())} return_data.update(const_d) else: exception = 'Protocol parameter "{}" could not be resolved'.format('{}.{}'.format(m.name, p.name)) raise ParameterResolutionException(exception) return return_data def _get_static_data(self): model_data_dict = {} for m, p in self._model_functions_info.get_model_parameter_list(): if isinstance(p, ModelDataParameter): value = self._parameter_values['{}.{}'.format(m.name, p.name)] if not all_elements_equal(value): model_data_dict.update({p.name: SimpleDataAdapter(value, p.data_type, self._get_mot_float_type())}) return model_data_dict def _get_all_kernel_source_items(self, device, problem_id_name='gid'): """Get the CL strings for the kernel source items for most common CL kernels in this library.""" import pyopencl as cl max_constant_buffer_size = device.get_info(cl.device_info.MAX_CONSTANT_BUFFER_SIZE) max_constant_args = device.get_info(cl.device_info.MAX_CONSTANT_ARGS) def _check_array_fits_constant_buffer(array, dtype): """Check if the given array when casted to the given type can be fit into the given max_size Args: array (ndarray): the array we want to fit dtype (np data type): the numpy data type we want to use Returns: boolean: if it fits in the constant memory buffer or not """ return np.product(array.shape) * np.dtype(dtype).itemsize < max_constant_buffer_size constant_args_counter = 0 kernel_param_names = [] data_struct_init = [] data_struct_names = [] for key, data_adapter in self._get_variable_data().items(): clmemtype = 'global' cl_data = data_adapter.get_opencl_data() if data_adapter.allow_local_pointer(): if _check_array_fits_constant_buffer(cl_data, data_adapter.get_opencl_numpy_type()): if constant_args_counter < max_constant_args: clmemtype = 'constant' constant_args_counter += 1 param_name = 'var_data_' + str(key) data_type = data_adapter.get_data_type().raw_data_type if data_adapter.get_data_type().is_vector_type: data_type += data_adapter.get_data_type().vector_length kernel_param_names.append(clmemtype + ' ' + data_type + '* ' + param_name) mult = cl_data.shape[1] if len(cl_data.shape) > 1 else 1 if len(cl_data.shape) == 1 or cl_data.shape[1] == 1: data_struct_names.append(data_type + ' ' + param_name) data_struct_init.append(param_name + '[{} * {}]'.format(problem_id_name, mult)) else: data_struct_names.append(clmemtype + ' ' + data_type + '* ' + param_name) data_struct_init.append(param_name + ' + {} * {}'.format(problem_id_name, mult)) for key, data_adapter in self._get_protocol_data().items(): clmemtype = 'global' cl_data = data_adapter.get_opencl_data() if data_adapter.allow_local_pointer(): if _check_array_fits_constant_buffer(cl_data, data_adapter.get_opencl_numpy_type()): if constant_args_counter < max_constant_args: clmemtype = 'constant' constant_args_counter += 1 param_name = 'protocol_data_' + str(key) data_type = data_adapter.get_data_type().raw_data_type if data_adapter.get_data_type().is_vector_type: data_type += str(data_adapter.get_data_type().vector_length) kernel_param_names.append(clmemtype + ' ' + data_type + '* ' + param_name) data_struct_init.append(param_name) data_struct_names.append(clmemtype + ' ' + data_type + '* ' + param_name) for key, data_adapter in self._get_static_data().items(): clmemtype = 'global' param_name = 'model_data_' + str(key) data_type = data_adapter.get_data_type().raw_data_type if data_adapter.get_data_type().is_vector_type: data_type += data_adapter.get_data_type().vector_length data_struct_init.append(param_name) if isinstance(data_adapter.get_opencl_data(), np.ndarray): kernel_param_names.append(clmemtype + ' ' + data_type + '* ' + param_name) data_struct_names.append(clmemtype + ' ' + data_type + '* ' + param_name) else: kernel_param_names.append(data_type + ' ' + param_name) data_struct_names.append(data_type + ' ' + param_name) data_struct = ''' typedef struct{ ''' + ('' if data_struct_names else 'constant void* place_holder;') + ''' ''' + " ".join((name + ";\n" for name in data_struct_names)) + ''' } ''' + self.get_kernel_data_struct_type() + '''; ''' return {'kernel_param_names': kernel_param_names, 'data_struct_names': data_struct_names, 'data_struct_init': data_struct_init, 'data_struct': data_struct} def _get_pre_model_expression_eval_code(self): """The code called in the evaluation function. This is called after the parameters are initialized and before the model signal expression. It can call functions defined in _get_pre_model_expression_eval_function() Returns: str: cl code containing evaluation changes, """ return '' def _get_pre_model_expression_eval_function(self): """Function used in the model evaluation generation function. The idea is that some implementing models may need to change some of the protocol or fixed parameters before they are handed over to the signal expression function. This function is called by the get_model_eval_function function during model evaluation function construction. Returns: str: cl function to be used in conjunction with the output of the function _get_pre_model_expression_eval_model() """ def _set_default_dependencies(self): """Initialize the default dependencies. By default this adds dependencies for the fixed data that is used in multiple parameters. Additionally, if enforce weights sum to one is set, this adds the dependency on the first weight. """ self._init_fixed_duplicates_dependencies() if self._enforce_weights_sum_to_one: names = ['{}.{}'.format(m.name, p.name) for (m, p) in self._model_functions_info.get_weights()] if len(names): self._add_parameter_dependency(names[0], SimpleAssignment('1 - ({})'.format(' + '.join(names[1:])))) def _get_mot_float_type(self): """Get the data type for the mot_float_type""" if self.double_precision: return SimpleCLDataType.from_string('double') return SimpleCLDataType.from_string('float') def _get_weight_sum_to_one_transformation(self): """Returns a snippit of CL for the encode and decode functions to force the sum of the weights to 1""" weight_indices = [] for (m, p) in self._model_functions_info.get_estimable_weights(): weight_indices.append(self._model_functions_info.get_parameter_estimable_index(m, p)) if weight_indices: return ''' mot_float_type _weight_sum = ''' + ' + '.join('x[{}]'.format(index) for index in weight_indices) + '''; if(_weight_sum > 1.0){ ''' + '\n'.join('x[{}] /= _weight_sum;'.format(index) for index in weight_indices) + ''' } ''' return '' def _add_parameter_dependency(self, parameter_name, dependency): """Adds a dependency rule to this model. The dependency is supposed to be a ParameterDependency object. The dependencies are executed in the same order as they were added to this model. Args: parameter_name (String): The parameter on which the dependency is applied dependency (ParameterDependency): The dependency rule, an ParameterDependency object """ if parameter_name not in ['{}.{}'.format(m.name, p.name) for m, p in self._model_functions_info.get_model_parameter_list()]: raise ParameterNameException("The parameter name \"{}\" can not be " "found in the model listing.".format(parameter_name)) if isinstance(dependency, string_types): dependency = SimpleAssignment(dependency) self._dependency_store.set_dependency(parameter_name, dependency) return self class SampleModelBuilder(OptimizeModelBuilder, SampleModelInterface): def __init__(self, model_name, model_tree, evaluation_model, signal_noise_model=None, problem_data=None, enforce_weights_sum_to_one=True): super(SampleModelBuilder, self).__init__(model_name, model_tree, evaluation_model, signal_noise_model, problem_data, enforce_weights_sum_to_one=enforce_weights_sum_to_one) def _init_model_information_container(self, dependency_store, model_tree, evaluation_model, signal_noise_model): """Get the model information container object. This is called in the __init__ to provide the new model with the correct subclass function information object. The rationale is that some subclasses may have additional parameters not present in optimization. For example, in sampling one can have priors with parameters. These parameters must be added to the model and the best point to do that is in the ModelFunctionsInformation object. Returns: ModelFunctionsInformation: the model function information object """ return ModelFunctionsInformation(dependency_store, model_tree, evaluation_model, signal_noise_model, enable_prior_parameters=True) def get_log_prior_function(self, func_name='getLogPrior', address_space_parameter_vector='private'): prior = '' for i, (m, p) in enumerate(self._model_functions_info.get_estimable_parameters_list()): prior += p.sampling_prior.get_prior_function() prior += ''' mot_float_type {func_name}(const void* data_void, {address_space_parameter_vector} const mot_float_type* const x){{ {kernel_data_struct_type}* data = ({kernel_data_struct_type}*)data_void; mot_float_type prior = 1.0; '''.format(func_name=func_name, address_space_parameter_vector=address_space_parameter_vector, kernel_data_struct_type=self.get_kernel_data_struct_type()) for i, (m, p) in enumerate(self._model_functions_info.get_estimable_parameters_list()): name = '{}.{}'.format(m.name, p.name) if all_elements_equal(self._lower_bounds[name]): lower_bound = str(get_single_value(self._lower_bounds[name])) if lower_bound == '-inf': lower_bound = '-INFINITY' else: lower_bound = 'data->var_data_lb_' + name.replace('.', '_') if all_elements_equal(self._upper_bounds[name]): upper_bound = str(get_single_value(self._upper_bounds[name])) if upper_bound == 'inf': upper_bound = 'INFINITY' else: upper_bound = 'data->var_data_ub_' + name.replace('.', '_') function_name = p.sampling_prior.get_prior_function_name() if m.get_prior_parameters(p): prior_params = [] for prior_param in m.get_prior_parameters(p): if self._model_functions_info.is_parameter_estimable(m, prior_param): estimable_index = self._model_functions_info.get_parameter_estimable_index(m, prior_param) prior_params.append('x[{}]'.format(estimable_index)) else: value = self._parameter_values['{}.{}'.format(m.name, prior_param.name)] if all_elements_equal(value): prior_params.append(str(get_single_value(value))) else: prior_params.append('data->var_data_' + '{}.{}'.format(m.name, prior_param.name).replace('.', '_')) prior += '\tprior *= {}(x[{}], {}, {}, {});\n'.format(function_name, i, lower_bound, upper_bound, ', '.join(prior_params)) else: prior += '\tprior *= {}(x[{}], {}, {});\n'.format(function_name, i, lower_bound, upper_bound) weight_indices = [] for (m, p) in self._model_functions_info.get_estimable_weights(): weight_indices.append(self._model_functions_info.get_parameter_estimable_index(m, p)) if weight_indices: prior += ''' mot_float_type _weight_sum = ''' + ' + '.join('x[{}]'.format(index) for index in weight_indices) + '''; if(_weight_sum > 1.0){ prior *= 0; } ''' prior += '\n\treturn log(prior);\n}' return prior def get_proposal_state(self): np_dtype = np.float32 if self.double_precision: np_dtype = np.float64 proposal_state = [] for m, p in self._model_functions_info.get_estimable_parameters_list(): for param in p.sampling_proposal.get_parameters(): if param.adaptable: value = param.default_value if is_scalar(value): if self.get_nmr_problems() == 0: proposal_state.append(np.full((1, 1), value, dtype=np_dtype)) else: proposal_state.append(np.full((self.get_nmr_problems(), 1), value, dtype=np_dtype)) else: if len(value.shape) < 2: value = np.transpose(np.asarray([value])) elif value.shape[1] > value.shape[0]: value = np.transpose(value) else: value = value if self.problems_to_analyze is None: proposal_state.append(value) else: proposal_state.append(value[self.problems_to_analyze, ...]) proposal_state_matrix = np.concatenate([np.transpose(np.array([s])) if len(s.shape) < 2 else s for s in proposal_state], axis=1) return proposal_state_matrix def is_proposal_symmetric(self): return all(p.sampling_proposal.is_symmetric() for m, p in self._model_functions_info.get_estimable_parameters_list()) def get_proposal_logpdf(self, func_name='getProposalLogPDF', address_space_proposal_state='private'): return_str = '' for _, p in self._model_functions_info.get_estimable_parameters_list(): return_str += p.sampling_proposal.get_proposal_logpdf_function() return_str += ''' double {func_name}( const uint param_ind, const mot_float_type proposal, const mot_float_type current, {address_space_proposal_state} mot_float_type* const proposal_state){{ switch(param_ind){{ '''.format(func_name=func_name, address_space_proposal_state=address_space_proposal_state) adaptable_parameter_count = 0 for i, (m, p) in enumerate(self._model_functions_info.get_estimable_parameters_list()): return_str += 'case ' + str(i) + ':' + "\n\t\t\t" param_proposal = p.sampling_proposal logpdf_call = 'return ' + param_proposal.get_proposal_logpdf_function_name() + '(proposal, current' for param in param_proposal.get_parameters(): if param.adaptable: logpdf_call += ', proposal_state[' + str(adaptable_parameter_count) + ']' adaptable_parameter_count += 1 else: logpdf_call += ', ' + str(param.default_value) logpdf_call += ');' return_str += logpdf_call + "\n" return_str += "\n\t\t" + '}' + "\n" + 'return 0;' + "\n" return_str += '}' return return_str def get_proposal_function(self, func_name='getProposal', address_space_proposal_state='private'): return_str = '' for _, p in self._model_functions_info.get_estimable_parameters_list(): return_str += p.sampling_proposal.get_proposal_function() return_str += ''' mot_float_type {func_name}( const uint param_ind, const mot_float_type current, void* rng_data, {address_space_proposal_state} mot_float_type* const proposal_state){{ switch(param_ind){{ '''.format(func_name=func_name, address_space_proposal_state=address_space_proposal_state) adaptable_parameter_count = 0 for i, (m, p) in enumerate(self._model_functions_info.get_estimable_parameters_list()): return_str += 'case ' + str(i) + ':' + "\n\t\t\t" param_proposal = p.sampling_proposal proposal_call = 'return ' + param_proposal.get_proposal_function_name() + '(current, rng_data' for param in param_proposal.get_parameters(): if param.adaptable: proposal_call += ', proposal_state[' + str(adaptable_parameter_count) + ']' adaptable_parameter_count += 1 else: proposal_call += ', ' + str(param.default_value) proposal_call += ');' return_str += proposal_call + "\n" return_str += "\n\t\t" + '}' + "\n" + 'return 0;' + "\n" return_str += '}' return return_str def get_proposal_state_update_function(self, func_name='updateProposalState', address_space='private'): return_str = '' for _, p in self._model_functions_info.get_estimable_parameters_list(): if p.sampling_proposal.is_adaptable(): return_str += p.sampling_proposal.get_proposal_update_function().get_update_function( p.sampling_proposal.get_parameters(), address_space=address_space) if self.proposal_state_update_uses_variance(): return_str += ''' void {func_name}({address_space} mot_float_type* const proposal_state, {address_space} ulong* const sampling_counter, {address_space} ulong* const acceptance_counter, {address_space} mot_float_type* const parameter_variance){{ '''.format(func_name=func_name, address_space=address_space) else: return_str += ''' void {func_name}({address_space} mot_float_type* const proposal_state, {address_space} ulong* const sampling_counter, {address_space} ulong* const acceptance_counter){{ '''.format(func_name=func_name, address_space=address_space) adaptable_parameter_count = 0 for i, (m, p) in enumerate(self._model_functions_info.get_estimable_parameters_list()): param_proposal = p.sampling_proposal proposal_update_function = param_proposal.get_proposal_update_function() state_params = [] for param in param_proposal.get_parameters(): if param.adaptable: state_params.append('proposal_state + {}'.format(adaptable_parameter_count)) adaptable_parameter_count += 1 if state_params: if proposal_update_function.uses_jump_counters(): state_params.extend(['sampling_counter + {}'.format(i), 'acceptance_counter + {}'.format(i)]) if proposal_update_function.uses_parameter_variance(): state_params.append('parameter_variance[{}]'.format(i)) return_str += ''' // {param_name} {update_func_name}({params}); '''.format(update_func_name=proposal_update_function.get_function_name(param_proposal.get_parameters()), params=', '.join(state_params), param_name='{}.{}'.format(m.name, p.name)) return_str += '}' return return_str def proposal_state_update_uses_variance(self): for i, (m, p) in enumerate(self._model_functions_info.get_estimable_parameters_list()): param_proposal = p.sampling_proposal proposal_update_function = param_proposal.get_proposal_update_function() if any(param.adaptable for param in param_proposal.get_parameters()): if proposal_update_function.uses_parameter_variance(): return True return False def get_log_likelihood_function(self, func_name='getLogLikelihood', evaluation_model=None, full_likelihood=True): evaluation_model = evaluation_model or self._evaluation_model inst_per_problem = self.get_nmr_inst_per_problem() eval_func_name = func_name + '_evaluateModel' obs_func_name = func_name + '_getObservation' param_listing = '' for p in evaluation_model.get_free_parameters(): param_listing += self._get_param_listing_for_param(evaluation_model, p) func = '' func += evaluation_model.get_cl_dependency_code() func += self.get_model_eval_function(eval_func_name) func += self.get_observation_return_function(obs_func_name) func += evaluation_model.get_log_likelihood_function(func_name, inst_per_problem, eval_func_name, obs_func_name, param_listing, full_likelihood=full_likelihood) return func def get_log_likelihood_per_observation_function(self, func_name="getLogLikelihoodPerObservation", evaluation_model=None, full_likelihood=True): evaluation_model = evaluation_model or self._evaluation_model inst_per_problem = self.get_nmr_inst_per_problem() eval_func_name = func_name + '_evaluateModel' obs_func_name = func_name + '_getObservation' param_listing = '' for p in evaluation_model.get_free_parameters(): param_listing += self._get_param_listing_for_param(evaluation_model, p) func = '' func += evaluation_model.get_cl_dependency_code() func += self.get_model_eval_function(eval_func_name) func += self.get_observation_return_function(obs_func_name) func += evaluation_model.get_log_likelihood_per_observation_function( func_name, inst_per_problem, eval_func_name, obs_func_name, param_listing, full_likelihood=full_likelihood) return func def get_metropolis_hastings_state(self): return DefaultMHState(self.get_nmr_problems(), self.get_nmr_estimable_parameters(), self.double_precision) def samples_to_statistics(self, samples_dict): results = {} for key, value in samples_dict.items(): _, param = self._model_functions_info.get_model_parameter_by_name(key) stat_mod = param.sampling_statistics results[key] = stat_mod.get_mean(value) results[key + '.std'] = stat_mod.get_std(value) return results def get_proposal_state_names(self): """Get a list of names for the adaptable proposal parameters. Returns: list: list of str with the name for each of the adaptable proposal parameters. This is used by the sampler to create a dictionary of final proposal states. """ return_list = [] for m, p in self._model_functions_info.get_estimable_parameters_list(): for param in p.sampling_proposal.get_parameters(): if param.adaptable: return_list.append('{}.{}.proposal.{}'.format(m.name, p.name, param.name)) return return_list class _DependencyStore(object): def __init__(self): self.names_in_order = [] self.dependencies = {} def set_dependency(self, param_name, dependency): if param_name not in self.names_in_order: self.names_in_order.append(param_name) self.dependencies.update({param_name: dependency}) def get_dependency(self, param_name): return self.dependencies[param_name] def has_dependency(self, param_name): return param_name in self.dependencies def has_dependencies(self): return self.names_in_order def get_index(self, param_name): return self.names_in_order.index(param_name) def remove_dependency(self, param_name): if param_name in self.dependencies: del self.dependencies[param_name] self.names_in_order.remove(param_name) class ModelFunctionsInformation(object): def __init__(self, dependency_store, model_tree, evaluation_model, signal_noise_model=None, enable_prior_parameters=False): """Contains centralized information about the model functions in the model builder parent. Args: dependency_store (_DependencyStore): the shared dependency store containing information about the parameters that depend on each other model_tree (mot.model_building.trees.CompartmentModelTree): the model tree object evaluation_model (mot.model_building.evaluation_models.EvaluationModel): the evaluation model to use for the resulting complete model signal_noise_model (mot.model_building.signal_noise_models.SignalNoiseModel): the signal noise model to use to add noise to the model prediction enable_prior_parameters (boolean): adds possible prior parameters to the list of parameters in the model """ self._dependency_store = dependency_store self._model_tree = model_tree self._evaluation_model = evaluation_model self._signal_noise_model = signal_noise_model self._enable_prior_parameters = enable_prior_parameters self._model_list = self._get_model_list() self._model_parameter_list = self._get_model_parameter_list() self._prior_parameters_info = self._get_prior_parameters_info() self._check_for_double_model_names() self._fixed_parameters = {'{}.{}'.format(m.name, p.name): p.fixed for m, p in self.get_model_parameter_list() if isinstance(p, FreeParameter)} def set_fixed_to_value(self, parameter_name, fix_state): """Set the given parameter fixed. This only works with free parameters. Args: parameter_name (str): the name of the parameter to fix or unfix fix_state (boolean): if the parameter is fixed or not """ self._fixed_parameters[parameter_name] = fix_state def is_fixed_to_value(self, parameter_name): """Check if the given (free) parameter is fixed to a value. Args: parameter_name (str): the name of the parameter to fix or unfix Returns: boolean: if the parameter is fixed to a value or not """ return self._fixed_parameters[parameter_name] def get_model_list(self): """Get the list of all the applicable model functions Returns: list of mot.model_building.cl_functions.base.ModelFunction: the list of model functions. """ return self._model_list def get_model_parameter_list(self): """Get a list of all model, parameter tuples. Returns: list of tuple: the list of tuples containing (model, parameters) """ param_list = copy.copy(self._model_parameter_list) if self._enable_prior_parameters: for prior_info in self._prior_parameters_info.values(): if prior_info: param_list.extend(prior_info) return param_list def get_free_parameters_list(self, exclude_priors=False): """Gets the free parameters as (model, parameter) tuples from the model listing. This does not incorporate checking for fixed parameters. Args: exclude_priors (boolean): if we want to exclude the priors or not Returns: list of tuple: the list of tuples containing (model, parameters) """ free_params = list((m, p) for m, p in self._model_parameter_list if isinstance(p, FreeParameter)) if not exclude_priors: if self._enable_prior_parameters: prior_params = [] for m, p in free_params: prior_params.extend((m, prior_p) for prior_p in m.get_prior_parameters(p) if self.is_parameter_estimable(m, p) and isinstance(prior_p, FreeParameter)) free_params.extend(prior_params) return free_params def get_static_parameters_list(self): """Gets the static parameters (as model, parameter tuples) from the model listing.""" static_params = list((m, p) for m, p in self.get_model_parameter_list() if isinstance(p, StaticMapParameter)) if self._enable_prior_parameters: prior_params = [] for m, p in self.get_estimable_parameters_list(): prior_params.extend((m, prior_p) for prior_p in m.get_prior_parameters(p) if isinstance(prior_p, FreeParameter)) static_params.extend(prior_params) return static_params def get_protocol_parameters_list(self): """Gets the static parameters (as model, parameter tuples) from the model listing.""" return list((m, p) for m, p in self.get_model_parameter_list() if isinstance(p, ProtocolParameter)) def get_model_parameter_by_name(self, parameter_name): """Get the parameter object of the given full parameter name in dot format. Args: parameter_name (string): the parameter name in dot format: . Returns: tuple: containing the (model, parameter) pair for the given parameter name """ for m, p in self.get_model_parameter_list(): if '{}.{}'.format(m.name, p.name) == parameter_name: return m, p raise ValueError('The parameter with the name "{}" could not be found in this model.'.format(parameter_name)) def get_non_model_tree_param_listing(self): """Get the model, parameter tuples for all parameters not in the model tree. Basically this returns the parameters of the evaluation and signal noise model. Returns: tuple: the (model, parameter) tuple for all non model tree parameters """ listing = [] for p in self._evaluation_model.parameter_list: listing.append((self._evaluation_model, p)) if self._signal_noise_model: for p in self._signal_noise_model.parameter_list: listing.append((self._signal_noise_model, p)) return listing def parameter_has_dependency(self, model, param): """Check if the given model and parameter name combo has a dependency. Args: model (mot.model_building.cl_functions.base.ModelFunction): the model function param (mot.model_building.cl_functions.parameters.CLFunctionParameter): the parameter Returns: boolean: if the given parameter has a dependency """ return self._dependency_store.has_dependency('{}.{}'.format(model.name, param.name)) def is_parameter_fixed_to_dependency(self, model, param): """Check if the given model and parameter name combo has a dependency. Args: model (mot.model_building.cl_functions.base.ModelFunction): the model function param (mot.model_building.cl_functions.parameters.CLFunctionParameter): the parameter Returns: boolean: if the given parameter is fixed to a dependency. Returns False if either this parameter has no dependency or if it is not fixed to it. """ return self.parameter_has_dependency(model, param) def is_parameter_estimable(self, model, param): """Check if the given model parameter is estimable. A parameter is estimable if it is free, not fixed to dependencies and not fixed to any static value. Args: model (mot.model_building.cl_functions.base.ModelFunction): the model function param (mot.model_building.cl_functions.parameters.CLFunctionParameter): the parameter Returns: boolean: true if the parameter is estimable, false otherwise """ return isinstance(param, FreeParameter) and \ not self._fixed_parameters.get('{}.{}'.format(model.name, param.name), False) and \ not self.is_parameter_fixed_to_dependency(model, param) def get_estimable_parameters_list(self): """Gets a list (as model, parameter tuples) of all parameters that are estimable. Returns: list of tuple: the list of estimable parameters """ estimable_parameters = [(m, p) for m, p in self._model_parameter_list if self.is_parameter_estimable(m, p)] if self._enable_prior_parameters: prior_params = [] for m, p in estimable_parameters: prior_params.extend((m, prior_p) for prior_p in m.get_prior_parameters(p) if not prior_p.fixed) estimable_parameters.extend(prior_params) return estimable_parameters def get_weights(self): """Get all the model functions/parameter tuples of the models that are a subclass of Weight Returns: list: the list of compartment models that are a subclass of Weight as (model, parameter) tuples. """ weight_models = [m for m in self._model_tree.get_compartment_models() if isinstance(m, Weight)] weights = [] for m in weight_models: for p in m.get_free_parameters(): weights.append((m, p)) return weights def get_estimable_weights(self): """Get all the estimable weights. Returns: list of tuples: the list of compartment models/parameter pairs for models that are a subclass of Weight """ return [(m, p) for m, p in self.get_weights() if self.is_parameter_estimable(m, p)] def _get_model_parameter_list(self): """Get a list of all model, parameter tuples. Returns: list of tuple: the list of tuples containing (model, parameters) """ return list((m, p) for m in self._model_list for p in m.parameter_list) def _get_prior_parameters_info(self): """Get a dictionary with the prior parameters for each of the model parameters. Returns: dict: lookup dictionary matching model names to parameter lists """ prior_lookup_dict = {} for model in self._model_list: for param in model.get_free_parameters(): prior_lookup_dict.update({ '{}.{}'.format(model.name, param.name): list((model, p) for p in model.get_prior_parameters(param)) }) return prior_lookup_dict def get_parameter_estimable_index(self, model, param): """Get the index of this parameter in the parameters list This returns the position of this parameter in the 'x', parameter vector in the CL kernels. Args: model (mot.model_building.cl_functions.base.ModelFunction): the model function param (mot.model_building.cl_functions.parameters.CLFunctionParameter): the parameter Returns: int: the index of the requested parameter in the list of optimized parameters Raises: ValueError: if the given parameter could not be found as an estimable parameter. """ ind = 0 for m, p in self.get_estimable_parameters_list(): if m.name == model.name and p.name == param.name: return ind ind += 1 raise ValueError('The given estimable parameter "{}" could not be found in this model'.format( '{}.{}'.format(model.name, param.name))) def has_parameter(self, model_param_name): """Check to see if the given parameter is defined in this model. Args: model_param_name (string): A model.param name like 'Ball.d' Returns: boolean: true if the parameter is defined in this model, false otherwise. """ for m, p in self.get_model_parameter_list(): if '{}.{}'.format(m.name, p.name) == model_param_name: return True return False def _get_model_list(self): """Get the list of all the applicable model functions""" models = list(self._model_tree.get_compartment_models()) models.append(self._evaluation_model) if self._signal_noise_model: models.append(self._signal_noise_model) return models def _check_for_double_model_names(self): models = self._model_list model_names = [] for m in models: if m.name in model_names: raise DoubleModelNameException("Double model name detected in the model tree.", m.name) model_names.append(m.name) class ParameterNameException(Exception): """Thrown when the a parameter of an given name could not be found.""" pass class ParameterResolutionException(Exception): """Thrown when a fixed parameter could not be resolved.""" pass class DoubleModelNameException(Exception): """Thrown when there are two models with the same name.""" pass PKmIWn7mot/model_building/trees.pyfrom six import string_types __author__ = 'Robbert Harms' __date__ = "2015-03-26" __license__ = "LGPL v3" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl" class Tree(object): def __init__(self, data=None, tag=None, children=None, parent=None): """Create a new Tree. In this tree, every node is a tree object as well. The tree is implemented as a linked list. Each node has a reference to its children and to its parent node. Args: data (object): The data object tag (str): The tag used for displaying this node children (list of Tree): The list of children to this node parent (Tree): The parent tree node. Attributes: data (object): The data object tag (str): The tag used for displaying this node children (list of Tree): The list of children to this node parent (Tree): The parent tree node. """ self.data = data self.tag = tag or "" self.children = children or [] self.parent = parent @property def leaves(self): """Get all the leaves under this tree. Returns: list: A list of all leaves under this tree. """ leaves = [] if not self.children: leaves.append(self) else: for child in self.children: leaves.extend(child.leaves) return leaves @property def internal_nodes(self): """Get all the non-leaves under this tree (the internal nodes). Returns: list: A list of all non-leaves under this tree. """ internal_nodes = [] if self.children: internal_nodes.append(self) for child in self.children: internal_nodes.extend(child.internal_nodes) return internal_nodes def __str__(self, level=0): ret = "\t"*level + self.tag + "\n" for child in self.children: ret += child.__str__(level+1) return ret class CompartmentModelTree(Tree): def __init__(self, model_lists): """Builds a multi modal multi compartment model from the given model tree. Valid model trees abides this grammar: tree ::= model | '(' tree ')' | '(' tree ',' operator ')' model ::= ModelFunction operator ::= '*' | '/' | '+' | '-' This means that one can build complex models consisting of "Model Functions" (for example, compartment models) that are combined using basic math operators. Args: model_lists (model tree list): The model tree list """ super(CompartmentModelTree, self).__init__() self._init_tree(model_lists) def get_compartment_models(self): """Get the compartment models that are part of this tree. This basically just returns the leaves of the tree. Returns: list of ModelFunction: the compartments in this tree """ return [n.data for n in self.leaves] def _init_tree(self, listing): if isinstance(listing, (list, tuple)): if len(listing) == 1: self.data = listing[0] self.tag = listing[0].name else: operator = None for node in listing: if isinstance(node, string_types): if operator is not None: raise ValueError('Double operator in model listing.') operator = node else: nn = CompartmentModelTree(node) nn.parent = self self.children.append(nn) if operator is None: raise ValueError('No operator in model listing.') self.data = operator self.tag = operator else: self.data = listing self.tag = listing.name def __str__(self, level=0): if isinstance(self.data, string_types): operator = ' ' + self.data + ' ' return '(' + "\n" + "\t" * (level + 1) + \ operator.join([child.__str__(level + 1) for child in self.children]) + \ "\n" + "\t" * level + ')' else: return self.data.name + '(' + ', '.join([p.name for p in self.data.parameter_list]) + ')' PKH+J"mot/model_building/problem_data.pyimport numpy as np __author__ = 'Robbert Harms' __date__ = "2016-10-03" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl" class AbstractProblemData(object): """A simple data container for the data for optimization/sampling models.""" @property def protocol(self): """Return the protocol data stored in this problem data container. The protocol data contains information about the experimental setup. In MRI this is the scanner protocol. Returns: collections.Mapping: The protocol data information mapping. """ return {} def get_nmr_inst_per_problem(self): """Get the number of instances/data points per problem. The minimum is one instance per problem. This number represents the number of data points Returns: int: the number of instances per problem. """ return np.array(self.protocol[list(self.protocol.keys())[0]]).shape[0] def get_nmr_problems(self): """Get the number of problems present in this problem data. Returns: int: the number of problem instances """ return self.observations.shape[0] @property def observations(self): """Return the observations stored in this problem data container. Returns: ndarray: The list of observed instances per problem. Should be a 2d matrix with as columns the observations and as rows the problems. """ return np.array([[]]) @property def static_maps(self): """Get a dictionary with the static maps. These maps will be loaded by the model builder as the values for the static parameters. Returns: Dict[str, value]: per static map the value for the static map. This can either be an one or two dimensional matrix containing the values for each problem instance or it can be a single value we will use for all p """ return {} @property def noise_std(self): """The noise standard deviation we will use during model evaluation. During optimization or sampling the model will be evaluated against the observations using an evaluation model. Most of these evaluation models need to have a standard deviation. Returns: number of ndarray: either a scalar or a 2d matrix with one value per problem instance. """ return 1 class SimpleProblemData(AbstractProblemData): def __init__(self, protocol, observations, static_maps=None, noise_std=None): """A simple data container for the data for optimization/sampling models. Args: protocol (dict): The protocol data dictionary observations (ndarray): The 2d array with the observations static_maps (dict): The dictionary with the static maps. These are 2d/3d ndarrays with one or more values per problem instance. noise_std (number or ndarray): either a scalar or a 2d matrix with one value per problem instance. """ self._protocol = protocol self._observation = observations self._static_maps = static_maps or {} self._noise_std = noise_std @property def protocol(self): return self._protocol @property def observations(self): return self._observation @property def static_maps(self): return self._static_maps @property def noise_std(self): return self._noise_std PKqJFȮ| | )mot/model_building/signal_noise_models.pyfrom mot.model_building.cl_functions.parameters import FreeParameter from mot.model_building.cl_functions.base import ModelFunction from mot.model_building.parameter_functions.transformations import CosSqrClampTransform from mot.cl_data_type import SimpleCLDataType __author__ = 'Robbert Harms' __date__ = "2014-08-05" __license__ = "LGPL v3" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl" class SignalNoiseModel(ModelFunction): def __init__(self, name, cl_function_name, parameter_list, dependency_list=()): """Signal noise models can add noise to the signal resulting from the model. They require the signal resulting from the model and zero or more parameters and they return a new signal with noise added. """ super(SignalNoiseModel, self).__init__(name, cl_function_name, parameter_list, dependency_list=dependency_list) def get_signal_function(self, fname='signalNoiseModel'): """Get the signal function that adds the noise to the signal function. Args: fname (str, optional): The function name of the function in OpenCL. Returns: str: A function with signature: .. code-block:: c double fname(const double signal, ); For example, if the noise model has only one parameter 'sigma' the function should look like: .. code-block:: c double fname(const double signal, const double sigma); The CL function should return a single double that represents the signal with the signal noise added to it. """ class JohnsonSignalNoise(SignalNoiseModel): def __init__(self): """Johnson noise adds noise to the signal using the formula: .. code-block:: c sqrt(signal^2 + eta^2) """ super(JohnsonSignalNoise, self).__init__( 'JohnsonNoise', 'johnsonNoiseModel', (FreeParameter(SimpleCLDataType.from_string('double'), 'eta', False, 0.1, 0, 100, parameter_transform=CosSqrClampTransform()),), ()) def get_signal_function(self, fname='signalNoiseModel'): return ''' double ''' + fname + '''(const double signal, const double eta){ return sqrt((signal * signal) + (eta * eta)); } ''' PKxJ8"mot/model_building/data_adapter.pyimport numbers import numpy as np from pyopencl import array as cl_array __author__ = 'Robbert Harms' __date__ = "2016-12-06" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl" class DataAdapter(object): """Create a data adapter for the given data and type. This data adapter is the bridge between the raw data and the data used in the kernels. """ def get_data_type(self): """Get the data type for the data in this adapter. Returns: mot.cl_data_type.CLDataType: the datatype """ raise NotImplementedError() def get_opencl_data(self): """Adapt and return the data for use in OpenCL kernels. Returns: np.ndarray: the data to be used in compute kernels. """ raise NotImplementedError() def get_opencl_numpy_type(self): """Get the numpy type for the data in this class for OpenCL use. Returns: np.dtype: the numpy type for the data """ raise NotImplementedError() def allow_local_pointer(self): """If this data can be put in a local storage pointer if there is enough memory for it. Returns: boolean: if we allow this memory to be referenced by a local pointer or not. """ raise NotImplementedError() class SimpleDataAdapter(DataAdapter): def __init__(self, data, data_type, mot_float_type, allow_local_pointer=True): """Create a data adapter for the given data and type. This data adapter is the bridge between the raw data and the data used in the kernels. If in the future we want to add computation types like CUDA or plain C, this adapter knows how to format the data for those targets. Args: value (ndarray): The value to adapt to different run environments data_type (mot.cl_data_type.CLDataType): the data type we need to convert it to mot_float_type (mot.cl_data_type.CLDataType): the data type of the mot_float_type allow_local_pointer (boolean): if this data can be referenced by a local pointer in the kernel (if there is enough memory for it). """ self._data = data self._data_type = data_type self._mot_float_type = mot_float_type self._allow_local_pointer = allow_local_pointer def get_opencl_data(self): if self._data_type.is_vector_type: return self._array_to_cl_vector() else: return self._get_cl_array() def get_opencl_numpy_type(self): if self._data_type.is_vector_type: return self._get_opencl_vector_data_type() else: return self._get_cl_numpy_type(self._data_type) def get_data_type(self): return self._data_type def allow_local_pointer(self): return self._allow_local_pointer def _get_cl_array(self): """Convert the data to a numpy array of the current data type. Returns: np.ndarray: the converted data as a numpy type """ numpy_type = self._get_cl_numpy_type(self._data_type) if isinstance(self._data, numbers.Number): return numpy_type(self._data) return np.require(self._data, numpy_type, ['C', 'A', 'O']) def _get_cl_numpy_type(self, data_type): """Get the numpy data type for non-vector types in CL. This function is not part of the CLDataType class since the numpy datatype may differ depending on the final use case. Args: data_type (mot.cl_data_type.CLDataType): the data type to convert to an numpy type """ raw_type = data_type.raw_data_type if raw_type == 'int': return np.int32 if raw_type == 'uint': return np.uint32 if raw_type == 'long': return np.int64 if raw_type == 'ulong': return np.uint64 if raw_type == 'float': return np.float32 if raw_type == 'double': return np.float64 if raw_type == 'mot_float_type': return self._get_cl_numpy_type(self._mot_float_type) def _array_to_cl_vector(self): """Create a CL vector type of the given array. Returns: ndarray: An array of the same length as the given array, but with only one column per row, the opencl vector. """ s = self._data.shape if len(s) > 1: width = s[1] else: width = 1 dtype = self._get_opencl_vector_data_type() ve = np.zeros((s[0], 1), dtype=dtype, order='C') for i in range(s[0]): for j in range(width): ve[i, 0][j] = self._data[i, j] return ve def _get_opencl_vector_data_type(self): """Get the data type for a vector of the given length and given type. Returns: The vector type given the given vector length and data type """ s = self._data.shape if len(s) > 1: vector_length = s[1] else: vector_length = 1 if self._data_type.raw_data_type == 'double': data_type = 'double' elif self._data_type.raw_data_type == 'mot_float_type': data_type = self._mot_float_type.raw_data_type else: data_type = 'float' if vector_length not in (2, 3, 4, 8, 16): raise ValueError('The given vector length is not one of (2, 3, 4, 8, 16)') if data_type not in ('char', 'uchar', 'short', 'ushort', 'int', 'uint', 'long', 'ulong', 'float', 'double', 'half'): raise ValueError('The given data type ({}) is not supported.'.format(data_type)) return getattr(cl_array.vec, data_type + str(vector_length)) PKH7mot/model_building/__init__.py__author__ = 'Robbert Harms' __date__ = "2015-11-29" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl" PK]JF!!%mot/model_building/models_examples.pyimport numpy as np from mot.utils import results_to_dict from mot.cl_data_type import SimpleCLDataType from mot.model_building.data_adapter import SimpleDataAdapter from mot.model_interfaces import OptimizeModelInterface __author__ = 'Robbert Harms' __date__ = "2015-04-02" __license__ = "LGPL v3" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl" class Rosenbrock(OptimizeModelInterface): def __init__(self, n=5): """When optimized the parameters should all be equal to 1.""" super(Rosenbrock, self).__init__() self.n = n def double_precision(self): return True @property def name(self): return 'rosenbrock' def get_data(self): return {} def get_kernel_data_struct(self, device): return ''' typedef struct{ constant void* place_holder; } ''' + self.get_kernel_data_struct_type() + '''; ''' def get_kernel_data_struct_type(self): return 'optimize_data' def get_kernel_param_names(self, device): return [] def get_kernel_data_struct_initialization(self, device, variable_name): return self.get_kernel_data_struct_type() + ' ' + variable_name + ' = {0};' def get_nmr_problems(self): return 1 def get_model_eval_function(self, fname='evaluateModel'): return ''' double ''' + fname + '''(const void* const data, const double* const x, const uint observation_index){ double sum = 0; for(uint i = 0; i < ''' + str(self.n) + ''' - 1; i++){ sum += 100 * pown((x[i + 1] - pown(x[i], 2)), 2) + pown((x[i] - 1), 2); } return -sum; } ''' def get_observation_return_function(self, fname='getObservation'): return ''' double ''' + fname + '''(const void* const data, const uint observation_index){ return 0; } ''' def get_objective_function(self, fname="calculateObjective"): eval_fname = fname + '_evaluateModel' obs_fname = fname + '_getObservation' func = self.get_model_eval_function(eval_fname) func += self.get_observation_return_function(obs_fname) return func + ''' double ''' + fname + '''(const void* const data, double* const x){ return ''' + obs_fname + '''(data, 1) -''' + eval_fname + '''(data, x, 1); } ''' def get_objective_per_observation_function(self, func_name="getObjectiveInstanceValue"): eval_fname = func_name + '_evaluateModel' obs_fname = func_name + '_getObservation' func = self.get_model_eval_function(eval_fname) func += self.get_observation_return_function(obs_fname) return func + ''' mot_float_type ''' + func_name + '''(const void* const data, mot_float_type* const x, uint observation_index){ return ''' + obs_fname + '''(data, observation_index) - ''' + eval_fname + '''(data, x, observation_index); } ''' def get_initial_parameters(self, previous_results=None): params = np.ones((1, self.n)) * 3 if isinstance(previous_results, np.ndarray): previous_results = results_to_dict(previous_results, self.get_free_param_names()) if previous_results: for i in range(self.n): if i in previous_results: params[0, i] = previous_results[i] return SimpleDataAdapter(params, SimpleCLDataType.from_string('double'), SimpleCLDataType.from_string('double')).get_opencl_data() def get_lower_bounds(self): return ['-inf'] * self.n def get_upper_bounds(self): return ['inf'] * self.n def get_free_param_names(self): return range(self.n) def get_nmr_inst_per_problem(self): return 1 def get_nmr_estimable_parameters(self): return self.n def get_parameter_decode_function(self, fname='decodeParameters'): return '''void ''' + fname + '''(const void* data, mot_float_type* x){}''' def get_parameter_encode_function(self, fname='encodeParameters'): return '''void ''' + fname + '''(const void* data, mot_float_type* x){}''' class MatlabLSQNonlinExample(OptimizeModelInterface): def __init__(self): """When optimized the parameters should be close to [0.2578, 0.2578] or something with a similar 2 norm. See the matlab manual page at http://nl.mathworks.com/help/optim/ug/lsqnonlin.html for more information. (viewed at 2015-04-02). """ super(MatlabLSQNonlinExample, self).__init__() def double_precision(self): return True @property def name(self): return 'matlab_lsqnonlin_example' def get_data(self): return {} def get_kernel_data_struct(self, device): return ''' typedef struct{ constant void* place_holder; } ''' + self.get_kernel_data_struct_type() + '''; ''' def get_kernel_data_struct_type(self): return 'optimize_data' def get_kernel_param_names(self, device): return [] def get_kernel_data_struct_initialization(self, device, variable_name): return self.get_kernel_data_struct_type() + ' ' + variable_name + ' = {0};' def get_nmr_problems(self): return 1 def get_model_eval_function(self, fname='evaluateModel'): return ''' double ''' + fname + '''(const void* const data, const double* const x, const uint k){ return -(2 + 2 * (k+1) - exp((k+1) * x[0]) - exp((k+1) * x[1])); } ''' def get_observation_return_function(self, fname='getObservation'): return ''' double ''' + fname + '''(const void* const data, const uint observation_index){ return 0; } ''' def get_objective_function(self, fname="calculateObjective"): eval_fname = fname + '_evaluateModel' obs_fname = fname + '_getObservation' func = self.get_model_eval_function(eval_fname) func += self.get_observation_return_function(obs_fname) return func + ''' double ''' + fname + '''(const void* const data, double* const x){ double sum = 0; for(uint i = 0; i < 10; i++){ sum += ''' + obs_fname + '''(data, i) - ''' + eval_fname + '''(data, x, i); } return sum; } ''' def get_objective_per_observation_function(self, func_name="getObjectiveInstanceValue"): eval_fname = func_name + '_evaluateModel' obs_fname = func_name + '_getObservation' func = self.get_model_eval_function(eval_fname) func += self.get_observation_return_function(obs_fname) return func + ''' mot_float_type ''' + func_name + '''(const void* const data, mot_float_type* const x, uint observation_index){ return ''' + obs_fname + '''(data, observation_index) - ''' + eval_fname + '''(data, x, observation_index); } ''' def get_initial_parameters(self, previous_results=None): params = np.array([[0.3, 0.4]]) if isinstance(previous_results, np.ndarray): previous_results = results_to_dict(previous_results, self.get_free_param_names()) if previous_results: for i in range(2): if i in previous_results: params[0, i] = previous_results[i] return SimpleDataAdapter(params, SimpleCLDataType.from_string('double'), SimpleCLDataType.from_string('double')).get_opencl_data() def get_lower_bounds(self): return [0, 0] def get_upper_bounds(self): return ['inf', 'inf'] def get_free_param_names(self): return [0, 1] def get_nmr_inst_per_problem(self): return 10 def get_nmr_estimable_parameters(self): return 2 def get_parameter_decode_function(self, fname='decodeParameters'): return '''void ''' + fname + '''(const void* data, mot_float_type* x){}''' def get_parameter_encode_function(self, fname='encodeParameters'): return '''void ''' + fname + '''(const void* data, mot_float_type* x){}''' PK̢qJ.;瞽$$9mot/model_building/parameter_functions/transformations.py__author__ = 'Robbert Harms' __date__ = "2014-06-20" __license__ = "LGPL v3" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl" class AbstractTransformation(object): def __init__(self, dependencies=()): """The transformations define the encode and decode operations needed to build a codec. These objects define the basic transformation from and to model and optimization space. The state of the parameters is extrinsic. Calling the encode and decode functions needs a reference to a parameter, these should be handled by the client code. The transformation function may depend on other parameters. (Client code should handle the correct order of handling the transformations given all the dependencies.) Args: dependencies (list of (CLFunction, CLFunctionParameter) pairs): A list of (models, parameters) which are necessary for the operation of the transformation. """ super(AbstractTransformation, self).__init__() self._dependencies = dependencies def get_cl_encode(self): """Get the CL encode assignment constructor Returns AssignmentConstructor: The cl code assignment constructor for encoding the parameter. """ raise NotImplementedError() def get_cl_decode(self): """Get the CL decode assignment constructor Returns: AssignmentConstructor: The cl code assignment constructor for decoding the parameter. """ raise NotImplementedError() @property def dependencies(self): """Get a list of (CLFunction, CLFunctionParameter) pairs where this transformation depends on Returns: list of tuple: A list of (CLFunction, CLFunctionParameter) tuples. """ return self._dependencies class AssignmentConstructor(object): def create_assignment(self, parameter_variable, lower_bound, upper_bound): """Create the assignment string. Args: parameter_variable (str): the name of the parameter variable holding the current value in the kernel lower_bound (str): the value or the name of the variable holding the value for the lower bound upper_bound (str): the value or the name of the variable holding the value for the upper bound Returns: str: the transformation assignment """ raise NotImplementedError() class FormatAssignmentConstructor(AssignmentConstructor): def __init__(self, assignment): """Assignment constructor that formats the given assignment template. This expects that the assignment string has elements like: * ``{parameter_variable}``: for the parameter variable * ``{lower_bound}``: for the lower bound * ``{upper_bound}``: for the upper bound * ``{dependency_variable_}``: for the dependency variable names Args: assignment (str): the string containing the assignment template. """ self._assignment = assignment def create_assignment(self, parameter_variable, lower_bound, upper_bound, dependency_variables=()): assignment = self._assignment.replace('{parameter_variable}', parameter_variable) assignment = assignment.replace('{lower_bound}', lower_bound) assignment = assignment.replace('{upper_bound}', upper_bound) for ind, var_name in enumerate(dependency_variables): assignment = assignment.replace('{dependency_variable_' + str(ind) + '}', var_name) return assignment class IdentityTransform(AbstractTransformation): def __init__(self, *args, **kwargs): """The identity transform does no transformation and returns the input given.""" super(IdentityTransform, self).__init__(*args, **kwargs) def get_cl_encode(self): return FormatAssignmentConstructor('{parameter_variable}') def get_cl_decode(self): return FormatAssignmentConstructor('{parameter_variable}') class ClampTransform(AbstractTransformation): """The clamp transformation limits the parameter between its lower and upper bound using the clamp function.""" def get_cl_encode(self): return FormatAssignmentConstructor('clamp((mot_float_type){parameter_variable}, ' '(mot_float_type){lower_bound}, ' '(mot_float_type){upper_bound})') def get_cl_decode(self): return FormatAssignmentConstructor('clamp((mot_float_type){parameter_variable}, ' '(mot_float_type){lower_bound}, ' '(mot_float_type){upper_bound})') class ScaleClampTransform(AbstractTransformation): def __init__(self, scale): """Clamps the value to the given bounds and applies a scaling to bring the parameters in sensible ranges. The given scaling factor should be without the scaling factor. To encode, the parameter value is multiplied by the scaling factor. To decode, it is divided by the scaling factor. Args: scale (float): the scaling factor by which to scale the parameter """ super(ScaleClampTransform, self).__init__() self._scale = scale def get_cl_encode(self): return FormatAssignmentConstructor('clamp((mot_float_type){parameter_variable}, ' '(mot_float_type){lower_bound}, ' '(mot_float_type){upper_bound}) * ' + str(self._scale)) def get_cl_decode(self): return FormatAssignmentConstructor('clamp((mot_float_type){parameter_variable} / ' + str(self._scale) + ', ' '(mot_float_type){lower_bound}, ' '(mot_float_type){upper_bound})') class CosSqrClampTransform(AbstractTransformation): """The clamp transformation limits the parameter between its lower and upper bound using a cos(sqr()) transform.""" def get_cl_encode(self): return FormatAssignmentConstructor( 'acos(clamp((mot_float_type)sqrt(fabs( ({parameter_variable} - {lower_bound}) / ' ' ({upper_bound} - {lower_bound}) )), ' ' (mot_float_type)0, (mot_float_type)1))') def get_cl_decode(self): return FormatAssignmentConstructor('pown(cos({parameter_variable}), 2) * ' + '({upper_bound} - {lower_bound}) + {lower_bound}') class SinSqrClampTransform(AbstractTransformation): """The clamp transformation limits the parameter between its lower and upper bound using a sin(sqr()) transform.""" def get_cl_encode(self): return FormatAssignmentConstructor( 'asin(clamp((mot_float_type)sqrt(fabs( ({parameter_variable} - {lower_bound}) / ' ' ({upper_bound} - {lower_bound}) )), ' ' (mot_float_type)0, (mot_float_type)1))') def get_cl_decode(self): return FormatAssignmentConstructor('pown(sin({parameter_variable}), 2) * ' + '({upper_bound} - {lower_bound}) + {lower_bound}') class SqrClampTransform(AbstractTransformation): """The clamp transformation limits the parameter between its lower and upper bound using a sqr() transform.""" def get_cl_encode(self): return FormatAssignmentConstructor('sqrt({parameter_variable})') def get_cl_decode(self): return FormatAssignmentConstructor('clamp((mot_float_type)({parameter_variable} * {parameter_variable}), ' ' (mot_float_type){lower_bound}, ' ' (mot_float_type){upper_bound})') class SinSqrClampDependentTransform(AbstractTransformation): """The clamp transformation limits the parameter between 0 and the given parameter with the sin(sqr()) transform.""" def get_cl_encode(self): return FormatAssignmentConstructor('asin(sqrt(fabs(({parameter_variable} - {lower_bound}) / ' ' ({dependency_variable_0} - {lower_bound}))))') def get_cl_decode(self): return FormatAssignmentConstructor('pown(sin({parameter_variable}), 2) * {dependency_variable_0} ' ' + {lower_bound}') class AbsModXTransform(AbstractTransformation): def __init__(self, x, dependencies=()): """Create an transformation that returns the absolute modulo x value of the input.""" super(AbsModXTransform, self).__init__(dependencies) self._x = x def get_cl_encode(self): return FormatAssignmentConstructor('fmod((mot_float_type)fabs({parameter_variable}), ' '(mot_float_type)' + str(self._x) + ')') def get_cl_decode(self): return FormatAssignmentConstructor('fmod((mot_float_type)fabs({parameter_variable}), ' '(mot_float_type)' + str(self._x) + ')') class AbsModPiTransform(AbsModXTransform): def __init__(self): super(AbsModPiTransform, self).__init__('M_PI') PKxJ()B)B):mot/model_building/parameter_functions/proposal_updates.py__author__ = 'Robbert Harms' __date__ = "2017-03-02" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl" class ProposalUpdate(object): def get_update_function(self, proposal_parameters, address_space='private'): """Get the update function to update the proposal parameters of one of the proposals. Args: proposal_parameters (list of ProposalParameter): the list of proposal parameters to (possibly use) in the update function. It will only use the parameters that have ``adaptable`` set to True. address_space (str): the address space to use for the function parameters """ raise NotImplementedError() def get_function_name(self, proposal_parameters): """Get the name of the proposal update function. Args: proposal_parameters (list of ProposalParameter): the list of proposal parameters to (possibly use) in the update function. It will only use the parameters that have ``adaptable`` set to True. Returns: str: the name of the function returned by :meth:`get_update_function` """ raise NotImplementedError() def uses_parameter_variance(self): """Check if this proposal update function uses the parameter variance. If not, we will not provide it, this saves memory in the kernel. Returns: boolean: if this proposal update function uses the parameter variance """ raise NotImplementedError() def uses_jump_counters(self): """Check if this proposal update function uses the jump counters (jump counter and acceptance counter). If not, we will not provide it, this saves memory in the kernel. Returns: boolean: if this proposal update function uses the jump counters """ raise NotImplementedError() class SimpleProposalUpdate(ProposalUpdate): def __init__(self, function_name, uses_parameter_variance=False, uses_jump_counters=True): """A simple proposal update function template. Args: function_name (str): the name of this proposal update function, try to choose an unique name. uses_parameter_variance (boolean): if this proposal requires the parameter variance. Enable if you need it in your update function. uses_jump_counters (boolean): if this proposal uses jump counters for its workings. This is enabled by default. You can disable it for speed. """ self._function_name = function_name self._uses_parameter_variance = uses_parameter_variance self._uses_jump_counters = uses_jump_counters def get_update_function(self, proposal_parameters, address_space='private'): return self._update_function_template('', proposal_parameters, address_space) def _update_function_template(self, function_body, proposal_parameters, address_space): params = ['{address_space} mot_float_type* const {name}'.format(address_space=address_space, name=p.name) for p in proposal_parameters if p.adaptable] if self.uses_jump_counters(): params.extend(['{address_space} ulong* const sampling_counter'.format(address_space=address_space), '{address_space} ulong* const acceptance_counter'.format(address_space=address_space)]) if self.uses_parameter_variance(): params.append('mot_float_type parameter_variance'.format(address_space=address_space)) return ''' #ifndef {include_guard_name} #define {include_guard_name} void {function_name}({params}){{ {function_body} }} #endif //{include_guard_name} '''.format(include_guard_name='PROPOSAL_UPDATE_{}'.format(self._function_name.upper()), function_name=self.get_function_name(proposal_parameters), params=', '.join(params), function_body=function_body) def get_function_name(self, proposal_parameters): return 'proposal_update_{}_{}'.format(self._function_name, len([True for p in proposal_parameters if p.adaptable])) def uses_parameter_variance(self): return self._uses_parameter_variance def uses_jump_counters(self): return self._uses_jump_counters class NoOperationUpdateFunction(SimpleProposalUpdate): def __init__(self): """This is the no-operation update function. It does not update the proposal parameters.""" super(NoOperationUpdateFunction, self).__init__('no_opt') def get_update_function(self, proposal_parameters, address_space='private'): return self._update_function_template('', proposal_parameters, address_space) class AcceptanceRateScaling(SimpleProposalUpdate): def __init__(self, target_acceptance_rate=0.44, batch_size=50, damping_factor=1): """Scales the proposal parameter (typically the std) such that it oscillates towards the chosen acceptance rate. This uses an scaling similar to the one in: "Examples of Adaptive MCMC", Gareth O. Roberts & Jeffrey S. Rosenthal (2009) This class implements the delta function as: :math:`\delta(n) = \sqrt{1 / (d*n)}`. Where n is the current batch index and d is the damping factor. As an example, with a damping factor of 500, delta reaches a scaling of 0.01 in 20 batches. At a batch size of 50 that would amount to 1000 samples. Args: target_acceptance_rate (float): the target acceptance rate between 0 and 1. batch_size (int): the size of the batches inbetween which we update the parameters """ super(AcceptanceRateScaling, self).__init__('acceptance_rate_scaling') self._target_acceptance_rate = target_acceptance_rate self._batch_size = batch_size self._damping_factor = damping_factor if target_acceptance_rate > 1 or target_acceptance_rate < 0: raise ValueError('The target acceptance rate should be ' 'within [0, 1], {} given.'.format(target_acceptance_rate)) def get_update_function(self, proposal_parameters, address_space='private'): body = ''' if(*sampling_counter % {batch_size} == 0){{ mot_float_type delta = sqrt(1.0/({damping_factor} * (*sampling_counter / {batch_size}))); if(*acceptance_counter / (mot_float_type){batch_size} > {target_ar}){{ *std *= exp(delta); }} else{{ *std /= exp(delta); }} *std = clamp(*std, (mot_float_type)1e-13, (mot_float_type)1e3); *acceptance_counter = 0; }} '''.format(batch_size=self._batch_size, target_ar=self._target_acceptance_rate, damping_factor=self._damping_factor) return self._update_function_template(body, proposal_parameters, address_space) class FSLAcceptanceRateScaling(SimpleProposalUpdate): def __init__(self, batch_size=50, min_val=1e-13, max_val=1e3): """An acceptance rate scaling algorithm found in a Neuroscience package called FSL. This scaling algorithm scales the std. by :math:`\sqrt(a/(n - a))` where a is the number of accepted samples in the last batch and n is the batch size. Its goal is to balance the acceptance rate at 0.5. So far, the author of this function in MOT has not been able to find theoretical support for this scaling algorithm. Please use this heuristic with caution. To prevent runaway proposal values we clamp the updated parameter value between a minimum and maximum value specified in the constructor. Args: batch_size (int): the size of the batches in between which we update the parameters min_val (float): the minimum value the parameter can take max_val (float): the maximum value the parameter can take """ super(FSLAcceptanceRateScaling, self).__init__('fsl_acceptance_rate_scaling') self._batch_size = batch_size self._min_val = min_val self._max_val = max_val def get_update_function(self, proposal_parameters, address_space='private'): body = ''' if(*sampling_counter == {batch_size}){{ *std = clamp(*std * sqrt((mot_float_type)(*acceptance_counter + 1) / ({batch_size} - *acceptance_counter + 1)), (mot_float_type){min_val}, (mot_float_type){max_val}); *sampling_counter = 0; *acceptance_counter = 0; }} '''.format(batch_size=self._batch_size, min_val=self._min_val, max_val=self._max_val) return self._update_function_template(body, proposal_parameters, address_space) class SingleComponentAdaptiveMetropolis(SimpleProposalUpdate): def __init__(self, waiting_period=100, scaling_factor=2.4, epsilon=1e-20): """Uses the Single Component Adaptive Metropolis (SCAM) scheme to update the proposals. This uses an scaling described in: "Componentwise adaptation for high dimensional MCMC", Heikki Haario, Eero Saksman and Johanna Tamminen (2005). That is, it updates the proposal standard deviation using the variance of the chain's history. Args: waiting_period (int): only start updating the proposal std. after this many draws. scaling_factor (float): the scaling factor to use (the parameter ``s`` in the paper referenced). epsilon (float): small number to prevent the std. from collapsing to zero. """ super(SingleComponentAdaptiveMetropolis, self).__init__('scam', uses_parameter_variance=True) self._waiting_period = waiting_period self._scaling_factor = scaling_factor self._epsilon = epsilon def get_update_function(self, proposal_parameters, address_space='private'): body = ''' if(*sampling_counter > {waiting_period}){{ *std = {scaling_factor} * sqrt(parameter_variance) + {epsilon}; }} '''.format(waiting_period=self._waiting_period, scaling_factor=self._scaling_factor, epsilon=self._epsilon) return self._update_function_template(body, proposal_parameters, address_space) PKqJ(mot_float_type current, void* rng_data, ) That is, it can have more than two parameter, but the first two are obligatory. The additional parameters are given by :meth:`get_parameters`. Returns: str: The cl function """ raise NotImplementedError() def get_proposal_function_name(self): """Get the name of the proposal function call. This is used by the model builder to construct the call to the proposal function. Returns: str: name of the function """ raise NotImplementedError() def get_proposal_logpdf_function(self): """Get the proposal pdf function as a CL string. This function is used if the proposal is not symmetric. The implementation should include include guards. This should follow the signature: .. code-block: c mot_float_type (mot_float_type proposal, mot_float_type current, ) Returns: str: The proposal log pdf function as a CL string """ raise NotImplementedError() def get_proposal_logpdf_function_name(self): """Get the name of the proposal logpdf function call. This is used by the model builder to construct the call to the proposal logpdf function. Returns: str: name of the function """ raise NotImplementedError() def get_proposal_update_function(self): """Get the proposal update function to use for updating the adaptable parameters. Returns: mot.model_building.parameter_functions.proposal_updates.ProposalUpdate: the proposal update function defining the update mechanism """ raise NotImplementedError() class ProposalParameter(object): def __init__(self, name, default_value, adaptable): """Container class for parameters of a proposal function. Args: default_value (double): the parameter value adaptable (boolean): if this parameter is adaptable during sampling Attributes: default_value (double): the parameter value adaptable (boolean): if this parameter is adaptable """ self.name = name self.default_value = default_value self.adaptable = adaptable class SimpleProposal(ParameterProposal): def __init__(self, proposal_body, proposal_name, parameters, is_symmetric=True, logpdf_body='return 0;', proposal_update_function=None): """Simple proposal template class. By default this assumes that the proposal you are generating is symmetric. If so, the proposal logpdf function can be reduced to a scalar since calculating it is unnecessary. Args: proposal_body (str): the body of the proposal code. Environment variables are ``current`` for the current position of this parameter and ``rng_data`` that can be used to generate random numbers. proposal_name (str): the name of this proposal parameters (list): the list of :class:`ProposalParameters` used by this proposal is_symmetric (boolean): if this proposal is symmetric, that is, if ``q(x|y) == q(y|x)``. logpdf_body (str): if the proposal is not symmetric we need a PDF function to calculate the probability of ``q(x|y)`` and ``q(y|x)``. It should return the log of the probability. If the proposal is symmetric this parameter need not be specified and defaults to returning a scalar. proposal_update_function (mot.model_building.parameter_functions.proposal_updates.ProposalUpdate): the proposal update function to use. For the default check the mot configuration. """ self._proposal_body = proposal_body self._proposal_name = proposal_name self._parameters = parameters self._is_symmetric = is_symmetric self._logpdf_body = logpdf_body self._proposal_update_function = proposal_update_function or get_default_proposal_update() def is_symmetric(self): return self._is_symmetric def is_adaptable(self): return any(p.adaptable for p in self._parameters) def get_parameters(self): return self._parameters def get_proposal_function(self): params = ['mot_float_type current', 'void* rng_data'] params.extend('mot_float_type {}'.format(p.name) for p in self._parameters) return ''' #ifndef {include_guard_name} #define {include_guard_name} mot_float_type {function_name}({params}){{ {function_body} }} #endif //{include_guard_name} '''.format(include_guard_name='PROPOSAL_{}'.format(self._proposal_name.upper()), function_name=self.get_proposal_function_name(), params=', '.join(params), function_body=self._proposal_body) def get_proposal_function_name(self): return 'proposal_{}'.format(self._proposal_name) def get_proposal_logpdf_function(self): params = ['mot_float_type current', 'mot_float_type other'] params.extend('mot_float_type {}'.format(p.name) for p in self._parameters) return ''' #ifndef {include_guard_name} #define {include_guard_name} mot_float_type {function_name}({params}){{ {function_body} }} #endif //{include_guard_name} '''.format(include_guard_name='PROPOSAL_LOGPDF_{}'.format(self._proposal_name.upper()), function_name=self.get_proposal_logpdf_function_name(), params=', '.join(params), function_body=self._logpdf_body) def get_proposal_logpdf_function_name(self): return 'proposal_logpdf_{}'.format(self._proposal_name) def get_proposal_update_function(self): return self._proposal_update_function class GaussianProposal(SimpleProposal): def __init__(self, std=1.0, adaptable=True, proposal_update_function=None): """Create a new proposal function using a Gaussian distribution with the given scale. Args: std (float): The scale of the Gaussian distribution. adaptable (boolean): If this proposal is adaptable during sampling proposal_update_function (mot.model_building.parameter_functions.proposal_updates.ProposalUpdate): the proposal update function to use. Defaults to the one in the current mot configuration. """ parameters = [ProposalParameter('std', std, adaptable)] super(GaussianProposal, self).__init__( 'return fma(std, (mot_float_type)frandn(rng_data), current);', 'gaussian', parameters, proposal_update_function=proposal_update_function ) class CircularGaussianProposal(SimpleProposal): def __init__(self, modulus, std=1.0, adaptable=True, proposal_update_function=None): """A Gaussian distribution which loops around the given modulus. Args: modulus (float): at which point we loop around std (float): The scale of the Gaussian distribution. adaptable (boolean): If this proposal is adaptable during sampling proposal_update_function (mot.model_building.parameter_functions.proposal_updates.ProposalUpdate): the proposal update function to use. Defaults to the one in the current mot configuration. """ parameters = [ProposalParameter('std', std, adaptable)] super(CircularGaussianProposal, self).__init__( ''' double x1 = fma(std, (mot_float_type)frandn(rng_data), current); double x2 = {}; return (mot_float_type) (x1 - floor(x1 / x2) * x2); '''.format(modulus), 'circular_gaussian', parameters, proposal_update_function=proposal_update_function ) PKP%J5Z6mot/model_building/parameter_functions/dependencies.py__author__ = 'Robbert Harms' __date__ = "2014-06-19" __license__ = "LGPL v3" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl" class AbstractParameterDependency(object): @property def pre_transform_code(self): """Some code that may be prefixed to this parameter dependency. Here one can put more elaborate CL code. Please make sure that additional variables are unique. Returns: str: The pre transformation code. This is prepended to the dependency function. """ return '' @property def assignment_code(self): """Get the assignment code (including a ;). Returns: str: The assignment code. """ return '' class SimpleAssignment(AbstractParameterDependency): def __init__(self, assignment_code): """Adds a simple parameter dependency rule for the given parameter. This is for one parameter, a simple one-line transformation dependency. Args: assignment_code (str): the assignment code (in CL) for this parameter """ self._assignment = assignment_code @property def assignment_code(self): return self._assignment PKqJEE;mot/model_building/parameter_functions/sample_statistics.pyimport numpy as np __author__ = 'Robbert Harms' __date__ = "2014-10-23" __license__ = "LGPL v3" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl" class ParameterSampleStatistics(object): def get_mean(self, samples): """Given the distribution represented by this statistic, get the mean of the samples. Args: samples (ndarray): The 2d array with the samples per voxel. Returns: A 1d ndarray with the mean per voxel. """ raise NotImplementedError() def get_std(self, samples): """Given the distribution represented by this statistic, get the standard deviation of the samples. Args: samples (ndarray): The 2d array with the samples per voxel. Returns: A 1d array with the variance per voxel. """ raise NotImplementedError() class GaussianPSS(ParameterSampleStatistics): def get_mean(self, samples): return np.mean(samples, axis=1) def get_std(self, samples): return np.std(samples, axis=1) class CircularGaussianPSS(ParameterSampleStatistics): def __init__(self, max_angle=np.pi): """Compute the circular mean for samples in a range The minimum angle is set to 0, the maximum angle can be given. Args: max_angle (number): The maximum angle used in the calculations """ super(CircularGaussianPSS, self).__init__() self.max_angle = max_angle def get_mean(self, samples): return CircularGaussianPSS.circmean(np.mod(samples, self.max_angle), high=self.max_angle, low=0, axis=1) def get_std(self, samples): return CircularGaussianPSS.circstd(np.mod(samples, self.max_angle), high=self.max_angle, low=0, axis=1) @staticmethod def circmean(samples, high=2 * np.pi, low=0, axis=None): """Compute the circular mean for samples in a range. Taken from scipy.stats Args: samples (array_like): Input array. high (float or int): High boundary for circular mean range. Default is ``2*pi``. low (float or int): Low boundary for circular mean range. Default is 0. axis (int, optional): Axis along which means are computed. The default is to compute the mean of the flattened array. Returns: float: Circular mean. """ ang = (samples - low) * 2 * np.pi / (high - low) res = np.angle(np.mean(np.exp(1j * ang), axis=axis)) mask = res < 0 if mask.ndim > 0: res[mask] += 2 * np.pi elif mask: res += 2 * np.pi return res * (high - low) / 2.0 / np.pi + low @staticmethod def circstd(samples, high=2 * np.pi, low=0, axis=None): """Compute the circular standard deviation for samples assumed to be in the range [low to high]. Taken from scipy.stats, with a small change on the 4th line. This uses a definition of circular standard deviation that in the limit of small angles returns a number close to the 'linear' standard deviation. Args: samples (array_like): Input array. low (float or int): Low boundary for circular standard deviation range. Default is 0. high (float or int): High boundary for circular standard deviation range. Default is ``2*pi``. axis (int): Axis along which standard deviations are computed. The default is to compute the standard deviation of the flattened array. Returns: float: Circular standard deviation. """ ang = (samples - low) * 2 * np.pi / (high - low) res = np.mean(np.exp(1j * ang), axis=axis) R = abs(res) R[R >= 1] = 1 - np.finfo(np.float).eps return ((high - low) / 2.0 / np.pi) * np.sqrt(-2 * np.log(R)) PKH4"2mot/model_building/parameter_functions/__init__.py__author__ = 'Robbert Harms' __date__ = "2014-11-27" __license__ = "LGPL v3" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl" PK(J\έ#,#,0mot/model_building/parameter_functions/priors.pyimport numpy as np from mot.cl_data_type import SimpleCLDataType from mot.model_building.parameter_functions.proposals import GaussianProposal __author__ = 'Robbert Harms' __date__ = "2014-06-19" __license__ = "LGPL v3" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl" class ParameterPrior(object): """The priors are used during model sampling. These priors are not in log space, we take the log in the model builder. They indicate the a priori information one has about a parameter. """ def get_prior_function(self): """Get the prior function as a CL string. This should include include guards (#ifdef's). This should follow the signature: .. code-block: c mot_float_type (mot_float_type parent_parameter, mot_float_type lower_bound, mot_float_type upper_bound, ) That is, the parent parameter and it lower and upper bound is given next to the optional parameters defined in this prior. Returns: str: The cl function """ raise NotImplementedError() def get_prior_function_name(self): """Get the name of the prior function call. This is used by the model builder to construct the call to the prior function. Returns: str: name of the function """ raise NotImplementedError() def get_parameters(self): """Get the additional parameters featured in this prior. This can return a list of additional parameters to be used in the model function. Returns: list of CLFunctionParameter: the list of function parameters to be added to the list of parameters of the enclosing model. """ raise NotImplementedError() class SimplePrior(ParameterPrior): def __init__(self, prior_body, prior_name, prior_params=None, cl_preamble=None): """A prior template function. Args: prior_body (str): the body of the prior prior_name (str): the name of this prior function prior_params (list): additional parameters for this prior preamble (str): optional C code loaded before the function definition. """ self._prior_body = prior_body self._prior_name = prior_name self._prior_params = prior_params or [] self._cl_preamble = cl_preamble def get_parameters(self): return self._prior_params def get_prior_function(self): params = ['value', 'lower_bound', 'upper_bound'] params.extend(p.name for p in self._prior_params) params = ['const mot_float_type {}'.format(v) for v in params] return ''' {cl_preamble} #ifndef {include_guard_name} #define {include_guard_name} mot_float_type {function_name}({params}){{ {prior_body} }} #endif //{include_guard_name} '''.format(include_guard_name='PRIOR_{}'.format(self._prior_name.upper()), function_name=self._prior_name, prior_body=self._prior_body, params=', '.join(params), cl_preamble=self._cl_preamble or '') def get_prior_function_name(self): return self._prior_name class AlwaysOne(SimplePrior): def __init__(self): """The uniform prior is always 1. :math:`P(v) = 1` """ super(AlwaysOne, self).__init__('return 1;', 'uniform') class ReciprocalPrior(SimplePrior): def __init__(self): """The reciprocal of the current value. :math:`P(v) = 1/v` """ body = ''' if(value <= 0){ return 0; } return 1.0/value; ''' super(ReciprocalPrior, self).__init__(body, 'reciprocal') class UniformWithinBoundsPrior(SimplePrior): def __init__(self): """This prior is 1 within the upper and lower bound of the parameter, 0 outside.""" super(UniformWithinBoundsPrior, self).__init__( 'return (value < lower_bound || value > upper_bound) ? 0.0 : 1.0;', 'uniform_within_bounds') class AbsSinPrior(SimplePrior): def __init__(self): """Angular prior: :math:`P(v) = |\\sin(v)|`""" super(AbsSinPrior, self).__init__('return fabs(sin(value));', 'abs_sin') class AbsSinHalfPrior(SimplePrior): def __init__(self): """Angular prior: :math:`P(v) = |\\sin(x)/2.0|`""" super(AbsSinHalfPrior, self).__init__('return fabs(sin(value)/2.0);', 'abs_sin_half') class VagueGammaPrior(SimplePrior): def __init__(self): """The vague gamma prior is meant as a proper uniform prior. Lee & Wagenmakers: The practice of assigning Gamma(0.001, 0.001) priors on precision parameters is theoretically motivated by scale invariance arguments, meaning that priors are chosen so that changing the measurement scale of the data does not affect inference. The invariant prior on precision λ corresponds to a uniform distribution on log σ, that is, p (σ2) ∝ 1/σ2, or a Gamma(a → 0, b → 0) distribution. This invariant prior distribution, however, is improper (i.e., the area under the curve is unbounded), which means it is not really a distribution, but the limit of a sequence of distributions (see Jaynes, 2003). WinBUGS requires the use of proper distributions, and the Gamma(0.001, 0.001) prior is intended as a proper approximation to the theoretically motivated improper prior. This raises the issue of whether inference is sensitive to the essentially arbitrary value 0.001, and it is sometimes the case that using other small values such as 0.01 or 0.1 leads to more stable sampling in WinBUGS. -- Lee & Wagenmakers, Bayesian Cognitive Modeling, 2014, Chapter 4, Box 4.1 While this is not WinBUGS and improper priors are allowed in MOT, it is still useful to have this prior in case people desire proper priors. """ body = ''' float kappa = 0.001; float theta = 1/0.001; return (1.0 / (tgamma(kappa) * pow(theta, kappa))) * pow(value, kappa - 1) * exp(- value / theta); ''' super(VagueGammaPrior, self).__init__(body, 'vague_gamma_prior', []) class NormalPDF(SimplePrior): def __init__(self): r"""Normal PDF on the given value: :math:`P(v) = N(v; \mu, \sigma)`""" from mot.model_building.cl_functions.parameters import FreeParameter params = [FreeParameter(SimpleCLDataType.from_string('mot_float_type'), 'mu', True, 0, -np.inf, np.inf, sampling_prior=AlwaysOne()), FreeParameter(SimpleCLDataType.from_string('mot_float_type'), 'sigma', True, 1, -np.inf, np.inf, sampling_prior=AlwaysOne())] super(NormalPDF, self).__init__( 'return exp(-pown(value - mu, 2) / (2 * pown(sigma, 2))) / (sigma * sqrt(2 * M_PI));', 'normal_pdf', params) class AxialNormalPDF(SimplePrior): def __init__(self): r"""The axial normal PDF is a Normal distribution wrapped around 0 and :math:`\pi`. It's PDF is given by: .. math:: f(\theta; a, b) = \frac{\cosh(a\sin \theta + b\cos \theta)}{\pi I_{0}(\sqrt{a^{2} + b^{2}})} where in this implementation :math:`a` and :math:`b` are parameterized with the input variables :math:`\mu` and :math:`\sigma` using: .. math:: \begin{align*} \kappa &= \frac{1}{\sigma^{2}} \\ a &= \kappa * \sin \mu \\ b &= \kappa * \cos \mu \end{align*} References: Barry C. Arnold, Ashis SenGupta (2006). Probability distributions and statistical inference for axial data. Environmental and Ecological Statistics, volume 13, issue 3, pages 271-285. """ from mot.model_building.cl_functions.parameters import FreeParameter from mot.model_building.cl_functions.library_functions import Bessel, Trigonometrics params = [FreeParameter(SimpleCLDataType.from_string('mot_float_type'), 'mu', True, 0, -np.inf, np.inf, sampling_prior=AlwaysOne()), FreeParameter(SimpleCLDataType.from_string('mot_float_type'), 'sigma', True, 1, -np.inf, np.inf, sampling_prior=AlwaysOne())] super(AxialNormalPDF, self).__init__( ''' float kappa = 1.0 / pown(sigma, 2); float a = kappa * sin(mu); float b = kappa * cos(mu); return exp(log_cosh(a * sin(value) + b * cos(value)) - log_bessel_i0(sqrt(pown(a, 2) + pown(b, 2))) - log(M_PI) ); ''', 'axial_normal_pdf', params, cl_preamble=Bessel().get_cl_code() + '\n' + Trigonometrics().get_cl_code()) class ARDBeta(SimplePrior): def __init__(self): r"""This is a collapsed form of the Beta PDF meant for use in Automatic Relevance Detection sampling. In this prior the ``alpha`` parameter of the Beta prior is set to 1 which simplifies the equation. The parameter ``beta`` is still free and can be changed as desired. The implemented prior is: .. math:: B(x; 1, \beta) = \beta * (1 - x)^{\beta - 1} """ from mot.model_building.cl_functions.parameters import FreeParameter params = [FreeParameter(SimpleCLDataType.from_string('mot_float_type'), 'beta', False, 1, 1e-4, 1000, sampling_prior=ReciprocalPrior(), sampling_proposal=GaussianProposal(0.01))] body = ''' if(value < 0 || value > 1){ return 0; } return beta * pow(1 - value, beta - 1); ''' super(ARDBeta, self).__init__(body, 'ard_beta_pdf', params) class ARDGaussian(SimplePrior): def __init__(self): """This is a Gaussian prior meant for use in Automatic Relevance Detection sampling. This uses a Gaussian prior with mean at zero and a standard deviation determined by the ``alpha`` parameter with the relationship :math:`\sigma = 1/\\sqrt(\\alpha)`. """ from mot.model_building.cl_functions.parameters import FreeParameter params = [FreeParameter(SimpleCLDataType.from_string('mot_float_type'), 'alpha', False, 8, 1e-5, 1e4, sampling_prior=UniformWithinBoundsPrior(), sampling_proposal=GaussianProposal(20))] body = ''' if(value < 0 || value > 1){ return 0; } mot_float_type sigma = 1.0/sqrt(alpha); return exp(-pown(value, 2) / (2 * pown(sigma, 2))) / (sigma * sqrt(2 * M_PI)); ''' super(ARDGaussian, self).__init__(body, 'ard_beta_pdf', params) PKD}J#n(('mot/model_building/cl_functions/base.pyimport os from .parameters import FreeParameter, CLFunctionParameter __author__ = 'Robbert Harms' __date__ = "2016-10-03" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl" class CLLibrary(object): """The basic interface for all CL libraries. This provides an interface to getting the CL code of a library.""" def get_cl_code(self): """Get the function code for this library and for all its dependencies. Returns: str: The CL code for inclusion in a kernel. """ raise NotImplementedError() class CLFunction(CLLibrary): def __init__(self, return_type, function_name, parameter_list): """The header to a CL function. CL functions wraps some meta-information about a single OpenCL function in a Python object. Args: return_type (str): Return type of the CL function. function_name (string): The name of the CL function parameter_list (list of mot.model_building.cl_functions.parameters.CLFunctionParameter): The list of parameters required for this function """ super(CLFunction, self).__init__() self._return_type = return_type self._function_name = function_name self._parameter_list = parameter_list @property def return_type(self): """Get the type (in CL naming) of the returned value from this function. Returns: str: The return type of this CL function. (Examples: double, int, double4, ...) """ return self._return_type @property def cl_function_name(self): """Return the name of the implemented CL function Returns: str: The name of this CL function """ return self._function_name @property def parameter_list(self): """Return the list of parameters from this CL function. Returns: A list containing instances of CLFunctionParameter.""" return self._parameter_list def get_cl_code(self): """Get the function code for this function and all its dependencies. Returns: str: The CL code for inclusion in a kernel. """ return '' def __hash__(self): return hash(self.__repr__()) def __eq__(self, other): return type(self) == type(other) def __ne__(self, other): return type(self) != type(other) class DependentCLFunction(CLFunction): def __init__(self, return_type, function_name, parameter_list, dependency_list): """A CL function with dependencies on multiple other CLFunctions. Args: return_type (str): Return type of the CL function. function_name (string): The name of the CL function parameter_list (list of mot.model_building.cl_functions.parameters.CLFunctionParameter): The list of parameters required for this function dependency_list (list of CLLibrary): The list of CL libraries this function depends on """ super(DependentCLFunction, self).__init__(return_type, function_name, parameter_list) self._dependency_list = dependency_list def _get_cl_dependency_code(self): """Get the CL code for all the CL code for all the dependencies. Returns: str: The CL code with the actual code. """ code = '' for d in self._dependency_list: code += d.get_cl_code() + "\n" return code class ModelFunction(DependentCLFunction): def __init__(self, name, cl_function_name, parameter_list, dependency_list=()): """This CL function is for all estimable models Args: name (str): The name of the model cl_function_name (string): The name of the CL function parameter_list (list or tuple of CLFunctionParameter): The list of parameters required for this function dependency_list (list or tuple of CLLibrary): The list of CL libraries this function depends on """ super(ModelFunction, self).__init__('mot_float_type', cl_function_name, parameter_list, dependency_list) self._name = name @property def name(self): """Get the name of this model function. Returns: str: The name of this model function. """ return self._name def get_free_parameters(self): """Get all the free parameters in this model Returns: list: the list of free parameters in this model """ return self.get_parameters_of_type(FreeParameter) def get_prior_parameters(self, parameter): """Get the parameters referred to by the priors of the free parameters. This returns a list of all the parameters referenced by the prior parameters, recursively. Returns: list of parameters: the list of additional parameters in the prior for the given parameter """ def get_prior_parameters(params): return_params = [] for param in params: prior_params = param.sampling_prior.get_parameters() proxy_prior_params = [prior_param.get_renamed('{}.prior.{}'.format(param.name, prior_param.name)) for prior_param in prior_params] return_params.extend(proxy_prior_params) free_prior_params = [p for p in proxy_prior_params if isinstance(p, FreeParameter)] return_params.extend(get_prior_parameters(free_prior_params)) return return_params return get_prior_parameters([parameter]) def get_parameters_of_type(self, instance_types): """Get all parameters whose state instance is one of the given types. Args: instance_types (list of DataType class names, or single DataType classname); The instance type we want to get all the parameters of. Returns: A list of parameters whose type matches one or more of the given types. """ return list([p for p in self.parameter_list if isinstance(p, instance_types)]) def get_parameter_by_name(self, param_name): """Get a parameter by name. Args: param_name (str): The name of the parameter to return Returns: ClFunctionParameter: the parameter of the given name Raises: KeyError: if the parameter could not be found. """ for e in self.parameter_list: if e.name == param_name: return e raise KeyError('The parameter with the name "{}" could not be found.'.format(param_name)) def get_extra_results_maps(self, results_dict): """Get extra results maps with extra output from this model function. This is used by the function add_extra_result_maps() from the ModelBuilder to add extra maps to the resulting dictionary. Suppose a model has a parameter that can be viewed in multiple ways. It would be nice to be able to output maps for that parameter in multiple ways such that the amount of post-processing is as least as possible. For example, suppose a model calculates an angle (theta) and a radius (r). Perhaps we would like to return the cartesian coordinate of that point alongside the polar coordinates. This function allows you (indirectly) to add the additional maps. Do not modify the dictionary in place. Args: results_dict (dict): The result dictionary with all the maps you need and perhaps other maps from other models as well. The maps are 1 dimensional, a long list of values for all the voxels in the ROI. Returns: dict: A new dictionary with the additional maps to add. """ return {} def get_cl_dependency_code(self): """Get the CL code for all the CL code for all the dependencies. Returns: str: The CL code with the actual code. """ return self._get_cl_dependency_code() class SimpleCLLibrary(CLLibrary): def __init__(self, name, cl_code, dependencies=None): """Create a CL function for a library function. These functions are not meant to be optimized, but can be used a helper functions for the models. Args: name (str): the name of this library, used to create the inclusion guards cl_code (str): the CL code for this library dependencies (list or tuple of CLLibrary): The list of CL libraries this function depends on """ super(SimpleCLLibrary, self).__init__() self._name = name self._cl_code = cl_code self._dependencies = dependencies or {} def get_cl_code(self): return ''' {dependencies} #ifndef {inclusion_guard_name} #define {inclusion_guard_name} {code} #endif // {inclusion_guard_name} '''.format(dependencies=self._get_cl_dependency_code(), inclusion_guard_name='LIBRARY_FUNCTION_{}_CL'.format(self._name), code=self._cl_code) def _get_cl_dependency_code(self): """Get the CL code for all the CL code for all the dependencies. Returns: str: The CL code with the actual code. """ code = '' for d in self._dependencies: code += d.get_cl_code() + "\n" return code class SimpleCLLibraryFromFile(SimpleCLLibrary): def __init__(self, name, cl_code_file, var_replace_dict=None, dependencies=None): """Create a CL function for a library function. These functions are not meant to be optimized, but can be used a helper functions in models. Args: name (str): The name of the CL function cl_code_file (str): The location of the code file var_replace_dict (dict): In the cl_code file these replacements will be made (using the % format function of Python) dependencies (list or tuple of CLLibrary): The list of cl libraries this function depends on """ code = open(os.path.abspath(cl_code_file), 'r').read() if var_replace_dict is not None: code = code % var_replace_dict super(SimpleCLLibraryFromFile, self).__init__(name, code, dependencies) PKqJٹ!!-mot/model_building/cl_functions/parameters.pyfrom copy import copy from mot.cl_data_type import SimpleCLDataType from mot.model_building.parameter_functions.priors import UniformWithinBoundsPrior from mot.model_building.parameter_functions.proposals import GaussianProposal from mot.model_building.parameter_functions.sample_statistics import GaussianPSS from mot.model_building.parameter_functions.transformations import IdentityTransform __author__ = 'Robbert Harms' __date__ = "2016-10-03" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl" class CLFunctionParameter(object): def __init__(self, data_type, name): """Creates a new function parameter for the CL functions. Args: data_type (mot.cl_data_type.SimpleCLDataType): the data type expected by this parameter name (str): The name of this parameter Attributes: name (str): The name of this parameter """ self._data_type = data_type self.name = name @property def data_type(self): """Get the CL data type of this parameter Returns: str: The CL data type. """ return self._data_type @property def is_cl_vector_type(self): """Parse the data_type to see if this parameter holds a vector type (in CL) Returns: bool: True if the type of this function parameter is a CL vector type. CL vector types are recognized by an integer after the data type. For example: double4 is a CL vector type with 4 doubles. """ return self._data_type.is_vector_type def get_renamed(self, name): """Get a copy of the current parameter but then with a new name. Args: name (str): the new name for this parameter Returns: cls: a copy of the current type but with a new name """ new_param = copy(self) new_param.name = name return new_param class CurrentObservationParam(CLFunctionParameter): def __init__(self, name='_observation'): """This parameter indicates that the model should inject the current observation value in the model. Sometimes during model linearization or other mathematical operations the current observation appears on both sides of the optimization equation. That is, it sometimes happens you want to use the current observation to model that same observation. This parameter is a signal to the model builder to inject the current observation. You can use this parameter by adding it to your model and then use the current name in your model equation. """ super(CurrentObservationParam, self).__init__(SimpleCLDataType.from_string('mot_float_type'), name) class StaticMapParameter(CLFunctionParameter): def __init__(self, data_type, name, value): """This parameter is meant for static data that is different per problem. These parameters are in usage similar to fixed free parameters. They are defined as static data parameters to make clear that they are meant to provide additional observational data. They differ from the model data parameters in that those are meant for data that define a model, irrespective of the data that is trying to be optimized. The static data parameters are supportive data about the problems and differ per problem instance. This makes them differ slightly in semantics. Args: data_type (mot.cl_data_type.SimpleCLDataType): the data type expected by this parameter name (str): The name of this parameter value (double or ndarray): A single value for all voxels or a list of values for each voxel Attributes: value (double or ndarray): A single value for all voxels or a list of values for each voxel """ super(StaticMapParameter, self).__init__(data_type, name) self.value = value class ProtocolParameter(CLFunctionParameter): """A protocol data parameter indicates that this parameter is supposed to be fixed using the Protocol data. This class of parameters is used for parameters that are constant per problem instance, but differ for the different measurement points (in diffusion MRI these are called the Protocol parameters). """ class ModelDataParameter(CLFunctionParameter): def __init__(self, data_type, name, value): """This parameter is meant for data that changes the way a model function behaves. These parameters are fixed and remain constant for every problem instance (voxels in DMRI) and for every measurement point (protocol in DMRI). They can consist of vector and array types. Args: data_type (mot.cl_data_type.SimpleCLDataType): the data type expected by this parameter name (str): The name of this parameter value (double or ndarray): A single value for all voxels or a list of values for each voxel Attributes: value (double or ndarray): A single value for all voxels or a list of values for each voxel """ super(ModelDataParameter, self).__init__(data_type, name) self.value = value class FreeParameter(CLFunctionParameter): def __init__(self, data_type, name, fixed, value, lower_bound, upper_bound, parameter_transform=None, sampling_proposal=None, sampling_prior=None, sampling_statistics=None): """This are the kind of parameters that are generally meant to be optimized. These parameters may optionally be fixed to a value or list of values for all voxels. Args: data_type (mot.cl_data_type.SimpleCLDataType): the data type expected by this parameter name (str): The name of this parameter fixed (boolean): If this parameter is fixed to the value given value (double or ndarray): A single value for all voxels or a list of values for each voxel lower_bound (double): The lower bound of this parameter upper_bound (double): The upper bound of this parameter parameter_transform (AbstractTransformation): The parameter transformation function sampling_proposal (ParameterProposal): The proposal function for use in model sampling sampling_prior (ParameterPrior): The prior function for use in model sampling sampling_statistics (ParameterSampleStatistics): The statistic functions used to get statistics out of the samples Attributes: value (number or ndarray): The value of this state lower_bound (number or ndarray): The lower bound upper_bound (number or ndarray): The upper bound fixed (boolean): If this free parameter is fixed to its value. parameter_transform (AbstractTransformation): The parameter transformation (codec information) sampling_proposal (ParameterProposal): The proposal function for use in model sampling sampling_prior (ParameterPrior): The prior function for use in model sampling sampling_statistics (ParameterSampleStatistics): The statistic functions used to get statistics out of the samples """ super(FreeParameter, self).__init__(data_type, name) self._value = value self._lower_bound = lower_bound self._upper_bound = upper_bound self._fixed = fixed self._parameter_transform = parameter_transform or IdentityTransform() self._sampling_proposal = sampling_proposal or GaussianProposal(1.0) self._sampling_prior = sampling_prior or UniformWithinBoundsPrior() self._sampling_statistics = sampling_statistics or GaussianPSS() @property def value(self): return self._value @property def lower_bound(self): return self._lower_bound @property def upper_bound(self): return self._upper_bound @property def fixed(self): return self._fixed @property def parameter_transform(self): return self._parameter_transform @property def sampling_proposal(self): return self._sampling_proposal @property def sampling_prior(self): return self._sampling_prior @property def sampling_statistics(self): return self._sampling_statistics class LibraryParameter(CLFunctionParameter): """Parameters of this type are used inside library functions. They are not meant to be used in Model functions. """ PKK}J__4mot/model_building/cl_functions/library_functions.pyimport os from pkg_resources import resource_filename from mot.model_building.cl_functions.base import SimpleCLLibraryFromFile, SimpleCLLibrary __author__ = 'Robbert Harms' __date__ = "2016-10-03" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl" class FirstLegendreTerm(SimpleCLLibraryFromFile): def __init__(self): """A function for finding the first legendre term. (see the CL code for more details)""" super(FirstLegendreTerm, self).__init__( self.__class__.__name__, resource_filename('mot', 'data/opencl/firstLegendreTerm.cl')) class Bessel(SimpleCLLibraryFromFile): def __init__(self): """Function library for the bessel functions.""" super(Bessel, self).__init__(self.__class__.__name__, resource_filename('mot', 'data/opencl/bessel.cl')) class Trigonometrics(SimpleCLLibraryFromFile): def __init__(self): """Estimate various trigonometric functions additional to the OpenCL offerings.""" super(Trigonometrics, self).__init__( self.__class__.__name__, resource_filename('mot', 'data/opencl/trigonometrics.cl')) class Rand123(SimpleCLLibrary): def __init__(self): """Estimate various trigonometric functions additional to the OpenCL offerings.""" super(Rand123, self).__init__(self.__class__.__name__, Rand123._get_random123_cl_code()) @staticmethod def _get_random123_cl_code(): """Get the source code needed for working with the Rand123 RNG. Returns: str: the CL code for the Rand123 RNG """ generator = 'threefry' src = open(os.path.abspath(resource_filename('mot', 'data/opencl/random123/openclfeatures.h'), ), 'r').read() src += open(os.path.abspath(resource_filename('mot', 'data/opencl/random123/array.h'), ), 'r').read() src += open(os.path.abspath(resource_filename('mot', 'data/opencl/random123/{}.h'.format(generator)), ), 'r').read() src += (open(os.path.abspath(resource_filename('mot', 'data/opencl/random123/rand123.h'), ), 'r').read() % { 'GENERATOR_NAME': (generator) }) return src class CerfImWOfX(SimpleCLLibraryFromFile): def __init__(self): """Calculate the cerf.""" super(CerfImWOfX, self).__init__( self.__class__.__name__, resource_filename('mot', 'data/opencl/cerf/im_w_of_x.cl')) class CerfDawson(SimpleCLLibraryFromFile): def __init__(self): """Evaluate dawson integral.""" super(CerfDawson, self).__init__( self.__class__.__name__, resource_filename('mot', 'data/opencl/cerf/dawson.cl'), dependencies=(CerfImWOfX(),)) class CerfErfi(SimpleCLLibraryFromFile): def __init__(self): """Calculate erfi.""" super(CerfErfi, self).__init__( self.__class__.__name__, resource_filename('mot', 'data/opencl/cerf/erfi.cl'), dependencies=(CerfImWOfX(),)) class EuclidianNormFunction(SimpleCLLibraryFromFile): def __init__(self, memspace='private', memtype='mot_float_type'): """A CL functions for calculating the Euclidian distance between n values. Args: memspace (str): The memory space of the memtyped array (private, constant, global). memtype (str): the memory type to use, double, float, mot_float_type, ... """ super(EuclidianNormFunction, self).__init__( self.__class__.__name__ + '_' + memspace + '_' + memtype, resource_filename('mot', 'data/opencl/euclidian_norm.cl'), var_replace_dict={'MEMSPACE': memspace, 'MEMTYPE': memtype}) PK$ZI%6kϊ+mot/model_building/cl_functions/__init__.py__author__ = 'Robbert Harms' __date__ = "2016-10-03" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl" PK>}J܈2mot/model_building/cl_functions/model_functions.pyimport os from pkg_resources import resource_filename from mot.cl_data_type import SimpleCLDataType from mot.model_building.cl_functions.base import ModelFunction from mot.model_building.cl_functions.parameters import FreeParameter from mot.model_building.parameter_functions.priors import ARDGaussian, UniformWithinBoundsPrior from mot.model_building.parameter_functions.proposals import GaussianProposal from mot.model_building.parameter_functions.transformations import ClampTransform, CosSqrClampTransform __author__ = 'Robbert Harms' __date__ = "2016-10-03" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl" class Scalar(ModelFunction): def __init__(self, name='Scalar', param_name='s', value=0.0, lower_bound=0.0, upper_bound=float('inf'), parameter_kwargs=None): """A Scalar model function to be used during optimization. Args: name (str): The name of the model value (number or ndarray): The initial value for the single free parameter of this function. lower_bound (number or ndarray): The initial lower bound for the single free parameter of this function. upper_bound (number or ndarray): The initial upper bound for the single free parameter of this function. parameter_kwargs (dict): additional settings for the parameter initialization """ parameter_settings = dict(parameter_transform=ClampTransform(), sampling_proposal=GaussianProposal(1.0)) parameter_settings.update(parameter_kwargs or {}) super(Scalar, self).__init__( name, 'cmScalar', (FreeParameter(SimpleCLDataType.from_string('mot_float_type'), param_name, False, value, lower_bound, upper_bound, **parameter_settings),)) def get_cl_code(self): """See base class for details""" path = resource_filename('mot', 'data/opencl/model_functions/Scalar.cl') return self._get_cl_dependency_code() + "\n" + open(os.path.abspath(path), 'r').read() class Weight(Scalar): def __init__(self, name='Weight', value=0.5, lower_bound=0.0, upper_bound=1.0, parameter_kwargs=None): """Implements Scalar model function to add the semantics of representing a Weight. Some of the code checks for type Weight, be sure to use this model function if you want to represent a Weight. A weight is meant to be a model volume fraction. Args: name (str): The name of the model value (number or ndarray): The initial value for the single free parameter of this function. lower_bound (number or ndarray): The initial lower bound for the single free parameter of this function. upper_bound (number or ndarray): The initial upper bound for the single free parameter of this function. """ parameter_settings = dict(parameter_transform=CosSqrClampTransform(), sampling_proposal=GaussianProposal(0.01), sampling_prior=UniformWithinBoundsPrior()) parameter_settings.update(parameter_kwargs or {}) super(Weight, self).__init__(name=name, param_name='w', value=value, lower_bound=lower_bound, upper_bound=upper_bound, parameter_kwargs=parameter_settings) class ARD_Beta_Weight(Weight): def __init__(self, name='ARD_Beta_Weight', value=0.5, lower_bound=0.0, upper_bound=1.0): """A compartment weight with a Beta prior, to be used in Automatic Relevance Detection It is exactly the same as a weight, except that it has a different prior, a Beta distribution prior between [0, 1]. Args: name (str): The name of the model value (number or ndarray): The initial value for the single free parameter of this function. lower_bound (number or ndarray): The initial lower bound for the single free parameter of this function. upper_bound (number or ndarray): The initial upper bound for the single free parameter of this function. """ parameter_settings = dict(sampling_prior=ARDGaussian()) super(ARD_Beta_Weight, self).__init__(name=name, value=value, lower_bound=lower_bound, upper_bound=upper_bound, parameter_kwargs=parameter_settings) class ARD_Gaussian_Weight(Weight): def __init__(self, name='ARD_Gaussian_Weight', value=0.5, lower_bound=0.0, upper_bound=1.0): """A compartment weight with a Gaussian prior, to be used in Automatic Relevance Detection It is exactly the same as a weight, except that it has a different prior, a Gaussian prior with mean at zero and std given by a hyperparameter. Args: name (str): The name of the model value (number or ndarray): The initial value for the single free parameter of this function. lower_bound (number or ndarray): The initial lower bound for the single free parameter of this function. upper_bound (number or ndarray): The initial upper bound for the single free parameter of this function. """ parameter_settings = dict(sampling_prior=ARDGaussian()) super(ARD_Gaussian_Weight, self).__init__(name=name, value=value, lower_bound=lower_bound, upper_bound=upper_bound, parameter_kwargs=parameter_settings) PKH4mot/parsers/__init__.py__author__ = 'Robbert Harms' __date__ = "2016-01-05" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl" PKqJʐ& & "mot/parsers/cl/CLDataTypeParser.pyimport re from mot.cl_data_type import SimpleCLDataType from mot.parsers.cl.CLDataType import CLDataTypeParser, CLDataTypeSemantics class Semantics(CLDataTypeSemantics): def __init__(self): super(Semantics, self).__init__() self._raw_data_type = None self._is_pointer_type = False self._vector_length = None self._address_space_qualifier = None self._pre_data_type_type_qualifiers = None self._post_data_type_type_qualifier = None def result(self, ast): return SimpleCLDataType( self._raw_data_type, is_pointer_type=self._is_pointer_type, vector_length=self._vector_length, address_space_qualifier=self._address_space_qualifier, pre_data_type_type_qualifiers=self._pre_data_type_type_qualifiers, post_data_type_type_qualifier=self._post_data_type_type_qualifier) def expr(self, ast): return ast def data_type(self, ast): return ast def is_pointer(self, ast): self._is_pointer_type = True return ast def scalar_data_type(self, ast): self._raw_data_type = ast return ast def vector_data_type(self, ast): match = re.match(r'(char|uchar|short|ushort|int|uint|long|ulong|float|double|half|mot_float_type)(\d+)', ast) self._raw_data_type = match.group(1) self._vector_length = match.group(2) return ast def user_data_type(self, ast): self._raw_data_type = ast return ast def address_space_qualifier(self, ast): self._address_space_qualifier = ast return ast def pre_data_type_type_qualifiers(self, ast): self._pre_data_type_type_qualifiers = ast return ast def post_data_type_type_qualifier(self, ast): self._post_data_type_type_qualifier = ast return ast def parse(parameter_declaration): """Parse the parameter declaration into a CLDataType Args: parameter_declaration (str): the CL parameter declaration. Example: const float4* const test Returns: mot.cl_data_type.SimpleCLDataType: the CL data type for this parameter declaration """ parser = CLDataTypeParser(parseinfo=False) return parser.parse(parameter_declaration, rule_name='result', semantics=Semantics()) PKqJ&ϔnnmot/parsers/cl/CLDataType.py#!/usr/bin/env python # -*- coding: utf-8 -*- # CAVEAT UTILITOR # # This file was automatically generated by Grako. # # https://pypi.python.org/pypi/grako/ # # Any changes you make to it will be overwritten the next time # the file is generated. from __future__ import print_function, division, absolute_import, unicode_literals from grako.parsing import graken, Parser from grako.util import re, RE_FLAGS __version__ = (2016, 1, 5, 17, 39, 0, 1) __all__ = [ 'CLDataTypeParser', 'CLDataTypeSemantics', 'main' ] class CLDataTypeParser(Parser): def __init__(self, whitespace=None, nameguard=None, comments_re=None, eol_comments_re=None, ignorecase=None, left_recursion=True, **kwargs): super(CLDataTypeParser, self).__init__( whitespace=whitespace, nameguard=nameguard, comments_re=comments_re, eol_comments_re=eol_comments_re, ignorecase=ignorecase, left_recursion=left_recursion, **kwargs ) @graken() def _result_(self): self._expr_() @graken() def _expr_(self): with self._optional(): self._address_space_qualifier_() def block0(): self._pre_data_type_type_qualifiers_() self._closure(block0) self._data_type_() with self._optional(): self._is_pointer_() with self._optional(): self._post_data_type_type_qualifier_() @graken() def _data_type_(self): with self._choice(): with self._option(): self._scalar_data_type_() with self._option(): self._vector_data_type_() with self._option(): self._user_data_type_() self._error('no available options') @graken() def _is_pointer_(self): self._token('*') @graken() def _scalar_data_type_(self): with self._choice(): with self._option(): self._token('bool') with self._option(): self._token('char') with self._option(): self._token('cl_char') with self._option(): self._token('unsigned char') with self._option(): self._token('uchar') with self._option(): self._token('cl_uchar') with self._option(): self._token('short') with self._option(): self._token('cl_short') with self._option(): self._token('unsigned short') with self._option(): self._token('ushort') with self._option(): self._token('int') with self._option(): self._token('unsigned int') with self._option(): self._token('uint') with self._option(): self._token('long') with self._option(): self._token('unsigned long') with self._option(): self._token('ulong') with self._option(): self._token('float') with self._option(): self._token('half') with self._option(): self._token('size_t') with self._option(): self._token('ptrdiff_t') with self._option(): self._token('intptr_t') with self._option(): self._token('uintptr_t') with self._option(): self._token('void') with self._option(): self._token('double') with self._option(): self._token('half') self._error('expecting one of: bool char cl_char cl_short cl_uchar double float half int intptr_t long ptrdiff_t short size_t uchar uint uintptr_t ulong unsigned char unsigned int unsigned long unsigned short ushort void') @graken() def _vector_data_type_(self): self._pattern(r'(char|uchar|short|ushort|int|uint|long|ulong|float|double|half|mot_float_type)\d+') @graken() def _user_data_type_(self): self._pattern(r'\w+') @graken() def _address_space_qualifier_(self): with self._choice(): with self._option(): self._token('__local') with self._option(): self._token('local') with self._option(): self._token('__global') with self._option(): self._token('global') with self._option(): self._token('__constant') with self._option(): self._token('constant') with self._option(): self._token('__private') with self._option(): self._token('private') self._error('expecting one of: __constant __global __local __private constant global local private') @graken() def _pre_data_type_type_qualifiers_(self): with self._choice(): with self._option(): self._token('const') with self._option(): self._token('restrict') with self._option(): self._token('volatile') self._error('expecting one of: const restrict volatile') @graken() def _post_data_type_type_qualifier_(self): self._token('const') class CLDataTypeSemantics(object): def result(self, ast): return ast def expr(self, ast): return ast def data_type(self, ast): return ast def is_pointer(self, ast): return ast def scalar_data_type(self, ast): return ast def vector_data_type(self, ast): return ast def user_data_type(self, ast): return ast def address_space_qualifier(self, ast): return ast def pre_data_type_type_qualifiers(self, ast): return ast def post_data_type_type_qualifier(self, ast): return ast def main(filename, startrule, trace=False, whitespace=None, nameguard=None): import json with open(filename) as f: text = f.read() parser = CLDataTypeParser(parseinfo=False) ast = parser.parse( text, startrule, filename=filename, trace=trace, whitespace=whitespace, nameguard=nameguard) print('AST:') print(ast) print() print('JSON:') print(json.dumps(ast, indent=2)) print() if __name__ == '__main__': import argparse import string import sys class ListRules(argparse.Action): def __call__(self, parser, namespace, values, option_string): print('Rules:') for r in CLDataTypeParser.rule_list(): print(r) print() sys.exit(0) parser = argparse.ArgumentParser(description="Simple parser for SimpleCLDataType.") parser.add_argument('-l', '--list', action=ListRules, nargs=0, help="list all rules and exit") parser.add_argument('-n', '--no-nameguard', action='store_true', dest='no_nameguard', help="disable the 'nameguard' feature") parser.add_argument('-t', '--trace', action='store_true', help="output trace information") parser.add_argument('-w', '--whitespace', type=str, default=string.whitespace, help="whitespace specification") parser.add_argument('file', metavar="FILE", help="the input file to parse") parser.add_argument('startrule', metavar="STARTRULE", help="the start rule for parsing") args = parser.parse_args() main( args.file, args.startrule, trace=args.trace, whitespace=args.whitespace, nameguard=not args.no_nameguard ) PKH4mot/parsers/cl/__init__.py__author__ = 'Robbert Harms' __date__ = "2016-01-05" __maintainer__ = "Robbert Harms" __email__ = "robbert.harms@maastrichtuniversity.nl" PKҬJ"!R--$mot-0.2.41.dist-info/DESCRIPTION.rst############################### Maastricht Optimization Toolbox ############################### The Maastricht Optimization Toolbox, MOT, is a library for parallel optimization and sampling using the graphics card for the computations. It is meant to optimize, in parallel, a large number of smaller problems, in contrast to optimizing one big problem with parallelized parts. For example, in diffusion MRI the brain is scanned in a 3D grid where each grid element, a *voxel*, represents its own optimization problem. The number of data points per voxel is generally small, ranging from 30 to 500 datapoints, and the models fitted to that data have generally somewhere between 6 and 20 parameters. Since each of these voxels can be analyzed independently of the others, the computations can be massively parallelized and hence programming for the graphics card can allow for a large speed gain. This software toolbox was originally built for exactly this use case, yet the algorithms and data structures are generalized such that any scientific field may take advantage of this toolbox. For the diffusion MRI package *MDT* to which is referred in this example, please see https://github.com/cbclab/MDT. **************** Can MOT help me? **************** To recognize if MOT can help you with your use case, try to see if your computations can be parallized in some way. If you have just one big optimization problem with 10.000 variables, MOT unfortunately can not help you. On the other hand, if you find a way to split your analysis in (a lot of; >10.000) smaller sub-problems, with ~30 parameters or less each, MOT may actually be of help. ******* Summary ******* * Free software: LGPL v3 license * Interface in Python, computations in OpenCL * Full documentation: https://mot.readthedocs.org * Project home: https://github.com/cbclab/MOT * PyPi package: `PyPi `_ * Uses the `GitLab workflow `_ * Tags: optimization, parallel, opencl, python ************************ Quick installation guide ************************ The basic requirements for MOT are: * Python 3.x (recommended) or Python 2.7 * OpenCL 1.2 (or higher) support in GPU driver or CPU runtime **Linux** For Ubuntu >= 16 you can use: * ``sudo add-apt-repository ppa:robbert-harms/cbclab`` * ``sudo apt update`` * ``sudo apt install python3-mot`` For Debian users and Ubuntu < 16 users, install MOT with: * ``sudo apt install python3 python3-pip python3-pyopencl python3-devel`` * ``sudo pip3 install mot`` **Mac** * Install Python Anaconda 3.* 64bit from https://www.continuum.io/downloads> * Open a terminal and type ``pip install mot`` **Windows** For Windows the short guide is: * Install Python Anaconda 3.* 64bit from https://www.continuum.io/downloads * Install or upgrade your GPU drivers * Install PyOpenCL using one of the following methods: 1. Use a binary, for example from http://www.lfd.uci.edu/~gohlke/pythonlibs/#pyopencl or; 2. Compile PyOpenCL with ``pip install pyopencl``, this requires: * Visual Studio 2015 (Community edition or higher) with the Python and Common Tools for Visual C++ options enabled * OpenCL development kit (`NVidia CUDA `_ or `Intel OpenCL SDK `_ or the `AMD APP SDK `_) * Open a Anaconda shell and type: ``pip install mot`` For more information and for more elaborate installation instructions, please see: https://mot.readthedocs.org ******* Caveats ******* There are a few caveats and known issues, primarily related to OpenCL: * Windows support is experimental due to the difficulty of installing PyOpenCL, hopefully installing PyOpenCL will get easier on Windows soon. * For AMD users with Ubuntu >= 16, the new AMD GPU-Pro driver is still in beta and may not work with all the optimalization routines cd in MOT. Our recommendation at the moment (October 2016) is to use Ubuntu version 14 when using AMD hardware. * GPU acceleration is not possible in most virtual machines due to lack of GPU or PCI-E pass-through, this will change whenever virtual machines vendors program this feature. Our recommendation is to install Linux on your machine directly. PKҬJ#rJJ"mot-0.2.41.dist-info/metadata.json{"classifiers": ["Environment :: Console", "Intended Audience :: Developers", "Intended Audience :: Science/Research", "License :: OSI Approved :: GNU Lesser General Public License v3 or later (LGPLv3+)", "Development Status :: 5 - Production/Stable", "Natural Language :: English", "Operating System :: POSIX :: Linux", "Operating System :: MacOS :: MacOS X", "Operating System :: Microsoft :: Windows", "Programming Language :: Python :: 2", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.4", "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Topic :: Scientific/Engineering"], "extensions": {"python.details": {"contacts": [{"email": "robbert.harms@maastrichtuniversity.nl", "name": "Robbert Harms", "role": "author"}], "document_names": {"description": "DESCRIPTION.rst"}, "project_urls": {"Home": "https://github.com/cbclab/MOT"}}}, "extras": [], "generator": "bdist_wheel (0.26.0)", "keywords": ["mot", "optimization", "sampling", "opencl", "gpu", "parallel", "computing"], "license": "LGPL v3", "metadata_version": "2.0", "name": "mot", "run_requires": [{"requires": ["grako", "numpy (>=1.9.0)", "pyopencl (>=2013.1)", "scipy"]}], "summary": "Maastricht Optimization Toolbox", "test_requires": [{"requires": ["coverage"]}], "version": "0.2.41"}PKҬJ "mot-0.2.41.dist-info/top_level.txtmot tests PKҬJndnnmot-0.2.41.dist-info/WHEELWheel-Version: 1.0 Generator: bdist_wheel (0.26.0) Root-Is-Purelib: true Tag: py2-none-any Tag: py3-none-any PKҬJvmot-0.2.41.dist-info/METADATAMetadata-Version: 2.0 Name: mot Version: 0.2.41 Summary: Maastricht Optimization Toolbox Home-page: https://github.com/cbclab/MOT Author: Robbert Harms Author-email: robbert.harms@maastrichtuniversity.nl License: LGPL v3 Keywords: mot,optimization,sampling,opencl,gpu,parallel,computing Platform: UNKNOWN Classifier: Environment :: Console Classifier: Intended Audience :: Developers Classifier: Intended Audience :: Science/Research Classifier: License :: OSI Approved :: GNU Lesser General Public License v3 or later (LGPLv3+) Classifier: Development Status :: 5 - Production/Stable Classifier: Natural Language :: English Classifier: Operating System :: POSIX :: Linux Classifier: Operating System :: MacOS :: MacOS X Classifier: Operating System :: Microsoft :: Windows Classifier: Programming Language :: Python :: 2 Classifier: Programming Language :: Python :: 2.7 Classifier: Programming Language :: Python :: 3 Classifier: Programming Language :: Python :: 3.4 Classifier: Programming Language :: Python :: 3.5 Classifier: Programming Language :: Python :: 3.6 Classifier: Topic :: Scientific/Engineering Requires-Dist: grako Requires-Dist: numpy (>=1.9.0) Requires-Dist: pyopencl (>=2013.1) Requires-Dist: scipy ############################### Maastricht Optimization Toolbox ############################### The Maastricht Optimization Toolbox, MOT, is a library for parallel optimization and sampling using the graphics card for the computations. It is meant to optimize, in parallel, a large number of smaller problems, in contrast to optimizing one big problem with parallelized parts. For example, in diffusion MRI the brain is scanned in a 3D grid where each grid element, a *voxel*, represents its own optimization problem. The number of data points per voxel is generally small, ranging from 30 to 500 datapoints, and the models fitted to that data have generally somewhere between 6 and 20 parameters. Since each of these voxels can be analyzed independently of the others, the computations can be massively parallelized and hence programming for the graphics card can allow for a large speed gain. This software toolbox was originally built for exactly this use case, yet the algorithms and data structures are generalized such that any scientific field may take advantage of this toolbox. For the diffusion MRI package *MDT* to which is referred in this example, please see https://github.com/cbclab/MDT. **************** Can MOT help me? **************** To recognize if MOT can help you with your use case, try to see if your computations can be parallized in some way. If you have just one big optimization problem with 10.000 variables, MOT unfortunately can not help you. On the other hand, if you find a way to split your analysis in (a lot of; >10.000) smaller sub-problems, with ~30 parameters or less each, MOT may actually be of help. ******* Summary ******* * Free software: LGPL v3 license * Interface in Python, computations in OpenCL * Full documentation: https://mot.readthedocs.org * Project home: https://github.com/cbclab/MOT * PyPi package: `PyPi `_ * Uses the `GitLab workflow `_ * Tags: optimization, parallel, opencl, python ************************ Quick installation guide ************************ The basic requirements for MOT are: * Python 3.x (recommended) or Python 2.7 * OpenCL 1.2 (or higher) support in GPU driver or CPU runtime **Linux** For Ubuntu >= 16 you can use: * ``sudo add-apt-repository ppa:robbert-harms/cbclab`` * ``sudo apt update`` * ``sudo apt install python3-mot`` For Debian users and Ubuntu < 16 users, install MOT with: * ``sudo apt install python3 python3-pip python3-pyopencl python3-devel`` * ``sudo pip3 install mot`` **Mac** * Install Python Anaconda 3.* 64bit from https://www.continuum.io/downloads> * Open a terminal and type ``pip install mot`` **Windows** For Windows the short guide is: * Install Python Anaconda 3.* 64bit from https://www.continuum.io/downloads * Install or upgrade your GPU drivers * Install PyOpenCL using one of the following methods: 1. Use a binary, for example from http://www.lfd.uci.edu/~gohlke/pythonlibs/#pyopencl or; 2. Compile PyOpenCL with ``pip install pyopencl``, this requires: * Visual Studio 2015 (Community edition or higher) with the Python and Common Tools for Visual C++ options enabled * OpenCL development kit (`NVidia CUDA `_ or `Intel OpenCL SDK `_ or the `AMD APP SDK `_) * Open a Anaconda shell and type: ``pip install mot`` For more information and for more elaborate installation instructions, please see: https://mot.readthedocs.org ******* Caveats ******* There are a few caveats and known issues, primarily related to OpenCL: * Windows support is experimental due to the difficulty of installing PyOpenCL, hopefully installing PyOpenCL will get easier on Windows soon. * For AMD users with Ubuntu >= 16, the new AMD GPU-Pro driver is still in beta and may not work with all the optimalization routines cd in MOT. Our recommendation at the moment (October 2016) is to use Ubuntu version 14 when using AMD hardware. * GPU acceleration is not possible in most virtual machines due to lack of GPU or PCI-E pass-through, this will change whenever virtual machines vendors program this feature. Our recommendation is to install Linux on your machine directly. PKҬJ[[mot-0.2.41.dist-info/RECORDmot/__init__.py,sha256=CH4bLHePZs0616xDIkUZ6rnk0GS0qSNSKgUKEn6Svwc,756 mot/__version__.py,sha256=_Bbx8XH39BifwZzxA7UutQXvimZCM0hXkkwLjZ8gTJI,218 mot/cl_data_type.py,sha256=c4CslxMGVGXpF4wZ2HG_ha6h1lxYuyf7gkvLZOBipfA,4498 mot/cl_environments.py,sha256=FsGwq06b5VMIk9ZAPnmPq1fZD9LCsWrT_gCzQvOi2tc,9104 mot/configuration.py,sha256=KdHPuKTh0DbR5HP_hviSfi4mTN3odYvawal1zITGZpY,7810 mot/factory.py,sha256=dbsqKw3XFP-mVEdBvhSt9mBkT5xcwdHI8bdsiN8lw2Y,3535 mot/load_balance_strategies.py,sha256=fFErnN-NCHxJdDohM3dvbGyHZmcj98K-bcU7rwapzpc,20236 mot/mcmc_diagnostics.py,sha256=17RnXhCyO0q4NXYkZkKazy_krl3u3wxs3c4SIbTVZPc,25359 mot/model_interfaces.py,sha256=DsfaKgOGEakEUZKcAFKepjhaImn77GfCnJfL2PLS2uk,23118 mot/utils.py,sha256=u2yaFS0YlmVF_1pUxl7AgOQoPnChdq_vu9uvtTdcmOo,8458 mot/cl_routines/__init__.py,sha256=IdOAwaxVUNgXhaYH0xuLRZIycRf1PM8QA0zcbcDF6VA,161 mot/cl_routines/base.py,sha256=_cRfef29hyC5qqx1-FHrU-IqxCseuJhBp0jGo34LbP8,3789 mot/cl_routines/generate_random.py,sha256=bTooiwJugJXCSmBfi4ZtLfGhpSe43wyPotD-3VVYA7k,9901 mot/cl_routines/filters/__init__.py,sha256=L6tWNlt2qgq86ObGoih3lUQkBoPXD5Z8n4j4t1ZwOp4,161 mot/cl_routines/filters/base.py,sha256=UC-iO9bFCbs0wub6K-1OPc2oXQII1FucPuOBUxGf1qw,10066 mot/cl_routines/filters/gaussian.py,sha256=yma_s0LE2z6V6xGw5SAp-EgM-kD2LTUKKamKEIcR8J8,7721 mot/cl_routines/filters/mean.py,sha256=69iUTW2xOEBg6l2mLiHpaH-CalMcVBQ5jSqOp3Nq9ZI,2327 mot/cl_routines/filters/median.py,sha256=rjH-3FPKd9DlmDkBkjE0tQBaJHVNjidEiCTd9ZLbWlQ,5176 mot/cl_routines/mapping/__init__.py,sha256=IdOAwaxVUNgXhaYH0xuLRZIycRf1PM8QA0zcbcDF6VA,161 mot/cl_routines/mapping/calc_dependent_params.py,sha256=-SGUUGLhJaPfs8dWLgkkRAR15ISrgHu3kmCGxHCqDsE,6867 mot/cl_routines/mapping/calculate_model_estimates.py,sha256=IBPDHhFlzewBhZ33yk5czrAvBf_br_pFrl7ftu3pRMg,5238 mot/cl_routines/mapping/codec_runner.py,sha256=i7zXHJlJp5oMpPz6nI7qrhc2ZUR7S_Dxkr9PlSgrabs,5501 mot/cl_routines/mapping/error_measures.py,sha256=dAClJW-kQRVGnqpd5DP_gE2kHhlrB_NKpHUd3Oo3-IM,1206 mot/cl_routines/mapping/loglikelihood_calculator.py,sha256=qvbmrwOtnmbv8m6eZHv95Pj3M38nm1MfIw3xRM-DJEg,5691 mot/cl_routines/mapping/objective_calculator.py,sha256=2HWW4aYp3jh9vz37quvuaYf-4M_svqq3LM6dTZkvc-s,5512 mot/cl_routines/mapping/objective_list_calculator.py,sha256=2zBqJrKZ7VNuZXUwUsnvMbs_sWLorbcGBfK5vFvNo_w,5899 mot/cl_routines/mapping/residual_calculator.py,sha256=aKLMxxkUPxUHUWxhFKoveD3PKdjrZWzObSBJZBW1DXU,6997 mot/cl_routines/optimizing/__init__.py,sha256=IdOAwaxVUNgXhaYH0xuLRZIycRf1PM8QA0zcbcDF6VA,161 mot/cl_routines/optimizing/base.py,sha256=40tBVPFypVBreJDRmpbOb7f0YlqPOBefdINl_2BK2uQ,18271 mot/cl_routines/optimizing/levenberg_marquardt.py,sha256=xDjcCneHpnlgMAZVaZ8ab5IzC98iPah8rn0NotWlxXI,7659 mot/cl_routines/optimizing/multi_step_optimizer.py,sha256=298AQFeNaF07RRMOIILJ7jQfZLKgIDk_9IAgp51DYk4,1045 mot/cl_routines/optimizing/nmsimplex.py,sha256=KeZT8IDI2f7a5BpVkRNUZ4vXPmVbS2E6JKL3uXn14tk,4573 mot/cl_routines/optimizing/powell.py,sha256=5qd5mE10ToEMCRioJ-WhO4oJIot4iVI6G3wNWXt4wlk,2690 mot/cl_routines/optimizing/random_restart.py,sha256=fVLG8dVtMQzjkJVit2g0g_8rCZLj6AijRG3UUxAdz-8,8083 mot/cl_routines/sampling/__init__.py,sha256=IdOAwaxVUNgXhaYH0xuLRZIycRf1PM8QA0zcbcDF6VA,161 mot/cl_routines/sampling/base.py,sha256=kUY6SVctednIyO0cD_Nkq9lhxTIKajTaaFrb3lpLN94,1479 mot/cl_routines/sampling/metropolis_hastings.py,sha256=SVUNn61L84AkwG0yCPESC6hjUYnmJmCmoxQ70rznfic,43832 mot/data/opencl/bessel.cl,sha256=O1PDPmiMwdGiNqCIRzQ65Zks9SN2gucSKUosykTgayM,2804 mot/data/opencl/euclidian_norm.cl,sha256=uIxtPhR32BgAEuW3V8u6zvHFAUKGydeeNVK1dh8hPAA,2995 mot/data/opencl/firstLegendreTerm.cl,sha256=Ji52CmzMRvND8lqfF19B8pecK5TRRfr8_EmyEf6f5hQ,3806 mot/data/opencl/lmmin.cl,sha256=nI1E0-7H_wpXkjHpaEPwn_RxFjN1G49N6iGzP8ljKD0,40195 mot/data/opencl/nmsimplex.cl,sha256=EdPr5bM9VyciNWXhIW4_vxe4kNSMDo0ExOaG3Oap2a8,8223 mot/data/opencl/powell.cl,sha256=zLi8piGfZ9Pj7tSu4cwoCOiOnTlJZu-N2Oa2c9cFDlw,17283 mot/data/opencl/trigonometrics.cl,sha256=QxFAyv030Gf7bT-qUjdb2K3ljBYlKcY5cDRuvm8g1oM,582 mot/data/opencl/cerf/dawson.cl,sha256=Du02OaMAPuBmte1mws9-p5dqpRgwnNg5w8ZX0xd4Krc,424 mot/data/opencl/cerf/erfi.cl,sha256=6CmyQwMhA5SeHY1AAW7hJ2oepb-rd4kBgSTINrZWl7I,661 mot/data/opencl/cerf/im_w_of_x.cl,sha256=iys5NLHP6g-sRgJwZfVZ6QSiAwU4XtU2BeyDKjzUK5s,34506 mot/data/opencl/model_functions/Scalar.cl,sha256=cfNLrq_dPwnlzswOFqILgmCSr3qpK9f2J08xTAH1O-g,357 mot/data/opencl/random123/array.h,sha256=Y3cs0wOLbBIzjtkyoXe_P2kDBWpKuRUaNAd8ZqdVo7c,16903 mot/data/opencl/random123/openclfeatures.h,sha256=iqkqrGrqBsaYR0-Qi-bN40qqgr1gukGB2Wy-e8umodc,2794 mot/data/opencl/random123/philox.h,sha256=Ls6QzPEoa4olaV0SXiEcIlAIx4TUgYCqsq7mjZULgqU,21666 mot/data/opencl/random123/rand123.h,sha256=YTtEsAhDy34iq0ACkz8tuTYq8Kn4mEYeN5I5QiukI-k,5053 mot/data/opencl/random123/threefry.h,sha256=5vCkQTxMD4CzteusT7RkgBOzeprN5_NHLvvJ_LK2dho,54631 mot/model_building/__init__.py,sha256=SFy-F5MI97N-vD1ujtf3lsDiszL48tlu4jUUhEbA3WA,138 mot/model_building/data_adapter.py,sha256=c0pp5-Hf6L6eThwY21FBTJwC5VCMN23Uc_8oTue1feU,5864 mot/model_building/evaluation_models.py,sha256=2ys5lgmJezvaS4YDD2BO35P4hrB4PhQSLC4ERGK4Uog,24511 mot/model_building/model_builders.py,sha256=jasrIIkIOwRouRPCS5Y_RQCRAXxRH19OkX8kkDEzG7c,89647 mot/model_building/models_examples.py,sha256=uO2070f_nDDRVNJ4ryvfmvBVi2lFRTSRq9SuNk3xN2Y,8674 mot/model_building/problem_data.py,sha256=p1qfTTPFFgkmdZMKu54jPV72k-A10-Dsh_33rVCOXnA,3599 mot/model_building/signal_noise_models.py,sha256=RJCIoUXpaUj80hWg7cRHnRAZcG37G_-hY3HwGi-zrRI,2428 mot/model_building/trees.py,sha256=ZJeS5MbDQS_5gWIS2bpmQgHcQe7eWxT2zxufnKe7pNQ,4484 mot/model_building/cl_functions/__init__.py,sha256=A83Z2ssyL4g6oRBUewp__0HmgjjMxkh-0hubZVpVjEg,138 mot/model_building/cl_functions/base.py,sha256=gWy5Oxnl2TcSKz2Q_zjf4aGW6KM7xoGXZpsyMCevusA,10485 mot/model_building/cl_functions/library_functions.py,sha256=hh7M0FLwxlambaNdg4EpYowPaZ_8HFNobDZvdI643YY,3679 mot/model_building/cl_functions/model_functions.py,sha256=mnbTGY-gSTO1rE_8zTqQJh565ui3P3aLQBQfaPsUQS8,5531 mot/model_building/cl_functions/parameters.py,sha256=W6Y3Fn56LJHMlnrLMHsqyJ_AWsI0KUIg656OeX3n-2M,8633 mot/model_building/parameter_functions/__init__.py,sha256=IpkDVH6qmxaJGkbdnOhXXgffLtBYgCdPgyWPNEdhJI8,162 mot/model_building/parameter_functions/dependencies.py,sha256=2c40MHAkZT0Lmw3V4M0XsxWKJwLJYFdDeAeDzck5diM,1241 mot/model_building/parameter_functions/priors.py,sha256=fXaURPg5ignq8n3VcVKQjJGiBEFdsqjEfhbakyfvL14,11299 mot/model_building/parameter_functions/proposal_updates.py,sha256=B2b7Jn1L6ZYQLYc8EnPMjbnh5tweWSkZc-kRHpqTFkU,10562 mot/model_building/parameter_functions/proposals.py,sha256=1U3uABYi-XlSNtapyH8SmZ5GmXMveRJ13rfxJfxP_8c,9753 mot/model_building/parameter_functions/sample_statistics.py,sha256=UvDWImqISkXO4TJO5aRtPaMahkl3B609aNjNpz0jwdM,3909 mot/model_building/parameter_functions/transformations.py,sha256=7rgZnVWkVjtHWFwgC_AFYUA_eLvcI4NGkajYLLCrIaM,9405 mot/parsers/__init__.py,sha256=pBSPog4nRYZnyIbhk2THeeOIxK6U1b-lhNv1I8E0UKk,138 mot/parsers/cl/CLDataType.py,sha256=qt_gBH8UC6ufwnKxLpdNGQZg-5fMWn--NAOEGgxg7Fs,8046 mot/parsers/cl/CLDataTypeParser.py,sha256=CuzZUHMaiYM1lKKUngKtMd97BIdYSHS9Gf0aacKeNiU,2342 mot/parsers/cl/__init__.py,sha256=pBSPog4nRYZnyIbhk2THeeOIxK6U1b-lhNv1I8E0UKk,138 mot-0.2.41.dist-info/DESCRIPTION.rst,sha256=L5FVuRAaEAJRYQ5uxt2TNeuy1kTtmmlWUX5Zf7bWlgk,4397 mot-0.2.41.dist-info/METADATA,sha256=NbWo8OFuoUJaiLIElPIejU_cXLLtb9yn8oLMHo5P6Ck,5620 mot-0.2.41.dist-info/RECORD,, mot-0.2.41.dist-info/WHEEL,sha256=GrqQvamwgBV4nLoJe0vhYRSWzWsx7xjlt74FT0SWYfE,110 mot-0.2.41.dist-info/metadata.json,sha256=60IX_lyrkc5Tlf862P0gJioCUUxkhxI9v7uKG1c26f0,1354 mot-0.2.41.dist-info/top_level.txt,sha256=4kv4g0CsKFVHvInak2nIu30yDtdrEYtLCaLiJJqfGac,10 tests/__init__.py,sha256=iwhKnzeBJLKxpRVjvzwiRE63_zNpIBfaKLITauVph-0,24 tests/model_interfaces.py,sha256=n1Z0WcSHzln0ZLTe3C35y353yQaE3r7rRP3YsaH6ocE,1341 tests/test_cl_routines.py,sha256=NnqSkeyqqzEa4HfYxIY6QCBdmzVtxvXfV0twPC_r6-M,4023 tests/utils.py,sha256=8XBiGCHp-u85efcEb_8Bj-qKVbEKgehECSPvFskP0Ko,6606 PKxJvǁtests/test_cl_routines.pyPKR}J,%==tests/model_interfaces.pyPKi}JR8Pbtests/utils.pyPKHuh\/tests/__init__.pyPK]J-NZNZ/mot/model_interfaces.pyPKJk O O&mot/load_balance_strategies.pyPKrvJ3 nmot/factory.pyPKqJimot/cl_data_type.pyPKJHrcc,mot/mcmc_diagnostics.pyPKJp\mot/__version__.pyPKR}JU| ! ! z]mot/utils.pyPK$ZI{ ##~mot/cl_environments.pyPK$ZIqrmot/__init__.pyPKxJ$Smot/configuration.pyPKJGmot/cl_routines/base.pyPKxJ/B6&&"Imot/cl_routines/generate_random.pyPKH16mot/cl_routines/__init__.pyPKvJm/)mot/cl_routines/mapping/error_measures.pyPKxJ;;3 mot/cl_routines/mapping/loglikelihood_calculator.pyPKxJ%:R}}'mot/cl_routines/mapping/codec_runner.pyPKxJ0[,mot/cl_routines/mapping/calc_dependent_params.pyPKxJ܈/|Gmot/cl_routines/mapping/objective_calculator.pyPKH1#Q]mot/cl_routines/mapping/__init__.pyPKxJeUU.3^mot/cl_routines/mapping/residual_calculator.pyPKxJW9vv4ymot/cl_routines/mapping/calculate_model_estimates.pyPKxJDD  4mot/cl_routines/mapping/objective_list_calculator.pyPK]J!_G_G"mot/cl_routines/optimizing/base.pyPKD}J $mot/cl_routines/optimizing/powell.pyPKxJ^1\mot/cl_routines/optimizing/levenberg_marquardt.pyPKJovJYx2mot/cl_routines/optimizing/multi_step_optimizer.pyPKD}JZ'mot/cl_routines/optimizing/nmsimplex.pyPK*vJe̳,-mot/cl_routines/optimizing/random_restart.pyPKH1&Lmot/cl_routines/optimizing/__init__.pyPK8wJ{o Mmot/cl_routines/sampling/base.pyPK]J鬾88/Smot/cl_routines/sampling/metropolis_hastings.pyPKH1$imot/cl_routines/sampling/__init__.pyPKjJM?R'R'Lmot/cl_routines/filters/base.pyPKyJk(  'mot/cl_routines/filters/mean.pyPKyJ9n>88!/1mot/cl_routines/filters/median.pyPKyJ&n))#Emot/cl_routines/filters/gaussian.pyPKHWq#dmot/cl_routines/filters/__init__.pyPK=cJ`dmot/data/opencl/lmmin.clPK.6kJ/sLFF!+mot/data/opencl/trigonometrics.clPKAJyKCCmot/data/opencl/powell.clPKuI-$jHmot/data/opencl/firstLegendreTerm.clPKH[g !Wmot/data/opencl/euclidian_norm.clPK@_bJC  |cmot/data/opencl/nmsimplex.clPKH Ճmot/data/opencl/bessel.clPK[ISF *mot/data/opencl/random123/openclfeatures.hPK[IhzBB!2mot/data/opencl/random123/array.hPK[Iyc TT"xmot/data/opencl/random123/philox.hPKXJBiG8#Z1mot/data/opencl/random123/rand123.hPK[Iqt&}gg$XEmot/data/opencl/random123/threefry.hPK[Iee)mot/data/opencl/model_functions/Scalar.clPKHfڣmot/data/opencl/cerf/dawson.clPKH^Amʆʆ!mot/data/opencl/cerf/im_w_of_x.clPKHiOmot/data/opencl/cerf/erfi.clPKxJ6`Z__'imot/model_building/evaluation_models.pyPK]J<#/^/^$mmot/model_building/model_builders.pyPKmIWn7f mot/model_building/trees.pyPKH+J"x mot/model_building/problem_data.pyPKqJFȮ| | ) mot/model_building/signal_noise_models.pyPKxJ8" mot/model_building/data_adapter.pyPKH7է mot/model_building/__init__.pyPK]JF!!% mot/model_building/models_examples.pyPK̢qJ.;瞽$$9 mot/model_building/parameter_functions/transformations.pyPKxJ()B)B): mot/model_building/parameter_functions/proposal_updates.pyPKqJ}J܈2 mot/model_building/cl_functions/model_functions.pyPKH4 mot/parsers/__init__.pyPKqJʐ& & "r mot/parsers/cl/CLDataTypeParser.pyPKqJ&ϔnn mot/parsers/cl/CLDataType.pyPKH4 mot/parsers/cl/__init__.pyPKҬJ"!R--$B mot-0.2.41.dist-info/DESCRIPTION.rstPKҬJ#rJJ". mot-0.2.41.dist-info/metadata.jsonPKҬJ ";4 mot-0.2.41.dist-info/top_level.txtPKҬJndnn4 mot-0.2.41.dist-info/WHEELPKҬJv+5 mot-0.2.41.dist-info/METADATAPKҬJ[[ZK mot-0.2.41.dist-info/RECORDPKWWj