Source code for MCEq.solvers

import numpy as np

from MCEq import config
from MCEq.misc import info


[docs] def solv_numpy(nsteps, dX, rho_inv, int_m, dec_m, phi, grid_idcs): """:mod:`numpy` implementation of forward-euler integration. Args: nsteps (int): number of integration steps dX (:func:`numpy.array` [nsteps]): vector of step-sizes :math:`\\Delta X_i` in g/cm**2 rho_inv (:func:`numpy.array` [nsteps]): vector of density values :math:`\\frac{1}{\\rho(X_i)}` int_m (:func:`numpy.array`): interaction matrix :eq:`int_matrix` in dense or sparse representation dec_m (:func:`numpy.array`): decay matrix :eq:`dec_matrix` in dense or sparse representation phi (:func:`numpy.array`): initial state vector :math:`\\Phi(X_0)` Returns: :func:`numpy.array`: state vector :math:`\\Phi(X_{nsteps})` after integration """ grid_sol = [] grid_step = 0 imc = int_m dmc = dec_m dxc = dX ric = rho_inv phc = phi.copy() # Fix: create a copy to avoid modifying the input dXaccum = 0.0 from time import time start = time() for step in range(nsteps): phc += (imc.dot(phc) + dmc.dot(ric[step] * phc)) * dxc[step] dXaccum += dxc[step] if grid_idcs and grid_step < len(grid_idcs) and grid_idcs[grid_step] == step: grid_sol.append(np.copy(phc)) grid_step += 1 info( 2, f"Performance: {1e3 * (time() - start) / float(nsteps):6.2f}ms/iteration", ) return phc, np.array(grid_sol)
[docs] class CUDASparseContext: """This class handles the transfer between CPU and GPU memory, and the calling of GPU kernels. Initialized by :class:`MCEq.core.MCEqRun` and used by :func:`solv_CUDA_sparse`. """ def __init__(self, int_m, dec_m, device_id=config.cuda_gpu_id): if config.cuda_fp_precision == 32: self.fl_pr = np.float32 elif config.cuda_fp_precision == 64: self.fl_pr = np.float64 else: raise Exception("CUDASparseContext(): Unknown precision specified.") # Setup GPU stuff and upload data to it try: import cupy as cp import cupyx.scipy as cpx self.cp = cp self.cpx = cpx except ImportError: raise Exception( "solv_CUDA_sparse(): CuPy not installed or not available.\n" + "Install with: pip install cupy-cuda12x>=12.0.0\n" + "CuPy 12.0+ is required for modern sparse matrix interface compatibility." ) cp.cuda.Device(device_id).use() self.set_matrices(int_m, dec_m)
[docs] def set_matrices(self, int_m, dec_m): """Upload sparce matrices to GPU memory""" self.cu_int_m = self.cpx.sparse.csr_matrix(int_m, dtype=self.fl_pr) self.cu_dec_m = self.cpx.sparse.csr_matrix(dec_m, dtype=self.fl_pr)
[docs] def alloc_grid_sol(self, dim, nsols): """Allocates memory for intermediate if grid solution requested.""" self.curr_sol_idx = 0 self.grid_sol = self.cp.zeros((nsols, dim))
[docs] def dump_sol(self): """Saves current solution to a new index in grid solution memory.""" self.cp.copyto(self.grid_sol[self.curr_sol_idx, :], self.cu_curr_phi) self.curr_sol_idx += 1
# self.grid_sol[self.curr_sol, :] = self.cu_curr_phi
[docs] def get_gridsol(self): """Downloads grid solution to main memory.""" return self.cp.asnumpy(self.grid_sol)
[docs] def set_phi(self, phi): """Uploads initial condition to GPU memory.""" self.cu_curr_phi = self.cp.asarray(phi, dtype=self.fl_pr)
[docs] def get_phi(self): """Downloads current solution from GPU memory.""" return self.cp.asnumpy(self.cu_curr_phi)
[docs] def solve_step(self, rho_inv, dX): """Makes one solver step on GPU using cuSparse (BLAS)""" # Mimic the exact NumPy implementation: # phc += (imc.dot(phc) + dmc.dot(ric[step] * phc)) * dxc[step] # Calculate: int_m @ curr_phi + dec_m @ (rho_inv * curr_phi) int_result = self.cu_int_m @ self.cu_curr_phi dec_result = self.cu_dec_m @ (rho_inv * self.cu_curr_phi) delta = int_result + dec_result # Apply: curr_phi += delta * dX self.cu_curr_phi += delta * dX
[docs] def solv_CUDA_sparse(nsteps, dX, rho_inv, context, phi, grid_idcs): """`NVIDIA CUDA cuSPARSE <https://developer.nvidia.com/cusparse>`_ implementation of forward-euler integration. Function requires a working :mod:`accelerate` installation. Args: nsteps (int): number of integration steps dX (:func:`numpy.array` [nsteps]): vector of step-sizes :math:`\\Delta X_i` in g/cm**2 rho_inv (:func:`numpy.array` [nsteps]): vector of density values :math:`\\frac{1}{\\rho(X_i)}` int_m (:func:`numpy.array`): interaction matrix :eq:`int_matrix` in dense or sparse representation dec_m (:func:`numpy.array`): decay matrix :eq:`dec_matrix` in dense or sparse representation phi (:func:`numpy.array`): initial state vector :math:`\\Phi(X_0)` mu_loss_handler (object): object of type :class:`SemiLagrangianEnergyLosses` Returns: :func:`numpy.array`: state vector :math:`\\Phi(X_{nsteps})` after integration """ c = context c.set_phi(phi) grid_step = 0 from time import time start = time() if len(grid_idcs) > 0: c.alloc_grid_sol(phi.shape[0], len(grid_idcs)) for step in range(nsteps): c.solve_step(rho_inv[step], dX[step]) if grid_idcs and grid_step < len(grid_idcs) and grid_idcs[grid_step] == step: c.dump_sol() grid_step += 1 info( 2, f"Performance: {1e3 * (time() - start) / float(nsteps):6.2f}ms/iteration", ) return c.get_phi(), c.get_gridsol() if len(grid_idcs) > 0 else []
[docs] def solv_MKL_sparse(nsteps, dX, rho_inv, int_m, dec_m, phi, grid_idcs): # mu_loss_handler): """`Intel MKL sparse BLAS <https://software.intel.com/en-us/articles/ intel-mkl-sparse-blas-overview?language=en>`_ implementation of forward-euler integration. Function requires that the path to the MKL runtime library ``libmkl_rt.[so/dylib]`` defined in the config file. Args: nsteps (int): number of integration steps dX (:func:`numpy.array` [nsteps]): vector of step-sizes :math:`\\Delta X_i` in g/cm**2 rho_inv (:func:`numpy.array` [nsteps]): vector of density values :math:`\\frac{1}{\\rho(X_i)}` int_m (:func:`numpy.array`): interaction matrix :eq:`int_matrix` in dense or sparse representation dec_m (:func:`numpy.array`): decay matrix :eq:`dec_matrix` in dense or sparse representation phi (:func:`numpy.array`): initial state vector :math:`\\Phi(X_0)` grid_idcs (list): indices at which longitudinal solutions have to be saved. Returns: :func:`numpy.array`: state vector :math:`\\Phi(X_{nsteps})` after integration """ from ctypes import POINTER, Structure, byref, c_int, c_void_p from ctypes import c_double as fl_pr from MCEq.config import mkl # sparse CSR-matrix x dense vector create_csr = mkl.mkl_sparse_d_create_csr gemv = mkl.mkl_sparse_d_mv gemv_hint = mkl.mkl_sparse_set_mv_hint optimize = mkl.mkl_sparse_optimize # dense vector + dense vector axpy = mkl.cblas_daxpy np_fl = config.floatlen # Prepare CTYPES pointers for MKL sparse CSR BLAS int_m_data = int_m.data.ctypes.data_as(POINTER(fl_pr)) int_m_ci = int_m.indices.ctypes.data_as(POINTER(c_int)) int_m_pb = int_m.indptr[:-1].ctypes.data_as(POINTER(c_int)) int_m_pe = int_m.indptr[1:].ctypes.data_as(POINTER(c_int)) dec_m_data = dec_m.data.ctypes.data_as(POINTER(fl_pr)) dec_m_ci = dec_m.indices.ctypes.data_as(POINTER(c_int)) dec_m_pb = dec_m.indptr[:-1].ctypes.data_as(POINTER(c_int)) dec_m_pe = dec_m.indptr[1:].ctypes.data_as(POINTER(c_int)) npphi = np.copy(phi).astype(np_fl) phi = npphi.ctypes.data_as(POINTER(fl_pr)) npdelta_phi = np.zeros_like(npphi) delta_phi = npdelta_phi.ctypes.data_as(POINTER(fl_pr)) m = c_int(int_m.shape[0]) cdzero = fl_pr(0.0) cdone = fl_pr(1.0) cizero = c_int(0) cione = c_int(1) grid_step = 0 grid_sol = [] int_m_handle = c_void_p() mst = create_csr( byref(int_m_handle), cizero, m, m, int_m_pb, int_m_pe, int_m_ci, int_m_data ) assert mst == 0, f"MKL create_csr failed with status {mst} on interaction matrix" dec_m_handle = c_void_p() mst = create_csr( byref(dec_m_handle), cizero, m, m, dec_m_pb, dec_m_pe, dec_m_ci, dec_m_data ) assert mst == 0, f"MKL create_csr failed with status {mst} on decay matrix" # hints operation = int(10) # SPARSE_OPERATION_NON_TRANSPOSE class MatrixDescr(Structure): _fields_ = [ ("type", c_int), ("mode", c_int), ("diag", c_int), ] descr = MatrixDescr() descr.type = int(20) # General matrix descr.mode = int(121) # set but dont care since general matrix descr.diag = int(131) # set but dont care since general matrix hint_status = gemv_hint( int_m_handle, operation, descr, nsteps, ) assert hint_status == 0, ( f"MKL gemv_hint failed with status {hint_status} on interaction matrix" ) hint_status = gemv_hint( dec_m_handle, operation, descr, nsteps, ) assert hint_status == 0, ( f"MKL gemv_hint failed with status {hint_status} on decay matrix" ) # add mkl_sparse_set_memory_hint??? # o = optimize(int_m_handle) assert o == 0, ( f"MKL mkl_sparse_optimize failed with status {o} on interaction matrix" ) o = optimize(dec_m_handle) assert o == 0, f"MKL mkl_sparse_optimize failed with status {o} on decay matrix" from time import time start = time() for step in range(nsteps): # delta_phi = int_m.dot(phi) gemv( operation, cdone, int_m_handle, descr, phi, cdzero, delta_phi, ) # delta_phi = rho_inv * dec_m.dot(phi) + delta_phi gemv( operation, fl_pr(rho_inv[step]), dec_m_handle, descr, phi, cdone, delta_phi, ) # phi = delta_phi * dX + phi axpy(m, fl_pr(dX[step]), delta_phi, cione, phi, cione) if grid_idcs and grid_step < len(grid_idcs) and grid_idcs[grid_step] == step: grid_sol.append(np.copy(npphi)) grid_step += 1 info( 2, f"Performance: {1e3 * (time() - start) / float(nsteps):6.2f}ms/iteration", ) return npphi, np.asarray(grid_sol)
[docs] def solv_spacc_sparse(nsteps, dX, rho_inv, spacc_int_m, spacc_dec_m, phi, grid_idcs): # mu_loss_handler): """Apple Accelerate (vecLib) implementation. Args: nsteps (int): number of integration steps dX (numpy.array[nsteps]): vector of step-sizes :math:`\\Delta X_i` in g/cm**2 rho_inv (numpy.array[nsteps]): vector of density values :math:`\\frac{1}{\\rho(X_i)}` int_m (numpy.array): interaction matrix :eq:`int_matrix` in dense or sparse representation dec_m (numpy.array): decay matrix :eq:`dec_matrix` in dense or sparse representation phi (numpy.array): initial state vector :math:`\\Phi(X_0)` grid_idcs (list): indices at which longitudinal solutions have to be saved. Returns: numpy.array: state vector :math:`\\Phi(X_{nsteps})` after integration """ from ctypes import POINTER, c_double import MCEq.spacc as spacc dim_phi = int(phi.shape[0]) npphi = np.copy(phi) phi = npphi.ctypes.data_as(POINTER(c_double)) npdelta_phi = np.zeros_like(npphi) delta_phi = npdelta_phi.ctypes.data_as(POINTER(c_double)) grid_step = 0 grid_sol = [] from time import time start = time() for step in range(nsteps): # delta_phi = int_m.dot(phi) npdelta_phi *= 0 spacc_int_m.gemv_ctargs(1.0, phi, delta_phi) # delta_phi = rho_inv * dec_m.dot(phi) + delta_phi spacc_dec_m.gemv_ctargs(rho_inv[step], phi, delta_phi) # phi = delta_phi * dX + phi spacc.daxpy(dim_phi, dX[step], delta_phi, phi) # axpy(m, fl_pr(dX[step]), delta_phi, cione, phi, cione) if grid_idcs and grid_step < len(grid_idcs) and grid_idcs[grid_step] == step: grid_sol.append(np.copy(npphi)) grid_step += 1 info( 2, "Performance: {0:6.2f}ms/iteration".format( 1e3 * (time() - start) / float(nsteps) ), ) return npphi, np.asarray(grid_sol)
# # TODO: Debug this and transition to BDF # def _odepack(dXstep=.1, # initial_depth=0.0, # int_grid=None, # grid_var='X', # *args, # **kwargs): # """Solves the transport equations with solvers from ODEPACK. # Args: # dXstep (float): external step size (adaptive sovlers make more # steps internally) # initial_depth (float): starting depth in g/cm**2 # int_grid (list): list of depths at which results are recorded # grid_var (str): Can be depth `X` or something else (currently only `X` # supported) # """ # from scipy.integrate import ode # ri = self.density_model.r_X2rho # if config.enable_muon_energy_loss: # raise NotImplementedError( # 'Energy loss not imlemented for this solver.') # # Functional to solve # def dPhi_dX(X, phi, *args): # return self.int_m.dot(phi) + self.dec_m.dot(ri(X) * phi) # # Jacobian doesn't work with sparse matrices, and any precision # # or speed advantage disappear if used with dense algebra # def jac(X, phi, *args): # # print 'jac', X, phi # return (self.int_m + self.dec_m * ri(X)).todense() # # Initial condition # phi0 = np.copy(self.phi0) # # Initialize variables # grid_sol = [] # # Setup solver # r = ode(dPhi_dX).set_integrator( # with_jacobian=False, **config.ode_params) # if int_grid is not None: # initial_depth = int_grid[0] # int_grid = int_grid[1:] # max_X = int_grid[-1] # grid_sol.append(phi0) # else: # max_X = self.density_model.max_X # info( # 1, # 'your X-grid is shorter then the material', # condition=max_X < self.density_model.max_X) # info( # 1, # 'your X-grid exceeds the dimentsions of the material', # condition=max_X > self.density_model.max_X) # # Initial value # r.set_initial_value(phi0, initial_depth) # info( # 2, 'initial depth: {0:3.2e}, maximal depth {1:}'.format( # initial_depth, max_X)) # start = time() # if int_grid is None: # i = 0 # while r.successful() and (r.t + dXstep) < max_X - 1: # info(5, "Solving at depth X =", r.t, condition=(i % 5000) == 0) # r.integrate(r.t + dXstep) # i += 1 # if r.t < max_X: # r.integrate(max_X) # # Do last step to make sure the rational number max_X is reached # r.integrate(max_X) # else: # for i, Xi in enumerate(int_grid): # info(5, 'integrating at X =', Xi, condition=i % 10 == 0) # while r.successful() and (r.t + dXstep) < Xi: # r.integrate(r.t + dXstep) # # Make sure the integrator arrives at requested step # r.integrate(Xi) # # Store the solution on grid # grid_sol.append(r.y) # info(2, # 'time elapsed during integration: {1} sec'.format(time() - start)) # self.solution = r.y # self.grid_sol = grid_sol