How to use the fbpic.utils.cuda.cuda_tpb_bpg_1d function in fbpic

To help you get started, we’ve selected a few fbpic examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github fbpic / fbpic / fbpic / particles / particles.py View on Github external
2. Sort field cell index
        3. Parallel prefix sum
        4. Rearrange particle arrays

        Parameter
        ----------
        fld : a Field object
             Contains the list of InterpolationGrid objects with
             the field values as well as the prefix sum.
        """
        # Shortcut for interpolation grids
        grid = fld.interp
        # Get the threads per block and the blocks per grid
        dim_grid_1d, dim_block_1d = cuda_tpb_bpg_1d( self.Ntot )
        dim_grid_2d_flat, dim_block_2d_flat = \
                cuda_tpb_bpg_1d( self.prefix_sum.shape[0] )

        # ------------------------
        # Sorting of the particles
        # ------------------------
        # Get the cell index of each particle
        # (defined by iz_lower and ir_lower)
        get_cell_idx_per_particle[dim_grid_1d, dim_block_1d](
            self.cell_idx,
            self.sorted_idx,
            self.x, self.y, self.z,
            grid[0].invdz, grid[0].zmin, grid[0].Nz,
            grid[0].invdr, grid[0].rmin, grid[0].Nr)
        # Sort the cell index array and modify the sorted_idx array
        # accordingly. The value of the sorted_idx array corresponds
        # to the index of the sorted particle in the other particle
        # arrays.
github fbpic / fbpic / fbpic / particles / particles.py View on Github external
Sort the particles by performing the following steps:
        1. Get fied cell index
        2. Sort field cell index
        3. Parallel prefix sum
        4. Rearrange particle arrays

        Parameter
        ----------
        fld : a Field object
             Contains the list of InterpolationGrid objects with
             the field values as well as the prefix sum.
        """
        # Shortcut for interpolation grids
        grid = fld.interp
        # Get the threads per block and the blocks per grid
        dim_grid_1d, dim_block_1d = cuda_tpb_bpg_1d( self.Ntot )
        dim_grid_2d_flat, dim_block_2d_flat = \
                cuda_tpb_bpg_1d( self.prefix_sum.shape[0] )

        # ------------------------
        # Sorting of the particles
        # ------------------------
        # Get the cell index of each particle
        # (defined by iz_lower and ir_lower)
        get_cell_idx_per_particle[dim_grid_1d, dim_block_1d](
            self.cell_idx,
            self.sorted_idx,
            self.x, self.y, self.z,
            grid[0].invdz, grid[0].zmin, grid[0].Nz,
            grid[0].invdr, grid[0].rmin, grid[0].Nr)
        # Sort the cell index array and modify the sorted_idx array
        # accordingly. The value of the sorted_idx array corresponds
github fbpic / fbpic / fbpic / particles / elementary_process / compton / compton.py View on Github external
use_cuda = self.use_cuda

        # Create temporary arrays (on CPU or GPU, depending on `use_cuda`)
        nscatter_per_batch = allocate_empty(N_batch, use_cuda, dtype=np.int64)
        nscatter_per_elec = allocate_empty(elec.Ntot, use_cuda, dtype=np.int64)
        photon_n = allocate_empty(elec.Ntot, use_cuda, dtype=np.float64)
        # Prepare random numbers
        if self.use_cuda:
            seed = np.random.randint( 256 )
            random_states = create_xoroshiro128p_states( N_batch, seed )


        # For each electron, calculate the local density of photons
        # *in the frame of the simulation*
        if use_cuda:
            bpg, tpg = cuda_tpb_bpg_1d( elec.Ntot )
            get_photon_density_gaussian_cuda[ bpg, tpg ]( photon_n, elec.Ntot,
                elec.x, elec.y, elec.z, c*t, self.photon_n_lab_peak,
                self.inv_laser_waist2, self.inv_laser_ctau2,
                self.laser_initial_z0, self.gamma_boost, self.beta_boost  )
        else:
            get_photon_density_gaussian_numba( photon_n, elec.Ntot,
                elec.x, elec.y, elec.z, c*t, self.photon_n_lab_peak,
                self.inv_laser_waist2, self.inv_laser_ctau2,
                self.laser_initial_z0, self.gamma_boost, self.beta_boost  )

        # Determine the electrons that scatter, and count them in each batch
        # (one thread per batch on GPU; parallel loop over batches on CPU)
        if use_cuda:
            batch_grid_1d, batch_block_1d = cuda_tpb_bpg_1d( N_batch )
            determine_scatterings_cuda[ batch_grid_1d, batch_block_1d ](
                N_batch, self.batch_size, elec.Ntot,
github fbpic / fbpic / fbpic / boundaries / particle_buffer_handling.py View on Github external
float_recv_left, float_recv_right, uint_recv_left, uint_recv_right:
        arrays of shape (n_float,Nptcl) and (n_int,Nptcl) where Nptcl
        is the number of particles that are received to the left
        proc and right proc respectively, and where n_float and n_int
        are the number of float and integer quantities respectively
        These arrays are always on the CPU (since they were used for MPI)
    """
    # Get the new number of particles
    old_Ntot = species.Ntot
    n_left = float_recv_left.shape[1]
    n_right = float_recv_right.shape[1]
    new_Ntot = old_Ntot + n_left + n_right

    # Get the threads per block and the blocks per grid
    n_left_grid, n_left_block = cuda_tpb_bpg_1d( n_left )
    n_right_grid, n_right_block = cuda_tpb_bpg_1d( n_right )
    n_old_grid, n_old_block = cuda_tpb_bpg_1d( old_Ntot )

    # Iterate over particle attributes
    # Build list of float attributes to copy
    attr_list = [ (species,'x'), (species,'y'), (species,'z'), \
                  (species,'ux'), (species,'uy'), (species,'uz'), \
                  (species,'inv_gamma'), (species,'w') ]
    if species.ionizer is not None:
        attr_list += [ (species.ionizer, 'w_times_level') ]
    # Loop through the float quantities
    for i_attr in range( len(attr_list) ):
        # Copy the proper buffers to the GPU
        left_buffer = cuda.to_device( float_recv_left[i_attr] )
        right_buffer = cuda.to_device( float_recv_right[i_attr] )
        # Initialize the new particle array
github fbpic / fbpic / fbpic / particles / particles.py View on Github external
Parameters:
        -----------
        dt: float, seconds
            The timestep that should be used for the push
            (This can be typically be half of the simulation timestep)

        x_push, y_push, z_push: float, dimensionless
            Multiplying coefficient for the momenta in x, y and z
            e.g. if x_push=1., the particles are pushed forward in x
                 if x_push=-1., the particles are pushed backward in x
        """
        # GPU (CUDA) version
        if self.use_cuda:
            # Get the threads per block and the blocks per grid
            dim_grid_1d, dim_block_1d = cuda_tpb_bpg_1d( self.Ntot )
            # Call the CUDA Kernel for push in x
            push_x_gpu[dim_grid_1d, dim_block_1d](
                self.x, self.y, self.z,
                self.ux, self.uy, self.uz,
                self.inv_gamma, dt, x_push, y_push, z_push )
            # The particle array is unsorted after the push in x
            self.sorted = False
        # CPU version
        else:
            push_x_numba( self.x, self.y, self.z,
                self.ux, self.uy, self.uz,
                self.inv_gamma, self.Ntot,
                dt, x_push, y_push, z_push )
github fbpic / fbpic / fbpic / lpa_utils / laser / antenna_injection.py View on Github external
small-size array into the full-size one)

        grid: a list of InterpolationGrid objects
            Contains the full-size array rho
        """
        Nm = len(grid)
        if type(grid[0].rho) is np.ndarray:
            # The large-size array rho is on the CPU
            for m in range( Nm ):
                grid[m].rho[ iz_min:iz_min+2 ] += self.rho_buffer[m]
        else:
            # The large-size array rho is on the GPU
            # Copy the small-size buffer to the GPU
            cuda.to_device( self.rho_buffer, to=self.d_rho_buffer )
            # On the GPU: add the small-size buffers to the large-size array
            dim_grid_1d, dim_block_1d = cuda_tpb_bpg_1d( grid[0].Nr, TPB=64 )
            for m in range( Nm ):
                add_rho_to_gpu_array[dim_grid_1d, dim_block_1d]( iz_min,
                            self.d_rho_buffer, grid[m].rho, m )
github fbpic / fbpic / fbpic / particles / particles.py View on Github external
grid : a list of InterpolationGrid objects
             (one InterpolationGrid object per azimuthal mode)
             Contains the field values on the interpolation grid
        """
        # Skip gathering for neutral particles (e.g. photons)
        if self.q == 0:
            return

        # Number of modes
        Nm = len(grid)

        # GPU (CUDA) version
        if self.use_cuda:
            # Get the threads per block and the blocks per grid
            dim_grid_1d, dim_block_1d = \
                cuda_tpb_bpg_1d( self.Ntot, TPB=self.gather_tpb )
            # Call the CUDA Kernel for the gathering of E and B Fields
            if self.particle_shape == 'linear':
                if Nm == 2:
                    # Optimized version for 2 modes
                    gather_field_gpu_linear[dim_grid_1d, dim_block_1d](
                         self.x, self.y, self.z,
                         grid[0].invdz, grid[0].zmin, grid[0].Nz,
                         grid[0].invdr, grid[0].rmin, grid[0].Nr,
                         grid[0].Er, grid[0].Et, grid[0].Ez,
                         grid[1].Er, grid[1].Et, grid[1].Ez,
                         grid[0].Br, grid[0].Bt, grid[0].Bz,
                         grid[1].Br, grid[1].Bt, grid[1].Bz,
                         self.Ex, self.Ey, self.Ez,
                         self.Bx, self.By, self.Bz)
                else:
                    # Generic version for arbitrary number of modes
github fbpic / fbpic / fbpic / boundaries / particle_buffer_handling.py View on Github external
float_recv_left, float_recv_right, uint_recv_left, uint_recv_right:
        arrays of shape (n_float,Nptcl) and (n_int,Nptcl) where Nptcl
        is the number of particles that are received to the left
        proc and right proc respectively, and where n_float and n_int
        are the number of float and integer quantities respectively
        These arrays are always on the CPU (since they were used for MPI)
    """
    # Get the new number of particles
    old_Ntot = species.Ntot
    n_left = float_recv_left.shape[1]
    n_right = float_recv_right.shape[1]
    new_Ntot = old_Ntot + n_left + n_right

    # Get the threads per block and the blocks per grid
    n_left_grid, n_left_block = cuda_tpb_bpg_1d( n_left )
    n_right_grid, n_right_block = cuda_tpb_bpg_1d( n_right )
    n_old_grid, n_old_block = cuda_tpb_bpg_1d( old_Ntot )

    # Iterate over particle attributes
    # Build list of float attributes to copy
    attr_list = [ (species,'x'), (species,'y'), (species,'z'), \
                  (species,'ux'), (species,'uy'), (species,'uz'), \
                  (species,'inv_gamma'), (species,'w') ]
    if species.ionizer is not None:
        attr_list += [ (species.ionizer, 'w_times_level') ]
    # Loop through the float quantities
    for i_attr in range( len(attr_list) ):
        # Copy the proper buffers to the GPU
        left_buffer = cuda.to_device( float_recv_left[i_attr] )
        right_buffer = cuda.to_device( float_recv_right[i_attr] )
        # Initialize the new particle array
        particle_array = cuda.device_array( (new_Ntot,), dtype=np.float64)
github fbpic / fbpic / fbpic / particles / elementary_process / cuda_numba_utils.py View on Github external
Parameters
    ----------
    species: an fbpic Particles object
    use_cuda: bool
        If True, the new arrays are device arrays, and copying is done on GPU.
        If False, the arrays are on CPU, and copying is done on CPU.
    old_Ntot, new_Ntot: int
        Size of the old and new arrays (with old_Ntot < new_Ntot)
    """
    # Check if the data is on the GPU
    data_on_gpu = (type(species.w) is not np.ndarray)

    # On GPU, use one thread per particle
    if data_on_gpu:
        ptcl_grid_1d, ptcl_block_1d = cuda_tpb_bpg_1d( old_Ntot )

    # Iterate over particle attributes and copy the old particles
    for attr in ['x', 'y', 'z', 'ux', 'uy', 'uz', 'w', 'inv_gamma',
                    'Ex', 'Ey', 'Ez', 'Bx', 'By', 'Bz']:
        old_array = getattr(species, attr)
        new_array = allocate_empty( new_Ntot, data_on_gpu, dtype=np.float64 )
        if data_on_gpu:
            copy_particle_data_cuda[ ptcl_grid_1d, ptcl_block_1d ](
                old_Ntot, old_array, new_array )
        else:
            copy_particle_data_numba( old_Ntot, old_array, new_array )
        setattr( species, attr, new_array )
    # Copy the tracking id, if needed
    if species.tracker is not None:
        old_array = species.tracker.id
        new_array = allocate_empty( new_Ntot, use_cuda, dtype=np.uint64 )
github fbpic / fbpic / fbpic / boundaries / particle_buffer_handling.py View on Github external
n_int = species.n_integer_quantities
    if left_proc is not None:
        float_send_left = np.empty((n_float, N_send_l), dtype=np.float64)
        uint_send_left = np.empty((n_int, N_send_l), dtype=np.uint64)
    else:
        float_send_left = np.empty((n_float, 0), dtype=np.float64)
        uint_send_left = np.empty((n_int, 0), dtype=np.uint64)
    if right_proc is not None:
        float_send_right = np.empty((n_float, N_send_r), dtype=np.float64)
        uint_send_right = np.empty((n_int, N_send_r), dtype=np.uint64)
    else:
        float_send_right = np.empty((n_float, 0), dtype=np.float64)
        uint_send_right = np.empty((n_int, 0), dtype=np.uint64)

    # Get the threads per block and the blocks per grid
    dim_grid_1d, dim_block_1d = cuda_tpb_bpg_1d( species.Ntot )
    # Float quantities:
    # Build list of float attributes to copy
    attr_list = [ (species,'x'), (species,'y'), (species,'z'),
                    (species,'ux'), (species,'uy'), (species,'uz'),
                    (species,'inv_gamma'), (species,'w') ]
    if species.ionizer is not None:
        attr_list.append( (species.ionizer,'w_times_level') )
    # Loop through the float attributes
    for i_attr in range(n_float):
        # Initialize 3 buffer arrays on the GPU (need to be initialized
        # inside the loop, as `copy_to_host` invalidates these arrays)
        left_buffer = cuda.device_array((N_send_l,), dtype=np.float64)
        right_buffer = cuda.device_array((N_send_r,), dtype=np.float64)
        stay_buffer = cuda.device_array((new_Ntot,), dtype=np.float64)
        # Check that the buffers are still on GPU
        # (safeguard against automatic memory management)