From 0d599e8b825e0a7fe08f9ea130b85500b5123159 Mon Sep 17 00:00:00 2001
From: Frederic Perez <frederic.perez@polytechnique.edu>
Date: Tue, 2 Apr 2024 14:46:06 +0200
Subject: [PATCH 01/54] try to improve particle exchange (CPU for now)

---
 doc/Sphinx/implementation.rst            |   2 +-
 src/ParticleBC/BoundaryConditionType.cpp |  30 +-
 src/Particles/Particles.cpp              | 118 ++++-
 src/Particles/Particles.h                |   4 +
 src/Patch/Patch.cpp                      | 534 ++++++++++-------------
 src/Patch/Patch.h                        |   4 +-
 src/Patch/SyncVectorPatch.cpp            |  60 ++-
 src/Patch/SyncVectorPatch.h              |   7 +-
 src/Patch/VectorPatch.cpp                |  22 +-
 src/Patch/VectorPatch.h                  |   4 +-
 src/Smilei.cpp                           |   2 +-
 src/SmileiMPI/AsyncMPIbuffers.cpp        |  22 +-
 src/SmileiMPI/AsyncMPIbuffers.h          |  12 +-
 src/Species/Species.cpp                  |  37 +-
 src/Species/SpeciesV.cpp                 |  48 +-
 src/Species/SpeciesVAdaptive.cpp         |  10 +-
 src/Tools/Timers.cpp                     |   2 +-
 17 files changed, 478 insertions(+), 440 deletions(-)
diff --git a/doc/Sphinx/implementation.rst b/doc/Sphinx/implementation.rst
index 46bf953e9..0d35165b2 100644
--- a/doc/Sphinx/implementation.rst
+++ b/doc/Sphinx/implementation.rst
@@ -547,7 +547,7 @@ file ``Smilei.cpp`` thought calls to different ``vecPatches`` methods.
 
 .. code-block:: c++
 
-    vecPatches.finalizeAndSortParticles( params, &smpi, simWindow,
+    vecPatches.finalizeExchParticlesAndSort( params, &smpi, simWindow,
                                                  time_dual, timers, itime );
 
 * **Particle merging**: merging process for particles (still experimental)
diff --git a/src/ParticleBC/BoundaryConditionType.cpp b/src/ParticleBC/BoundaryConditionType.cpp
index 318b6b289..5a55d74b2 100755
--- a/src/ParticleBC/BoundaryConditionType.cpp
+++ b/src/ParticleBC/BoundaryConditionType.cpp
@@ -28,9 +28,9 @@ void internal_inf( Species *species, int imin, int imax, int direction, double l
                        cell_keys /* [imin:imax - imin] */ )
     #pragma omp teams distribute parallel for
 #endif
-    for (int ipart=imin ; ipart<imax ; ipart++ ) {
-        if ( position[ ipart ] < limit_inf) {
-            cell_keys[ ipart ] = -1;
+    for( int ipart=imin ; ipart<imax ; ipart++ ) {
+        if( cell_keys[ ipart ] >= 0 && position[ ipart ] < limit_inf ) {
+            cell_keys[ ipart ] = -2 - 2 * direction;
         }
     }
 }
@@ -50,9 +50,9 @@ void internal_sup( Species *species, int imin, int imax, int direction, double l
                        cell_keys /* [imin:imax - imin] */ )
     #pragma omp teams distribute parallel for
 #endif
-    for (int ipart=imin ; ipart<imax ; ipart++ ) {
-        if ( position[ ipart ] >= limit_sup) {
-            cell_keys[ ipart ] = -1;
+    for( int ipart=imin ; ipart<imax ; ipart++ ) {
+        if( cell_keys[ ipart ] >= 0 && position[ ipart ] >= limit_sup ) {
+            cell_keys[ ipart ] = -3 - 2 * direction;
         }
     }
 }
@@ -63,10 +63,11 @@ void internal_inf_AM( Species *species, int imin, int imax, int /*direction*/, d
     double* position_y = species->particles->getPtrPosition(1);
     double* position_z = species->particles->getPtrPosition(2);
     int* cell_keys = species->particles->getPtrCellKeys();
-    for (int ipart=imin ; ipart<imax ; ipart++ ) {
+    double limit_inf2 = limit_inf*limit_inf;
+    for( int ipart=imin ; ipart<imax ; ipart++ ) {
         double distance2ToAxis = position_y[ipart]*position_y[ipart]+position_z[ipart]*position_z[ipart];
-        if ( distance2ToAxis < limit_inf*limit_inf ) {
-            cell_keys[ ipart ] = -1;
+        if( cell_keys[ ipart ] >= 0 && distance2ToAxis < limit_inf2 ) {
+            cell_keys[ ipart ] = -4;
         }
     }
 }
@@ -77,10 +78,11 @@ void internal_sup_AM( Species *species, int imin, int imax, int /*direction*/, d
     double* position_y = species->particles->getPtrPosition(1);
     double* position_z = species->particles->getPtrPosition(2);
     int* cell_keys = species->particles->getPtrCellKeys();
-    for (int ipart=imin ; ipart<imax ; ipart++ ) {
+    double limit_sup2 = limit_sup*limit_sup;
+    for( int ipart=imin ; ipart<imax ; ipart++ ) {
         double distance2ToAxis = position_y[ipart]*position_y[ipart]+position_z[ipart]*position_z[ipart];
-        if ( distance2ToAxis >= limit_sup*limit_sup ) {
-            cell_keys[ ipart ] = -1;
+        if( cell_keys[ ipart ] >= 0 && distance2ToAxis >= limit_sup2 ) {
+            cell_keys[ ipart ] = -5;
         }
     }
 }
@@ -97,8 +99,8 @@ void reflect_particle_inf( Species *species, int imin, int imax, int direction,
     #pragma omp target is_device_ptr( position, momentum )
     #pragma omp teams distribute parallel for
 #endif
-    for (int ipart=imin ; ipart<imax ; ipart++ ) {
-        if ( position[ ipart ] < limit_inf ) {
+    for( int ipart=imin ; ipart<imax ; ipart++ ) {
+        if( position[ ipart ] < limit_inf ) {
             position[ ipart ] = 2.*limit_inf - position[ ipart ];
             momentum[ ipart ] = -momentum[ ipart ];
         }
diff --git a/src/Particles/Particles.cpp b/src/Particles/Particles.cpp
index 308ee4319..aa9b8a02c 100755
--- a/src/Particles/Particles.cpp
+++ b/src/Particles/Particles.cpp
@@ -413,6 +413,50 @@ void Particles::copyParticles( unsigned int iPart, unsigned int nPart, Particles
     }
 }
 
+// ---------------------------------------------------------------------------------------------------------------------
+//! Copy particles indexed by array 'indices' to dest_id in dest_parts
+//! The array 'indices' must be sorted in increasing order
+//! cell keys not affected
+// ---------------------------------------------------------------------------------------------------------------------
+void Particles::copyParticles( vector<size_t> indices, Particles &dest_parts, int dest_id )
+{
+    const size_t transfer_size = indices.size();
+    const size_t dest_new_size = dest_parts.size() + transfer_size;
+    
+    for( unsigned int iprop=0 ; iprop<double_prop_.size() ; iprop++ ) {
+        // Make space in dest array
+        dest_parts.double_prop_[iprop]->resize( dest_new_size );
+        auto loc = dest_parts.double_prop_[iprop]->begin() + dest_id;
+        move_backward( loc, loc + transfer_size, dest_parts.double_prop_[iprop]->end() );
+        // Copy data
+        for( size_t i = 0; i < transfer_size; i++ ) {
+            ( *dest_parts.double_prop_[iprop] )[dest_id+i] = ( *double_prop_[iprop] )[indices[i]];
+        }
+    }
+    
+    for( unsigned int iprop=0 ; iprop<short_prop_.size() ; iprop++ ) {
+        // Make space in dest array
+        dest_parts.short_prop_[iprop]->resize( dest_new_size );
+        auto loc = dest_parts.short_prop_[iprop]->begin() + dest_id;
+        move_backward( loc, loc + transfer_size, dest_parts.short_prop_[iprop]->end() );
+        // Copy data
+        for( size_t i = 0; i < transfer_size; i++ ) {
+            ( *dest_parts.short_prop_[iprop] )[dest_id+i] = ( *short_prop_[iprop] )[indices[i]];
+        }
+    }
+    
+    for( unsigned int iprop=0 ; iprop<uint64_prop_.size() ; iprop++ ) {
+        // Make space in dest array
+        dest_parts.uint64_prop_[iprop]->resize( dest_new_size );
+        auto loc = dest_parts.uint64_prop_[iprop]->begin() + dest_id;
+        move_backward( loc, loc + transfer_size, dest_parts.uint64_prop_[iprop]->end() );
+        // Copy data
+        for( size_t i = 0; i < transfer_size; i++ ) {
+            ( *dest_parts.uint64_prop_[iprop] )[dest_id+i] = ( *uint64_prop_[iprop] )[indices[i]];
+        }
+    }
+}
+
 // ---------------------------------------------------------------------------------------------------------------------
 //! Make a new particle at the position of another
 //! cell keys not affected
@@ -529,6 +573,70 @@ void Particles::eraseParticle( unsigned int ipart, unsigned int npart, bool comp
 
 }
 
+
+// ---------------------------------------------------------------------------------------------------------------------
+//! Erase particles indexed by array 'indices' to dest_id in dest_parts
+//! The array 'indices' must be sorted in increasing order
+//! cell keys not affected
+// ---------------------------------------------------------------------------------------------------------------------
+void Particles::eraseParticles( vector<size_t> indices )
+{
+    const size_t indices_size = indices.size();
+    const size_t initial_size = size();
+    
+    if( indices_size > 0 ) {
+        
+        for( auto prop : double_prop_ ) {
+            // Relocate data to fill erased space
+            size_t j = 1, stop = ( 1 == indices_size ) ? initial_size : indices[1], to = indices[0];
+            for( size_t from = indices[0]+1; from < initial_size; from++ ) {
+                if( from < stop ) {
+                    ( *prop )[to] = ( *prop )[from];
+                    to++;
+                } else {
+                    j++;
+                    stop = ( j == indices_size ) ? initial_size : indices[j];
+                }
+            }
+            // Resize
+            prop->resize( initial_size - indices_size );
+        }
+        
+        for( auto prop : short_prop_ ) {
+            // Relocate data to fill erased space
+            size_t j = 1, stop = ( 1 == indices_size ) ? initial_size : indices[1], to = indices[0];
+            for( size_t from = indices[0]+1; from < initial_size; from++ ) {
+                if( from < stop ) {
+                    ( *prop )[to] = ( *prop )[from];
+                    to++;
+                } else {
+                    j++;
+                    stop = ( j == indices_size ) ? initial_size : indices[j];
+                }
+            }
+            // Resize
+            prop->resize( initial_size - indices_size );
+        }
+        
+        for( auto prop : uint64_prop_ ) {
+            // Relocate data to fill erased space
+            size_t j = 1, stop = ( 1 == indices_size ) ? initial_size : indices[1], to = indices[0];
+            for( size_t from = indices[0]+1; from < initial_size; from++ ) {
+                if( from < stop ) {
+                    ( *prop )[to] = ( *prop )[from];
+                    to++;
+                } else {
+                    j++;
+                    stop = ( j == indices_size ) ? initial_size : indices[j];
+                }
+            }
+            // Resize
+            prop->resize( initial_size - indices_size );
+        }
+        
+    }
+}
+
 // ---------------------------------------------------------------------------------------------------------------------
 // Print parameters of particle iPart
 // ---------------------------------------------------------------------------------------------------------------------
@@ -1198,11 +1306,11 @@ void Particles::copyFromDeviceToHost()
 void Particles::extractParticles( Particles* particles_to_move )
 {
     particles_to_move->clear();
-    for ( int ipart=0 ; ipart<size() ; ipart++ ) {
-        if ( cell_keys[ipart] == -1 ) {
-            copyParticle( ipart, *particles_to_move );
-        }
-    }
+    // for ( int ipart=0 ; ipart<size() ; ipart++ ) {
+    //     if ( cell_keys[ipart] == -1 ) {
+    //         copyParticle( ipart, *particles_to_move );
+    //     }
+    // }
 }
 
 void Particles::savePositions() {
diff --git a/src/Particles/Particles.h b/src/Particles/Particles.h
index 1f67ab1cc..ebcb79ae2 100755
--- a/src/Particles/Particles.h
+++ b/src/Particles/Particles.h
@@ -143,6 +143,8 @@ class Particles
 
     //! Insert nPart particles starting at ipart to dest_id in dest_parts
     void copyParticles( unsigned int iPart, unsigned int nPart, Particles &dest_parts, int dest_id );
+    //! Transfer particles indexed by array indices to dest_id in dest_parts
+    void copyParticles( std::vector<size_t> indices, Particles &dest_parts, int dest_id );
 
     //! Make a new particle at the position of another
     void makeParticleAt( Particles &source_particles, unsigned int ipart, double w, short q=0., double px=0., double py=0., double pz=0. );
@@ -151,6 +153,8 @@ class Particles
     void eraseParticle( unsigned int iPart, bool compute_cell_keys = false );
     //! Suppress nPart particles from iPart
     void eraseParticle( unsigned int iPart, unsigned int nPart, bool compute_cell_keys = false );
+    //! Suppress indexed particles
+    void eraseParticles( std::vector<size_t> indices );
 
     //! Suppress all particles from iPart to the end of particle array
     void eraseParticleTrail( unsigned int iPart, bool compute_cell_keys = false );
diff --git a/src/Patch/Patch.cpp b/src/Patch/Patch.cpp
index b8ed401d9..0bf353e67 100755
--- a/src/Patch/Patch.cpp
+++ b/src/Patch/Patch.cpp
@@ -517,220 +517,184 @@ void Patch::updateMPIenv( SmileiMPI *smpi )
 // ---------------------------------------------------------------------------------------------------------------------
 void Patch::cleanMPIBuffers( int ispec, Params &params )
 {
-    int ndim = params.nDim_field;
+    size_t ndim = params.nDim_field;
+    SpeciesMPIbuffers &buffer = vecSpecies[ispec]->MPI_buffer_;
 
-    for( int iDim=0 ; iDim < ndim ; iDim++ ) {
+    for( size_t iDim=0 ; iDim < ndim ; iDim++ ) {
         for( int iNeighbor=0 ; iNeighbor<nbNeighbors_ ; iNeighbor++ ) {
-            vecSpecies[ispec]->MPI_buffer_.partRecv[iDim][iNeighbor].clear();//resize(0,ndim);
-            vecSpecies[ispec]->MPI_buffer_.partSend[iDim][iNeighbor].clear();//resize(0,ndim);
-            vecSpecies[ispec]->MPI_buffer_.part_index_send[iDim][iNeighbor].clear();
-            //vecSpecies[ispec]->MPI_buffer_.part_index_send[iDim][iNeighbor].resize(0);
-            vecSpecies[ispec]->MPI_buffer_.part_index_recv_sz[iDim][iNeighbor] = 0;
+            buffer.partRecv[iDim][iNeighbor]->clear();
+            buffer.partSend[iDim][iNeighbor]->clear();
         }
     }
 } // cleanMPIBuffers
 
 
 // ---------------------------------------------------------------------------------------------------------------------
-// Split particles Id to send in per direction and per patch neighbor dedicated buffers
-// Apply periodicity if necessary
+// Copy particles to be exchanged to buffers
 // ---------------------------------------------------------------------------------------------------------------------
-void Patch::initExchParticles( int ispec, Params &params )
+void Patch::copyExchParticlesToBuffers( int ispec, Params &params )
 {
-    Particles &cuParticles = ( *vecSpecies[ispec]->particles_to_move );
-    int ndim = params.nDim_field;
-    int idim, check;
-//    double xmax[3];
-
-    for( int iDim=0 ; iDim < ndim ; iDim++ ) {
-        for( int iNeighbor=0 ; iNeighbor<nbNeighbors_ ; iNeighbor++ ) {
-            vecSpecies[ispec]->MPI_buffer_.partRecv[iDim][iNeighbor].clear();//resize(0,ndim);
-            vecSpecies[ispec]->MPI_buffer_.partSend[iDim][iNeighbor].clear();//resize(0,ndim);
-            vecSpecies[ispec]->MPI_buffer_.part_index_send[iDim][iNeighbor].resize( 0 );
-            vecSpecies[ispec]->MPI_buffer_.part_index_recv_sz[iDim][iNeighbor] = 0;
-        }
+    SpeciesMPIbuffers &buffer = vecSpecies[ispec]->MPI_buffer_;
+    Particles &part = *vecSpecies[ispec]->particles;
+    
+    cleanMPIBuffers( ispec, params );
+    
+    vector<vector<bool>> copy( 3 );
+    copy[0] = { neighbor_[0][0] != MPI_PROC_NULL, neighbor_[0][1] != MPI_PROC_NULL };
+    copy[1] = { neighbor_[1][0] != MPI_PROC_NULL, neighbor_[1][1] != MPI_PROC_NULL };
+    if( params.nDim_field > 2 ) {
+        copy[2] = { neighbor_[2][0] != MPI_PROC_NULL, neighbor_[2][1] != MPI_PROC_NULL };
     }
-
-    int n_part_send = cuParticles.size();
-
-    int iPart;
-
-    // Define where particles are going
-    //Put particles in the send buffer it belongs to. Priority to lower dimensions.
-    if( params.geometry != "AMcylindrical" ) {
-        for( int i=0 ; i<n_part_send ; i++ ) {
-            iPart = i;
-            check = 0;
-            idim = 0;
-            //Put indexes of particles in the first direction they will be exchanged and correct their position according to periodicity for the first exchange only.
-            while( check == 0 && idim<ndim ) {
-                if( cuParticles.position( idim, iPart ) < min_local_[idim] ) {
-                    if( neighbor_[idim][0]!=MPI_PROC_NULL ) {
-                        vecSpecies[ispec]->MPI_buffer_.part_index_send[idim][0].push_back( iPart );
-                    }
-                    //If particle is outside of the global domain (has no neighbor), it will not be put in a send buffer and will simply be deleted.
-                    check = 1;
-                } else if( cuParticles.position( idim, iPart ) >= max_local_[idim] ) {
-                    if( neighbor_[idim][1]!=MPI_PROC_NULL ) {
-                        vecSpecies[ispec]->MPI_buffer_.part_index_send[idim][1].push_back( iPart );
-                    }
-                    check = 1;
+    if( params.geometry == "AMcylindrical" ) {
+        copy[0][0] = copy[0][0] && ( Pcoordinates[0]!=0 || vecSpecies[ispec]->boundary_conditions_[0][0]=="periodic" );
+        copy[0][1] = copy[0][1] && ( Pcoordinates[0]!=params.number_of_patches[0]-1 || vecSpecies[ispec]->boundary_conditions_[0][1]=="periodic" );
+    }
+    
+    // Loop all particles and count the outgoing ones
+    for( size_t ipart = 0; ipart < part.size(); ipart++ ) {
+        if( part.cell_keys[ipart] < -1 ) {
+            if( part.cell_keys[ipart] == -2 ) {
+                if( copy[0][0] ) {
+                    part.copyParticle( ipart, *buffer.partSend[0][0] );
                 }
-                idim++;
-            }
-        }
-    } else { //if (geometry == "AMcylindrical")
-        double r_min2, r_max2;
-        r_max2 = max_local_[1] * max_local_[1] ;
-        r_min2 = min_local_[1] * min_local_[1] ;
-        for( int i=0 ; i<n_part_send ; i++ ) {
-            iPart = i;
-            //Put indexes of particles in the first direction they will be exchanged and correct their position according to periodicity for the first exchange only.
-            if( cuParticles.position( 0, iPart ) < min_local_[0] ) {
-                if( neighbor_[0][0]!=MPI_PROC_NULL ) {
-                    if ( (Pcoordinates[0]==0) && ( vecSpecies[ispec]->boundary_conditions_[0][0]!="periodic" ) ) {
-                        continue;
-                    }
-                    vecSpecies[ispec]->MPI_buffer_.part_index_send[0][0].push_back( iPart );
-                    //MESSAGE("Sending particle to the left x= " << cuParticles.position(0,iPart) <<  " xmin = " <<  min_local_[0] );
+            } else if( part.cell_keys[ipart] == -3 ) {
+                if( copy[0][1] ) {
+                    part.copyParticle( ipart, *buffer.partSend[0][1] );
                 }
-                //If particle is outside of the global domain (has no neighbor), it will not be put in a send buffer and will simply be deleted.
-            } else if( cuParticles.position( 0, iPart ) >= max_local_[0] ) {
-                if ( (Pcoordinates[0]==params.number_of_patches[0]-1) && ( vecSpecies[ispec]->boundary_conditions_[0][1]!="periodic" ) ) {
-                    continue;
+            } else if( part.cell_keys[ipart] == -4 ) {
+                if( copy[1][0] ) {
+                    part.copyParticle( ipart, *buffer.partSend[1][0] );
                 }
-                if( neighbor_[0][1]!=MPI_PROC_NULL ) {
-                    vecSpecies[ispec]->MPI_buffer_.part_index_send[0][1].push_back( iPart );
-                    // MESSAGE("Sending particle to the right x= " << cuParticles.position(0,iPart) <<  " xmax = " <<  max_local_[0] );
+            } else if( part.cell_keys[ipart] == -5 ) {
+                if( copy[1][1] ) {
+                    part.copyParticle( ipart, *buffer.partSend[1][1] );
                 }
-            } else if( cuParticles.distance2ToAxis( iPart ) < r_min2 ) {
-                if( neighbor_[1][0]!=MPI_PROC_NULL ) {
-                    vecSpecies[ispec]->MPI_buffer_.part_index_send[1][0].push_back( iPart );
-                    //MESSAGE("Sending particle to the south r= " << cuParticles.distance2ToAxis(iPart) <<  " rmin2 = " <<  r_min2 );
+            } else if( part.cell_keys[ipart] == -6 ) {
+                if( copy[2][0] ) {
+                    part.copyParticle( ipart, *buffer.partSend[2][0] );
                 }
-            } else if( cuParticles.distance2ToAxis( iPart ) >= r_max2 ) {
-                if( neighbor_[1][1]!=MPI_PROC_NULL ) {
-                    vecSpecies[ispec]->MPI_buffer_.part_index_send[1][1].push_back( iPart );
-                    //MESSAGE("Sending particle to the north r= " << cuParticles.distance2ToAxis(iPart) <<  " rmax2 = " <<  r_max2 << " rmin2= " << r_min2 );
+            } else if( part.cell_keys[ipart] == -7 ) {
+                if( copy[2][1] ) {
+                    part.copyParticle( ipart, *buffer.partSend[2][1] );
                 }
             }
-
         }
     }
 
-} // initExchParticles(... iDim)
+} // copyExchParticlesToBuffers(... iDim)
 
 
 // ---------------------------------------------------------------------------------------------------------------------
-// For direction iDim, start exchange of number of particles
-//   - vecPatch : used for intra-MPI process comm (direct copy using Particels::copyParticles)
-//   - smpi     : inhereted from previous SmileiMPI::exchangeParticles()
+// Exchange number of particles to exchange to establish or not a communication
 // ---------------------------------------------------------------------------------------------------------------------
 void Patch::exchNbrOfParticles( SmileiMPI *smpi, int ispec, Params &, int iDim, VectorPatch *vecPatch )
 {
-    int h0 = ( *vecPatch )( 0 )->hindex;
-    /********************************************************************************/
-    // Exchange number of particles to exchange to establish or not a communication
-    /********************************************************************************/
+    SpeciesMPIbuffers &buffer = vecSpecies[ispec]->MPI_buffer_;
+    
     for( int iNeighbor=0 ; iNeighbor<nbNeighbors_ ; iNeighbor++ ) {
+        int iOppositeNeighbor = ( iNeighbor+1 )%2;
+        
+        buffer.partSendSize[iDim][iNeighbor] = buffer.partSend[iDim][iNeighbor]->size();
+        
+        // Send number of particles from neighbor
         if( neighbor_[iDim][iNeighbor]!=MPI_PROC_NULL ) {
-            vecSpecies[ispec]->MPI_buffer_.part_index_send_sz[iDim][iNeighbor] = ( vecSpecies[ispec]->MPI_buffer_.part_index_send[iDim][iNeighbor] ).size();
-
             if( is_a_MPI_neighbor( iDim, iNeighbor ) ) {
-                //If neighbour is MPI ==> I send him the number of particles I'll send later.
                 int local_hindex = hindex - vecPatch->refHindex_;
                 int tag = buildtag( local_hindex, iDim+1, iNeighbor+3 );
-                MPI_Isend( &( vecSpecies[ispec]->MPI_buffer_.part_index_send_sz[iDim][iNeighbor] ), 1, MPI_INT, MPI_neighbor_[iDim][iNeighbor], tag, MPI_COMM_WORLD, &( vecSpecies[ispec]->MPI_buffer_.srequest[iDim][iNeighbor] ) );
+                MPI_Isend( &buffer.partSendSize[iDim][iNeighbor], 1, MPI_INT, MPI_neighbor_[iDim][iNeighbor], tag, MPI_COMM_WORLD, &buffer.srequest[iDim][iNeighbor] );
             } else {
-                //Else, I directly set the receive size to the correct value.
-                ( *vecPatch )( neighbor_[iDim][iNeighbor]- h0 )->vecSpecies[ispec]->MPI_buffer_.part_index_recv_sz[iDim][( iNeighbor+1 )%2] = vecSpecies[ispec]->MPI_buffer_.part_index_send_sz[iDim][iNeighbor];
+                // If the destination is in the same MPI, directly set the number at destination
+                int destination_hindex = neighbor_[iDim][iNeighbor] - vecPatch->refHindex_;
+                SpeciesMPIbuffers &destination_buffer = ( *vecPatch )( destination_hindex )->vecSpecies[ispec]->MPI_buffer_;
+                destination_buffer.partRecvSize[iDim][iOppositeNeighbor] = buffer.partSendSize[iDim][iNeighbor];
             }
-        } // END of Send
-
-        if( neighbor_[iDim][( iNeighbor+1 )%2]!=MPI_PROC_NULL ) {
-            if( is_a_MPI_neighbor( iDim, ( iNeighbor+1 )%2 ) ) {
-                //If other neighbour is MPI ==> I receive the number of particles I'll receive later.
-                int local_hindex = neighbor_[iDim][( iNeighbor+1 )%2] - smpi->patch_refHindexes[ MPI_neighbor_[iDim][( iNeighbor+1 )%2] ];
+        }
+        
+        // Receive number of particles from neighbor
+        if( neighbor_[iDim][iOppositeNeighbor]!=MPI_PROC_NULL ) {
+            if( is_a_MPI_neighbor( iDim, iOppositeNeighbor ) ) {
+                int local_hindex = neighbor_[iDim][iOppositeNeighbor] - smpi->patch_refHindexes[ MPI_neighbor_[iDim][iOppositeNeighbor] ];
                 int tag = buildtag( local_hindex, iDim+1, iNeighbor+3 );
-                MPI_Irecv( &( vecSpecies[ispec]->MPI_buffer_.part_index_recv_sz[iDim][( iNeighbor+1 )%2] ), 1, MPI_INT, MPI_neighbor_[iDim][( iNeighbor+1 )%2], tag, MPI_COMM_WORLD, &( vecSpecies[ispec]->MPI_buffer_.rrequest[iDim][( iNeighbor+1 )%2] ) );
+                MPI_Irecv( &buffer.partRecvSize[iDim][iOppositeNeighbor], 1, MPI_INT, MPI_neighbor_[iDim][iOppositeNeighbor], tag, MPI_COMM_WORLD, &buffer.rrequest[iDim][iOppositeNeighbor] );
             }
         }
-    }//end loop on nb_neighbors.
-
+        
+    }
+    
 } // exchNbrOfParticles(... iDim)
 
 
+// ---------------------------------------------------------------------------------------------------------------------
+// Wait for end of communications over number of particles
+// ---------------------------------------------------------------------------------------------------------------------
 void Patch::endNbrOfParticles( int ispec, int iDim )
 {
-    Particles &cuParticles = ( *vecSpecies[ispec]->particles_to_move );
-
-    /********************************************************************************/
-    // Wait for end of communications over number of particles
-    /********************************************************************************/
+    SpeciesMPIbuffers &buffer = vecSpecies[ispec]->MPI_buffer_;
+    
     for( int iNeighbor=0 ; iNeighbor<nbNeighbors_ ; iNeighbor++ ) {
-        MPI_Status sstat    [2];
-        MPI_Status rstat    [2];
-        if( neighbor_[iDim][iNeighbor]!=MPI_PROC_NULL ) {
-            if( is_a_MPI_neighbor( iDim, iNeighbor ) ) {
-                MPI_Wait( &( vecSpecies[ispec]->MPI_buffer_.srequest[iDim][iNeighbor] ), &( sstat[iNeighbor] ) );
-            }
+        int iOppositeNeighbor = ( iNeighbor+1 )%2;
+        
+        MPI_Status sstat[2];
+        MPI_Status rstat[2];
+        if( is_a_MPI_neighbor( iDim, iNeighbor ) ) {
+            MPI_Wait( &( buffer.srequest[iDim][iNeighbor] ), &( sstat[iNeighbor] ) );
         }
-        if( neighbor_[iDim][( iNeighbor+1 )%2]!=MPI_PROC_NULL ) {
-            if( is_a_MPI_neighbor( iDim, ( iNeighbor+1 )%2 ) )  {
-                MPI_Wait( &( vecSpecies[ispec]->MPI_buffer_.rrequest[iDim][( iNeighbor+1 )%2] ), &( rstat[( iNeighbor+1 )%2] ) );
-                if( vecSpecies[ispec]->MPI_buffer_.part_index_recv_sz[iDim][( iNeighbor+1 )%2]!=0 ) {
-                    //If I receive particles over MPI, I initialize my receive buffer with the appropriate size.
-                    vecSpecies[ispec]->MPI_buffer_.partRecv[iDim][( iNeighbor+1 )%2].initialize( vecSpecies[ispec]->MPI_buffer_.part_index_recv_sz[iDim][( iNeighbor+1 )%2], cuParticles );
-                }
-            }
+        if( is_a_MPI_neighbor( iDim, iOppositeNeighbor ) )  {
+            MPI_Wait( &( buffer.rrequest[iDim][iOppositeNeighbor] ), &( rstat[iOppositeNeighbor] ) );
         }
     }
-
 } // END endNbrOfParticles(... iDim)
 
 
 // ---------------------------------------------------------------------------------------------------------------------
-// For direction iDim, finalize receive of number of particles and really send particles
+// For direction iDim, prepare particles to be sent
 //   - vecPatch : used for intra-MPI process comm (direct copy using Particels::copyParticles)
 //   - smpi     : used smpi->periods_
 // ---------------------------------------------------------------------------------------------------------------------
 void Patch::prepareParticles( SmileiMPI *smpi, int ispec, Params &params, int iDim, VectorPatch *vecPatch )
 {
-    Particles &cuParticles = ( *vecSpecies[ispec]->particles_to_move );
-
-    int n_part_send;
-    int h0 = ( *vecPatch )( 0 )->hindex;
     double x_max = params.cell_length[iDim]*( params.global_size_[iDim] );
-
+    SpeciesMPIbuffers &buffer = vecSpecies[ispec]->MPI_buffer_;
+    
     for( int iNeighbor=0 ; iNeighbor<nbNeighbors_ ; iNeighbor++ ) {
-
-        // n_part_send : number of particles to send to current neighbor
-        n_part_send = ( vecSpecies[ispec]->MPI_buffer_.part_index_send[iDim][iNeighbor] ).size();
-        if( ( neighbor_[iDim][iNeighbor]!=MPI_PROC_NULL ) && ( n_part_send!=0 ) ) {
-            // Enabled periodicity
+        
+        Particles &partSend = *buffer.partSend[iDim][iNeighbor];
+        
+        // Enabled periodicity
+        if( neighbor_[iDim][iNeighbor] != MPI_PROC_NULL && partSend.size() != 0 ) {
             if( smpi->periods_[iDim]==1 ) {
-                for( int iPart=0 ; iPart<n_part_send ; iPart++ ) {
-                    if( ( iNeighbor==0 ) && ( Pcoordinates[iDim] == 0 ) &&( cuParticles.position( iDim, vecSpecies[ispec]->MPI_buffer_.part_index_send[iDim][iNeighbor][iPart] ) < 0. ) ) {
-                        cuParticles.position( iDim, vecSpecies[ispec]->MPI_buffer_.part_index_send[iDim][iNeighbor][iPart] )     += x_max;
-                    } else if( ( iNeighbor==1 ) && ( Pcoordinates[iDim] == params.number_of_patches[iDim]-1 ) && ( cuParticles.position( iDim, vecSpecies[ispec]->MPI_buffer_.part_index_send[iDim][iNeighbor][iPart] ) >= x_max ) ) {
-                        cuParticles.position( iDim, vecSpecies[ispec]->MPI_buffer_.part_index_send[iDim][iNeighbor][iPart] )     -= x_max;
+                if( iNeighbor == 0 && Pcoordinates[iDim] == 0 ) {
+                    for( size_t iPart=0; iPart < partSend.size(); iPart++ ) {
+                        if( partSend.position( iDim, iPart ) < 0. ) {
+                            partSend.position( iDim, iPart ) += x_max;
+                        }
+                    }
+                }
+                if( iNeighbor == 1 && Pcoordinates[iDim] == params.number_of_patches[iDim]-1 ) {
+                    for( size_t iPart=0; iPart < partSend.size(); iPart++ ) {
+                        if( partSend.position( iDim, iPart ) >= x_max ) {
+                            partSend.position( iDim, iPart ) -= x_max;
+                        }
                     }
                 }
             }
-            // Send particles
+        }
+        
+        if( neighbor_[iDim][iNeighbor] != MPI_PROC_NULL ) {
+            // Initialize receive buffer with the appropriate size
             if( is_a_MPI_neighbor( iDim, iNeighbor ) ) {
-                // If MPI comm, first copy particles in the sendbuffer
-                for( int iPart=0 ; iPart<n_part_send ; iPart++ ) {
-                    cuParticles.copyParticle( vecSpecies[ispec]->MPI_buffer_.part_index_send[iDim][iNeighbor][iPart], vecSpecies[ispec]->MPI_buffer_.partSend[iDim][iNeighbor] );
-                }
-            } else {
-                //If not MPI comm, copy particles directly in the receive buffer
-                for( int iPart=0 ; iPart<n_part_send ; iPart++ ) {
-                    cuParticles.copyParticle( vecSpecies[ispec]->MPI_buffer_.part_index_send[iDim][iNeighbor][iPart], ( ( *vecPatch )( neighbor_[iDim][iNeighbor]- h0 )->vecSpecies[ispec]->MPI_buffer_.partRecv[iDim][( iNeighbor+1 )%2] ) );
+                if( buffer.partRecvSize[iDim][iNeighbor]!=0 ) {
+                    buffer.partRecv[iDim][iNeighbor]->initialize( buffer.partRecvSize[iDim][iNeighbor], *vecSpecies[ispec]->particles );
                 }
             }
-        } // END of Send
-
+            // Swap particles to other patch directly if it belongs to the same MPI
+            else {
+                int iOppositeNeighbor = ( iNeighbor+1 )%2;
+                SpeciesMPIbuffers &neighbor_buffer = ( *vecPatch )( neighbor_[iDim][iNeighbor]- vecPatch->refHindex_ )->vecSpecies[ispec]->MPI_buffer_;
+                swap( buffer.partSend[iDim][iNeighbor], neighbor_buffer.partRecv[iDim][iOppositeNeighbor] );
+            }
+        }
+    
     } // END for iNeighbor
 
 } // END prepareParticles(... iDim)
@@ -738,169 +702,135 @@ void Patch::prepareParticles( SmileiMPI *smpi, int ispec, Params &params, int iD
 
 void Patch::exchParticles( SmileiMPI *smpi, int ispec, Params &, int iDim, VectorPatch *vecPatch )
 {
-    int n_part_send, n_part_recv;
-
-    for( int iNeighbor=0 ; iNeighbor<nbNeighbors_ ; iNeighbor++ ) {
-
-        // n_part_send : number of particles to send to current neighbor
-        n_part_send = ( vecSpecies[ispec]->MPI_buffer_.part_index_send[iDim][iNeighbor] ).size();
-        if( ( neighbor_[iDim][iNeighbor]!=MPI_PROC_NULL ) && ( n_part_send!=0 ) ) {
-            // Send particles
-            if( is_a_MPI_neighbor( iDim, iNeighbor ) ) {
-                // Then send particles
-                int local_hindex = hindex - vecPatch->refHindex_;
-                int tag = buildtag( local_hindex, iDim+1, iNeighbor+3 );
-                vecSpecies[ispec]->typePartSend[( iDim*2 )+iNeighbor] = smpi->createMPIparticles( &( vecSpecies[ispec]->MPI_buffer_.partSend[iDim][iNeighbor] ) );
-                MPI_Isend( &( ( vecSpecies[ispec]->MPI_buffer_.partSend[iDim][iNeighbor] ).position( 0, 0 ) ), 1, vecSpecies[ispec]->typePartSend[( iDim*2 )+iNeighbor], MPI_neighbor_[iDim][iNeighbor], tag, MPI_COMM_WORLD, &( vecSpecies[ispec]->MPI_buffer_.srequest[iDim][iNeighbor] ) );
-            }
-        } // END of Send
-
-        n_part_recv = vecSpecies[ispec]->MPI_buffer_.part_index_recv_sz[iDim][( iNeighbor+1 )%2];
-        if( ( neighbor_[iDim][( iNeighbor+1 )%2]!=MPI_PROC_NULL ) && ( n_part_recv!=0 ) ) {
-            if( is_a_MPI_neighbor( iDim, ( iNeighbor+1 )%2 ) ) {
-                // If MPI comm, receive particles in the recv buffer previously initialized.
-                vecSpecies[ispec]->typePartRecv[( iDim*2 )+iNeighbor] = smpi->createMPIparticles( &( vecSpecies[ispec]->MPI_buffer_.partRecv[iDim][( iNeighbor+1 )%2] ) );
-                int local_hindex = neighbor_[iDim][( iNeighbor+1 )%2] - smpi->patch_refHindexes[ MPI_neighbor_[iDim][( iNeighbor+1 )%2] ];
-                int tag = buildtag( local_hindex, iDim+1, iNeighbor+3 );
-                MPI_Irecv( &( ( vecSpecies[ispec]->MPI_buffer_.partRecv[iDim][( iNeighbor+1 )%2] ).position( 0, 0 ) ), 1, vecSpecies[ispec]->typePartRecv[( iDim*2 )+iNeighbor], MPI_neighbor_[iDim][( iNeighbor+1 )%2], tag, MPI_COMM_WORLD, &( vecSpecies[ispec]->MPI_buffer_.rrequest[iDim][( iNeighbor+1 )%2] ) );
-            }
-
-        } // END of Recv
-
-    } // END for iNeighbor
-
+    SpeciesMPIbuffers &buffer = vecSpecies[ispec]->MPI_buffer_;
+    
+    for( int iNeighbor=0; iNeighbor<nbNeighbors_; iNeighbor++ ) {
+        
+        // MESSAGEALL("      Patch "<<hindex<<"  dimension "<<iDim<<"    neighbor "<<iNeighbor<<"   n_send "<<buffer.partSend[iDim][iNeighbor]->size()<<"   n_recv "<<buffer.partRecv[iDim][iNeighbor]->size());
+        // Send
+        Particles &partSend = *buffer.partSend[iDim][iNeighbor];
+        if( partSend.size() != 0 && is_a_MPI_neighbor( iDim, iNeighbor ) ) {
+            int local_hindex = hindex - vecPatch->refHindex_;
+            int tag = buildtag( local_hindex, iDim+1, iNeighbor+3 );
+            vecSpecies[ispec]->typePartSend[( iDim*2 )+iNeighbor] = smpi->createMPIparticles( &partSend );
+            MPI_Isend( &partSend.position( 0, 0 ), 1, vecSpecies[ispec]->typePartSend[( iDim*2 )+iNeighbor], MPI_neighbor_[iDim][iNeighbor], tag, MPI_COMM_WORLD, &( buffer.srequest[iDim][iNeighbor] ) );
+        }
+        
+        // Receive
+        int iOppositeNeighbor = ( iNeighbor+1 )%2;
+        Particles &partRecv = *buffer.partRecv[iDim][iOppositeNeighbor];
+        if( partRecv.size() != 0 && is_a_MPI_neighbor( iDim, iOppositeNeighbor ) ) {
+            // MESSAGE("                  patch "<<hindex<<"   dimension "<<iDim<<" receives from neighbor "<<iOppositeNeighbor<<"  nrecv "<<partRecv.size());
+            vecSpecies[ispec]->typePartRecv[( iDim*2 )+iNeighbor] = smpi->createMPIparticles( &partRecv );
+            int local_hindex = neighbor_[iDim][iOppositeNeighbor] - smpi->patch_refHindexes[ MPI_neighbor_[iDim][iOppositeNeighbor] ];
+            int tag = buildtag( local_hindex, iDim+1, iNeighbor+3 );
+            MPI_Irecv( &partRecv.position( 0, 0 ), 1, vecSpecies[ispec]->typePartRecv[( iDim*2 )+iNeighbor], MPI_neighbor_[iDim][iOppositeNeighbor], tag, MPI_COMM_WORLD, &buffer.rrequest[iDim][iOppositeNeighbor] );
+        }
+        
+    }
+    
 } // END exchParticles(... iDim)
 
 
 // ---------------------------------------------------------------------------------------------------------------------
-// For direction iDim, finalize receive of particles, temporary store particles if diagonalParticles
-// And store recv particles at their definitive place.
-// Call Patch::cleanupSentParticles
-//   - vecPatch : used for intra-MPI process comm (direct copy using Particels::copyParticles)
-//   - smpi     : used smpi->periods_
+// For direction iDim, wait receive of particles
 // ---------------------------------------------------------------------------------------------------------------------
-void Patch::finalizeExchParticles( int ispec, int iDim )
+void Patch::waitExchParticles( int ispec, int iDim )
 {
-
-    int n_part_send, n_part_recv;
-
-    /********************************************************************************/
-    // Wait for end of communications over Particles
-    /********************************************************************************/
+    SpeciesMPIbuffers &buffer = vecSpecies[ispec]->MPI_buffer_;
+    
     for( int iNeighbor=0 ; iNeighbor<nbNeighbors_ ; iNeighbor++ ) {
+        
         MPI_Status sstat    [2];
         MPI_Status rstat    [2];
-
-        n_part_send = vecSpecies[ispec]->MPI_buffer_.part_index_send[iDim][iNeighbor].size();
-        n_part_recv = vecSpecies[ispec]->MPI_buffer_.part_index_recv_sz[iDim][( iNeighbor+1 )%2];
-
-        if( ( neighbor_[iDim][iNeighbor]!=MPI_PROC_NULL ) && ( n_part_send!=0 ) ) {
-            if( is_a_MPI_neighbor( iDim, iNeighbor ) ) {
-                MPI_Wait( &( vecSpecies[ispec]->MPI_buffer_.srequest[iDim][iNeighbor] ), &( sstat[iNeighbor] ) );
-                MPI_Type_free( &( vecSpecies[ispec]->typePartSend[( iDim*2 )+iNeighbor] ) );
-            }
+        
+        int iOppositeNeighbor = ( iNeighbor+1 )%2;
+        Particles &partSend = *buffer.partSend[iDim][iNeighbor];
+        Particles &partRecv = *buffer.partRecv[iDim][iOppositeNeighbor];
+        
+        if( partSend.size() != 0 &&  is_a_MPI_neighbor( iDim, iNeighbor ) ) {
+            MPI_Wait( &buffer.srequest[iDim][iNeighbor], &sstat[iNeighbor] );
+            MPI_Type_free( &vecSpecies[ispec]->typePartSend[( iDim*2 )+iNeighbor] );
         }
-        if( ( neighbor_[iDim][( iNeighbor+1 )%2]!=MPI_PROC_NULL ) && ( n_part_recv!=0 ) ) {
-            if( is_a_MPI_neighbor( iDim, ( iNeighbor+1 )%2 ) ) {
-                MPI_Wait( &( vecSpecies[ispec]->MPI_buffer_.rrequest[iDim][( iNeighbor+1 )%2] ), &( rstat[( iNeighbor+1 )%2] ) );
-                MPI_Type_free( &( vecSpecies[ispec]->typePartRecv[( iDim*2 )+iNeighbor] ) );
-            }
+        if( partRecv.size() != 0 && is_a_MPI_neighbor( iDim, iOppositeNeighbor ) ) {
+            MPI_Wait( &buffer.rrequest[iDim][iOppositeNeighbor], &rstat[iOppositeNeighbor] );
+            MPI_Type_free( &vecSpecies[ispec]->typePartRecv[( iDim*2 )+iNeighbor] );
         }
     }
 }
 
 void Patch::cornersParticles( int ispec, Params &params, int iDim )
 {
-
     int ndim = params.nDim_field;
-    int idim, check;
-
-    Particles &cuParticles = ( *vecSpecies[ispec]->particles_to_move );
-
-    int n_part_recv;
-
-    /********************************************************************************/
-    // Wait for end of communications over Particles
-    /********************************************************************************/
+    SpeciesMPIbuffers &buffer = vecSpecies[ispec]->MPI_buffer_;
+    
+    // No need to treat diag particles at last dimension
+    if( iDim == ndim-1 ) {
+        return;
+    }
+    
     for( int iNeighbor=0 ; iNeighbor<nbNeighbors_ ; iNeighbor++ ) {
-
-        n_part_recv = vecSpecies[ispec]->MPI_buffer_.part_index_recv_sz[iDim][( iNeighbor+1 )%2];
-
-        if( ( neighbor_[iDim][( iNeighbor+1 )%2]!=MPI_PROC_NULL ) && ( n_part_recv!=0 ) ) {
-
-            // Treat diagonalParticles
-            if( iDim < ndim-1 ) { // No need to treat diag particles at last dimension.
-                if( params.geometry != "AMcylindrical" ) {
-                    for( int iPart=n_part_recv-1 ; iPart>=0; iPart-- ) {
-                        check = 0;
-                        idim = iDim+1;//We check next dimension
-                        while( check == 0 && idim<ndim ) {
-                            //If particle not in the domain...
-                            if( ( vecSpecies[ispec]->MPI_buffer_.partRecv[iDim][( iNeighbor+1 )%2] ).position( idim, iPart ) < min_local_[idim] ) {
-                                if( neighbor_[idim][0]!=MPI_PROC_NULL ) { //if neighbour exists
-                                    //... copy it at the back of the local particle vector ...
-                                    ( vecSpecies[ispec]->MPI_buffer_.partRecv[iDim][( iNeighbor+1 )%2] ).copyParticle( iPart, cuParticles );
-                                    //...adjust particles->last_index or cell_keys ...
-                                    //vecSpecies[ispec]->addSpaceForOneParticle();
-                                    //... and add its index to the particles to be sent later...
-                                    vecSpecies[ispec]->MPI_buffer_.part_index_send[idim][0].push_back( cuParticles.size()-1 );
-                                }
-                                //Remove it from receive buffer.
-                                ( vecSpecies[ispec]->MPI_buffer_.partRecv[iDim][( iNeighbor+1 )%2] ).eraseParticle( iPart );
-                                vecSpecies[ispec]->MPI_buffer_.part_index_recv_sz[iDim][( iNeighbor+1 )%2]--;
-                                check = 1;
-                            }
-                            //Other side of idim
-                            else if( ( vecSpecies[ispec]->MPI_buffer_.partRecv[iDim][( iNeighbor+1 )%2] ).position( idim, iPart ) >= max_local_[idim] ) {
-                                if( neighbor_[idim][1]!=MPI_PROC_NULL ) { //if neighbour exists
-                                    ( vecSpecies[ispec]->MPI_buffer_.partRecv[iDim][( iNeighbor+1 )%2] ).copyParticle( iPart, cuParticles );
-                                    //...adjust particles->last_index or cell_keys ...
-                                    //vecSpecies[ispec]->addSpaceForOneParticle();
-                                    vecSpecies[ispec]->MPI_buffer_.part_index_send[idim][1].push_back( cuParticles.size()-1 );
-                                }
-                                ( vecSpecies[ispec]->MPI_buffer_.partRecv[iDim][( iNeighbor+1 )%2] ).eraseParticle( iPart );
-                                vecSpecies[ispec]->MPI_buffer_.part_index_recv_sz[iDim][( iNeighbor+1 )%2]--;
-                                check = 1;
-                            }
-                            idim++;
+        
+        Particles &partRecv = *buffer.partRecv[iDim][iNeighbor];
+        
+        vector<vector<size_t>> indices_corner_min( ndim-iDim-1 );
+        vector<vector<size_t>> indices_corner_max( ndim-iDim-1 );
+        vector<size_t> indices_all_corners;
+        
+        if( neighbor_[iDim][iNeighbor] != MPI_PROC_NULL && partRecv.size() != 0 ) {
+            
+            // Find corner particles and store their indices
+            if( params.geometry != "AMcylindrical" ) {
+                
+                for( size_t iPart = 0; iPart < partRecv.size(); iPart++ ) {
+                    for( size_t otherDim = iDim+1; otherDim < (size_t) ndim; otherDim++ ) {
+                        if( partRecv.position( otherDim, iPart ) < min_local_[otherDim] ) {
+                            indices_corner_min[otherDim-iDim-1].push_back( iPart );
+                            indices_all_corners.push_back( iPart );
+                            break;
+                        } else if( partRecv.position( otherDim, iPart ) >= max_local_[otherDim] ) {
+                            indices_corner_max[otherDim-iDim-1].push_back( iPart );
+                            indices_all_corners.push_back( iPart );
+                            break;
                         }
                     }
-                } else { //In AM geometry
-                    //In this case, iDim = 0 and idim = iDim + 1 = 1. We only have to check potential comms along R.
-                    double r_min2, r_max2;
-                    r_min2 = min_local_[1]*min_local_[1];
-                    r_max2 = max_local_[1]*max_local_[1];
-                    for( int iPart=n_part_recv-1 ; iPart>=0; iPart-- ) {
-                        //MESSAGE("test particle diag r2 = " << (vecSpecies[ispec]->MPI_buffer_.partRecv[0][(iNeighbor+1)%2]).distance2ToAxis(iPart) << "rmin2 = " << r_min2 << " rmax2 = " << r_max2 );
-                        if( ( vecSpecies[ispec]->MPI_buffer_.partRecv[0][( iNeighbor+1 )%2] ).distance2ToAxis( iPart ) < r_min2 ) {
-                            if( neighbor_[1][0]!=MPI_PROC_NULL ) { //if neighbour exists
-                                //... copy it at the back of the local particle vector ...
-                                ( vecSpecies[ispec]->MPI_buffer_.partRecv[0][( iNeighbor+1 )%2] ).copyParticle( iPart, cuParticles );
-                                //...adjust particles->last_index or cell_keys ...
-                                //vecSpecies[ispec]->addSpaceForOneParticle();
-                                //... and add its index to the particles to be sent later...
-                                vecSpecies[ispec]->MPI_buffer_.part_index_send[1][0].push_back( cuParticles.size()-1 );
-                                //..without forgeting to add it to the list of particles to clean.
-                            }
-                            //Remove it from receive buffer.
-                            ( vecSpecies[ispec]->MPI_buffer_.partRecv[0][( iNeighbor+1 )%2] ).eraseParticle( iPart );
-                            vecSpecies[ispec]->MPI_buffer_.part_index_recv_sz[0][( iNeighbor+1 )%2]--;
-                        }
-                        //Other side of idim
-                        else if( ( vecSpecies[ispec]->MPI_buffer_.partRecv[0][( iNeighbor+1 )%2] ).distance2ToAxis( iPart ) >= r_max2 ) {
-                            if( neighbor_[1][1]!=MPI_PROC_NULL ) { //if neighbour exists
-                                //MESSAGE("particle diag +R");
-                                ( vecSpecies[ispec]->MPI_buffer_.partRecv[0][( iNeighbor+1 )%2] ).copyParticle( iPart, cuParticles );
-                                //...adjust particles->last_index or cell_keys ...
-                                //vecSpecies[ispec]->addSpaceForOneParticle();
-                                vecSpecies[ispec]->MPI_buffer_.part_index_send[1][1].push_back( cuParticles.size()-1 );
-                            }
-                            ( vecSpecies[ispec]->MPI_buffer_.partRecv[0][( iNeighbor+1 )%2] ).eraseParticle( iPart );
-                            vecSpecies[ispec]->MPI_buffer_.part_index_recv_sz[0][( iNeighbor+1 )%2]--;
-                        }
+                }
+                
+            } else { //In AM geometry
+            
+                //In this case, iDim = 0 and idim = iDim + 1 = 1. We only have to check potential comms along R.
+                double r_min2 = min_local_[1]*min_local_[1];
+                double r_max2 = max_local_[1]*max_local_[1];
+                
+                for( size_t iPart = 0; iPart < partRecv.size(); iPart++ ) {
+                    if( partRecv.distance2ToAxis( iPart ) < r_min2 ) {
+                        indices_corner_min[0].push_back( iPart );
+                        indices_all_corners.push_back( iPart );
+                        break;
+                    } else if( partRecv.distance2ToAxis( iPart ) >= r_max2 ) {
+                        indices_corner_max[0].push_back( iPart );
+                        indices_all_corners.push_back( iPart );
+                        break;
                     }
                 }
-            }//If not last dim for diagonal particles.
+                
+            }
+            
+            // Copy corner particles to the start or the end of the particles to be sent for the following dimension
+            for( size_t otherDim = iDim+1; otherDim < (size_t) ndim; otherDim++ ) {
+                if( indices_corner_min[otherDim-iDim-1].size() > 0 && neighbor_[otherDim][0] != MPI_PROC_NULL ) {
+                    partRecv.copyParticles( indices_corner_min[otherDim-iDim-1], *buffer.partSend[otherDim][0], 0 );
+                }
+                if( indices_corner_max[otherDim-iDim-1].size() > 0 && neighbor_[otherDim][1] != MPI_PROC_NULL ) {
+                    partRecv.copyParticles( indices_corner_max[otherDim-iDim-1], *buffer.partSend[otherDim][1], buffer.partSend[otherDim][1]->size() );
+                }
+            }
+            
+            // Erase corner particles from the current recv array
+            if( indices_all_corners.size() > 0 ) {
+                partRecv.eraseParticles( indices_all_corners );
+            }
+            
         } //If received something
     } //loop i Neighbor
 }
@@ -925,22 +855,20 @@ void Patch::importAndSortParticles( int ispec, Params &params )
 
 void Patch::cleanParticlesOverhead( Params &params )
 {
-    int ndim = params.nDim_field;
+    
     for( unsigned int ispec=0 ; ispec<vecSpecies.size() ; ispec++ ) {
-        Particles &cuParticles = ( *vecSpecies[ispec]->particles );
-
-        for( int idim = 0; idim < ndim; idim++ ) {
+        SpeciesMPIbuffers &buffer = vecSpecies[ispec]->MPI_buffer_;
+        
+        for( size_t idim = 0; idim < params.nDim_field; idim++ ) {
             for( int iNeighbor=0 ; iNeighbor<nbNeighbors_ ; iNeighbor++ ) {
-                vecSpecies[ispec]->MPI_buffer_.partRecv[idim][iNeighbor].clear();
-                vecSpecies[ispec]->MPI_buffer_.partRecv[idim][iNeighbor].shrinkToFit( );
-                vecSpecies[ispec]->MPI_buffer_.partSend[idim][iNeighbor].clear();
-                vecSpecies[ispec]->MPI_buffer_.partSend[idim][iNeighbor].shrinkToFit( );
-                vecSpecies[ispec]->MPI_buffer_.part_index_send[idim][iNeighbor].clear();
-                vector<int>( vecSpecies[ispec]->MPI_buffer_.part_index_send[idim][iNeighbor] ).swap( vecSpecies[ispec]->MPI_buffer_.part_index_send[idim][iNeighbor] );
+                buffer.partRecv[idim][iNeighbor]->clear();
+                buffer.partRecv[idim][iNeighbor]->shrinkToFit( );
+                buffer.partSend[idim][iNeighbor]->clear();
+                buffer.partSend[idim][iNeighbor]->shrinkToFit( );
             }
         }
-
-        cuParticles.shrinkToFit(  );
+        
+        vecSpecies[ispec]->particles->shrinkToFit(  );
     }
 
 }
diff --git a/src/Patch/Patch.h b/src/Patch/Patch.h
index 6fc3f7578..ff5a76a5c 100755
--- a/src/Patch/Patch.h
+++ b/src/Patch/Patch.h
@@ -174,7 +174,7 @@ class Patch
     //! Clean the MPI buffers for communications
     void cleanMPIBuffers( int ispec, Params &params );
     //! manage Idx of particles per direction,
-    void initExchParticles( int ispec, Params &params );
+    void copyExchParticlesToBuffers( int ispec, Params &params );
     //! init comm  nbr of particles
     void exchNbrOfParticles( SmileiMPI *smpi, int ispec, Params &params, int iDim, VectorPatch *vecPatch );
     //! finalize comm / nbr of particles, init exch / particles
@@ -184,7 +184,7 @@ class Patch
     //! effective exchange of particles
     void exchParticles( SmileiMPI *smpi, int ispec, Params &params, int iDim, VectorPatch *vecPatch );
     //! finalize exch / particles
-    void finalizeExchParticles( int ispec, int iDim );
+    void waitExchParticles( int ispec, int iDim );
     //! Treat diagonalParticles
     void cornersParticles( int ispec, Params &params, int iDim );
     //! inject particles received in main data structure and particles sorting
diff --git a/src/Patch/SyncVectorPatch.cpp b/src/Patch/SyncVectorPatch.cpp
index 09817b201..675529113 100755
--- a/src/Patch/SyncVectorPatch.cpp
+++ b/src/Patch/SyncVectorPatch.cpp
@@ -24,26 +24,15 @@ template void SyncVectorPatch::exchangeAlongAllDirections<complex<double>,cField
 template void SyncVectorPatch::exchangeAlongAllDirectionsNoOMP<double,Field>( std::vector<Field *> fields, VectorPatch &vecPatches, SmileiMPI *smpi );
 template void SyncVectorPatch::exchangeAlongAllDirectionsNoOMP<complex<double>,cField>( std::vector<Field *> fields, VectorPatch &vecPatches, SmileiMPI *smpi );
 
-void SyncVectorPatch::exchangeParticles( VectorPatch &vecPatches, int ispec, Params &params, SmileiMPI *smpi )
+void SyncVectorPatch::initExchParticles( VectorPatch &vecPatches, int ispec, Params &params, SmileiMPI *smpi )
 {
     #pragma omp for schedule(runtime)
     for( unsigned int ipatch=0 ; ipatch<vecPatches.size() ; ipatch++ ) {
-        Species *spec = vecPatches.species( ipatch, ispec );
-        // Leaving particles are put in a buffer called particle_to_move
-        // On GPU, particle_to_move is built and bring back to the CPU here
-        spec->extractParticles();
-        vecPatches( ipatch )->initExchParticles( ispec, params );
-    }
-
-    // Init comm in direction 0
-#ifndef _NO_MPI_TM
-    #pragma omp for schedule(runtime)
-#else
-    #pragma omp single
-#endif
-    for( unsigned int ipatch=0 ; ipatch<vecPatches.size() ; ipatch++ ) {
-        vecPatches( ipatch )->exchNbrOfParticles( smpi, ispec, params, 0, &vecPatches );
+        vecPatches( ipatch )->copyExchParticlesToBuffers( ispec, params );
     }
+    
+    // Start exchange along dimension 0 only
+    SyncVectorPatch::initExchParticlesAlongDimension( vecPatches, ispec, 0, params, smpi );
 }
 
 // ---------------------------------------------------------------------------------------------------------------------
@@ -52,24 +41,17 @@ void SyncVectorPatch::exchangeParticles( VectorPatch &vecPatches, int ispec, Par
 //! - the importation of the new particles in the particle property arrays
 //! - the sorting of particles
 // ---------------------------------------------------------------------------------------------------------------------
-void SyncVectorPatch::finalizeAndSortParticles( VectorPatch &vecPatches, int ispec, Params &params, SmileiMPI *smpi )
+void SyncVectorPatch::finalizeExchParticlesAndSort( VectorPatch &vecPatches, int ispec, Params &params, SmileiMPI *smpi )
 {
-    SyncVectorPatch::finalizeExchangeParticles( vecPatches, ispec, 0, params, smpi );
-
-    // Per direction
+    // finish exchange along dimension 0 only
+    SyncVectorPatch::finalizeExchParticlesAlongDimension( vecPatches, ispec, 0, params, smpi );
+    
+    // Other directions
     for( unsigned int iDim=1 ; iDim<params.nDim_field ; iDim++ ) {
-#ifndef _NO_MPI_TM
-        #pragma omp for schedule(runtime)
-#else
-        #pragma omp single
-#endif
-        for( unsigned int ipatch=0 ; ipatch<vecPatches.size() ; ipatch++ ) {
-            vecPatches( ipatch )->exchNbrOfParticles( smpi, ispec, params, iDim, &vecPatches );
-        }
-
-        SyncVectorPatch::finalizeExchangeParticles( vecPatches, ispec, iDim, params, smpi );
+        SyncVectorPatch::initExchParticlesAlongDimension( vecPatches, ispec, iDim, params, smpi );
+        SyncVectorPatch::finalizeExchParticlesAlongDimension( vecPatches, ispec, iDim, params, smpi );
     }
-
+    
     #pragma omp for schedule(runtime)
     for( unsigned int ipatch=0 ; ipatch<vecPatches.size() ; ipatch++ ) {
         vecPatches( ipatch )->importAndSortParticles( ispec, params );
@@ -108,8 +90,20 @@ void SyncVectorPatch::finalizeAndSortParticles( VectorPatch &vecPatches, int isp
 
 }
 
+void SyncVectorPatch::initExchParticlesAlongDimension( VectorPatch &vecPatches, int ispec, int iDim, Params &params, SmileiMPI *smpi )
+{
+    // Exchange numbers of particles in direction 0 only
+#ifndef _NO_MPI_TM
+    #pragma omp for schedule(runtime)
+#else
+    #pragma omp single
+#endif
+    for( unsigned int ipatch=0 ; ipatch<vecPatches.size() ; ipatch++ ) {
+        vecPatches( ipatch )->exchNbrOfParticles( smpi, ispec, params, iDim, &vecPatches );
+    }
+}
 
-void SyncVectorPatch::finalizeExchangeParticles( VectorPatch &vecPatches, int ispec, int iDim, Params &params, SmileiMPI *smpi )
+void SyncVectorPatch::finalizeExchParticlesAlongDimension( VectorPatch &vecPatches, int ispec, int iDim, Params &params, SmileiMPI *smpi )
 {
 #ifndef _NO_MPI_TM
     #pragma omp for schedule(runtime)
@@ -140,7 +134,7 @@ void SyncVectorPatch::finalizeExchangeParticles( VectorPatch &vecPatches, int is
     #pragma omp single
 #endif
     for( unsigned int ipatch=0 ; ipatch<vecPatches.size() ; ipatch++ ) {
-        vecPatches( ipatch )->finalizeExchParticles( ispec, iDim );
+        vecPatches( ipatch )->waitExchParticles( ispec, iDim );
     }
 
     #pragma omp for schedule(runtime)
diff --git a/src/Patch/SyncVectorPatch.h b/src/Patch/SyncVectorPatch.h
index 0ce868cae..0322c1283 100755
--- a/src/Patch/SyncVectorPatch.h
+++ b/src/Patch/SyncVectorPatch.h
@@ -17,9 +17,10 @@ class SyncVectorPatch
 public :
 
     //! Particles synchronization
-    static void exchangeParticles( VectorPatch &vecPatches, int ispec, Params &params, SmileiMPI *smpi );
-    static void finalizeAndSortParticles( VectorPatch &vecPatches, int ispec, Params &params, SmileiMPI *smpi );
-    static void finalizeExchangeParticles( VectorPatch &vecPatches, int ispec, int iDim, Params &params, SmileiMPI *smpi );
+    static void initExchParticles( VectorPatch &vecPatches, int ispec, Params &params, SmileiMPI *smpi );
+    static void finalizeExchParticlesAndSort( VectorPatch &vecPatches, int ispec, Params &params, SmileiMPI *smpi );
+    static void initExchParticlesAlongDimension( VectorPatch &vecPatches, int ispec, int iDim, Params &params, SmileiMPI *smpi );
+    static void finalizeExchParticlesAlongDimension( VectorPatch &vecPatches, int ispec, int iDim, Params &params, SmileiMPI *smpi );
 
     //! Densities synchronization
     static void sumRhoJ( Params &params, VectorPatch &vecPatches, SmileiMPI *smpi );
diff --git a/src/Patch/VectorPatch.cpp b/src/Patch/VectorPatch.cpp
index 0c2fbb036..22d976ba2 100755
--- a/src/Patch/VectorPatch.cpp
+++ b/src/Patch/VectorPatch.cpp
@@ -322,7 +322,7 @@ void VectorPatch::initialParticleSorting( Params &params )
 }
 
 // ---------------------------------------------------------------------------------------------------------------------
-// For all patches, move particles (restartRhoJ(s), dynamics and exchangeParticles)
+// For all patches, move particles (restartRhoJ(s), dynamics and initExchParticles)
 // ---------------------------------------------------------------------------------------------------------------------
 void VectorPatch::dynamics( Params &params,
                             SmileiMPI *smpi,
@@ -402,7 +402,7 @@ void VectorPatch::dynamics( Params &params,
     for( unsigned int ispec=0 ; ispec<( *this )( 0 )->vecSpecies.size(); ispec++ ) {
         Species *spec = species( 0, ispec );
         if ( (!params.Laser_Envelope_model) && (spec->isProj( time_dual, simWindow )) ){
-            SyncVectorPatch::exchangeParticles( ( *this ), ispec, params, smpi ); // Included sortParticles
+            SyncVectorPatch::initExchParticles( ( *this ), ispec, params, smpi ); // Included sortParticles
         } // end condition on Species and on envelope model
     } // end loop on species
     //MESSAGE("exchange particles");
@@ -460,7 +460,7 @@ void VectorPatch::projectionForDiags( Params &params,
 // ---------------------------------------------------------------------------------------------------------------------
 //! For all patches, exchange particles and sort them.
 // ---------------------------------------------------------------------------------------------------------------------
-void VectorPatch::finalizeAndSortParticles( Params &params, SmileiMPI *smpi, SimWindow *simWindow,
+void VectorPatch::finalizeExchParticlesAndSort( Params &params, SmileiMPI *smpi, SimWindow *simWindow,
         double time_dual, Timers &timers, int itime )
 {
     timers.syncPart.restart();
@@ -471,7 +471,7 @@ void VectorPatch::finalizeAndSortParticles( Params &params, SmileiMPI *smpi, Sim
 
     for( unsigned int ispec=0 ; ispec<( *this )( 0 )->vecSpecies.size(); ispec++ ) {
         if( ( *this )( 0 )->vecSpecies[ispec]->isProj( time_dual, simWindow ) ) {
-            SyncVectorPatch::finalizeAndSortParticles( ( *this ), ispec, params, smpi ); // Included sortParticles
+            SyncVectorPatch::finalizeExchParticlesAndSort( ( *this ), ispec, params, smpi ); // Included sortParticles
         }
 
     }
@@ -491,7 +491,7 @@ void VectorPatch::finalizeAndSortParticles( Params &params, SmileiMPI *smpi, Sim
 
     timers.syncPart.update( params.printNow( itime ) );
 
-} // END finalizeAndSortParticles
+} // END finalizeExchParticlesAndSort
 
 
 //! Perform the particles merging on all patches
@@ -3030,7 +3030,7 @@ void VectorPatch::createPatches( Params &params, SmileiMPI *smpi, SimWindow *sim
 
     // Set Index of the 1st patch of the vector yet on current MPI rank
     // Is this really necessary ? It should be done already ...
-    refHindex_ = ( *this )( 0 )->Hindex();
+    setRefHindex();
 
     // Current number of patch
     int nPatches_now = this->size() ;
@@ -4645,7 +4645,7 @@ void VectorPatch::ponderomotiveUpdatePositionAndCurrents( Params &params,
     timers.syncPart.restart();
     for( unsigned int ispec=0 ; ispec<( *this )( 0 )->vecSpecies.size(); ispec++ ) {
         if( ( *this )( 0 )->vecSpecies[ispec]->isProj( time_dual, simWindow ) ) {
-            SyncVectorPatch::exchangeParticles( ( *this ), ispec, params, smpi ); // Included sortParticles
+            SyncVectorPatch::initExchParticles( ( *this ), ispec, params, smpi ); // Included sortParticles
         } // end condition on species
     } // end loop on species
     timers.syncPart.update( params.printNow( itime ) );
@@ -5421,7 +5421,7 @@ void VectorPatch::dynamicsWithTasks( Params &params,
             Species *spec_task = species( ipatch, ispec );
             for( unsigned int scell = 0 ; scell < spec_task->Ncells ; scell++ ) {
                 for( unsigned int iPart=spec_task->particles->first_index[scell] ; ( int )iPart<spec_task->particles->last_index[scell]; iPart++ ) {
-                    if ( spec_task->particles->cell_keys[iPart] != -1 ) {
+                    if ( spec_task->particles->cell_keys[iPart] >= 0 ) {
                         //First reduction of the count sort algorithm. Lost particles are not included.
                         spec_task->count[spec_task->particles->cell_keys[iPart]] ++;
                     }
@@ -5437,7 +5437,7 @@ void VectorPatch::dynamicsWithTasks( Params &params,
             Species *spec_task = species( ipatch, ispec );
             for( unsigned int scell = 0 ; scell < spec_task->Ncells ; scell++ ) {
                 for( unsigned int iPart=spec_task->particles->first_index[scell] ; ( int )iPart<spec_task->particles->last_index[scell]; iPart++ ) {
-                    if ( spec_task->particles->cell_keys[iPart] != -1 ) {
+                    if ( spec_task->particles->cell_keys[iPart] >= 0 ) {
                         //First reduction of the count sort algorithm. Lost particles are not included.
                         spec_task->count[spec_task->particles->cell_keys[iPart]] ++;
                     }
@@ -5657,7 +5657,7 @@ void VectorPatch::ponderomotiveUpdatePositionAndCurrentsWithTasks( Params &param
                 Species *spec_task = species( ipatch, ispec );
                 for( unsigned int scell = 0 ; scell < spec_task->Ncells ; scell++ ) {
                     for( unsigned int iPart=spec_task->particles->first_index[scell] ; ( int )iPart<spec_task->particles->last_index[scell]; iPart++ ) {
-                        if ( spec_task->particles->cell_keys[iPart] != -1 ) {
+                        if ( spec_task->particles->cell_keys[iPart] >= 0 ) {
                             //First reduction of the count sort algorithm. Lost particles are not included.
                             spec_task->count[spec_task->particles->cell_keys[iPart]] ++;
                         }
@@ -5675,7 +5675,7 @@ void VectorPatch::ponderomotiveUpdatePositionAndCurrentsWithTasks( Params &param
                     Species *spec_task = species( ipatch, ispec );
                     for( unsigned int scell = 0 ; scell < spec_task->Ncells ; scell++ ) {
                         for( unsigned int iPart=spec_task->particles->first_index[scell] ; ( int )iPart<spec_task->particles->last_index[scell]; iPart++ ) {
-                            if ( spec_task->particles->cell_keys[iPart] != -1 ) {
+                            if ( spec_task->particles->cell_keys[iPart] >= 0 ) {
                                 //First reduction of the count sort algorithm. Lost particles are not included.
                                 spec_task->count[spec_task->particles->cell_keys[iPart]] ++;
                             }
diff --git a/src/Patch/VectorPatch.h b/src/Patch/VectorPatch.h
index 35be9ee6b..ff1493813 100755
--- a/src/Patch/VectorPatch.h
+++ b/src/Patch/VectorPatch.h
@@ -138,7 +138,7 @@ public :
     //! Particle sorting for all patches. This is done at initialization time.
     void initialParticleSorting( Params &params );
     
-    //! For all patch, move particles (restartRhoJ(s), dynamics and exchangeParticles)
+    //! For all patch, move particles (restartRhoJ(s), dynamics and initExchParticles)
     void dynamics( Params &params,
                    SmileiMPI *smpi,
                    SimWindow *simWindow,
@@ -157,7 +157,7 @@ public :
                    Timers &timers, int itime );
     
     //! For all patches, exchange particles and sort them.
-    void finalizeAndSortParticles( Params &params, SmileiMPI *smpi, SimWindow *simWindow,
+    void finalizeExchParticlesAndSort( Params &params, SmileiMPI *smpi, SimWindow *simWindow,
                                   double time_dual,
                                   Timers &timers, int itime );
     void finalizeSyncAndBCFields( Params &params, SmileiMPI *smpi, SimWindow *simWindow,
diff --git a/src/Smilei.cpp b/src/Smilei.cpp
index 15cd7b047..0ab0db1a2 100755
--- a/src/Smilei.cpp
+++ b/src/Smilei.cpp
@@ -629,7 +629,7 @@ int main( int argc, char *argv[] )
         #pragma omp parallel shared (time_dual,smpi,params, vecPatches, region, simWindow, checkpoint, itime)
         {
             // finalize particle exchanges and sort particles
-            vecPatches.finalizeAndSortParticles( params, &smpi, simWindow,
+            vecPatches.finalizeExchParticlesAndSort( params, &smpi, simWindow,
                                                  time_dual, timers, itime );
 
             // Particle merging
diff --git a/src/SmileiMPI/AsyncMPIbuffers.cpp b/src/SmileiMPI/AsyncMPIbuffers.cpp
index 0f7cebe9d..a5a53dbb0 100755
--- a/src/SmileiMPI/AsyncMPIbuffers.cpp
+++ b/src/SmileiMPI/AsyncMPIbuffers.cpp
@@ -66,6 +66,12 @@ SpeciesMPIbuffers::SpeciesMPIbuffers()
 
 SpeciesMPIbuffers::~SpeciesMPIbuffers()
 {
+    for( size_t i=0 ; i<partRecv.size() ; i++ ) {
+        delete partRecv[i][0];
+        delete partRecv[i][1];
+        delete partSend[i][0];
+        delete partSend[i][1];
+    }
 }
 
 
@@ -77,19 +83,21 @@ void SpeciesMPIbuffers::allocate( unsigned int ndims )
     partRecv.resize( ndims );
     partSend.resize( ndims );
     
-    part_index_send.resize( ndims );
-    part_index_send_sz.resize( ndims );
-    part_index_recv_sz.resize( ndims );
+    partSendSize.resize( ndims );
+    partRecvSize.resize( ndims );
     
     for( unsigned int i=0 ; i<ndims ; i++ ) {
         srequest[i].resize( 2 );
         rrequest[i].resize( 2 );
         partRecv[i].resize( 2 );
         partSend[i].resize( 2 );
-        part_index_send[i].resize( 2 );
-        part_index_send_sz[i].resize( 2 );
-        part_index_recv_sz[i].resize( 2 );
+        partSendSize[i].resize( 2 );
+        partRecvSize[i].resize( 2 );
+        
+        partRecv[i][0] = new Particles();
+        partRecv[i][1] = new Particles();
+        partSend[i][0] = new Particles();
+        partSend[i][1] = new Particles();
     }
-    
 }
 
diff --git a/src/SmileiMPI/AsyncMPIbuffers.h b/src/SmileiMPI/AsyncMPIbuffers.h
index b52883431..028e0777e 100755
--- a/src/SmileiMPI/AsyncMPIbuffers.h
+++ b/src/SmileiMPI/AsyncMPIbuffers.h
@@ -41,18 +41,14 @@ class SpeciesMPIbuffers : public AsyncMPIbuffers
     void allocate( unsigned int nDim_field ) ;
     
     //! ndim vectors of 2 sent packets of particles (1 per direction)
-    std::vector< std::vector<Particles > > partRecv;
+    std::vector< std::vector<Particles* > > partRecv;
     //! ndim vectors of 2 received packets of particles (1 per direction)
-    std::vector< std::vector<Particles > > partSend;
+    std::vector< std::vector<Particles* > > partSend;
     
-    //! ndim vectors of 2 vectors of index particles to send (1 per direction)
-    //!   - not sent
-    //    - used to sort Species::indexes_of_particles_to_exchange built in Species::dynamics
-    std::vector< std::vector< std::vector<int> > > part_index_send;
     //! ndim vectors of 2 numbers of particles to send (1 per direction)
-    std::vector< std::vector< unsigned int > > part_index_send_sz;
+    std::vector< std::vector< unsigned int > > partSendSize;
     //! ndim vectors of 2 numbers of particles to receive (1 per direction)
-    std::vector< std::vector< unsigned int > > part_index_recv_sz;
+    std::vector< std::vector< unsigned int > > partRecvSize;
     
 };
 
diff --git a/src/Species/Species.cpp b/src/Species/Species.cpp
index 37462566f..31ab4c1a5 100755
--- a/src/Species/Species.cpp
+++ b/src/Species/Species.cpp
@@ -378,11 +378,8 @@ void Species::initOperators( Params &params, Patch *patch )
     partBoundCond = new PartBoundCond( params, this, patch );
     for( unsigned int iDim=0 ; iDim < nDim_field ; iDim++ ) {
         for( unsigned int iNeighbor=0 ; iNeighbor<2 ; iNeighbor++ ) {
-            MPI_buffer_.partRecv[iDim][iNeighbor].initialize( 0, ( *particles ) );
-            MPI_buffer_.partSend[iDim][iNeighbor].initialize( 0, ( *particles ) );
-            MPI_buffer_.part_index_send[iDim][iNeighbor].resize( 0 );
-            MPI_buffer_.part_index_recv_sz[iDim][iNeighbor] = 0;
-            MPI_buffer_.part_index_send_sz[iDim][iNeighbor] = 0;
+            MPI_buffer_.partRecv[iDim][iNeighbor]->initialize( 0, ( *particles ) );
+            MPI_buffer_.partSend[iDim][iNeighbor]->initialize( 0, ( *particles ) );
         }
     }
     typePartSend.resize( nDim_field*2, MPI_DATATYPE_NULL );
@@ -1774,10 +1771,10 @@ void Species::sortParticles( Params &params )
     // Merge all MPI_buffer_.partRecv in particles_to_move
     for( int idim = 0; idim < params.nDim_field; idim++ ) {
         for( int iNeighbor = 0; iNeighbor < 2; iNeighbor++ ) {
-            int n_part_recv = MPI_buffer_.part_index_recv_sz[idim][iNeighbor];
-            if( ( n_part_recv != 0 ) ) {
+            int n_part_recv = MPI_buffer_.partRecv[idim][iNeighbor]->size();
+            if( n_part_recv != 0 ) {
                 // insert n_part_recv in particles_to_move from 0
-                MPI_buffer_.partRecv[idim][iNeighbor].copyParticles( 0,
+                MPI_buffer_.partRecv[idim][iNeighbor]->copyParticles( 0,
                                                                      n_part_recv,
                                                                      *particles_to_move,
                                                                      particles_to_move->size() );
@@ -1809,10 +1806,10 @@ void Species::sortParticles( Params &params )
     //Merge all MPI_buffer_.partRecv in particles_to_move
     // for( int idim = 0; idim < ndim; idim++ ) {
     //     for( int iNeighbor=0 ; iNeighbor<2 ; iNeighbor++ ) {
-    //         int n_part_recv = MPI_buffer_.part_index_recv_sz[idim][iNeighbor];
+    //         int n_part_recv = MPI_buffer_.partRecv[idim][iNeighbor]->size();
     //         if( ( n_part_recv!=0 ) ) {
     //              // insert n_part_recv in particles_to_move from 0
-    //             //MPI_buffer_.partRecv[idim][iNeighbor].copyParticles( 0, n_part_recv, *particles_to_move, 0 );
+    //             //MPI_buffer_.partRecv[idim][iNeighbor]->copyParticles( 0, n_part_recv, *particles_to_move, 0 );
     //             total_number_part_recv += n_part_recv;
     //             //particles->last_index[particles->last_index.size()-1] += n_part_recv;
     //             //particles->cell_keys.resize(particles->cell_keys.size()+n_part_recv);
@@ -1825,7 +1822,7 @@ void Species::sortParticles( Params &params )
     // Sort to adapt do cell_keys usage
     std::vector<int> indexes_of_particles_to_exchange;
     for ( int ipart=0 ; ipart< (int)(getNbrOfParticles()) ; ipart++ ) {
-        if ( particles->cell_keys[ipart] == -1 ) {
+        if ( particles->cell_keys[ipart] < 0 ) {
             indexes_of_particles_to_exchange.push_back( ipart );
         }
     }
@@ -1900,15 +1897,15 @@ void Species::sortParticles( Params &params )
 
     //Evaluation of the necessary shift of all bins.2
     //idim=0
-    shift[1] += MPI_buffer_.part_index_recv_sz[0][0];//Particles coming from xmin all go to bin 0 and shift all the other bins.
-    shift[particles->last_index.size()] += MPI_buffer_.part_index_recv_sz[0][1];//Used only to count the total number of particles arrived.
+    shift[1] += MPI_buffer_.partRecv[0][0]->size();//Particles coming from xmin all go to bin 0 and shift all the other bins.
+    shift[particles->last_index.size()] += MPI_buffer_.partRecv[0][1]->size();//Used only to count the total number of particles arrived.
     //idim>0
     for( idim = 1; idim < ndim; idim++ ) {
         for( int iNeighbor=0 ; iNeighbor<nbNeighbors_ ; iNeighbor++ ) {
-            n_part_recv = MPI_buffer_.part_index_recv_sz[idim][iNeighbor];
+            n_part_recv = MPI_buffer_.partRecv[idim][iNeighbor]->size();
             for( unsigned int j=0; j<( unsigned int )n_part_recv ; j++ ) {
                 //We first evaluate how many particles arrive in each bin.
-                ii = int( ( MPI_buffer_.partRecv[idim][iNeighbor].position( 0, j )-min_loc )/dbin ); //bin in which the particle goes.
+                ii = int( ( MPI_buffer_.partRecv[idim][iNeighbor]->position( 0, j )-min_loc )/dbin ); //bin in which the particle goes.
                 shift[ii+1]++; // It makes the next bins shift.
             }
         }
@@ -1943,11 +1940,11 @@ void Species::sortParticles( Params &params )
     //Space has been made now to write the arriving particles into the correct bins
     //idim == 0  is the easy case, when particles arrive either in first or last bin.
     for( int iNeighbor=0 ; iNeighbor<nbNeighbors_ ; iNeighbor++ ) {
-        n_part_recv = MPI_buffer_.part_index_recv_sz[0][iNeighbor];
+        n_part_recv = MPI_buffer_.partRecv[0][iNeighbor]->size();
         //if ( (neighbor_[0][iNeighbor]!=MPI_PROC_NULL) && (n_part_recv!=0) ) {
         if( ( n_part_recv!=0 ) ) {
             ii = iNeighbor*( particles->last_index.size()-1 ); //0 if iNeighbor=0(particles coming from Xmin) and particles->last_index.size()-1 otherwise.
-            MPI_buffer_.partRecv[0][iNeighbor].overwriteParticle( 0, *particles, particles->last_index[ii], n_part_recv );
+            MPI_buffer_.partRecv[0][iNeighbor]->overwriteParticle( 0, *particles, particles->last_index[ii], n_part_recv );
             particles->last_index[ii] += n_part_recv ;
         }
     }
@@ -1955,12 +1952,12 @@ void Species::sortParticles( Params &params )
     for( idim = 1; idim < ndim; idim++ ) {
         //if (idim!=iDim) continue;
         for( int iNeighbor=0 ; iNeighbor<nbNeighbors_ ; iNeighbor++ ) {
-            n_part_recv = MPI_buffer_.part_index_recv_sz[idim][iNeighbor];
+            n_part_recv = MPI_buffer_.partRecv[idim][iNeighbor]->size();
             //if ( (neighbor_[idim][iNeighbor]!=MPI_PROC_NULL) && (n_part_recv!=0) ) {
             if( ( n_part_recv!=0 ) ) {
                 for( unsigned int j=0; j<( unsigned int )n_part_recv; j++ ) {
-                    ii = int( ( MPI_buffer_.partRecv[idim][iNeighbor].position( 0, j )-min_loc )/dbin ); //bin in which the particle goes.
-                    MPI_buffer_.partRecv[idim][iNeighbor].overwriteParticle( j, *particles, particles->last_index[ii] );
+                    ii = int( ( MPI_buffer_.partRecv[idim][iNeighbor]->position( 0, j )-min_loc )/dbin ); //bin in which the particle goes.
+                    MPI_buffer_.partRecv[idim][iNeighbor]->overwriteParticle( j, *particles, particles->last_index[ii] );
                     particles->last_index[ii] ++ ;
                 }
             }
diff --git a/src/Species/SpeciesV.cpp b/src/Species/SpeciesV.cpp
index 98d5d9dbb..89d12b340 100755
--- a/src/Species/SpeciesV.cpp
+++ b/src/Species/SpeciesV.cpp
@@ -518,7 +518,7 @@ void SpeciesV::dynamics( double time_dual, unsigned int ispec,
                     nrj_lost_per_thd[tid] += mass_ * energy_lost;
 
                     // for( iPart=particles->first_index[ipack*packsize_+scell] ; iPart<particles->last_index[ipack*packsize_+scell]; iPart++ ) {
-                    //     if ( particles->cell_keys[iPart] != -1 ) {
+                    //     if ( particles->cell_keys[iPart] >= 0 ) {
                     //         //Compute cell_keys of remaining particles
                     //         for( unsigned int i = 0 ; i<nDim_field; i++ ) {
                     //             particles->cell_keys[iPart] *= length_[i];
@@ -552,7 +552,7 @@ void SpeciesV::dynamics( double time_dual, unsigned int ispec,
             //    if( mass_>0 ) {
 
             //        for( iPart=particles->first_index[ipack*packsize_+scell] ; iPart<particles->last_index[ipack*packsize_+scell]; iPart++ ) {
-            //            if ( particles->cell_keys[iPart] != -1 ) {
+            //            if ( particles->cell_keys[iPart] >= 0 ) {
             //                //Compute cell_keys of remaining particles
             //                for( unsigned int i = 0 ; i<nDim_field; i++ ) {
             //                    particles->cell_keys[iPart] *= this->length_[i];
@@ -564,7 +564,7 @@ void SpeciesV::dynamics( double time_dual, unsigned int ispec,
             //        }
 
             //        for( iPart=particles->first_index[ipack*packsize_+scell] ; iPart<particles->last_index[ipack*packsize_+scell]; iPart++ ) {
-            //            if ( particles->cell_keys[iPart] != -1 ) {
+            //            if ( particles->cell_keys[iPart] >= 0 ) {
             //                //Compute cell_keys of remaining particles
             //                for( unsigned int i = 0 ; i<nDim_field; i++ ) {
             //                    particles->cell_keys[iPart] *= this->length_[i];
@@ -1053,7 +1053,7 @@ void SpeciesV::dynamicsTasks( double time_dual, unsigned int ispec,
             if( mass_>0 ) {
                 for( int scell = first_cell_of_bin[ibin] ; scell <= last_cell_of_bin[ibin] ; scell++ ) {
                     for( int iPart=particles->first_index[ipack*packsize_+scell] ; ( int )iPart<particles->last_index[ipack*packsize_+scell]; iPart++ ) {
-                        if ( particles->cell_keys[iPart] != -1 ) {
+                        if ( particles->cell_keys[iPart] >= 0 ) {
                             //Compute cell_keys of remaining particles
                             for( unsigned int i = 0 ; i<nDim_field; i++ ) {
                                 particles->cell_keys[iPart] *= this->length_[i];
@@ -1067,7 +1067,7 @@ void SpeciesV::dynamicsTasks( double time_dual, unsigned int ispec,
             } else if( mass_==0 ) {
                 for( int scell = first_cell_of_bin[ibin] ; scell <= last_cell_of_bin[ibin] ; scell++ ) {
                     for( int iPart=particles->first_index[scell] ; ( int )iPart<particles->last_index[scell]; iPart++ ) {
-                        if ( particles->cell_keys[iPart] != -1 ) {
+                        if ( particles->cell_keys[iPart] >= 0 ) {
                             //Compute cell_keys of remaining particles
                             for( unsigned int i = 0 ; i<nDim_field; i++ ) {
                                 particles->cell_keys[iPart] *= length[i];
@@ -1366,27 +1366,27 @@ void SpeciesV::sortParticles( Params &params )
     //Loop over just arrived particles to compute their cell keys and contribution to count
     for( unsigned int idim=0; idim < nDim_field ; idim++ ) {
         for( unsigned int ineighbor=0 ; ineighbor < 2 ; ineighbor++ ) {
-            buf_cell_keys[idim][ineighbor].resize( MPI_buffer_.part_index_recv_sz[idim][ineighbor] );
+            buf_cell_keys[idim][ineighbor].resize( MPI_buffer_.partRecv[idim][ineighbor]->size() );
 
             // #pragma omp simd
-            // for( unsigned int ip=0; ip < MPI_buffer_.part_index_recv_sz[idim][ineighbor]; ip++ ) {
+            // for( unsigned int ip=0; ip < MPI_buffer_.partRecv[idim][ineighbor]->size(); ip++ ) {
             //     for( unsigned int ipos=0; ipos < nDim_field ; ipos++ ) {
-            //         double X = ((this)->*(distance[ipos]))(&MPI_buffer_.partRecv[idim][ineighbor], ipos, ip);
+            //         double X = ((this)->*(distance[ipos]))(MPI_buffer_.partRecv[idim][ineighbor], ipos, ip);
             //         int IX = round( X * dx_inv_[ipos] );
             //         buf_cell_keys[idim][ineighbor][ip] = buf_cell_keys[idim][ineighbor][ip] * length_[ipos] + IX;
             //     }
             // }
             // // not vectorizable because random access to count
-            // for( unsigned int ip=0; ip < MPI_buffer_.part_index_recv_sz[idim][ineighbor]; ip++ ) {
+            // for( unsigned int ip=0; ip < MPI_buffer_.partRecv[idim][ineighbor]->size(); ip++ ) {
             //     count[buf_cell_keys[idim][ineighbor][ip]] ++;
             // }
 
             computeParticleCellKeys( params,
-                                     &MPI_buffer_.partRecv[idim][ineighbor],
+                                     MPI_buffer_.partRecv[idim][ineighbor],
                                      &buf_cell_keys[idim][ineighbor][0],
                                      &count[0],
                                      0,
-                                     MPI_buffer_.part_index_recv_sz[idim][ineighbor] );
+                                     MPI_buffer_.partRecv[idim][ineighbor]->size() );
 
         }
     }
@@ -1403,8 +1403,8 @@ void SpeciesV::sortParticles( Params &params )
 
     //Now proceed to the cycle sort
 
-    if( MPI_buffer_.partRecv[0][0].size() == 0 ) {
-        MPI_buffer_.partRecv[0][0].initialize( 0, *particles );    //Is this correct ?
+    if( MPI_buffer_.partRecv[0][0]->size() == 0 ) {
+        MPI_buffer_.partRecv[0][0]->initialize( 0, *particles );    //Is this correct ?
     }
 
     // Resize the particle vector
@@ -1418,7 +1418,7 @@ void SpeciesV::sortParticles( Params &params )
     //Copy all particles from MPI buffers back to the writable particles via cycle sort pass.
     for( unsigned int idim=0; idim < nDim_field ; idim++ ) {
         for( unsigned int ineighbor=0 ; ineighbor < 2 ; ineighbor++ ) {
-            for( unsigned int ip=0; ip < MPI_buffer_.part_index_recv_sz[idim][ineighbor]; ip++ ) {
+            for( unsigned int ip=0; ip < MPI_buffer_.partRecv[idim][ineighbor]->size(); ip++ ) {
                 cycle.resize( 1 );
                 cell_target = buf_cell_keys[idim][ineighbor][ip];
                 ip_dest = particles->first_index[cell_target];
@@ -1429,7 +1429,7 @@ void SpeciesV::sortParticles( Params &params )
                 cycle[0] = ip_dest;
                 cell_target = particles->cell_keys[ip_dest];
                 //As long as the particle is not erased, we can build up the cycle.
-                while( cell_target != -1 ) {
+                while( cell_target >= 0 ) {
                     ip_dest = particles->first_index[cell_target];
                     while( particles->cell_keys[ip_dest] == cell_target ) {
                         ip_dest++;
@@ -1441,7 +1441,7 @@ void SpeciesV::sortParticles( Params &params )
                 //Last target_cell is -1, the particle must be erased:
                 particles->translateParticles( cycle );
                 //Eventually copy particle from the MPI buffer into the particle vector.
-                MPI_buffer_.partRecv[idim][ineighbor].overwriteParticle( ip, *particles, cycle[0] );
+                MPI_buffer_.partRecv[idim][ineighbor]->overwriteParticle( ip, *particles, cycle[0] );
             }
         }
     }
@@ -1450,14 +1450,14 @@ void SpeciesV::sortParticles( Params &params )
     for( unsigned int ip=( unsigned int )particles->last_index.back(); ip < npart; ip++ ) {
         cell_target = particles->cell_keys[ip];
 
-        if( cell_target == -1 ) {
+        if( cell_target < 0 ) {
             continue;
         }
         cycle.resize( 0 );
         cycle.push_back( ip );
 
         //As long as the particle is not erased, we can build up the cycle.
-        while( cell_target != -1 ) {
+        while( cell_target >= 0 ) {
 
             ip_dest = particles->first_index[cell_target];
 
@@ -1533,7 +1533,7 @@ void SpeciesV::computeParticleCellKeys( Params    & params,
 
         #pragma omp simd
         for( iPart=istart; iPart < iend ; iPart++ ) {
-            if ( cell_keys[iPart] != -1 ) {
+            if ( cell_keys[iPart] >= 0 ) {
                 //Compute cell_keys particles
                 cell_keys[iPart]  = std::round( position_x[iPart] * dx_inv_[0]) - min_loc_l ;
                 cell_keys[iPart] *= length_[1];
@@ -1553,7 +1553,7 @@ void SpeciesV::computeParticleCellKeys( Params    & params,
 
         #pragma omp simd
         for( iPart=istart; iPart < iend ; iPart++  ) {
-            if ( cell_keys[iPart] != -1 ) {
+            if ( cell_keys[iPart] >= 0 ) {
                 //Compute cell_keys of remaining particles
                 cell_keys[iPart]  = std::round(position_x[iPart] * dx_inv_[0] )- min_loc_x ;
                 cell_keys[iPart] *= length_[1];
@@ -1573,7 +1573,7 @@ void SpeciesV::computeParticleCellKeys( Params    & params,
 
         #pragma omp simd
         for( iPart=istart; iPart < iend ; iPart++  ) {
-            if ( cell_keys[iPart] != -1 ) {
+            if ( cell_keys[iPart] >= 0 ) {
                 //Compute cell_keys of remaining particles
                 cell_keys[iPart]  = std::round(position_x[iPart] * dx_inv_[0] )- min_loc_x ;
                 cell_keys[iPart] *= length_[1];
@@ -1589,7 +1589,7 @@ void SpeciesV::computeParticleCellKeys( Params    & params,
 
         #pragma omp simd
         for( iPart=istart; iPart < iend ; iPart++  ) {
-            if ( cell_keys[iPart] != -1 ) {
+            if ( cell_keys[iPart] >= 0 ) {
                 //Compute cell_keys of remaining particles
                 cell_keys[iPart]  = round(position_x[iPart] * dx_inv_[0] )- min_loc_x ;
             }
@@ -1598,7 +1598,7 @@ void SpeciesV::computeParticleCellKeys( Params    & params,
     }
 
     for( iPart=istart; iPart < iend ; iPart++  ) {
-        if ( cell_keys[iPart] != -1 ) {
+        if ( cell_keys[iPart] >= 0 ) {
             count[cell_keys[iPart]] ++;
         }
     }
@@ -2526,7 +2526,7 @@ void SpeciesV::ponderomotiveUpdatePositionAndCurrentsTasks( double time_dual, un
 
                 smpi->traceEventIfDiagTracing(diag_PartEventTracing, Tools::getOMPThreadNum(),0,11);
                 for( int iPart=particles->first_index[scell] ; iPart<particles->last_index[scell]; iPart++ ) {
-                    if ( particles->cell_keys[iPart] != -1 ) {
+                    if ( particles->cell_keys[iPart] >= 0 ) {
                         //First reduction of the count sort algorithm. Lost particles are not included.
                         for( int i = 0 ; i<( int )nDim_field; i++ ) {
                             particles->cell_keys[iPart] *= length_[i];
diff --git a/src/Species/SpeciesVAdaptive.cpp b/src/Species/SpeciesVAdaptive.cpp
index b24d86711..98813c71e 100755
--- a/src/Species/SpeciesVAdaptive.cpp
+++ b/src/Species/SpeciesVAdaptive.cpp
@@ -275,7 +275,7 @@ void SpeciesVAdaptive::scalarDynamics( double time_dual, unsigned int ispec,
             //     if( mass_>0 ) {
             //
             //         for( iPart=particles->first_index[scell] ; ( int )iPart<particles->last_index[scell]; iPart++ ) {
-            //             if ( particles->cell_keys[iPart] != -1 ) {
+            //             if ( particles->cell_keys[iPart] >= 0 ) {
             //                 //Compute cell_keys of remaining particles
             //                 for( unsigned int i = 0 ; i<nDim_particle; i++ ) {
             //                     particles->cell_keys[iPart] *= this->length_[i];
@@ -289,7 +289,7 @@ void SpeciesVAdaptive::scalarDynamics( double time_dual, unsigned int ispec,
             //     } else if( mass_==0 ) {
             //
             //         for( iPart=particles->first_index[scell] ; ( int )iPart<particles->last_index[scell]; iPart++ ) {
-            //             if ( particles->cell_keys[iPart] != -1 ) {
+            //             if ( particles->cell_keys[iPart] >= 0 ) {
             //                  //Compute cell_keys of remaining particles
             //                 for( unsigned int i = 0 ; i<nDim_particle; i++ ) {
             //                     particles->cell_keys[iPart] *= this->length_[i];
@@ -754,7 +754,7 @@ void SpeciesVAdaptive::scalarDynamicsTasks( double time_dual, unsigned int ispec
                 if( mass_>0 ) {
 
                     for( int iPart=particles->first_index[ipack*packsize_+scell] ; ( int )iPart<particles->last_index[ipack*packsize_+scell]; iPart++ ) {
-                        if ( particles->cell_keys[iPart] != -1 ) {
+                        if ( particles->cell_keys[iPart] >= 0 ) {
                             //Compute cell_keys of remaining particles
                             for( unsigned int i = 0 ; i<nDim_field; i++ ) {
                                 particles->cell_keys[iPart] *= this->length_[i];
@@ -768,7 +768,7 @@ void SpeciesVAdaptive::scalarDynamicsTasks( double time_dual, unsigned int ispec
                 } else if( mass_==0 ) {
 
                     for( int iPart=particles->first_index[scell] ; ( int )iPart<particles->last_index[scell]; iPart++ ) {
-                        if ( particles->cell_keys[iPart] != -1 ) {
+                        if ( particles->cell_keys[iPart] >= 0 ) {
                             //Compute cell_keys of remaining particles
                             for( unsigned int i = 0 ; i<nDim_field; i++ ) {
                                 particles->cell_keys[iPart] *= length[i];
@@ -1662,7 +1662,7 @@ void SpeciesVAdaptive::scalarPonderomotiveUpdatePositionAndCurrentsTasks( double
 
                 smpi->traceEventIfDiagTracing(diag_PartEventTracing, Tools::getOMPThreadNum(),0,11);
                 for( int iPart=particles->first_index[first_cell_of_bin[ibin]] ; iPart<particles->last_index[last_cell_of_bin[ibin]]; iPart++ ) {
-                    if ( particles->cell_keys[iPart] != -1 ) {
+                    if ( particles->cell_keys[iPart] >= 0 ) {
                         //First reduction of the count sort algorithm. Lost particles are not included.
                         for( int i = 0 ; i<( int )nDim_field; i++ ) {
                             particles->cell_keys[iPart] *= length_[i];
diff --git a/src/Tools/Timers.cpp b/src/Tools/Timers.cpp
index 0cd6dac0c..d3edda0e4 100755
--- a/src/Tools/Timers.cpp
+++ b/src/Tools/Timers.cpp
@@ -18,7 +18,7 @@ Timers::Timers( SmileiMPI *smpi ) :
     collisions( "Collisions" ),             // Call to Collisions methods
     movWindow( "Mov window" ),              // Moving Window
     loadBal( "Load balancing" ),            // Load balancing
-    syncPart( "Sync Particles" ),           // Call exchangeParticles (MPI & Patch sync)
+    syncPart( "Sync Particles" ),           // Call initExchParticles (MPI & Patch sync)
     syncField( "Sync Fields" ),             // Call sumRhoJ(s), exchangeB (MPI & Patch sync)
     syncDens( "Sync Densities" ),           // If necessary the following timers can be reintroduced
     particleMerging( "Part Merging" ),      // Particle merging

From 50891ac16ef744f99d6d240606cfec7be65793d1 Mon Sep 17 00:00:00 2001
From: Frederic Perez <frederic.perez@polytechnique.edu>
Date: Tue, 2 Apr 2024 17:00:55 +0200
Subject: [PATCH 02/54] repurpose extractParticles

---
 src/Particles/Particles.cpp | 17 ++++++++-----
 src/Particles/Particles.h   |  2 +-
 src/Patch/Patch.cpp         | 51 ++++++++-----------------------------
 src/Species/Species.cpp     | 10 --------
 src/Species/Species.h       |  6 -----
 5 files changed, 22 insertions(+), 64 deletions(-)

diff --git a/src/Particles/Particles.cpp b/src/Particles/Particles.cpp
index aa9b8a02c..d628e24d2 100755
--- a/src/Particles/Particles.cpp
+++ b/src/Particles/Particles.cpp
@@ -1303,14 +1303,17 @@ void Particles::copyFromDeviceToHost()
     ERROR( "Device only feature, should not have come here!" );
 }
 
-void Particles::extractParticles( Particles* particles_to_move )
+// Loop all particles and copy the outgoing ones to buffers
+void Particles::extractParticles( const bool copy[], Particles* buffer[] )
 {
-    particles_to_move->clear();
-    // for ( int ipart=0 ; ipart<size() ; ipart++ ) {
-    //     if ( cell_keys[ipart] == -1 ) {
-    //         copyParticle( ipart, *particles_to_move );
-    //     }
-    // }
+    for( size_t ipart = 0; ipart < size(); ipart++ ) {
+        if( cell_keys[ipart] < -1 ) {
+            int direction = -cell_keys[ipart] - 2;
+            if( copy[direction] ) {
+                copyParticle( ipart, *buffer[direction] );
+            }
+        }
+    }
 }
 
 void Particles::savePositions() {
diff --git a/src/Particles/Particles.h b/src/Particles/Particles.h
index ebcb79ae2..aa7fbbe9f 100755
--- a/src/Particles/Particles.h
+++ b/src/Particles/Particles.h
@@ -476,7 +476,7 @@ class Particles
     //! Extract particles from the Particles object and put
     //! them in the Particles object `particles_to_move`
     // -----------------------------------------------------------------------------
-    virtual void extractParticles( Particles *particles_to_move );
+    virtual void extractParticles( const bool copy[], Particles* buffer[] );
 
     // -----------------------------------------------------------------------------
     //! Erase particles leaving the patch object on device
diff --git a/src/Patch/Patch.cpp b/src/Patch/Patch.cpp
index 0bf353e67..c7e3ebd78 100755
--- a/src/Patch/Patch.cpp
+++ b/src/Patch/Patch.cpp
@@ -539,48 +539,21 @@ void Patch::copyExchParticlesToBuffers( int ispec, Params &params )
     
     cleanMPIBuffers( ispec, params );
     
-    vector<vector<bool>> copy( 3 );
-    copy[0] = { neighbor_[0][0] != MPI_PROC_NULL, neighbor_[0][1] != MPI_PROC_NULL };
-    copy[1] = { neighbor_[1][0] != MPI_PROC_NULL, neighbor_[1][1] != MPI_PROC_NULL };
-    if( params.nDim_field > 2 ) {
-        copy[2] = { neighbor_[2][0] != MPI_PROC_NULL, neighbor_[2][1] != MPI_PROC_NULL };
+    bool copy[params.nDim_field*2];
+    Particles* sendBuffer[params.nDim_field*2];
+    for( size_t iDim = 0; iDim < params.nDim_field; iDim++ ) {
+        copy[2*iDim+0] = neighbor_[iDim][0] != MPI_PROC_NULL;
+        copy[2*iDim+1] = neighbor_[iDim][1] != MPI_PROC_NULL;
+        sendBuffer[2*iDim+0] = buffer.partSend[iDim][0];
+        sendBuffer[2*iDim+1] = buffer.partSend[iDim][1];
     }
     if( params.geometry == "AMcylindrical" ) {
-        copy[0][0] = copy[0][0] && ( Pcoordinates[0]!=0 || vecSpecies[ispec]->boundary_conditions_[0][0]=="periodic" );
-        copy[0][1] = copy[0][1] && ( Pcoordinates[0]!=params.number_of_patches[0]-1 || vecSpecies[ispec]->boundary_conditions_[0][1]=="periodic" );
+        copy[0] = copy[0] && ( Pcoordinates[0]!=0 || vecSpecies[ispec]->boundary_conditions_[0][0]=="periodic" );
+        copy[1] = copy[1] && ( Pcoordinates[0]!=params.number_of_patches[0]-1 || vecSpecies[ispec]->boundary_conditions_[0][1]=="periodic" );
     }
     
-    // Loop all particles and count the outgoing ones
-    for( size_t ipart = 0; ipart < part.size(); ipart++ ) {
-        if( part.cell_keys[ipart] < -1 ) {
-            if( part.cell_keys[ipart] == -2 ) {
-                if( copy[0][0] ) {
-                    part.copyParticle( ipart, *buffer.partSend[0][0] );
-                }
-            } else if( part.cell_keys[ipart] == -3 ) {
-                if( copy[0][1] ) {
-                    part.copyParticle( ipart, *buffer.partSend[0][1] );
-                }
-            } else if( part.cell_keys[ipart] == -4 ) {
-                if( copy[1][0] ) {
-                    part.copyParticle( ipart, *buffer.partSend[1][0] );
-                }
-            } else if( part.cell_keys[ipart] == -5 ) {
-                if( copy[1][1] ) {
-                    part.copyParticle( ipart, *buffer.partSend[1][1] );
-                }
-            } else if( part.cell_keys[ipart] == -6 ) {
-                if( copy[2][0] ) {
-                    part.copyParticle( ipart, *buffer.partSend[2][0] );
-                }
-            } else if( part.cell_keys[ipart] == -7 ) {
-                if( copy[2][1] ) {
-                    part.copyParticle( ipart, *buffer.partSend[2][1] );
-                }
-            }
-        }
-    }
-
+    part.extractParticles( copy, sendBuffer );
+    
 } // copyExchParticlesToBuffers(... iDim)
 
 
@@ -706,7 +679,6 @@ void Patch::exchParticles( SmileiMPI *smpi, int ispec, Params &, int iDim, Vecto
     
     for( int iNeighbor=0; iNeighbor<nbNeighbors_; iNeighbor++ ) {
         
-        // MESSAGEALL("      Patch "<<hindex<<"  dimension "<<iDim<<"    neighbor "<<iNeighbor<<"   n_send "<<buffer.partSend[iDim][iNeighbor]->size()<<"   n_recv "<<buffer.partRecv[iDim][iNeighbor]->size());
         // Send
         Particles &partSend = *buffer.partSend[iDim][iNeighbor];
         if( partSend.size() != 0 && is_a_MPI_neighbor( iDim, iNeighbor ) ) {
@@ -720,7 +692,6 @@ void Patch::exchParticles( SmileiMPI *smpi, int ispec, Params &, int iDim, Vecto
         int iOppositeNeighbor = ( iNeighbor+1 )%2;
         Particles &partRecv = *buffer.partRecv[iDim][iOppositeNeighbor];
         if( partRecv.size() != 0 && is_a_MPI_neighbor( iDim, iOppositeNeighbor ) ) {
-            // MESSAGE("                  patch "<<hindex<<"   dimension "<<iDim<<" receives from neighbor "<<iOppositeNeighbor<<"  nrecv "<<partRecv.size());
             vecSpecies[ispec]->typePartRecv[( iDim*2 )+iNeighbor] = smpi->createMPIparticles( &partRecv );
             int local_hindex = neighbor_[iDim][iOppositeNeighbor] - smpi->patch_refHindexes[ MPI_neighbor_[iDim][iOppositeNeighbor] ];
             int tag = buildtag( local_hindex, iDim+1, iNeighbor+3 );
diff --git a/src/Species/Species.cpp b/src/Species/Species.cpp
index 31ab4c1a5..0fb38f673 100755
--- a/src/Species/Species.cpp
+++ b/src/Species/Species.cpp
@@ -1744,16 +1744,6 @@ void Species::computeCharge( ElectroMagn *EMfields, bool old /*=false*/ )
 }//END computeCharge
 
 
-void Species::extractParticles()
-{
-    particles->extractParticles( particles_to_move );
-}
-
-// void Species::injectParticles( Params &params )
-// {
-// }
-
-
 // ---------------------------------------------------------------------------------------------------------------------
 //! Sort particles
 // ---------------------------------------------------------------------------------------------------------------------
diff --git a/src/Species/Species.h b/src/Species/Species.h
index 56c693d65..b91c9521b 100755
--- a/src/Species/Species.h
+++ b/src/Species/Species.h
@@ -482,12 +482,6 @@ class Species
     //! Method calculating the Particle charge on the grid (projection)
     virtual void computeCharge( ElectroMagn *EMfields, bool old=false );
 
-    //! Method used to select particles which will change of patches
-    virtual void extractParticles();
-
-    //! Method used to integrate particles which come from another patches
-    // virtual void injectParticles( Params &params );
-
     //! Method used to inject and sort particles
     virtual void sortParticles( Params &param );
 

From da1b17248adcb1ccb7171f8f31ad7b4faf853f53 Mon Sep 17 00:00:00 2001
From: Frederic Perez <frederic.perez@polytechnique.edu>
Date: Tue, 2 Apr 2024 18:30:02 +0200
Subject: [PATCH 03/54] CI on particle_exchange

---
 .gitlab-ci.yml              | 8 ++++++++
 src/Particles/Particles.cpp | 2 +-
 src/Particles/Particles.h   | 7 +++----
 src/Patch/Patch.cpp         | 2 +-
 4 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 6faa6ff17..f50bfd819 100755
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -15,6 +15,7 @@ install:
   stage: install
   only:
     - develop
+    - particle_exchange
 
   script:
     # Force workdir cleaning in case of retried
@@ -33,6 +34,7 @@ compile_default:
   stage: compile_default
   only:
     - develop
+    - particle_exchange
 
   script:
     # Move in test dir
@@ -44,6 +46,7 @@ runQuick:
   stage: run_quick
   only:
     - develop
+    - particle_exchange
 
   script:
     # Move in test dir
@@ -55,6 +58,7 @@ run1D:
   stage: run_default
   only:
     - develop
+    - particle_exchange
 
   script:
     # Move in test dir
@@ -67,6 +71,7 @@ run2D:
   stage: run_default
   only:
     - develop
+    - particle_exchange
 
   script:
     # Move in test dir
@@ -81,6 +86,7 @@ run3D:
   stage: run_default
   only:
     - develop
+    - particle_exchange
 
   script:
     # Move in test dir
@@ -96,6 +102,7 @@ runAM:
   stage: run_default
   only:
     - develop
+    - particle_exchange
 
   script:
     # Move in test dir
@@ -108,6 +115,7 @@ runCollisions:
   stage: run_default
   only:
     - develop
+    - particle_exchange
 
   script:
     # Move in test dir
diff --git a/src/Particles/Particles.cpp b/src/Particles/Particles.cpp
index d628e24d2..62f8b67af 100755
--- a/src/Particles/Particles.cpp
+++ b/src/Particles/Particles.cpp
@@ -1304,7 +1304,7 @@ void Particles::copyFromDeviceToHost()
 }
 
 // Loop all particles and copy the outgoing ones to buffers
-void Particles::extractParticles( const bool copy[], Particles* buffer[] )
+void Particles::extractParticles( const size_t /* ndim */, const bool copy[], Particles* buffer[] )
 {
     for( size_t ipart = 0; ipart < size(); ipart++ ) {
         if( cell_keys[ipart] < -1 ) {
diff --git a/src/Particles/Particles.h b/src/Particles/Particles.h
index aa7fbbe9f..a155baf7a 100755
--- a/src/Particles/Particles.h
+++ b/src/Particles/Particles.h
@@ -473,10 +473,9 @@ class Particles
     // Accelerator specific virtual functions
 
     // -----------------------------------------------------------------------------
-    //! Extract particles from the Particles object and put
-    //! them in the Particles object `particles_to_move`
+    //! Extract particles escaping the box to buffers
     // -----------------------------------------------------------------------------
-    virtual void extractParticles( const bool copy[], Particles* buffer[] );
+    virtual void extractParticles( const size_t ndim, const bool copy[], Particles* buffer[] );
 
     // -----------------------------------------------------------------------------
     //! Erase particles leaving the patch object on device
@@ -484,7 +483,7 @@ class Particles
     virtual int eraseLeavingParticles();
 
     // -----------------------------------------------------------------------------
-    //! Inject particles from particles_to_move object and put
+    //! Inject particles from particles_to_inject object and put
     //! them in the Particles object
     //! \param[in,out] particles_to_inject Particles object containing particles to inject
     virtual int injectParticles( Particles *particles_to_inject );
diff --git a/src/Patch/Patch.cpp b/src/Patch/Patch.cpp
index c7e3ebd78..f0bb6a1fb 100755
--- a/src/Patch/Patch.cpp
+++ b/src/Patch/Patch.cpp
@@ -552,7 +552,7 @@ void Patch::copyExchParticlesToBuffers( int ispec, Params &params )
         copy[1] = copy[1] && ( Pcoordinates[0]!=params.number_of_patches[0]-1 || vecSpecies[ispec]->boundary_conditions_[0][1]=="periodic" );
     }
     
-    part.extractParticles( copy, sendBuffer );
+    part.extractParticles( params.nDim_field, copy, sendBuffer );
     
 } // copyExchParticlesToBuffers(... iDim)
 

From 593e96c545fa40d8811673639c087f3af3585e2a Mon Sep 17 00:00:00 2001
From: Frederic Perez <frederic.perez@polytechnique.edu>
Date: Wed, 3 Apr 2024 00:36:25 +0200
Subject: [PATCH 04/54] Fix in the new copyParticles

---
 src/Particles/Particles.cpp |  7 ++++---
 src/Patch/Patch.cpp         | 11 ++++-------
 2 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/src/Particles/Particles.cpp b/src/Particles/Particles.cpp
index 62f8b67af..b675ac12f 100755
--- a/src/Particles/Particles.cpp
+++ b/src/Particles/Particles.cpp
@@ -422,12 +422,13 @@ void Particles::copyParticles( vector<size_t> indices, Particles &dest_parts, in
 {
     const size_t transfer_size = indices.size();
     const size_t dest_new_size = dest_parts.size() + transfer_size;
+    const size_t displaced_size = dest_parts.size() - dest_id;
     
     for( unsigned int iprop=0 ; iprop<double_prop_.size() ; iprop++ ) {
         // Make space in dest array
         dest_parts.double_prop_[iprop]->resize( dest_new_size );
         auto loc = dest_parts.double_prop_[iprop]->begin() + dest_id;
-        move_backward( loc, loc + transfer_size, dest_parts.double_prop_[iprop]->end() );
+        move_backward( loc, loc + displaced_size, dest_parts.double_prop_[iprop]->end() );
         // Copy data
         for( size_t i = 0; i < transfer_size; i++ ) {
             ( *dest_parts.double_prop_[iprop] )[dest_id+i] = ( *double_prop_[iprop] )[indices[i]];
@@ -438,7 +439,7 @@ void Particles::copyParticles( vector<size_t> indices, Particles &dest_parts, in
         // Make space in dest array
         dest_parts.short_prop_[iprop]->resize( dest_new_size );
         auto loc = dest_parts.short_prop_[iprop]->begin() + dest_id;
-        move_backward( loc, loc + transfer_size, dest_parts.short_prop_[iprop]->end() );
+        move_backward( loc, loc + displaced_size, dest_parts.short_prop_[iprop]->end() );
         // Copy data
         for( size_t i = 0; i < transfer_size; i++ ) {
             ( *dest_parts.short_prop_[iprop] )[dest_id+i] = ( *short_prop_[iprop] )[indices[i]];
@@ -449,7 +450,7 @@ void Particles::copyParticles( vector<size_t> indices, Particles &dest_parts, in
         // Make space in dest array
         dest_parts.uint64_prop_[iprop]->resize( dest_new_size );
         auto loc = dest_parts.uint64_prop_[iprop]->begin() + dest_id;
-        move_backward( loc, loc + transfer_size, dest_parts.uint64_prop_[iprop]->end() );
+        move_backward( loc, loc + displaced_size, dest_parts.uint64_prop_[iprop]->end() );
         // Copy data
         for( size_t i = 0; i < transfer_size; i++ ) {
             ( *dest_parts.uint64_prop_[iprop] )[dest_id+i] = ( *uint64_prop_[iprop] )[indices[i]];
diff --git a/src/Patch/Patch.cpp b/src/Patch/Patch.cpp
index f0bb6a1fb..546e0ca08 100755
--- a/src/Patch/Patch.cpp
+++ b/src/Patch/Patch.cpp
@@ -634,8 +634,8 @@ void Patch::prepareParticles( SmileiMPI *smpi, int ispec, Params &params, int iD
         Particles &partSend = *buffer.partSend[iDim][iNeighbor];
         
         // Enabled periodicity
-        if( neighbor_[iDim][iNeighbor] != MPI_PROC_NULL && partSend.size() != 0 ) {
-            if( smpi->periods_[iDim]==1 ) {
+        if( neighbor_[iDim][iNeighbor] != MPI_PROC_NULL ) { 
+            if( partSend.size() > 0 && smpi->periods_[iDim]==1 ) {
                 if( iNeighbor == 0 && Pcoordinates[iDim] == 0 ) {
                     for( size_t iPart=0; iPart < partSend.size(); iPart++ ) {
                         if( partSend.position( iDim, iPart ) < 0. ) {
@@ -651,17 +651,14 @@ void Patch::prepareParticles( SmileiMPI *smpi, int ispec, Params &params, int iD
                     }
                 }
             }
-        }
-        
-        if( neighbor_[iDim][iNeighbor] != MPI_PROC_NULL ) {
+            
             // Initialize receive buffer with the appropriate size
             if( is_a_MPI_neighbor( iDim, iNeighbor ) ) {
                 if( buffer.partRecvSize[iDim][iNeighbor]!=0 ) {
                     buffer.partRecv[iDim][iNeighbor]->initialize( buffer.partRecvSize[iDim][iNeighbor], *vecSpecies[ispec]->particles );
                 }
-            }
             // Swap particles to other patch directly if it belongs to the same MPI
-            else {
+            } else {
                 int iOppositeNeighbor = ( iNeighbor+1 )%2;
                 SpeciesMPIbuffers &neighbor_buffer = ( *vecPatch )( neighbor_[iDim][iNeighbor]- vecPatch->refHindex_ )->vecSpecies[ispec]->MPI_buffer_;
                 swap( buffer.partSend[iDim][iNeighbor], neighbor_buffer.partRecv[iDim][iOppositeNeighbor] );

From f5659256bf7b8e0cf6371611c1caa7c6958b9a2b Mon Sep 17 00:00:00 2001
From: Frederic Perez <frederic.perez@polytechnique.edu>
Date: Wed, 3 Apr 2024 11:11:15 +0200
Subject: [PATCH 05/54] new reference

---
 .../references/tst2d_04_laser_wake.py.txt     | Bin 51895 -> 51895 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)

diff --git a/validation/references/tst2d_04_laser_wake.py.txt b/validation/references/tst2d_04_laser_wake.py.txt
index 48d9eaecad05f679f5c1af836be621ca3b01a1e9..094e7c366dca0e242cf0f955e47a7c88c3b3a09d 100755
GIT binary patch
delta 23828
zcmWJscRZC16i%hAlAXO}ME2&`GbLL_#<fRW`{KUFHIhn7gNBij>{TR3DrA(lD4&W#
zWi>QZ-~H?U?)}}rp7)&doacGYd%bAW^`cD$t;CI9wYPf?O2PU*qq-wxA?Vgn=2hmW
ze`n~?-gZb8OeP|3`fM_S#y-_f&D8zK$GPvBTp0;#zfZhhWRJwJ9>QVMDzW(X+SU}d
z(PVV8wzs=moQ!XTsxA9JCSvD8^>0dAJU*U?lJ?__MLDfr`G~}56uEWpK2Amu=+3q9
z^7N@-Y-21K`pfH&qj6lPY->Dlxv0Q*_g{PLA3s+9NmvK{+LJfDAk<;|i{0`gLiRA_
z@Xfa&>LB>`?~q?A_XFLyo70y!1q1s`io>NN5#XY}zudz$8XP>5UF9Ffz^gZ@4=lXn
zVdzA2>Gp|4n0Yv`Vh}?lgZpP`(J!x(;d(n;MY~QcY{71eX|YHsDQGP%sUShV^Iz9b
zTJ~^ekd_l|Z-f#;m#pr~t74M@<7ov(DXdZ1KRkY15IrNc&%ga<4ezCs=6UqZAc@~|
z55pr<_<StI+iZs^#MWHv24QtLWo^hBXX}ZZcN&>@63mD3p>jop%vv0Zd^~9?!jp=&
z3!U;U^O=|uC_7FXq2YJk|E5<sGciGQ@(2ym(VF*piE0}aPir2(44o9@?Ax0Ct0NvC
zcFFUn*OIYSq@lKuF9K0|qo(^~5{|wpD#~AX!>g~oQWLiu<J)KRe56$)7~A}shai~T
z!BzXgP(v>XT%JF@#IP0)OPspePfw8{PTi-fy(%7jGu;htHBrE{Y>F&!o(eTS%#}Lo
z>EJVBUFXA*36)>!7Vh@Yz|$(AdU!1p7ANC2hs&nI$v&r3o-FZ@@MZa%0mos8Q2*sn
zQ0@tAO3SrOI_g;Mv6&Dv6*a|lf5C5WUYp|K$S7Yce>0rw9ppW*)dnQi3RvIOo58x$
z82jB)Gl;V|HtDl!3g<%q{`bt@0A9>xoOQqHf^nB6k8b@Dfqj!sKdryVVei>`DZZ9e
zTw(U9+7LuT=Piu4IG)fjfiXd1gf|l}^n05o8Kq-sJ8_NeO#>A>C(9>EtrXmoky*!m
zEgpRi%<JYCknt(+fs+x;5%`UL@nTpz3422J{VgqX!-MaG+4gZ6p^ku1V!=aWSiBT2
zB<0}_S&R0Ude%ws=g8u|P^}24-b1bZ*Fc85)Z#0z@5BSkjqvG_Q3^1qmRXwKpn?-I
zXD*v=m=5Q!oY?6vm<fh{95cs8X>jIR=^gQ88hDRiKePHM6_Q0C4}WEf2Vu9{As1vL
zL61wRw&Je~*ee)Hj_Md-j?$;6maC>{qM0G;Oqk(-ee$der#TMPRSTBAv4&y`ug|~L
z%%SVdE}=pNbI^C$Fzs{83<%-te?LBbYYffD>&{ir+T+spTh0N);i&idTuI7o9P0Ek
z{a_5HV*S-WtNUNl@b|NolP@P}Xv$+#Co7bR8@(FJH&~|Q^Ztys=58uRDdzoa9-&}J
zSkw=dfp{DqPYa#6Oh%!5w+}cd(I1hc_|?!U66RA{-fW$ABk*NyX4O)c5k^JbJZd9o
z4CXZt!>pFvK#VQK%4nPf(dVCi;}DAg?*F`uR%^&Gd0a+(%Sb%<DVVa%KA-^mrf-i+
zI;k+8)~dchM^ats=?GOc6SyNBjsHHSLCl-e2966fIME+(EP9j*>OpVSeSXA&iku(+
z@!v!^Fzopv{$1e!c)ILZJ=0>07l-ELemyZm^NFXIgFczz22x|XT)#QKiQ?}uylf4+
zL}1RLYvxe>uwb`tjydqa!?VsM=3tg^A#Q2d6!;u=GWQi*;E%v}4ApL7s4BZvR(v!L
z;X{5}z7m1(>2={Boh<bBk!oZn-qVodt6VgTTqZus&}MwSKOKci&XOLDQnAs_dM9ax
zf*Co-xt_m|M>t`@t8|l${^rI<eg;RN_8TpWm+S#3SKG}|e8nAWA3dpO`)-V~<<WWT
zO#plMUO75MbAz3kw{#jWlA!%sk+1$tIIO~o<NPr)$VoRk(w~1kOb@G{PrXM0ffD@&
zu4XC>C~dVh(@BS3mBze5u}p~Zk3RZ(oCZD#LVbq~vf#uD=iGn|0aI684kS#*!GA>#
zF{MYtz*mPP8KGneS4!OyPkb^($-Gvl-)-h-_Q}%sMzT4IKOcUdYHNWH)qSIP>RQ8r
zkW&PY>%2LXRZ+B^_$^?|1$|c=3kz_RHuOv~HHW{iI*mW>(!qsGrgfjrgrEe^a?)%=
z9Exp;zdZhtK(!2=hUu&<v;&Dd{hw$!P`fB$rJRW~{^zfVIHe=scFPCv9#PS`X}HLl
zAsK~xzIZ*DkH_n~U2QJjAfvT1A@**<Hv)gA8Ee-H2Vm*Fhk413?r5s`kzesKeN<{t
zQcaDG;QPa$p1$Y?)z#f6UR0AnD@W~#?L;`}Ha8jnDkg)@Sk3(2fq2M`WZSkpOo98Y
zcX<oxMfqA<SIt>J9a<jI0yrfz;e6wN`eM^G*k}CZqhdxDoF&G7&zNma19M3+osr%+
zXunh!P&pa`xk8tQ(`$5~y*c#tSeQ9}e=l4#wATVBlTG>TEG#g8L;v|F$rdPL!MB@P
z!x~EcyUXwXGKWU@EJ>D27VzmiPp9Qm3#hyuy}nJ-0{BX(UvJ&v$1_TvQmW}e$f`E5
z<8M|R62mvh>=kxt$RE}<L)^*2T_Yhm9SbzvTBhiCiVo#jjhoG%?bA`}?Q;;kN5vZR
z!(>Z(QH~6@9FU%f$HB;e;+1kT_Gla0n$jmF&mTBH`;3H6k)?Rq#2uaZKZ`DHF~(P<
zSU1iQWAL4|;CH6DL-3LY_pLu90#=IFE|NJTKw7RWdht3LvJVKD^i9XZ?*WAYVu1o5
z)|070qg3!~uV1-ml@4)9#b&Q|XTo*Wkpl7@4FVEBe)Q?f0?~b3Q|AKGpk#St!ljFG
z5Pk6Drks00P$ehJQTagt>ZFWa?YJzkHS~ud=d1-%`FVD2`H!%`>3y3|eLru3EYb{}
z*~?bod9;0MF2w?FJ`vph`?dv4D8xK5@v#JEIU3odzyfsNa1;kRNkLF&(!zVu03_Wy
zlv-s=hw@#}t-G~pC~wDWHL{Y0Q#q9e^)oco&JdLu)5t`g23GB0I+95{p0+0rP_b$G
z=;StHmV&N9OqO5A;<2gHx8pz=84K?{i@x$J9HHu=rd2-)#R?R2HI&^^J%Xlxw8aQV
zY~ILlY%qbwJb_J~749$_ENe2!699QW%wN9miU3zm;Z~_`GW?t5@htfg5AW<~8y_xF
zK=|~))klx1;7}z(HFZpf%^&@W2qVo*P!g&?IR23a7ul6L%U7~sXVQ4Z>OdN}`x&jh
z3X6lr@{I=%ga$xdAm@cJ6-m?^c2pDzx4`+$hm{tCEb*K=Z{b#1OBDFcpzig~0<~g0
zVItBBHav8=*FR_h4|^UR&wFJ7lpYhls_T{@6aI3%^``|q-Z4m&%>L5^bseF~*l0gg
zV}H9raeEvprFQ<lx-}h@|G1nj;m$_8lgZOs^E6ajZEM(Nl8H*{vGtdn)A856xrEEF
zsQ8C(uUE}i3WicSeyn_sN6{z2?pp`Q_$?`~y}%{{8!H*6H8%y|ub<E42k4X1@Po;n
z#71M>AV3`I)4Oj3&oifacK&dK$Yt3_T0<l{Jsuza^e-GFSWVZC)RJM4b7ObSL_GXz
z_VMI+N`Z)54r-5XQ(;?c%-YO>bTFDZp_QbT329eDA6wFqlzerpp`0}vycQG&H%X<#
z1*ewUHHA1>rU+c-Z1#hu3a73vS54ecyvUgITeiUK>o<RP-m%1mV6|@=QI<I7#vNQl
z|Go5D?byDbmM|Cp=B?Hp3uxOS&vt>q5`<J1E|<`Mx1nX0FY8%C0CxYd<u-z(8~p-T
zIS*lXdf<b5U9lLS)N|sAS30V#-07NE&c^@xT^Hk*XlT!Hv9HA}6A4`>otcv!>B!tm
z;;8sc#jY0vDu>r7=py~ay81^v#`-th4SGq&qOt=s$I>J4Rjf_c;_d(}Kd8x5GwY6L
zL!|c4$e3W0WMPZ4ixJfONsDq9xItBEasG!Y63D#uYnIs<0eO2&?K(=xu-E_Omb0Vr
zP%d2B8-JHV0Qcqs@r*7i+<nl0k!GI`hrF2B^634&GoSqcWrhaZ{8=M+%Vfh@W0KBP
zY&v{#I(?6GA{HFatW+8uI0WOJQzk#-0gFC<*4Uq6iP>aZfk%v1IGw@rzOdO6!~VL6
zW%gMhvlhcw*F%<I<Rb`_qZYvZXFsJOfUpFo3}-=yPnOVI`|8&1CQHcWzB=f`ZVH?N
zEV4h}yCI`S(7h__SiC~HQd@sD9X~q%H}}9W8@o0(u<_7~^44zh^k>~nMAom5`(4v<
z`(phJ<69~&AG;IFGeg1t!!DuH@8WTxyWF9*pN!qLohBnz5y)upE~bh2OhTffSnUqI
zzt0AkpAwih!VA?I1J9+6Ved*()XrHqh~WHr+hLLf)3lJjYU>DC9<_=O=p=)H7W;+U
z3-RzKd5D?(f&xMti)h++sSqXWeR{zy9bPpjT&vrY35^5HsuNQ*5S6?3Z<}E@Y{;#W
z>F7;|Au-Yo!jT*coHr<z>w@mk=YDHOzt9A`UobibezC+g$ktVnw!*y1FKt`CTjJTD
zrni=Y=;Xs@(Wk%367HW*xvHpb0R>UZzITc&;htLHfdvtI-xq$j<ruJpJ6RL0ej?^z
za6zS`a@GNb7rBh)8)I;e*)1V+AssCicMv11M%hR{s&5ha9}VrC7>1%$GLh5ccOdV9
zbS%CUop<jE75$p`&={Xm(8k*-|HPws+;KFh!=ssuk5ZM%lUxyKQN{oMW)}$=cE6%s
zyX%IV$M1--?J~kyro+ef4H-j}`z3QRdv{>`vB~2PO8^Levg@!;iGZCO2<nFA`($8+
z&xZ9M<KdvKws_1}3W!X%c)QR?rD|Zdq?%Vc9NFbjbDZAa`ujxo_PnHlOsW}X+Gc}m
zWu|W4<_s_&znw1kE(Yu*N@V#|9AVd`YiXEfhNr@o4lXTQVo{UchNf66>|h(M41Q#Z
zr!9CQcj{Q+MS{gH&)U@j*erG^IXYUvo4->#(!wp_xmAN^yoD7!%{@`!)Mp7f%egP6
zge{;y;ZKfKtPSq0TOW-3L`Ff~Kc5<<Gw_Xkil&P(9m%%&;!AI6I4JMSDy5W(H)oqD
z)eh;n_RvY1@jVrH==@d;qI1rD{<W^LxA91@8z1KD?<S*0t(Iq!bp(EW`sPv1_5kep
z##5<C=Wq7H@kiqN#^`2xZ|GB(5iomNeOmbI1~Dq>8vGAPAaU+GnW`26R>OfOnA^y3
zpGjrL^D$j!|2jXOGDm@-nV7I;dQtv0e5`W9Ass@GxcGgL&4j4FzJwo-X#_+{PwrFR
zn+<mPLbcpF8F0esG?SKS47{|=Jn~4^7NjGld&;lUm5h(OiqE<wx*KdLQs}h8U;lP5
zGVim*dF$?g0eVqpE)2N7h&6|h^!wrpA{OxR%XbNFJ4@IS^K^gp4lCGg$~bb4{@J%S
z8a;5+97HF+t<KF8hN#h3uVc?eMsZ5^k0Ul2SnRYhw81<Z*Q-X0c;3=5zfAbx`(2rM
zD#|fy=fQLwdP}u!U7=!~<eu3L-zdlyRWfGIkbtv)I|60~$*BGEHHq0f0!O(lz6%-!
zpdU5R!MMvEOWJOAxF0mZ?BPmt-(LWmC-3Vo5YOB|eoW_|M<oe_vqIh9Ooan+?_O@;
zB{F<I)eGP2;^EHsj4_)r3VfAbv@xMCm5U0^-)@+vgLGj#i;Qq4glx*bH{VYKJ@Ltw
zO*YvO-sX~c#3KXloxAhpry3bfaz^VP$uI)O&}Xw-*(`9ZB5>MQ&I%o9j_(QCW-I*p
zF0jmg$pTNMb&5$iSzvuh<9OLsGq}EHC$XKba14T;ZX`4<K_K{|;_|R1yu2wsansfc
z_Wk_FS}bP{dGGYw`48>FU16^Z#C}Afvw%=<i*E)Vx~;eBNH5A=4+^p>#%P%OK$-EO
zXeORKd&8lXUX()nwTb@Ibfq&VCH*|}4F#9~tFb&Z8;?`=JMZ*eAmboQg6s*I2rQYN
z;J06=FO^nySsDiJIA5OXA(do|Rc0xks#}cVM%k$YY&q_r^{+Hj=MxDc&3s-I{0WDS
zN;6;cMlxN72ey6bi-()qi!|Mr6tKQ+@*#MD3dc%_ReN94bm*^Sz5al%bUH3Du@@l?
z`14;s@iL?H_he@lKP3a~h5~PqxT3*$-uB`lRYhP~^&2XFXpV`^E&{_!R>-Dj+f(9W
zg%Qv9W$(Odfw`aMYwmq9$0g?<3Ge<hg=~ePS<7BCSg32c{XpFkCiN}JX0etaPRv*J
z#Hw0>-I=}v=JY>klSNthqnSWfFYl+l@?rG#iB~WU%D{(>UX>Z@*{G>kv8Dbt4WCUt
z_k2ZnGE{PfmUT?iF)sSr<?72++*agh8T*KW8yy$+{uqfz4h8Kjwj45^lUE8I`4W!4
z5#&dtMiO2fE#!ZE&kc#qm+xQRa@Gj1ZST%wST+XQurjsnfjeY6{@bR?9RQp9ES`u-
zMu4x%wzRE7WVqehdOGh#JiPc~P~S@@9oB&u%Y{c&s2yR~dQ4Y3H~*8rC?}o?65n1b
zMfTBP@xdXVdurJbw_GVCbRq*>b%Ehd%wb5Wi1^gL_a$NYbfotxE!`Y{x4O4DeY3=N
zJua#AVN0x!o8El8(gL?fXZ{%nb5vs$+tg@b3bQ?jILw&LV091GzxJ2~v^O15J58fQ
znbYz;*U1vrUvGZDcgze5t_K|YqAv$OALc5UZwg15+vl{8(2*2Rmbot~n~j%y{$@>H
zCuo@W^~j}Bu1su5%s-)Ql8z_d%g&9Rr{YUVhjxkA6x_RakxTn!JdVFUVfCbfjLYx8
zi_M!x;J_f;nYLLHCS3c<o9OF~?d~kUODl{~{kc!d?;azNw#{XIx!oO3mii@e_mSY;
z->;|pmcoH`<i3ncB^iE9Fr5(~dgDP}Za~?)nZBP(Jh`RRM1>ML27`FzbXbAc%Ph>9
zP_cTxqw^+Re#^5hI&f!$Uq-Ldb2^lA0eoWTox`E}*3(p8E-4hK94h1sHpg^lhf@*B
zmRRZCrnSS)5-<J|n3}b;z$Z<Dzs?Ps;Yr&5(F?IAu%c(wOWb>J3Q4&S^i3koL3z{u
z*S8`qfoGygMIzb)JcL9xuhpBuRtNsIS4UJ~BG85R{Iy`b`!Q0>=0FBs_R~F+#+8j<
zereuOsimRCF`c9F+cME4n60~xj%4oTrVVz*R6H;Gqa$sSg8ypIdeUCUW1G}Jg}Od6
zom0AqyZa7CAYXODxxsHF{LL=)PW*y9b_jBqYCJT?&afk#(mw&x77|<@_`88&i?7OP
zHVOE-%52&P!eOHCVR=dc87l4vix_vu!^b<UQ$O-4fYeqG#!M=>EG_g%%cq0AXjet&
zY6iqlUC&ysqrpJslW4v#S)i0k7>IPnWB{cqYs`l`1nPC#`mD!Q@aWnlR`*piYz%mQ
znkHe1{|Q?Tez;<Rp{DyzG}HS!D|6Cq_P80g*XUa~7Z}4o=ExUBf+?K#;!gZ~%nSx!
zNko@^wg85!kL7oW(bsl!>Al~r%z(#UhvO%^CbTWuHyqp(fMJ9$cdR{s1~!%q>+D;j
zn?lOo_RV=Tq?W3jEZC5VP1Mr&uNBg9^6b^h!bB<x6_L$%v{CSp<9oMFb@BLVo{MHs
zO~(Eco(_-Sh9kdgS&?Nv32S;c=I@+w!<%F_9sL==@&tLRyPGi>+vLABns9@eU8%C|
zuSh_A2t1QKC=dagmhKuqDJR3(u!{j(@5TdZSEQ==B?=UHh&%{Nph9xQ+(`|3QQlJz
zR99Tg01vf|Z#SK%fpv+=uZI7!pl4P*kJlgrbos8PnT!R%{d9(uoo_XfX*_?5pU(_A
z1;Y)RUs&KEn~}zc59ZiyGE3c7WKQ6f%md+H-k4%w#_<OV3r5hD{66Vtr3n=LX;k5z
zGKDtVGa=bL77!pkI%xZm?r@B785I9Ag{ZgZLYBC+;Qg&@Iz!#QDDh~lUS%{L3p*Z)
zHVtQC$a*HbXgm!+Mih&_TFyY{S>{`u-08TdD6qvUiHajVCY~P02?`#vzs+&&NIah6
ze(ZMR4Bhi}6yVZq3&$wGr#`dCNqD_QVxXhf4VjO52Hm-3h;0g1SHvzFgYtxKCI5eJ
zuq7Zn<@yo{3{Fjme36O(snk{v)$?SC72}MS!g$CwbR7~Zqd=0~=3lPiRLC5zh%}%V
z<=axB9~X()447At9VSv~@ZUN8;0uFUkoQ=1-t%KRT&+6p!gtCK3K;cSbAxrT(J9lK
zP&38I8bijLjuv=~o29(a-yHA%cC*R-X@*DocSZSqG{L}mBaTQZBVaphb(~yA*Abfc
zj&WTy1zr2z2e<B;K@+#DKTE4QB*(LniF#vGa9=SyEitVL$3KOe)+u{oQ?Rp0d`LP@
zs5X8%d?^c~Q;8R+qG(v~E<u{_bp{IVO8mXdm5v9jELFKq(dqjTUrl8i1%L5dyxdnA
zkMT}_URk!1@nK@im5MjvxHD?U+mr=*QA(dXA4hNRv(<4gm5q&2s8Ek+<Zv^B)5OWG
zSIpd?DVS!(dyE7}UL2E*?F|RFA!Vg<4>H)d?Ft*C#)H4&yQgO(DKL6PjJep53XQTJ
zLqq)OFy8P`q<x4^)5*u8dt7MX=JNbeO;r{QUNd{GmzWM_?Y<Inecteki_<ZwQ3nS!
zio{J{nBZtR(Ht%qV~#B8eyjI+%rT-_d0NHI4Aa<i8yMD&(WYv1SJiVv@Kd?4`CN?=
zcs+S(m?&ZjK4-r%@)Vnb=YZV1kq>6@$oN#3OtA^nBnLfxTA%?bX@*RUx~^D$$2G6>
zzchTauR%aMJPZ5uFK`?C(r{<d**C#G8K^_xor2eF>DYS_AAgIbVlZFE*CXB(OyBoY
zVBtbMK9wDmFwZ8V^3f!j7yaQ_bFWGD@hcKKU;glc$HN_|LuRkzNya$CKQp{~f!;k|
zhr%E9yTQAKEt|<1B#`~H;eK>$I9!q%Dot}BgHS5#?G?Xx*zMl^HH3~Nv8UjqMSvI;
zA_tqVo3p0FFQ&&H&#z~|_e80{1KKp$Oq>ekOUnYytjxhJpVNS&;7FaipBt3U3n*Mv
z*TVlSyp23ho1nJE>W)}JGjz{>BV(apj>WeUoUJ)cG08d1MaJG3%M$LNGnmka%hJpt
z<pxG@;DYl#`%n`Cp4c}hg*uo(e8_8|n+j%daF1${q@OW}`UKQhM{9uFx>aFmumj5X
z?NzhmPeU$V)^Z-xEPQStu(QyBPT#^CcK#~Nz}ozlaqgdK7~gL5uThqsj-5Dq;VMvY
zb%sYrBq|<xa$kMFZAeDqy}{l7<Z%4h$Y4GnO_0!GEpXRgJvSV6nl4i^HbTo@ZH3H#
z#-P&Nbm7o-cewH5Zii793C^A}EuNYThdHNT%KNj)uvRMcl$#zX7PBkn<z`c0YCFla
zMuQ4kO17&vzNCT0BQrDkrVRL;{&nG|5Dkufvx@5R$O2W$SCv?!G{~y=%B?2)9biYd
zU#57N8h)QUsU*B&jAw3BM;++U<z|PjgdL^}&9(l^W3$G%dtjKucghI++q>DTymX;E
zWZO~s6+?I_-ub2_-593CetsJ~W(@mzC{$%OQ;76*_?*pd1m5ynOKeNkAyfS%sldPv
z|8jd;e|4u~<e$=?1WII~#mV7aI-E4*`(AyZjmW_M>XD<*KBuA8;Cr9NjZ~CSpct*#
zQqb}8+{N&Sczm;KW7q`~GL9wBtR)PEW9$b7uDVMk4EX5b*%0Z5)m#lvHXJub)+XiA
zM>HeYX2Z(MWaAE_{zDSiyh(7<CpXhIC>+Lm#EHF!V(3m~u9^ORRl5AnTVuOxLxE2x
zerj)1rozu%I+{D^M&wZWnO#w5=#z5W?ur|KGvSEEY8OQ$3r>p$x~x2*!rYq^Q8Olc
zVbozLx$WH^>?l%wd`;UJhnFRDjP{w}?4Hm^JKaoCVd=F(t%xzEIRE&T;S5L;BIcM2
zy0k#1yJ2=g!vKhD*N7o8x{a2mKlE~!F*sLFt^CY3hDcUzyZR3X(8l)qbcnVpNc4QJ
z9MHAI?OJTZ2IeWqrG1{(`Im;eGqICeGnx3{?K0C|at1m!W;w0W`@hr8Y)kA^Dn4Sb
zcrEEjL5({B*L>(jX-HgA62It3=Wm&OOa1w9?9g-UdU%zD7g*waCyU+C&V63+AfE{)
zsFxU7Y%zq+tb$u3MQ-rXtC>Tej|7}<XXlU4hru7g?SWZdWH7J4eRiXIJd`{f9&`Af
z1gDZKE+w*1!9-R|->E7M?&X9&@d?j>en)AK^qEW`G*_<{a=oCzyB42U_s3FTDDj;l
zcdj{XNK!txRJ|LSqgKp>PXX@y$@LaFOt5s7QAO#iG3vgZifZyS!o%B+TtB9$k0amp
z{nrcC;iy1S_A?zl&<%Z8&dO~F>)yWea!-ul&;GipZRY_l+f+74*69NqURDv9-c10<
zq|AEg6JvyD^?krZLGnx0;S;xL_{OkO`N7ppd|$gqYWQFVGW*G@bhM_Sjo3o6`SDcr
z=<R-0w}*muXByW@^6@wweB{65B4pGm-*_l3C>*n-r}b;MlCb=}XpMY@D~jYDJj|zK
zgl{1<s^y9i{9xM8Ilw`<!x`0Pvk&?7%$7eXWB2ZG$WE)O^9`Ucl^+rvK0C$(R~vVe
z!dw#Edi2^RWGNLU#b^EXYSN&t?oCdFDSdZb=oblinh7TjmGo~H&_K#-aZRX@0*8f7
z4j;U04B314=BRy<!?Wk(82x$lQAMnA>};Je-v3uK@|#dL!iaD&t<Vs_AHu0itadu+
zC>Q+xqP!}QJnGr>2DKso>j#;UMLkHldX4e3ml2q7?x@&#NFSP<CP{+9bf~po3lU3_
z@G!sO3&(DKtQ22r+tQVQHD(Q!76CM5x*$m7CNl9$>$a{GlMJlp*gm``D-F$vzD2?}
zGZjx-ifmKgN*|Rc%C4=_GhbaZVdeyJGR~g(kon?JIJQe2EDc}v$DaoxHc>aZp^)DX
z9&vReBu!sRsF<fe+dg|&e>XRHe0OJ~U@i&9i(`UV<-?)pT*$vWs$>{q;?D6liHGFR
z|4J2xl0a&+>(RD~RIvGLPTa1!k_NAwYm*|lGr+py>v-I?ObE}`rMB*+!AMugNG@w4
zaP}pP_7xjJ(a-PC*{kHx>#fs7RIN4&Tj;M*YUw^7Z(i0|vjL88iC~^<)kh__pd&Y%
zv~aAwW$`st8T73sXeazNVU26sNnMIA2<v2JG3gtCj5u*wQ&U0@lumQbpRLe_&3iqx
z6eI;;)nEDTV{aXFX*O8j5gmuId}jpOe`ccg(-7S?Z~9u^*j4{fE(13fHvf|MOT$>h
z(slofsknc)N-4{eB)s8UvV8k(9BS<Nl@{eoM^fd!OSo$|4iJ28AVEU@d?@S;cE`tr
z<$~&qaAQ<)yW?CLXar)pl~bxJu26CM)?YHcziqqiJGTD{10L&p=3bN0@J^{)+>)MN
zKjA$TkaHpl*kl#SWaU(##&F#Y&P@Y_kcDSc^xZL^LFPk=cP5yteqLw%l?gxmJj|^V
z<AB{VeTl`~2u$yNPgvL{i$tb^MtIFmEi5?A=4otai105#!D>a99-T@?C#>lp2X}?;
zz9lW>f@-O#CrYp}*#2d~yedfES6JcJ)B%1bj}n~MfwOM}tN0%3!thA9{ooD_NG46R
z9p&W%)8};_zJGL(l6FXHuSN`Z@4Fkc!7mf_t<Q|S)5;|9u;?*mpP6(lP`27*sGo*)
ziowfyv8gC%DxUk-F$pdA)G&qJio;qx>iv%@WXzTy-FK4+!%qp+rB9#yQ9OKQQv<g<
z*54isJoCyBPkqh#Rw--(&*DXH&rQ05$Yd1}Vn+f$bpze(m~eQ~t|2Tl7!3u*q5PHO
zL>x5y-q+4sodh}iy(e-RQ{frkACEqIkSHRc^#1sN>F_(ivr>RA!+Uu)KW{F|gpZtK
zHeBj4aAIV;dqoVuCAO<WMLrVvQp3|lc}f!vgK}<u+NXmrPZ!THsOjRV0~u^}0a|#*
zwAY72PYW3&m*#vdcf-mR^;}oNKpAQ|{?00;YC`+v{Tm9;XaeIzPrH|b9(;KAj_Ejq
z3ZzAX|I2^;;PGw#&zl)TG>})E=)W6<9kdH}zxp$<#gbyh@HYd`AN+Ok!G&}jW($Y$
zJ!xq0VoqOLG!^UI4PKwznuMqCU01E?i9=3K$)!)T(HO+@L4inC2*Vq7PoiG!B%!ot
z&2`HtH$1MgZA8z^03{wLXtUom2I2UAqnkV2AnVq!m$(E8I{pgWX`x?ERHZapsXmAX
z?)CGf`q^=i;cwL_)tCeaUH!zXmr`IZrE0`XF%2Yd#;Z^sq{G~Vm!$BG40s&%A193|
z6O0<T2vIfWXduWcBln}|n~ajH5qAj}s-~uY?s=|(2P>wcq&+mS?7`Mkf)+Z+Z!okg
za$21pOkDp`NYlXM4m%_-GwcG*ch3{nzV3$P*r*f2WoqDU`LyrEv>Ghuz1`{eRU10|
z`jQUvD#Fbq{w`vM3mm<>k$ahLYC95cKSH)g;AYVXoiMQs9H{hSTkXie>Ag(RI-%+K
zYPQ21)Y9lka5jlPNx}37s=qC_C*jA5ql>qv<M5;Cu;j7sXf!(GGZxhthKK&SM%{`c
zq0g%HLz6>p_;UUB>lf_C*t>hT|E3v3Xc?|*dQ;;Hc3b#~-oyUzjZj)&*wYyX_3gS5
z?w!%FO?rN24|^PBSqgBsxg>#*{^mG`rzv1zb$I#9Z33=sY_>5xlMYKJoBpY#W&qDC
zin4`T21IKQAJ^F#34@9+<?deAr{{yb?vk0GdVWw#xi;KZ!>6yk`|8Bhutfc4!OUhY
zT$4(6D$7?#qBh`<_A_x+eA-%h^v(kbNUYsv&g>u$r7!%p?0KmKFW*uFkDgZp+uy}1
z?#I>OR9LZXW4a=|uDX9^$n1~dl#DI=7dcaOyER^6BM^dA1;=--p6ST{VaZ3_AOq$9
zjc~Hl4a)1&-Qz*q=|#!c_vMOa3T|P$yE?xuiNH05ypu^s<4|4o;fs2^Xgr_U)Gbd7
zL(@_FgE>|Hs8X`M*0b3K9crTf8>P{WN}iw(3Kd51es;%)M?c*_Hw%xaX8D6)z2Vhi
zr!ZhuT$%c?8Vz0gO}~C<#z9BL)C$3p1Pmws-e`K60>2xU_{oU`2rxe@E_Wf)p;`Tp
zlbdS>Y;<_H{>3;Q0?W6JrBFg)Jx1n1)q(-Euo#b*&o3MD6<x0UW~zt>HNGc%oma)O
z)2|<NRI8)>&aKaPUsprB;IY}up$e#2JuaZ-EebwetvA|tD?pz44F7|FyP#%6veQXl
z6{y-cEcv5f5tN0?2So^3MR<G5MA-b-Ml96Wdyes(3A$-#rM4>uV!nS%%kR)MY`L`T
zb?sg{GDf|aHIYxpi_`8+`2>M+5uqXhf0FS^|L}U1R1$t`Vet#%i^K4175<;)qA_7b
z;_a{6Fx)g+bz_I7KZg0d;1QZ}#;EFDce{Cwk^fHU9PzHz5ONRmy^i#EhkwD}t%FPa
z!K+ZoX|6I9NOci4p%0@W>W2RrWv4jE-_+ULT$l)yF;g*~+7xgVd#xg6ML@iyEy<FB
z9w>fcs`@0E0q%U`+7kw8z<1U_KYDWzWM}=0sZlcq>nl<lA_mqCGf1KT)%A#D<C9E+
zdrVUe?-Gw>lS-7ad6%$;>@O93D8KwZwMGUH^XA^*bLWRREov#Jge-h6P4B2J+65aD
zJVJ(c?uPp&2VXw<A`O?a>T|r+W#Ja<RPYuyL3DY<cKFR<V|0Jn+_ZTg3D5u9@Itqc
zip_Ep3|`6U*q;9Ld;XSmTxlU@9ZSAb@z#&I<~I^4*xhxO$w@2;8KnL(%RP%la`1<R
zlWEb|{QF=;nprqX-Q@W9FVi0l-#=HPD!O8Zs9ywVnBrQSVU={K78tKd@$4~mh5NrZ
zJx$Q}htQr`mHFo(aFv?pU?>|6ZznA8`x?Z7Z~fY`Gj}3*6Va!M^Yr7>k$q-Wcjf3%
z-nt+7xgrg8MbwhxTj?VcZ9h5$P(hb-DQ9}rALK_%zUK^@f{Vqx(bHcWA!^m*XVL^O
z+FwzRh@z<ASh$NL_cKK#F8F&eY}kY6bp4G)^Cj@)!8M=4-@L%G6XvVb#bHvTeC$a0
zE+9@aeP~>LBn6yYI@K$0C4l@p+33%1G5FNuxEOyz6a~)o<c-T3<E_qmjoK7HJf^yp
z*Kkt`hB>b0iS9|q{T0SRd{t?<rg5#ZNS}%e8sZr{cBJ4(!-S?kv5DwId@#2Pi^bZ<
z8$YQRL}UN%K#H_{7)DJHaqK;Q1oG*sY+v(r#SOv*9?$m~;U)I1ofq8<;eFYk&W|@-
zKvMa7{cN>A1pc|4`K>V&vd>NZd@&mZ!&_fYe9?)66ggs|-Z~Lpl3JX(_ay^ibx-~p
z8v#C|6lUwRG?;7ryv0JC?g2ggr=Am@0(oxUIozqfptt8?z?Cb6IRuraGH<@M6ZnIz
z6AWAVaLsMvv^=u{@@>1`x%Js@^scu%?A9TLdOWAxyUvK9S1zqf#dtde47?1uqQ4Vl
z8mF%3WlF>2p1%(4APkMOW23>RC1KO7Bh_a0tD(c5vs-%E1u<}EjaaX<365_KXkpIx
z!4zw=H=#sU5{ms4Uj9CshVwr!3;hyK!)*Jy{W&jFQPip6#_ZW-yh+M`qn?_8uQHyN
zDmccX*kV+kr(QHZ_Bw7B<sE_?Q#v-4f_~UAn6vS`nk!yBsdx6`v=%Bn{bzU8&kT|)
z+^+U3yTYNfe+AcBNpP;WFjiwd3=H{p6%o(pqo87k(S60aSeS2n_wiwCBJ@-%u$>c1
zhRRi^t#*}Ec>JRL;bKG@G(Mj?m3Ax*G#^)Fr|MH6aC9gt$ioMOBFsY0-7|+4%TF#-
z>C(X1Y3n@<0w}q%G-)m?fz(Zlg|1Vwc=?jjr8{<f*nd-WLL^Cm?uHo>ZFm2^?pZua
zVtwAS4Zd&7EmoKl2U^rFe|dI3kkEY<Jy|9UfkEB9l$<U2eqBt&TSFX+9e(ycdT)ll
z?M2G}n!GUf<1veOK?x{es59`<G7XuG|1mX163DFgM%;TW75A~@wbLCG4Ev8!qJA|V
znM;OJ6v(m28&0fSf1*a=rWCH3qw7KVP^0>|@H=0O_qicGSL2L@*2#UxMHFy7Uhms*
zmKAWVzBG?Basy=@(_bIxn~>qxub!V5!l7oT`(=`0G>mZh<WfUpp|vQaEv`Ee`0sw&
zA*hoKKkiJhgpa1eKMTG@MfEhW<q6QX{Yt=Z;?z6+fBgv%ZFT#IRq-LfP2t=8mP}#m
z$~yV4kRnvMrR7D838MFB`9z-Jo%rnu<T_`IBe0}xk5J)6PYE{$Sq%>SqrCs8*{5xI
zNU7n-Pu(@cv`a^HFMQ&Fy$x_puu1^t7#j7bD%e1`EBWi(AUB4uS4aPpQ9#1(OxZcL
z!<Hx-y#F?Pg9rZEr|xan7>CJ%Dg!4q2<(Dq)Tjdl@~BFC<t(QnYw%*j0~ZSB1y;Au
zrX^q|S@4|Cf3bLrzluR5EDAGp&XzI<h2SDvr~#X&KkB)sh4t3Epom<UfzA;vR3_aI
zIXG(q!|!^Q&eysSF!!VW`KqKJXk9u^QQ{5*mgReAUam($R*S?>VR8(#3<}FcUP^%T
zJsi=NpD7^kBfS4_YYGhT7VmEsBS7Z3PL<jS6>`|(LSAUaK{shZj&F-Mr0(y!H^0dg
z=30gGOXB6A*YJ-Fj}r^Z_VOlQN#MpZ@ADE4J_Hvs(Rj>fmf7(<Z_sIj#kHQ=e1rXW
zPH|&;Cyz~P*r=gHGZQJzYt>LkfL0oNn+G;o`A=+o{@SpZLSv9T!iEt}oV7>7HP9gV
zvg-H|E8N(#BJ}u?2Z|=%ANOO5MWt^ClTZCg#Vr2sD%{rzwCAt?$Lm5xqb(PAWfPE$
z&!)M*sZ*0MfxT>G$TS|mx^sV$_9kQQb8D4n4To_!VjttywP2*gf8sXn_Q8zyx>-Jc
zC%O_kcl0-l8X9fwV7Q({znUHIWc&NV706nwOcjg%&@6IHSJpNHSg%fOr(e<l`?sV)
z+bwZWJ^rD)O*ILA+4@)z2k9g8m_z(!C;e0i;`fvAGD?MfnP;Ipo{)j<5nHbo;Q?WW
zSG~HwnE)Hh1(ao%hV{#rbe>2tKz!m7N9jR!v@V&b7tS5;nb;vI^7|bN`V04NunCy%
z;m^qT_^rW<?mT{%-DWo6u_^^YqtGXYHsd!QQ!jCW(&!RVR(oyDuw#G1%N4(GJtvR*
z>sza6;H0UnfW}^PJkxnVUH8sG+`Tn?<at;$9=otCdiPZ_uHWSQQcj{nc(RRprzZvV
zADAw>zolT-p^sKa7ZP!{xYA1LZ7fCyoz;40M#db0b3RtOQTTeyH-}k07`NzNh)_J^
zgN{W+OGA&1E1JA1Es%FH!li23G1EeG7`~ME{aK_NC^`E+*30q(?arcIvITU-<M>QE
zKadP!r)_PjR$@UcOS`_C-scfXN^*h&$-vrGck|fW6wnX3K;iOB0jc-bL*6$=L;Mc`
zR=%l&z;n8Jp)k}EG=sLR|MgUYVIurWi`$qaoGGbPJ5~M7uo4{;M89nS<6QpJw((zk
zT%xu}OYC`IsJnf~)CU;>+<7ROJ9~*6|NQ>EZF|sC&!<1h=X~dW8crq3h$~Py;QZs-
z?_9Js!$j-ZJ2W>rRD8X1x?=*+X**vXql*j9|GS;G|6v&R1v0RSt0f}w#pvwyL(&wC
z+svxqES!kvDz{v#*d34dLi<|$BFNZL%M%|^6oy+<=Y@wd1M$_zTLW8c4x#yh8!6M`
zme|Ht*eQQshJM?5t~U>DV8Nr6ba2fBn0Fo+n2`&D1?hj6-a#asJ6Gv(!8sbpE5a!|
zhGQWnJZn;VV-f)pD&=Lvl@y>k3!QaOr9(GU7Nkb^1#@^=sEs}ZSZEel<=#qxa%=aP
zo_EpUCqO-_p5g^8qNnZ%EA0c0yDwHAe$)fYp~Xj$fy%%k#H7oBVvr$t@=t~^GgL=0
z)kU`cFzkrXys+WVyy3mPcYhxCNnoKaW0uodLIMMr4Gm{#e5m_Qw7TM%0OI2(V@J|9
zpy{h7%abdjc=P7AGn*LqVBcnH$@Mu?d}#9Pm9w)8>iM%?c*PNdVayyM*X-g^k<;;M
zc~JtstY~5Wdpr(3hjW#0oF?N9QK^S%BB7Z5`OMQ}OuktAhhg`|u>;s7i`pBBty*BJ
z_`Lh@?}M;dA(oUoK!Uq#p{;7E;qda$vlEBtp3gp}e+E@*aUe{r*1m{I1mUK=-&gih
zKxr%MzEz225X#)`y|ID*x()jt+Y`nF9OAgqog+m+)zckH?h0w(#uV@4w@Lt~)dZ)6
zN-{Lwwt42`7zKZ;Y4-_f22a3CB^%3*+Jd~6_e+VJ`mnj`!_%>6GH~XK&UEA^LE!$o
z&b5?3-;*}~c<=b*cZRb6?d?8%c-&BG^uYPHi_*A3t@T8;;4YjQ{!yqfAcrB!?Ry>t
ztKf#w2gK}eRh+!G_Ys4#A!aISO?y8z$9eBs8KZb7RR8jYxb#lV7uz1#sm0qx;a}3#
z*|B5_KDP2Ovu{jAb*nm+*1M^Ajd#ZT`2J)xT0S;$;8g-%?z+CG`(zxQ4TFvpN=4)Q
zy^+r%w+3O}`_&$yghNOU_`c{<b^xP_%1*`Ea=~p5JyNvwe(3$28Wm9M2C_%b9unE-
z549b{y<@kSL%~!>qb&7(7<7zH_dJ_93<EZ=sCSEFVB2@oucyu8V8;RVYj5bM8ttQw
zmQL3b;PY+Rv^6#cj(a>RwAUg*wfWILEgm~~EXjMEpG^^l@_x>Cb#sB>QNGD-tb7>W
zWgQXuf(L_+ZM+z%DTGtr3LhU_*o5Q6VdL`3$)ASBt_iP}WQDO{`xx_)KLU6k_^(oG
zrLl4@P|M)F5>6GBJ@1^-LUBdj$~HX{WPD%XJ~w2CZ{uPeBk2>;V3biNw>S{(9y~sO
zF)kJbV$Jw?+0*c*%oly}|1!}}&9zc>G7F{4YKHD~Wn-<M?VuiOE)pq|VR9@gN3b*I
zT%!GhT-+j~WAw5#7Z;!0rFp)|McF7vADPu0Y#}ymWS}1?Z0(I{@>iwdW0kKPb1(xf
z^rIQK8mHlgXVjh7<dU$(n?XL5B?kL{#Wv|nhT-Is?OGpvTyUxL^q=ZEdKk5ri-9!m
z396xCJ6@d#BtU=v;`Qk^5*#XbA_QvoLHY8F$qpqcpe=p4_W7d-%JfV{CNv7-PCf4N
z_F*9;`-ygJ^5Q`;T`|26U*Gn;%}CsQxq}t<2Cl|9H%Nl^n1AFsvF$j;5gR`dunGRL
zRm*=;-~u0IL8C6Z@_DtmyY0noF|g=UO<C3=q#?2TZ@uKj6~oZqWuHSSYPc_@(no>A
z0ZCC$>vDDnVZDg_EdA9C40QZ=(CKq3R^i#1)VVa&5mbHckdT3rUmrWHwbPL0zeBGC
zd9!iFV8qXaKL-t%){aq^a#6u$%kjKTd3c3sfFyVN2>y|6eLohMi`AYbO!Y)<Hj1Wp
z7rZ!3zkZdl+ih2nj(yM0e2}9jmu9I>U$1FL;ib_n1-pcN&}f?`W5Rzd(0`%-8nvEI
zN}{?{-#d|@b7HWEm40xbxKXIv#w{8i8%E{Ed<lTF|20lqH+sTw(unH57$>-XXEjQ6
zpB)IJ^}0@z1)ZEWBoN8-05QXbQ}?dvfyBn+`pwIlAh2Wh=7NVN_*{Ozjq+6n_uoCP
zw)GYhwh6@*Jl`k=;v|w;jFkq|3A(-8E@=v37p-n&GuXr42}kpD0d%3UEx0-Mv=1C{
z_{=hR!5`cbendQK@P~+RzsjE7@q+EIym%wHz2UUX58{CSR0xDV{L?*uFb2$IDJIS}
z6o`r#lqri&2c69B;y!u^eMKeWOb~lEg!)|-KGBm4QO-#*_w|oL@9)EnyE^hA#HV%F
z!SLg-XivW~6DxqJZBL71vkSm%cKo&Ug931wS7=p=J_-MQ3n&#haS~FKCR>Q%0*Eyu
zex%g#6o9R;$6hzZ<FLk3$d}TR2fM%ie$3L81+%*rhZSe2aOlR=ET>)^RBXKZOu9A#
z>bA(M%4GV3-nlLH&o&+eT{h#r8qM}_u-Z-1C65m6`NHn=X@=n5`9`taO&#`>J1DyC
zH9((DWgfd;7@@7}+s0p+M)-=zKe$7~-w5&Qn|~%Jby1|ttUWSY5npLELKg2XD9=55
zwP&v_e07;Svzf&MKCWtWn2QI%rF9+K-VI@J-nf+6hcgZY^g?<)6Vic`!nBwjng{Bm
zM-q}N3L%}FYLmu&4!*e@jXBV90qod3j%b#ag3rp=fXI90K!|1Ot4ix#f$#3LE9`%+
z09vR|Yz?Y}^>5p+w#QXMV~*LN$mkXNv6u;4d29tlx`=OUT`q$HUg5h%&ZTf%xHHl@
z>jE5=GWV0_ItOXLO_Ga6P6Lb4{=bTh1<>VGxX;NX2WG$c77BP#fzlK<cXSIG_NY8}
z?v$gy|Kol2TylN4BNXhuGp))Eu-dLI>Z>b4f1l4+f}?Ib8d$T`=LqdY`L50anOuI{
zd{tffx1kZ*xiYov%eO(DSY5{AhmJ^{&@}kR?1gGJeY{RiB)rzLY};cHjt4e3v)rTK
zk}8>I-af{fNPqL`!Z#KAI({?QZM#+$J?)P4KUXH#kd0qObIA*&BXlLShonmH)2Gk9
z?9AwWT$Ti|mEPyZwZ;cetW$7YUe96U!$|Bc996kYPfkL=Fy32M3WQq23q?nqVqp97
zzhTa*L>M!R;QjnA8Ep1!jpUq5ft_b2xu5)|fX!%2p?z~K_-V9CO%XyN;P#^`Y$Mx2
zXkwRk*5S~EIU~Nl_x<*$e_(7-SNjkKe>R+>Z45vI5!e4tu|}e`$B%RtKQbm)vx_t@
zkg?O#=;2%GXf)aXB<T}>I7%w|hQ<vB;M_>RopzNs9?H|{Pd?;?PrP-*9-XvAqnw6U
zLBSfB7B-MTY>WQclUMb5q;En9LmNKb*lVqi`t`y5?K}ri=FQQuXOlii>QG?}<cY*r
zOV52Xn-j5`YINe0VkWLbin519J{G*WA}0Rp6fy}^rWEckL8dSH2kZOG&}iu3(TUAf
zc$_J>bx5ohW%Vw-vDRro!?nPtkJy`0>1P!2@z#YF+%>EVtxsE#qV7r<|JR1W3)NGb
z^4gKx`%e$|&33#vGjQ#7PCEva&Pon{Z9`@C=ccFMw_@>8#`V?I7JAKAXu8ie;lhvU
z;OK1)sJphS<<!}$XvwZ0I?r2)6O+zivQwpKc~+>mN1+%Gm(=vFJDkQGNuuj|yTl0`
zAuoE(Rb``3wv<+}DS^E{Tb5NvWAI!nwPkfY5HBApa(=zd1tUfyZvK+f!;9m-iKBB8
zAoTd;q5NunI4@Y2d?U{g7{{7gUR^!_J5P5VpOW;1_c5xZSbu-enNS%Mp9q2>(uG4l
zr$ZrG#Q%4RRS0Y$t_55EUGs$<8>cH6G8AFE|Iu%{gQ2K>!P+`MEggf71Z$oYE<mZa
zJqB)r=WvsOeW$8d8TvaDoR5~P@RPOwxvaE0EDC+`b?@^g6uR#B(bKFIg`XUcNjlJu
z1HxRJ+bcSdBh|_{P~tj%%E(vzx7>l`^y^to934m;9qNsp^=ZSw!0V-5Uz$-9DPvm?
zHQ<i2wo3<3)}R9W8>7#O<v3fT%5jtPJhuHNPpk1A$2p!XL#DD+w5WN9n*;o?j^iVr
z@m*VJC}jEgD=!FUMo)ER*hE2wI9=Y**LJHM(%ifK(GYva(M$epB=ooynO*b<A%Hsg
zf~h;vAB_IhWSbRwfD0vvQCp4|x4tW{wq-Ge4avnPGI+dzvGJ7nhqwsXzKd3xu9pN~
z?y$NaXvu^JBg|U++w(zz#a=>w<`hJ!A+68081@fA?5*ZfkTPfNbO^cv`$W^OH_O(5
z!)UHE!B+>r^k`|b<3uA2zrNA&x3n3&9z1!^A=?VUUnh15A83QDlq)1J)^?Z-|M0@>
z?Ef8HcRZEv8&;HvP%0TEN>b^IN~O9rm01cQTauKW-6=a{WRtx^l#zYgWFF(l$T}P|
zJG=Ou&+o72^FGh>&-;w`z3%(It}7R~wlbWjx%1%6s)6D4H+c}F6aA|>BM&+<MND`;
z=RqawbN(@xJm5}Sr!$k!gM|(WpOe3Hp@+<^%g34vH6lyP(m%2xl26>-=khnGOi@Us
zBdBIUIbEB<j7U1%o?@QJ*G+~)BSYu8zQ=*a#Hg|Q-%sE=zY@$+69n39Vm-11U%2T1
zFj9QR6{^J8*P59fA!b+E>7AVBu!}qX?W(^%_&CNp7%4phA9MfFi|T5)Y$Uq-*LFQ@
z-1cLvvG*y~^{L65p3uko@y@%1ef`R~%tBj7&X$4j^TDo`dKMs1Pdvo1<qF*aM^3t8
z0Az39F=yl#4JRMmRR1EI4BT($ndTI-;HFmK!sJ9Qtek#8w{*K0m>z3hHTNlp>xY{y
zg;uH{S=r(%D`y>OybC;kYp@<fazcIvi!=du)0rIMU(Im)2M0kasHg>obfQ=Gb+ka{
zMxsC?Zwu7IR99476Lc-^kT-B>fN-vXs+PezkmX4Ua2cqE8)a56`GP7S>#3HJw`D1)
z21OK9?a2rAXD=G)PJe^PZl=bJhd#q8$J;h3*|Cu8V;UT691K{*{coD`9voDJl_%A0
z;k7TJGcvVY8<=9+^|IA2qIYS@#+Z{Tj+u!c=kU`+hn=J2>!xbBv{lK|N2iTf&KQuo
zcumpx!Jcf^Ejx@Sj=f~GcEKkmN+ixPZ*+FxqSe(2z#A!C+aETD;+L1H0mk}K=ts|@
z+}Iq8#;=~=3>%Hd9uIlbW%fjDB?yN4-tJDo<;0q&35{_Wvn^CS&><2<XDax4mi*D-
z&LE}mg*nQm@{fd`cLpPkQ|X^~g~GKZ(G%)saWJC4ElHRq8T^BS<R86Fg+mtEhFd+U
zu+MP#%^RK+aOg~z(-TYpo68@1dx)Q4PWMmAnVY^KIBUM~DOe8(c2i=rS9n~ou>ams
z<O{<1Z5Ph%bdN-#+~##o{a9QO=Kj!S5r?ncd1Z}v#i6QNC-+mE7-VDGW`6P6C;YIx
zTSn$`2u7ZG-Q>LYBVIFT>E2B6M1v!x=^|E6cvhUgjAUbt?CIYrY+!^@=G&z#)o#LL
zv8;lTI5q;j-Cg~;x?dIJhZkg(wk$BJ@%U7zlq(Ke%+Qyf4Z!+Uub1~_qLK4b;X~>A
zWQ=|5rt*mUD{4#T?VWc0j&kaRi&t3?+14~wo>x}lzPwiRF@-v8_uT8txYme@jDn%0
zdLsUb9qyW0X~VJke<cFL9jLr<sqTUup%WdwRt|I){6wBF!X{%OUFcaD)p?e_8zsLU
z8!Or9!r6DvkLTEQVTZw0TXmP8xL6dE7W7{yHea1Q=jYymD`idQcfHy$<dU(y@dF|<
zIVTil|7$=gxqABjTD3?CdvP#ixdPv3%r*LuN^o3Y@nTL)9=?iTPFEsWWZ^gSHD&**
zBzzLLar;AbB!0Egtm>@yNA}I*bM|B>>^gL)p*qq8$JK<2ycngha?#i_g;yHl9Cp0B
zY-$YkeatIye{5m<tuf!DFR6NgigP*_N$&wI$3Joi`NDeHFoky258}Pz&CJ;S;K81c
znRi~hgI{G$EWsdMA4YyLoD4E}Pvz;Kvy=>rMc%nr-BB7@$f_$GwS!oIKF+DnpBYu4
z!-{c5mwg?IzEZsLGp89vc2lvrN*mVMe26>DOhS(?+V)7fpSaA}wEy{N7k=R`)xGhp
z8|A$%jvh+w#wM;K*~A)Z|I7im9L}HU;yg<DSy<bFy2KqdZ)n<(Wk^~QCYtbq$mn8G
zL@hdC5|fNmIS%U<Eluss$1h~{9sT7Q*x*KceB*5t{tXs*9<Shpobg%N-P~57jFV}M
zsa{a*nC|Uc;}4q|ho4Vqe}sX};XFGk-j|u=x4>C1kmTz%k`Q%;LzaZIx{_4Z+p4Eo
zWhS#Ke6phBIJ{vFwYvwX)HfHXImc;Ff8Zl<2$+^tyGFu}X{YsZtt9ZASdQ7XmI<F@
z29^|ybAj&KMsJ@~G4NY|Q4z{4hk7P0sXoPO7~=mMeWR}q{1vTF#R@h8y=$gBnXLu9
z&l5x2Du@tAU}PV#C~1YfecT)OEZV^1$vdiLK^rs%i@ZN{yqzkY=E`?aXot3KM*}Xu
zXb1Y5I$w|1?I2to=y=7X9p08-lYgw*4m;hmPrICHhdpTt#!s8tplY{ZLZ3hzy!YZO
zNIcgHVG*e8z1jlV!jyw^TaCc?SnGmqzz-lS^}i8l8?1)v?sA1coE6}$(`4H4yBL@c
z5D%6^9%NfxVmV=$1?F@NPrUXggXG<WkME13py}^!pHqhdKq*XIrYh4F=uZ{I-Sx8q
z8IyPKPLV9|6KSZxeVZdXPHvi~eSJ&i7ad@gq&Q%OhczMCULWrs;T9c<b|AptO`hwI
zBSRoYJKecRG6nVvufG4+k^{_j98W)RmqAI-e^Xa4)`AYdxy|*=7BDtE=X)=`1NN}4
zGTm6}hLww-oTnTIAfQElxX_Ud=R?MW2AC+oD)5o*i|iN(3*~;qBNN~==bU@NcoG82
z8CYXErr=7U=7$T12~$9@ymaBO?IaWos%Yz;pMbG2wrg`nV^G5v{Q7|y1^SkDUb&Y>
z2J})Y?H(8a)BR!olBzwxS9jZxfq?|(4sJ4Xf<!3P-5<DFSPQPsq1?$XWzgej7jn`n
z7hHz+63<Ab!mYQ&<3D7=;EiSLk~s4_7<sTMKT6<JK{dAnad#zbkw#OcF5Kh|s?mhl
zQ7Y{4bnnSInnimwl)TTALiEBw@(tF3?IC#P(w(RD3b9z28Z29PF9ju1V?IRe%fjqG
zarp;M<)GMp1$woe`51c@AcVIFADxM5k(esRUD{`v_IDQJ@69{GQO^p|<5R*Yf$2#u
zt|m$DaekeKf-?Ivnjib(Bf|Ig;<J(9mYnQ2&zcQ9qyg0#<5Gw<XL|OXw+f0ee-D3M
zEj5TNzMq)$12j8AeAKIcQ2jv?UWn|h1BPrB4MmSCC@isAiq%KhuQjUMawi9t74oT-
zoLER1Q4R3j?~K}4b`thbG*i&XV^W;*I~#2;@a-26EWnPCdG;=15f1tp>%QkNM&hmj
zgAlhuY!mZtVfyeLk+=GaQGGW0Z4IV!cx0kkb%)Hy+GOO`qNtZqV(?-bQ72m`824sW
zoZUU;i8JZ#wfomq;4fTcd`Fe*g&w$@;AvuuD=LI<O!9mEQOCx6mQFnumEB*T{_64t
z89Ui#NA&V>*@sEpRHy>Ccd~cS2mL@%rDJ4`bFIip5ZL6E=tSv=qR^Ari>8gPeG_$q
z7!hz%)23_~2N;)^FQklO>aep1pY8-Ms+_7RZ<@mJoMPqqwrQ$SkxZV7{w(?tPQ6xh
zl9<Cco|amDZ|6|>xsWB@n>l0+f9do?Y!3BOVzh>(XYqh9ahFuqG<DCkjOF8}&{?Z7
zm_=*?)7wMlK3b1r?<HkzC;wqwWp1z!j~>Ln#am~c?0WIyd#2~bbe;Hcv0K1#S1Vp>
z^mMgW_<`axd7+7O<w!`dIz>E5pN~s~mPel!GjP*)VJmAg1_#nt7s{Xcq2S*0KWiMU
z(ZftdEa~%Q%>O!ef$tnuWa<;an!`ZFhP95Wm!3vIhJDptt<+eE&`&G8lbis>W=0{l
z!HE#n&zO+S6%WnE*K7_NhCtxso9tvNIVJm^WVUu`G64;@ZJefTFTxU+rwii`Yf+*%
z9~a_VF-5JFbDv8WzUG~ld$B%%HiIuKkGBrvCHtT0%}!%DdB*e`ZRaE!NxNP&44B64
ztip^79<%7bYlUt6)*PBfhG+QPp2JG(A(x~#v&b=_TCVLrjTT!^wv|^*qAph~N&6XL
z3^S#K_Hbtp<A3w)6W8(v(B7PWW%zOz*2)*1W7TX$DJhqZ#Eu#i>a5V@JXMG`Q{yK`
zb&_!0Ad3I*eH|>Uu=%=&CJekqcXu7jjR)qLv$Ag(lc4BNXSK;z0^HJ@xY;}z3qNLP
zHWbPt;Khq+L)O~?&}^vkJ(tjL3nzmEcrP&cLbTu4zR%Yp;aD@5wrYF|SS)Ynwz!lH
z)6HvqwxY!lu+Z%|@~IMhn>n_m80+D0^EZR*0Yo_Q>2ZFEd<PusZw=F+>jGi66Z>{M
z^+Gt4kV$@MKUB(`IZhiq2q|WR5BUOzKtd?zM5h!P#?NGQ5$dzakioyhK6{J|k9%)I
z;UXEj9B4V!=gGj%$8qYw92q`X9SI{Yk->e<BqyJC7+S<{u@tZl1Jn4*Rb8fG@I;^M
zMT2BG#oev*+=>i$9A8>mRt!PT%Z*6CvO%~Xd@sd%xS#qv?y<Ki=>?|5;;V|HT~v~b
z5@9!wSqCUS*mYGTg9z#la)Lvm^`Q9S(zr-*B~?}*DK)553?uKt9i-K>LHs@Zo@q^i
zp#@jTcDqP8_|c&;U%?mD7k9*41Uuv4i;$)9Tu)>WojR7b%Ns{G;|c;!I%9&g4NpP9
zQxLrO-FNq&FnHx}uhvwQ3EJZXMnN0j5?Bz54;b242V?5F=U;EOLg{pKtHJednCWP(
z{nR@M`7&!>biov`ms9UrOPPe``sv%#1GB(dp?W%-umIev`mW~&|G>Z(h314m%OJZD
zXnrSV6$F`sAL=x&!GX)sM}rO5A?wJW!-LP)p_Np|Hr=`grwKRqiZms!f;kH_k!Nce
z%KF`m6{r8e^SC$t`X3hHaH7v8-G8$XX)71pTQLbP`jj%IFBB;DD~=7C8-$BV$IWP-
zb_2w-sTOg!!MNPI$Mzd_ARlM?SfjH99$MV3q-V_nK5-#wxi{hPo33Hj#nccYFP`2;
zq7oem(HVC7f;C>)L|>P)8SIJjlm|0X?7qlVKN<eeFcNvAnx8M}CR0;0uQzgOzoM%2
z*;1YJ-!c9E-h&OZCFp2v!yBMbf%5JiPWlqnC?z>9c<p8_DspBtrB2l1_V@AWLj^U+
zC!=`%sBR@L(8c_lw=BXNv%-IjqpA1*v7cBzDJc^2hA)*qnkj}$SNZFuoqoWc7q9#O
z{c8oL0;<r}vJ<FQGy2zyx<PiRg!!3iFZ>;?rGH}83m3Kt9jLGDh6|_My#w7k;g<2#
z%+I53u&u%V?a_`OFyb<}#+_UOt1&-P);(juO;k8Rz9kW*RS#8M%O(_Jr;bXV<?c$%
zX?=XY-?|2KDK{;1^J>w(o&8@XZ!O9&N}SowUyWMg^XtvmDv)U*GlAG$f)`o**V6^Q
zWB16A&eVDq(rlA0Wl&GXGvdTQB)v%Nn)B0GpuGscE>wCordwgAm0}6!KR-;Vc<#^b
z9fwhg55DUYj($bL9^+&^+hPnCypa?&R*f`Ru6Q7?1z)rT{+1_^&`7~7<VfQ$945w%
zGRlx~olCw!!C{nYC~#0>FKH4T;&vQ6EIf;|1`TG*ne(WmL%OIRwTQ!O{uwEXOL%2!
zPRX=r83!tN6UePA$oRu{y~Ar2Wp<p8C&*5&;xjSneCg3uq}XeoNp)Vu`F~f&-&L((
z{Bzdu(6nWg{#MR^<N6XRt24G;^;twdk?HLlG4sgtyi+vyz$}ugA2s<?KdEk&M;m%y
zj-vTQ;M6TiGCpX`7z!-<h4@>d+P;p2?1%PMY{a)9r{0r~<~`LY#KlDjT{A33o=tYq
z-G{!S(sf%1bdJNNkZp2=RX?oDyRu|jV1=jKbt~`g{|JPJ{bd%Zk?>4K(Eq+?0(h4^
zOlY@Eh2j&-g#sq&V9@g_ecwnL>}}Il<DE<dHHzEL*&#oW7vT9fcrX&@P7e%Td6bV#
zteR1;3u=+rN08?<7HY?3|Aw^ep<X<hx<GH*OGY`tW<xHXas0aQ%Pz`l8dpi~yDaWe
z-}slo(zAd?ycH+!bKP_a_i#C~3;b9{#SUMN2Fn$6<4sz;WxawmSG2fTYL@Yi7<c+T
zvn4$EXl<73<0Afy@3<x)HIEGMy_l<vr!j)i^~uzbeH;x!4~NAxlX2Bh_jg-uFY-*2
zMdsPtaU@w&UMHXy^DjM09OKAGH;c3TVoW}v@GCQR8X13(P-cG0!jJ?$zewuhi)o;&
z5`9p}Djky5jU<b$QsJJ~!~2gs62M|BH*!5b5?1R-ZQ$Sm+*EjFZWRuMASE6pDVbzY
z49a`?FXkK6S}TdLj~2m$m*a%B#!C2e*dr>wwH`P$7*>xOwnEm`HXke`f#fsm%|^i<
zaCmytd&R6D&P~#4K0P%AqW7eH+K6Ph5$Z>dIygcVBxt*CUmu}j#+PTu>?u(Djm8wx
zDDcJFH|%;8g^K?<>#B>*D8TO_*-E%W0S$o*wWItLa9AKEFrT5o-9p`svAYzwY#mB9
zilo4hk)H1NEb4rT!rc)i6yU038|(6=fL~Pgu2N<S<T1Aqodri=J4r{Csh<ofb}x%M
z&JDqcgyH=kZ~Ebe`B};<ksf#-!6vh(OpFA2C7Pv*FIwS*_uQdkQayNl<zL&@QVDzy
z<H9bC6#@M-nOSG*$}FW>+<$U68Qun5Q945KuHCpt=WT$Z!J$_$+=p5b+PiIuIyz~&
zN?{HE4)^7S1?nzWL51u%x0+51{Qmt|dE`qc=<0C4bYLEUwiv$yHHsr}Pl?;{Uibve
z*$f?@6rP2BC-(;Yda(e6UHcE%oLmAs4o)@3%oVVocoubVa1BIX6_#C!-XOs0J8c_v
z(qAB|6htK0{DYaw`<~;})}tcuc24(S@O&~Iv=P4nYg)DceW9$uJ{!-~SGg;|v5mJ+
zT6hUUubk8JGg$yh87?!+3$wsdwCbu6GXV;v^kG%X)D3H8c^Az-07(VQ^L9C%P#jX~
zzVmqtuynpBe*vy4p!t?0Y8w3=#zoYaHK-@(a9;;)n7<EP39wX_B7F3uaD_QkP+d+P
zNiRC8kbLed9W8ea9yW}AKR~_v59796@=_|1Z1BMGr%NerX$a5V75a*j#1uWTy(;ip
z(d0g(NG?>pGJ2iAQ4JOWY26nqi4a^QmT>oYCpCh8>VF`k2W;+Ye}1~u2k)GGGnc;%
zKv$sUID5u`B9QMY9w`0O2X@V}4TjW-7pp%7Py{-`$XGdJtCk4P#>e*TV5)&P{ZV*b
zB^TcQv2*$>r31(QvDndWW?@c1!y8lmQmQzjW2bCXB{~Iot0WW1x-YLEdozxe<P)eQ
z^c$NDjg$<NX3-?{dltRXJPKYbAL?ybKxd`R#7nb_ct@t7D7)zoj$M@y2+~`^kfdYs
zZ@w&{l5kF}Y{n9vnmB&AQ+o-={@I0msQ-fovadC~rxwwSElzm2b^&e6f27hI%wyIw
zTiu!HSw_4@e?436_-`ByD1Q7?HN%_XZN}O-nk_FaB+*d8?ULJWZRt7OyWQLNOX)n4
zoW0a7suxhVr)OGIZV?T5-VXq^jo)gayr=fxW%UC$>O7qX;bRH+=J4&`!S3x_zfm^(
zx9ZOqgnS~u_RU!`G>8}MVB7l(v|1y2aAylJADMq0N}{GeZko-9Rk%S5Y441Ib~s#(
zxN{+CgBl>#rARI>l0fa@HQVCv{lGpb|KaxAA^7%QsLt`&5XeTVFl>hY0#6xJna^=;
z@O1J{-BeH!+!rNY{ZGyc^q=KC<ga$2ii5>D3hLS`K*~n)1VjB#sLa~A?(9W|&$1&8
zci)atGtM-m9BNQsBC!(i@#+-JBoR4j*Cs%ln7I8z-YC>%no=~=hG6X43tGMSPGIIJ
zyK_jd5^}uVZ>R-Ff`&3fM?7sXz^VFT-b*pmqJqufFNs_jlal=<BS$EK`E;8BhOK;f
zj6bhFpiPB<_O^hDZvnt=c7-l+<s;}HXRGpingqYo4v9-{6@v(M$BSudA^nM2|IqCQ
z2${}%mMLEkdz@N)N*b!*Y}FzSCKZ5i@V6(w<dVVHdP2;V5&}sRKALQM!obt<Kf?~z
zFi0HYkT^|{2n93QOtk@rP!Q!ZiM|yU3W=QI+>`ntkfEh1!FW0p#5r5VAI?O;)tKAs
zMTt?M)4dfRijg3eLcY!D84ky`t-Bqc3xU74BdpCdLqPmfNY_=yP#_8<{u9Uxh4f0o
z4%(?uxPGWyy^;FNnV}J7A`=R?TUJB}2go6?@GK-+I64H(vV`u~xr9J`o7Z7&w-6wi
zaE9%V4S}8i_0+SHsDWJ%7PE7O0u<~e67)i;OZ#W)L3AkG-xSS!QyU5@ieEma_JzW`
z|J=$;$3vkhl*qC^7Yc!vfx6y|kIZ%SUOd(3Rj~~;q%)+Kp>Yi~WC*xcw~xRmPQRnh
zeg@8O6Q!fun1poV>blAJ2$b}X1wL9L0qbF*A5P{5puNc>arW#FXuc=mM5Y~rrIkBf
zb5UdPK~_q1^UM^O=~+*o3HlA<k7$wyPX7kE^Ircsj!gh(@RPZ5X$t&ZIKxZh-va^s
zgD1F8e}ygMAV$x>B?KT#+Otji4nX*SI?BIg#=+aa-fbXt8i=*8kL+@s1MG62{QGPk
zN_unqDBtHn<H3qV;Olv)Cih8mhtI*notrZibTgn8pYQF`I}UQHeFd>U2SLNazK8a0
z4fJw+iWgk-LndqGTdS(Iu<SNqntb&q9CJ>Hx4T2=g<qWyZ2Mih0Hu1oo1>fI$n9%o
z{%rAZ@@M~e-WPXRtDiQqu4)9acY9P7PWOT`S?HFe>k!<Rnb0nk8U|<K1y$Em)UZuV
zP*P8N2#Oz``owah7artHH>#Hqp+G=Jqd_naIEub`F*$qUdVhF*vd~B5x#VAAN3gO-
zg#+gm2G{*ilb6=nWP1?a*<Yqv=k5n>mGsevS8{+>n&D>EMmY%H*HyYnss?YDtmOpH
zY9QW}I>Rtl4wuQL0~H(jpvkbJ7gw1MhMc<f57{C>so&6#^wbR!C5<(1b31^&b+#sF
znj1vF(Do?&90c>;j|GjS&0~QjX@ob(D;W&W1=BrpPJ?&#=dO%or9;0}Bw6W28qj#6
zczH?+X#L{hnjj>C{e#Mt{dI9b(ht;%oR0yKt*Q3UdtzWkPFv~xNHmC_R*nB99|OfM
P2aTV%#r*#aS_%ITYJ~t>

delta 23828
zcmWJscR1C56iy|Hh8-e%Mz#p&+N&fKvbpw%YhT>!a_y2tLn<SyjLeoCp@;@0iTEiL
z4XYw5{qA4)dA`s6>pbs!&Uw%Kes1S)x}Co%r;)1EtNLO82?=mEB~)jS_JEm=60Z`!
z1Sl>&II!JK1twkv-*?|cfO{P(cQq1@peB#iYuREV{`fxfac*Y_PC5w%PAErU`E8bX
z_GfV@tZRL^s~`^7c2$@@{t|=fGZnwdiBZ`2I#kk=GXn2Qx61{`gkjmEM^A7xn2NdY
zXW!<X^TVfoXL{Fpz0kp)>zrk!6P_11>#=X$8t106O1=tdVbc_O!yBp^jHK<8d%DLO
z9;N>Ds0}><!dG|6&6jvWKmGp1wM~8yF%xfdH6s{iq>hw0*@uCMU!1+%i*UI3CE>Y=
zYZT<>)EDg-i2<>}?nT{jY8>qQBDwebyEu5z%wF286#;|?ttJy9A#gFbvFLIs5kA<j
z+ke%xhAkcRv@mM|W{O-jdm^WTslc2k&n$sPDo6T;vjuVX<bg{cewst0c<d~XjuEWz
zIPYf~Fof-y@vcU@4B_*|o2?+E25(ID*di^R(U6m1e3#045*JiTgQZp?vG~hbLt&l-
z<e$1L*D#xmett5;#HVyzH2QC1kuw>ui;iZ{frg5LLzh*WD7eWW`x@LO<JS8uscS7!
zXx$*kPrE@vl1=T6T)to|+qY4}@g)&elk@XW{c*q}Z(S2&cIe}p`z#-Ei2$!2zT=@%
zw>pCB?`OTW?L^r8cJM0GN)YI;YaJLoM*?q6_wwfQD3DBW)O~P^44d=ENdlKBFyzcy
zrlm#$(sT1_caCImU9A4x)kcR?Cf*f&E6HH>E^>2_OahQQ?an!GW0ZVp;ioRgN!X#e
zW|LFm45y_RZfw<3!y@<1R1w3yhB)me`1AdkArgW@J<Pm}@KAdX?@<;D`0+D`ZL-D)
zOceWfb`=>xrTOVm_a#Hf3ts>4wY4q?&L&-Oynh_Mio~;6eg)%`|Lj)Hmm`sUxkiGo
zApt+Kx|eV8rQ`Be<_8?F=vc}UE%uZ*8J&Av4Pyy3y!w!Oll^@y1&gOjMv0ANbd5`{
z=Drz)Phw`ZPvwyCfZoxw!K}gfh;!~rU^5Y`FIlY@6+2+6us^#M7Xcr#3&rHT(1(uQ
z!Fwc}9HCq4=+(AAMDQt?vkK4*h8OCT8~<uaP?lD3eXKJI*!zPfo<1W3lWMW4;T;Ny
zQa>8Y(DZ2FS8`^zmtZnf_;5^}eny8oH;Xz&<LKZ$bo>0$KmwSEyzKk2H45r&9{Lx`
zgg^?r;*HYv<FIAFp7=8@UHq)^b<lLl5Cb7;?=h+ox>?6fALlg2z10<h#qZ6b-NfbF
zZ&hPx&6D1fD{l;JmK!GA9~c3Z{oeYQ!4LZIBfI)y#k4i5^F27`-4}%1UoT#cpN>Sf
z2U~wJ`%!S`t-ni0-qNxE)#BN=qjbz;v#6HYlZ^KrYfCnm(lGu`Qd4~^1*iEl|J6Sw
zWA(|<U&`H4NWY&LFmjEA%vleQ+9(ENxs7O5z&H_4Ha5IxnQ)+DpGi{r{5=AGw7#EZ
zA*c^2rY{1`<{e-gm%kZdm<XGEU;pF~35K^bE`+5@63pgFiEeos1y}bPZkv8i29+H@
zUtYdT0Wz&o?K7j2y7GfhDSMN_E5ufReUJ|CKIG}zex`$7ca;9#EDCt~eo%A&6$u{{
zJo&SKQ-eTI{=4Y1{83P`*tK-NK_63lXJpr28R6vQ;5FZ`MmXqQN0WVQjND=TZF<+t
zAuZ7-&F!W!r1j?P(@rx62b~uej$JkekMP3C`94G7v)|3yQDB0z-jhrf4uN=BmPJPN
zStRC9ok~2VNX1*9#&Z8^rJ$CVL>(*jBONo>WW%<}CSyX%0p_tIG}O3!fjIDtf~t<@
zyNQcr6iLqJ8u}Q850g!J74MTU|Ac<Vs$Vdk{-SB}cBeO1nzV8hTzACH3xhT6%lg<d
zaWeDIEdX_)>sh^Y2N1D;pjCH;2s+pEJ#?mmfT*!(JA0Z0S%P)8RH55Z;Mb>iDd7<r
zgmQFhx#}sPD9d7Lq(uX=VqIpBNHT~!hh>co)4?fnPsd5!6qs4$oawfp!r}7cN25n0
z!SaGlcu`g$Ea?%&gB497xWpmm%vVF4&uFy!-DHfHzL<L4i8IF7w|yTIEKP8mnn&1f
zZF4C0J4fZQpEZVK*U1O$_)Xwhk&eBEi3vQF(Q}S9G=`4pyZT?GweVAsVfEMZ{%FIy
z5IbEPi4>NoYr|itsFkEuJCTxt{rX~^kH69}sCG`wOeq=7eJ@=Xwxi*70n_J`0~9>d
z+LwQfDGqP9eRp|28-?$!>@BX`A)%%@Rb+C+BN&&+`Uh?Zd1Kqx7nyN&j!2gI!msdB
zA1!9d35NOv2>iL~?12uT{;KuNn+hU0T~>W%IT8e$Yj5eV6_8-l^UB#n-BCb|Vc)*c
zM~301F5X-QC~IW2RgdY=!16iWn^Qa)X6ygc5t*QalK%HE3P~yOi~8dCl+ory*d->S
z^|U<_=mpi@WzYN}Y|qs`TBR1$VZc~_pfSR%P=4<r6V#&^@*OrY!9SZGUwRd1f{8|a
z`&iW(rSxnq=~^=e^%E)L+pd~`<PV;^rh_KHazE_Pc5xFRu2OzH=;X&Cb!Q0`nlJvR
z(7kItB@(HrcSt)+4=3Vc=)EawX9~JK@lR{{OvfVy3ZCZ}rF^P#zy6yw4G+E<0>MWV
zY*#!<GG&0W^hv`}$&n~*JL+AqSVBVQ&3cxGj81v-CtR9-O~i*IMR?7?5o4CV?494D
zkM7$e95|oqL+mvZ{$pfE2p&=Ae(;w_g}3o5S4f<}a8#l=Z0<G*B5VZ=Iwqpv?+y8G
z>Sr<-u*Oq-pD~8cy_&^GW;FOnE--qxFBy)hJk24^(7`h1%NO^K6ev8xHGa`M5vU6r
zqpx0x1Z&qXo6;WnLWr6SN7*L<c+IPCf0)YzRY|`DIj2o9OPEJ`%YRf8{C0Trx#deH
z_*8=FZt8*=RGe-epNThtFa3f$em^vUF1hem2JWWdB1k70<d^_qnxnwSP6Ed2V?Te~
z>y3U7+!D$yBk|Xi?}M%ziRfy<YxZ<81$8pZbZe&QD3H2Ws$V@BGwRq5_%SM}#y!{^
z(@jB@)vVF&)M+x7yKOc7(I163YCT$x7L#!0&+D-3YeD$B`-O(tV<KJ;Q%F}=aztav
zRGq8_0*0QQkmA^20LKFbHaV9%f+a!5V3fxjI_z1$FG~kQDVI>AL@SB0;qW+L{vHLc
zru2<3=E=aH)?GF5k^)R6!W2VW8Zgay=2Hn8$zaJ-b7J@l9V)jgah5EmfPc(z=~8zh
zbPx$E?*bzsrDWrYqXFK)>(5yjs4R{*pV%r02btgp){}~JzNXlu&YR03V~YIiOlmHZ
zCRh=17e+$Nz^UKn(c>Nlom&U8GvArO!v_X@<+n}2{p8!>##IyGV(p<`p8lr+8#VWo
zMTB``JI99&3OgcEgL?OO6$=e3H`rge%$<sQIdK!3vvl<M(NrsKkc_9*B5JN4qv3<W
zndocpD7cH~kW1wcGBQPR{961Lg}F<9jx60IJa{v+ImaRx1FteoXl(MvG`%6YZbqjp
z&ok_dsnbV+V5)n^!6yXR`D}t`_b&&~+buhw*-L~l+w8ut|AL@>tKmw<4H7g7Y;3I@
ziGn$Fcjg#m@cX`v>cB$^bT@~uOdX{Gaq^62tZFheRtLN^WmM8&>~w7jTPj3;mhagl
zK?AB?!;KaBNDz$@xW-xU36JjC-MeS6fgh=#l4d*?Ot9+r{nfjjrnnxW`cpmB6b0P5
z{qh;#CvM#6w^}s?-Kh5;G&@ZoW{VtqA(JT_ko|n^GUIz`QgX?{K~vD`X#Hi$O@N`!
z#{yNHZg_>_^Ze1h2z(dQcIK4}4G%1L-kViQMWrtLxu|(M@~stiG#Di#RpqGG)LADQ
z(p!ierQawR^sHO?<O*Yc%YHYn_{CT%Eo!@b-;%Jy`{>kZ8e_$Zu}GQQ=Z#NGHFzqg
z9kJz}*pVqI1N0d<+n{uu0Igb*d%1HQprf+j)TeSH>|OS(m)aN%18RncTP~BJ!R_pp
z3(un9A5&3#R2P{FDr`BTN%trqH}Loh-I@ktC$_R@GWe~1dgoE{6djVh*h2P6rNS$H
zqSkl>4cu(=9&wIDKzzYs8R4iKkebI0R-*tHK7CU^l4Odhk(L4j%w{-{wC!VVy(y|~
zvKLA2FhPPk(+_($Q=qvE0y)bBmR66DYrUzapcj8k(B`Wt{8#ku!NXgo@R*~j$9<<E
z)NyQ+S^emMw>5nqm77N(IsW>MnkpI^+Wa^3TrU-`ZK`GGVSutynKbcDI~ntL{&@M=
zo`&JyYo?e#P%z_SX9Uj_8K=CC2S`pv;m(c{o5sf^9C~-x;Hg<KGVPxXzeW8<#4)`B
z)lLS#IjJV+1SSahd6CvVB&iR_nqxzEPdmWD`CkuhMu{Mp<=;_Z9t^?#W>MaE8J$vQ
zXW_%oQQ%DLWhK2KgTSvm`hhMA42!$weRiP1k%s7-)%%m7r=3-0WSkBm(l`HY*Gq-_
zY2{Ka?KHS1O1wk0B}Kqc3)%FKpd(CMKbX?VHNg4T%(gz?O;OA^Ra;ro3|Wi6H*Hxq
z#fsmC59WPM@Cv(0ht4KbSi2ZsrEtImxI-5_I`d88hMLdO&%$PKEq~dPquUf}C?k!Y
z!p2Z{Rk@{X+6L1XxCpa#;rMi$Lv-?I8gBl!i~4klkcys}Iwn5<(J{xKsW((P84tSt
z_TfEBLzb$r%tx;%XpH;m%!6dS?qGN7%mAa5*L_=@>PgsnMu{}a6^y@5@qfI3kB9+#
z-qCM%IpFf|E)jNV0<LWcJ#E#i4}lx58H-pu0!nUn`n$~=4CfBFn8yc$^<QejhJ`02
zkTm+HSMwzb@_i17hW{W#<7k8HaYn1;4N4VPb)mt4v{Pj^gWvkr!iU=4(%~n?2$L;S
z!R>mocIM_Juo`|ylbsBQJzSS%_>^ryrRruPV>aVj!2F5%1yj7$cyPn52s13>epco;
zV2bRPJR!TaOi-B0dN|YE-URj=>{7I~HGw~W$9E+LnL?3KtwxlI862jZDYfe`g*$WU
zZ^ng8z`5XWnnZ*JidX*WiTp}JLG8a^>m-x#jAFdTas5;@X`C&%`ks!f;vQ@gipfa$
z?-sejhK7ea>?D~#QZQZMw}LNY<~-fKa<BhG6jJAuPVzl&C83CurgN-$Fe<)(KTx^D
z8$avtmMJjicjw^HfT)f>7V|yo{d$i8Omb#lKd(DLt2j-a|2YxbuG}V3RD;2Pz~>BW
z6A7%gC{H=RjDmX~E)B=ekRfy>JaB;l$^@gA%4ck7(3f%C^OH<6P&zuIf4!trfh;j<
zrF1A2zMtB2gIg;JhHdk<YVHk(-$#-&24pOuJbt3Bq{<jOTpg9&|Cplv!43KHcg^s>
zzx(D`txWN^Wvh2L1C)nnyY1gZ7()#0iKx7=3Fs^?iyb&@3Zh|yM=ExiL5l|S(~FGX
zXY<d(y6+pqmkU3aW@f2+c&)QW%bJUXl5wfOGAxo%@W{r1TH{o-z4<Jk=K~#O%Y{yS
zlupKRl5NWF6EwW{iDKEfNWq6v`=>YjB%`1E<$hzPXneld;yu+v!d2FBBCBgK)@zt7
z3lhAs!pz4;|DGexM7Om#o-javQmL`W8bEdP6P?f0*ACE9r1j6Kj0i8X0vz9u2f?F<
zkJ5dvl3*>a9hR%3z~Wa@zePV8)c4F;7%-N~D!KZfcZ_M!o!7igN+=l;Hl#k9eN2a7
zq0xp-7O9}zcswS<DG454?)<*0N`kZ7!*nu|2yofw^)w5+32v(LnedP`LxVKik5rj@
zGgKJ&DYl+BL0;Nj5ivVPt1PS=F0L|yt-lV7?O-UJt3HDpsT!tm-s_6OLZ2yox-U9%
z-_i_-%m3I4WR1aKUZ<JgO&S-2-sOn=3Po9gJ?#x1Nf^+6aLJYdN}XppDW&~%ygi`A
z{9<o1ZY#KB)5rj&nBxKJ<2;7aS(1|+N`6m9x0y;)x9KR9Jh{8GqmYD^!qGBkq=GT$
z(FniwA0jFm9Zpf#bwrX2#YrMoAGcW~IIC>Y2fbZ+N7>UH;UA$mS?enitkm7#<osof
z?+ZpA#&slkZRpeVy(0?ln9R|&-;&`e8hrBWra;9->XNmGAq|d|vE6>oP&%unTX*Io
z9sWB#_R7U5748h*P2ndeL2-}I10q)#q@K3C;-;bid%k$~7Q8UV&$o^X^eLL5G+4G>
zb~nSKF{{+wRVFAfFIV~KyD|Pg_A7ewKSS`7?VUDlHv;~OhKJA9OrcNJlw=fP3J%oi
z^0o*SGuUvs<ESy?4-4}`+xWw_LY7+Qs<qNdba9K4H}p-yXN@jpNouJ$da!g$%|kjC
zz8-RZ$GBwpr|enKGNj=j&zsjOu2C>1*48v)fQ-`4pAY?d8im2a2U6J6NXYY5A>irv
zAap%R8X(p&<oAwq{4XClAXTR6@wF`%2>9K;HIr#U9~O(0C{52D!P@%Yb`@@Kup2OW
zB_bXScht5gvh<R`^HF17=9?%e7}Bk2XAGSuBjKi>2PmNMVyEUyhSHfFmAfJ<nhc36
zZxus2=<xEnoBJcxRM@jnwr9_oB;eNorq1w_@bqKw*XBcSsd|@Qw^z|=#>iXc*kJdQ
zu>t9DNznRC@kQ*!=7(h_*c+DocNmOOid|$=orxj1w7PK^Z8d^?If~bf(<TsKpP`yZ
zH-*;;4a@0vrXc-l^T$K|MsWY2kK1=0SqSY<mp9%Ng#Wc)JdnYt<n?H&Cwpa5@qGJw
z%IIw>9S{G?xcZDM88PnE86^W6w$IAU^k1T2sDe$i*ccfpdUIR{-ZJEO*BP@{r6k<J
zxgs)a7>pY4*v~gj6Vcf92XBmrBQEE%{VpoiN8J~0@xR*$kT{>l_I8IOM8tW<a(58H
zi|1!v$9xd*3_OuKUPgl8dt1*7P}`$`L$6!OwVn*?9IqZI-lD)887AE*B^oFhj4f<q
zO$Lpxms;-Lr-Q02`<xATDyY%g2}6uh+WPQ`Tsjs6F$;qUyj&7kS=^h;=Vy$<Ha6#i
z<4o~`OOxiV!={+BEHFN8YJy@7f@>FhjIf%1<XK^a0W@k6+NqB|8bW@?a~*>aW9VW!
zGWH<E6g1x4QWgs{fo5Le%_}uVFrdf3@-9OKf&z~7Ub^XrZl6OmEsiE(qlfnSM6Oi4
zyQa~pe1nc^xmsCK+mq4So4vJ~QAxAPTN@4+P|#xEua?A7G9D<q;7lKj!d}&X^3@$A
zJO}rvT~;T8@h3g{V$V+^4)=;riWWLz`xQ<@^%wdWX`00;xeDN+7<2r&mjjS~dMH0j
zC4$UykwtTN5Og)YD2dM@f&WWCVg1%9u<2(T|8<HCI*E-=%*hPRxcIqEQjP|RqW4Pg
zE+s+A<n5G&YC6o7zY62~p2E<cR9)e_;YskVJf+{A+aEkVn>x&gmC<U`H8#g3BP{eE
z%A<=h=C^=p&!_7q$Zl$Nrk=s;l+;m&>1-optI#nymZJ{|TSMMZqYXjTi92Tfv=Ll>
zB^Fln%>@3t`BH9|hzW3*Ngi4@GlD%lS{$o8HDGksy7t6=Z{(r6b4OV7C!z7RJ}s-2
z6bw>!wXV;kqs=AdvpE}*u`{md<Cr`Rz0X#a<;GBOIi6&^tBH(#!5<wqRY&31|G4P7
z6(s!S<!tlvLlDk67w4OvB4U-mrc=A89B@f=tCr3bpwSgMilc)*jB%d&Kp1fVIFcaK
z{Ei4z%eeD#Jp#eNHs7WHs)PiZ5m&rfx}rdYBSb~?Dudqv!q0u9DWDuOb5@-J%5%y-
zY6^2n(5b%h!=_7g*im4xR{LKHBu|NE^6DnRIlihygMM%5v1f|k{Xql$Kb{)r=QG0N
zyMuJ=-<aUy4ublpPsUhgI8E7}Z%oC}l%ql4-y7l`TK04K&jh$1`!RO4%m7rD>y&xN
z4T1U4dH+-%6A+Sn)?@j_7~*@mbPLuDp@Z*||2&r_1b5uj>TUJF*_Zt_%Fk#h)$n5P
zt-cg&{hhpXZxkIBLJIc2TS&sd8P*4!+%)vP=F?ynOF`>L2F^~|R5GS(KjgTX5rveU
zFCFfjXRK5d0WOWEAlwi*=sulIM9Xop?v{23bjxz{?R=nz)_$hfMXu<>xk2qR{{I}{
z1xsrD?Rg?_R*VRLmk5S;ag9zYmq=i<g)>Y7qu{!geea%PGThPFyk;Lnfv4T2A-W7u
z>Xq*Kb%i>e1P3H#`ltzX5Ie8qSJ;yR>n~MioxjlFc2)LqzH^>n%A~`V?x%&uHp%8x
zRYOd!(PO@EYl1br+e&i1jIm_R!6JRt2${R3Lp{G3pw$e4BSeA#SwUvmq+)&0)qHfC
z>xv-++qXY|&}9Tu9QIz@8jZm;gq=jK(Km$JNuxZm2@NP*3o@)$a={hfW5QAXG@MhZ
z`+o9j3jR!>zBw04NBvLHl6+%HSRfPgdx49Fea5CLT<0iw^(0?qSt1#$Bu(C0l|^BG
z@ZWc)jU;R!He4@#AB2^jyFSE!CSqRng-ekPdS|Ofzg5!L$9LU2)H)6a0$j8|%W~bw
z0nRtk&3I1}VO!H_@rd>y*x8|^SmH#2$!_Vueo7SNX-*Da2qA-Rg$Qeb9tF-yJN5SR
z(_rY<3*qM8B=}7}9oBZ74oB^W1}e)_AhO12>|hKHe%5)2$#%E`j~1tGY@HUm9LN_n
zd}Dx{%Bl51g5k!Pp5nRmgvS_%o0KM$9gHw&M|v&OAAOv<w)tN9kRG(`FWh{wk^p~S
zzSWBnHiQQke=_qFFn*uQPCor)1m#xe?nxCGz~DjO!NDALcyUs1E3>ve+CQ|<y!&4w
zhTGN(NCu^#kX|9Tz6TuxaxT30YfHk5)O(#dW9&3^K7}uTMld{`RoagXS27y9uL^uF
zjKVsR9)=1dq51JxsW*><Fz?bWm6z{`_(W>v6OWT4@~Ik(MG^JU(08hDsgM9aS9^n=
zKX!n$wJn=TNkoulXL=IW7zDH8y+w&OB)A>I_HfZN3LJG>fA}*hNtMb!YvL_Jfsm*5
zw~g6o;KKUSY3Ozm_{2;296dmXPU^V;zQhzbOHc0E@+}cI7i3hcc{(s!tAP9!HBH=g
z(v{$xXMkg-OS>WjjqrBjdnprnW8BsreaxKG5M7T49+$G#$M-Q$F6xfxfRGHUe~B&u
z<j)^_WF25Yh4n-Au>m#)urp|E&wY6#xGt}fFYc)ia~a+>6=CWCtme5zel{52bx8Fv
ze<Bv_Vk_Y>Ou^mS0=si{>8LEWVfR{Y63U%x80KD0#N|5{|LSBYxH&7UunNf7)ybnJ
z92$k!Qr;~;)FWZUU)`?9aY5+tn#p)Jj7r314PWW?gATYc=zX!GJ^|zYXvio3(}(9A
z^@VP?9ih{^tA%in2$D3zg7MKHV6t9QI+98PA^M&{Zib;4z@w0vp2{#Hwh#>~)hXaE
zZMk&kdm@a#G%}LAl?17@AD{2<p~J^{v(PrD6lf&>P>vuZ0<p&>y@LAK2AW@ZCW{8D
z;@sj{MWID~y!V{)%!W}q>6C!Qz>`M!=GWtE{nPq*<5?ev$2b8!Z@2C&chLrJ-|bm)
zi+bQMdG~z-O&=QeuKw&ftq*lvWQr2IA&fZMd`sQQz=RIVWy>NpXg4@Z%+WoJf;`UV
zKO8BzalL4jief4F^lYEB7AGA`Rx6G+QIqgx(bKHg-xBfr!;kKB8!7na0GY68Nyen&
znJYoTQK%%hF|g2pgnnUDE783{_-ja>tNJPt6Q!J-YeO86d!X*shHQQ8&sTmnKqr9u
zK{np47LGuU?iIV~N(6h!^kjSAAaH0Dr5<t%C&9%MBb_5EQ9w&tVehgagZahP1KX7-
z@K;PrV;AEP`LN`?bm;jcptJ2Oy|bPS%4SRV$igXbRmA7`;wuWY*PjWUGB^Z*rM+=Y
zll$>ES>@%;1N!*;n|K<*$^aYG0|s_GFx;`lG5H(9`dDcH>u1t2z&(7_8P=S8nqb*j
zJN;Q*7Yc6Nr1pvsV1};K`&L>XSgOVsS5p~aT-7{W^GO$$N8aW6A5eiG#ox-hwN24^
zKYO39aXe;gU83Jzr{ks15u=(@$+$YYu=Nlr343c&?3Nh(&$2h#5;2~DoGhhd;<jYG
z-{*bPodL=h)SHT;S8Pdmb>Asdo!KDtu&}-NqKb$ek0L!r3mmX}-zUKnd<LjeUq~?7
zq6aUg&OCUU?*J<C^&C2UL>Mu>Fq=Ia2oq8}d{SIUpi%qq!bY_ykZ$hlw^@#bOR=R_
zW42LXV4tRrT{#1k=>f0YgOb4ALDGpfl?>F%A62<rZ|Ly5-u>N^{&=vaPAYJx8^g+B
zrHk_w`*6pdMI)hefQrAlJ|KqyrY$flEB??&>WA^rTh0V5X34mHT0sY&|I+dLldA@z
zqWP(>wGINoeX@j&TMvAkJ!WNJ5n$>>_4xKn0C&vGYQ?K{;J$Q`vcSYXDmeZp^(WvJ
z!?hZ%>42?d-10`H@5}=_#%Y!*J+Df}=QsCD^qok;=wKP;mc~TXm;W4RoSlHdZLROB
z_mlAyweAm5E(+a3GXBdJCLy79qg$eH5H5&J=-gl-qA06KrCg~!PX9Y`l240(VrTq9
z8?F<8+4=})HwV=bK5nTu`gDp2vHGz|`}Q$BowMcD9^NDfdKF{y%{B`9UUA=&pNR#-
z-Z2aR`2;x1Iqh|@G7+4r-lqi{CV|_`V`1;XWRTWX)OnaghcTDAl|8u(H|>bQ$rD}r
z5a)g<P4&Ahx?YcD_TtgO8?tr%7pnEKg1PGHZ>kaj-vo+i2KWPNNF>a&9o9l!dB2ZW
z<W#`=L=F4Fo&ylgJR|jV?jST*-(>#gLV!5lU8TF-bl{HNC{fVwAawYT?V-+#12Loa
zJI6j9d?P;JwB=qj${E*|nRwIj?qxwbH#He4cedY)H%P)9_8oouQxcID(J@E$NKU{f
z$Aq`5v5@iG)#95=4A0k4QiwHLl!V{tpOW9W1tEp+L{ZR^7c##L+C<spfIhRUJfdm@
zoJhJBT{=sEugpj7y&N2XIJUb^Fr5hJqr!dJ<bpt$;{UHxg#-m#xzk(?q9E+`zashG
zSn%CwpVd^F0Kb?_s1GZzCxV&njo4uBBv>o{F&ud_8M4x}DUFBdAdCJ_)7fIcq#*iP
zM*#swwN{39mdm2ob-R(!8wb!(U1x=SgMccsnJNACy4b@N%sSJkgVdwG8TW5#BGxy|
zjZu`~x3L)gjF$$y;n{vxo2(5Rby8Bc>gWP_AN87shS)*Kyv#X!q4WT-t)0-67Z+e$
zv6Mc%bk)M|H+BE)3X8<H-RA|GSCcV*&|iDSH5q?b-K%*an}jT<>eu8v6H!B@=#STx
z1k@E)F531g7B_lcUU>K+5_|WqC5C#Cu<XE-<3aX8m>9>`1Y$(&`3t#s{TP-l6+fxG
z3DQTolFnmgJ_I<aTsE$vY!8_&57tQxes3AJZrQOG2qRXHj9o^<V6S1Ts42s}E@pM}
zPCF9|EV2qDl2QVU$8dG|r6)pT;OE!ljNP$wi`1vfuE~(9{Ou3(S~C1_aWXcKi3A;6
z+Wa<S0*LD^M}OWfgH-uc^`Oe#nh00eo%IjvA=lq%d9y`rJh4kWEP6!?6S+#Yt>!iH
zmq~?0=qp9=_P2hUGphm(_vIJ4HMD?(%jq)CYQd9v!E(MA+EDUet98#Vb?`km(v-!^
z2VS~0P9A@?&^*me;*fecCR%j)Zt!G)(dhisNzG&`-Vi#i<UYla-I`|m^>h+3kl?qF
z8IgeL#-i!#wz2p^q;hNEgGhX(L3#2;nS{?ppIP0f1~SGqW&Z0|FRbrh+*Hf$h+)T{
z_?&;Ihgm06f0hXuK)lS}hcl!0u-;rw^*>Am9*VAZYIqQMcc}|W^@PFI>;V3<VQM50
zSdTRGR>Xq$QP+`l<^)(~|LfGjFcPbU6hCJFM}rEYbD01`hVSLuJXD{b3=eqwEx6Re
zL1^1f$I@^Bwf$AS`R-zvy4U%*(zpiRp{CvcYNdr0c?DBUs@hn6D2ct=TN5{#wYzg1
z)Wk8F`5AZ9eQ>`@E#01~s{};h^=ZWf4Y*Z!WJB(G4VdX_Yj%-82pM&gTeF#z;kW~M
zz5T}zXVzx_zMs;=9kL1|kGn$gWJ=-TwZ};)Y(h3;T2I2u?rT?`7c$Q3>_ITRKM}cx
zW^^R?Cg3SA-LbPQu{e15wn}ANB+9Lc&3~N^!+6P0^3*u_KvYS275Z*B5vdO<ZkvWW
zpyCFOrw5I6@c=#Mz|Q;n&>j4kaDSHrusrK?5fvkX_NYK-10y-XkyvM@@;nR--d`%x
zNsWX7E3-O@x>&e(+*7n-J|1Go<xh<i5<&lVlrs4_4b0!Vhzs3G0-5mtIO$uHA^R2=
zb+0OG7)S^!KYbFa2Xeml1n$dRXs1Z~);6S$2lB>4C7sj}KeC(?G-1$@&?_A>p@t1=
zx4-Ao)sfF?m-sa%X}I-vD0=0`J~$R1dPb;N6}FoXcAS||h0y2^yFGs#fSiDi*b}@8
zpdG<~k2=K#m-JhadqE$Qt*H*b{C5Q7xxK+!fg+3{Q|ZFK)RKhzt+$401<+7vy2Th&
z6EX8X=dHc3;<3F$<+tgMSiC)#HTQ5L66aL=#80<|VSS8ye`sAGejm3FeGp2-O^2mk
z7`QoLS48{Ro1OX?bWPrC)07@Oq?X@$Uuh3-IQgmVC%xbwRqD^@{dWT)u}VAG@opIG
zR-K*NzcUhgEd;omj>p1x?ah%kgYnRAb8_MPLn{1Ws<+TPPlHv1P5)FAlHm9#S;<5-
z3G@{EvbA=Hz_8I<*{*9k&>F_!D4zVa?Qmxi*M^6xxbWSzqgq52Q?&2rOl@Y+SvJnD
z_>>w_4ZZ#zcrB`e3-`;iI-iRHxpBKOtBo9FKK9(Q|E(enPf~ocE~$b7%VlN9Y*pBO
zs=%_2rT}A)daw5y{nh(=)N<!{SwkGc;Zh3$f27FTPBuEzFl1)lT~s#-CH_6-+{rjl
zhJI=t_T7<)z0w`uuWQ7k0B_gQ?DkkH21;h0jm?V0_cAZu)Eo}Of91DY<>-N^(_wug
zt=tReba&oxu0M`+`OyEK(Fv%o=<`XwlmJIAaeNwBb%68&%uY!0f{!hFReg4W@XdH}
z{L@kxxE#E-_DdrYYFow^soP>9DtZ0Rt+(+&s-EX3#ZV!X{dGafaVia#mH*l~*e8LS
z-Q=I|`ZRF5xxGK08~_#zQqRjj>q7ir7%rJz(EHE+TG>xS1&r2Nj&r%Bf;qIY=Pea#
zSj){awC}bmZjbGsz7`;lmG_1PG+p<?^vlLO&HLm*{qPk3^MBH?<u}>xtcNmO_}wf1
z>#+i;FBkU+Q)Luj_?&@|@!Cf0GC6dS`Jw@?X{01HEBN4BpN59t0f{(TxZra05e+|`
zd^2qz$8f#hINmx%rQ(a=0AcUHaoEz?_orMU7ELR+dHV82BK=gU*EiWPWSSEDuy!L5
z6Kc!v?9%YU%Lj*e_Dmhat6Z{Ot-SiUtQ9yzoov*DRX4t|5HCkqH(NIMyX*yP;fi)M
zWdV?IA-FQ&MHsB!^g6F(7YRRS@3hzF#z4-Xp$N~7c*qbPQ<gBJ0zt}>Xv#zb(Vtt(
zzltXT`|jZbBf5z|zpSehw%Hes82$^dRMm%Ty;2*3yZ`9%_4xl+-6o3uUCC7LehpP*
zOB|4iy{v>yQbOu7Ysz>^V&P*#r4-5urQhLm<Ok<{lp;<s8IZ1`wcN;;hK-k<{Cjur
zgPo!$-oE-S3Fob9(p=SKz@lZ`ZwtF1o^4}4`TnFncE72=wb_b@f0sAB(axpdcEu4U
zmpB@7(N>pFZJ{B1J$2gl@-hXP#%Ai@i^U^%{e`V|BC$B8_>Wcgbp*-=e)@biF$_m`
zI0Yvf1>yBhj(`7>y>R&ZkRnCF9@)m+gF)R8dxfD~GC&iYpG)xUH?#)_uFZqdI$l73
zJgq!C<PT>OGi~%_!oXv~^ofUVB(U|ZEF9yG0V*{)k9vs_KDDtiD({k|!jAh-e7=<?
zLa&HwTvQ_sc09EFV&hGLZGY#|CZ2i0<DSdQX+4I}*7un(xVDjzj&WIy9pObzR0|Fz
zD`QUPaa-=!3Rs=v<;1jMKb}A6Mc8{v3|Aso+;e~PLL)!SmaB=vMTL_7j38;Ca&7-q
zw=^ID`&6|m7C(r=fMz`5?>-SAM%&Iso!N^|d)qRHW%QBdevSH#cu#CmW#QG^6pw3m
zOPPE3({QOs-<PjE5oHhGEX&tnh)NUDq+Pq>u}&}g*58O2Gz|G<Y!(=Se!UyNs^x^?
zF%BQHq+B3Ye4s||Z1bdI$fWX)6%Tt1wmIW8WJSP>-ka}UanOTwruDmD?i>eSncFqf
z6<$DQFG~Jd7XUABjjz6$4u$=KZ%4jsMS_L~b)?2T23~kH9OJf%1F1ug<ZiN4K|z$v
zYMz(~dNtp+n26G#^yxpfw4iu68s?hDo!|j)qhEMmzfNUDA?p%YH$T`7>pf=CdX0S8
zl`)bh$10C!7hCSKyxxa;4Tn!Uv`FBP@;S$Q=Y^3LMZc%4zXSHIzV*JYvl|X}jo;2p
zmV_tm>oz+<2-aWqKl95IhwL{t6pN)FdYb$fwzTgQ#7Xf=k@jN-*w5nKz<SCZ6>W^(
z2T)UD@#?bB!t%33EMLF2XH6&({WYqOq`ghRYLA>d(--3KYv8H(Y6;QkpF3D2ZySNP
z#zHfl4~8MLU$#-Gt3NLEYFU&CdSZZb+Qv(&jH6Q_T(~l!iO=mfSXFr%K`XOk)ng@l
z_(NJ3{KG~B_qVwb>VE>EQA9eQIy4&!_6zVtVI~5eU!45%qA>;n@5r-X+!F_JOLi=W
z%NQxP;gT0~!HG~fJbo_mbRv`ul%^)=kRjwnZ>X=6J7jAb`Coiw3~xoh9v`Pk0>`L@
zYaa+;e&774v5Xk53D4!)kIUeh`-)dP5A)&6yL(53V+HUaNsrppwLaE19!X>yYS<3@
zxzh{eXGG!38EG%MoqSNX@m<(xu@FSex3-hhw&0CuQDIkgQT#5u+CK2n2wTeXmHypw
zL6)zlO(uP#@gPB~`-^EJ8f*RAdMkvA&rRQpy7nhv_e$H%ycRO{_cM#tEJdMsQ*XRH
zDFT^XsejDBQbMtb%r%wu#}`}mDzb$pJ#cB?9m$!>W7xtS+mS6Sk0-Y2{On6HgDJtc
z#*qXEXtOn3`^4CUcnW_wfBPH+A&QRIh<afVBkZ0|35bB{D*vX)))*KY`MOI`D-Ob+
zjcf~gmH-FM`C=5*65*SO_W{cvRM<`}nAG|AI2tm9A7+>pxB=6apzWUXhOpEAFKK;`
z0(556GK2dC@yivt7#_dfI64RE$5KTxfhBQAuren;+vva~qt1a$dPi1`zHY~JCbb!>
z+ADhhu4ZT#e&qm_LAWVcE&$7a>vYCT*<s>#+>e<aZnUqf2wRnsM`|e+UsOG5iuWUq
zJlt99gu#Ytu7~R)k+e&>`>Z;Zv9-RYgdU}0jIpFk+Cl=xCd}17KTbvt*NW!p#Aw{*
zEqKxWzX*)lRn8<F7>Xu}7m8SY{c&(-fG)eU7p6(j0^4hj<HPfQx>^~Um~gts|HQNb
z2tCC4OE->F!Ee51Xi3}?;x?QiD{==y#)ik|-~I^&@ouqIAyPPaz7~=Sxf%`oS~$W?
zzmZ{!yU>yK#&|H{EjUsyLWMiYTIH%wDUh)_(*KQSB>c(!EX%jW6`Y#xJ(}HQ2=p|e
zQ<tM;fz|7e6p!6DTwLdkyB^JrRveeaY}~0_7;~P-cxquM8msx{>CUaR@fYeI={(1c
zE8llpBm_RwV>`l1Omtb&V_ZXvA|CR<zeg@38;8d94mi=7#5342O^x$LMvyuRQm?5D
zXPBW#>*Ai515VhJ@?_X^YXmNBa*8|mH-W)|Wo7Q$RD86h<{$5IMihhPigYR!;!tsd
z`==Tu7EQQ{pY|F?p<^)jS4meAZhK{}{JQoe?y9q5W?At=KAx}KhOO?H>RdC;$8U!g
z+!wNbZ&O82j@wMPV@)9J(OvfSH}+s)!?v|_&I|Ulp4OJJ42G$wksSqB!(jhFY>(xZ
zNN5}T)Y_yH3qHo~Ce#y*mifaf>YANS0!#{fin$OHKv?>9z^+#$`1zH+U6bkrrYn^$
ztv?OGU|S)|?34svg<`E&5=`K@ZJwj(#7-<9|F1?UeYkB<P+a);<Tg}3)3(9Fd!p?z
z{gl&hbzU?-=6TIwY6Fh7?Gq#fywbb%_0CJmRW5j9H%~3TadSnlQziQCqUX=HlJ{OZ
z=E~~0QBX!e{g5$!DLSgA-FX7(EJ06)0>kjr<%PXn@8a;+O}_6XL<$z3YNB+u#bbD#
z;hf_KGUhvfG0XZKgAdBf%oIOFpr+jg%}FB?Cd*uOH`5M9mI9A7Ry9A=G$;&KIPZ=Z
znyC%7Z5H-;BeW<-?l=K;CX-GZ<{CpvN9OYD5C`C!aDRC)#S?CI<V(xs1OZ$4YsphS
zB<QWSv?yPUfUCI&YFZgQN1CFnV0Rq6dr*D<^oMvzKUqlT@{9+Uuebd_)`fv!mjD~z
z_zB26UH>^Zz!W&%ZuztBtOzHl*Gd~4`o&=vYlZ5$il2IBS8b#B{@egTM*Mk}Q9s%e
zezHo6?SHN(`fca<Cn*8kPmJSEo#)2mPV?J$_|CV{R1z+F%&h8}-jx!SCvyP%=Nrph
z^c6k(?^B(02U&bCzLeK80(ew_ubTPzag6%-F!9KXKy374Vi#47L2B-i3%A`Q$>`0+
zCVxyQ1~)fvxmmg|3YB+RHFyS-ur-$_$~!+0ld5KgdXs&yrr|;N77I75czrv5LevyJ
zcb&c~_e2VkybIbh(E{>$8;K`YoPhSvx_e627dGzycXbj%Ao_ZlQ{k~N*t|nHepg=v
z1ZSp<N^Xp$!auc=;=VF6tXS{4;Fu5(R|boHRT+0dUhZv_I(I4r$mf`)KZu8ihK}KF
zlVQ+zgp#Ee?*f$$=Q@QHtswN_o5dGj7}v3Pa|0nhO5pc}MVkpl;9$+!zez%@AXda$
z9n$zqPa;&KaKqnOJ=V0zzb`t(&{CZ_#qI)C3?<fd^`_{2*xtRjqV%-@4*9(5&q&;W
zl&$rqXBYP(xpe#aP0ai8@pj7P+cSokVz>70*s<eiO=c^6$Kj86*f{)e9%h8fxorna
z@}u$h`3BbY>`1))I$h~b9*OagN#aGKZ~$iRxG;Ens|T7zZ``-B|0uqj!vh<sESiv?
z^t$!r?-S6lRWvren+U<<0gb8&K`_7a`ivXn=F_9~Pq$n(5?Z~MZoCPPfqQokEiWD-
zgUQl1t0l2Gh(5Z{bz^M;WPen&Jflwqe&#!^X%bY39o(hpD4z(2S)<%Nm#ENoXoOQt
zISyWTTD*3*4F&Bk`V*=olQV4hy1cPC%M$nmT;Gb_*MYVpGlTuFrQq;It%;CLf^b^<
zAJ_b;*|yJYuMQ2roYZT|vTi+ja#%0++0jc)S0s^3rSVLKpfv7(_$ybwTNcl$Ht!$s
zQ^wqh=hW%nD%ewdXn;vc4`sA9CtL@OG28lv6d}qE_b+{?UY%6+z*jF1t419T#U1Wd
z)BSN|r0F;rS=S}tIn!$8#;yd6VVQExJ`#r#Gp9$6zKcel>f8HU&qkucb>EC!i7;eX
zS+7G_e34SJ)V3$u4O8=$=iG~r;;!q3=OQh+fV%x4G0gl3%xp;r^}fM4WY}GB6Sne#
zepBkB(+^k!;Dfe$al*$y@Z){o_Im0h%muuobQOfd`kLX7JflcRJE3;-JtL~|WXRUk
z?shb2_P{2Vh;V2=F_3GmNra-inH`!uhruj{H=Cbb0rGp-rth_K!6|+I(d}$}`0ufK
zaL5}TOiI{zB}8KnzEYI`^1N^p4pYbUOUg!9^*$Vnem5^8gc8jCtQmg=&{2)QihM(o
z5ykY;)V-vL?h(a9cgHnxk&m~m>7W5_{*>c5(|Z`3lOt?H7#*_q8M9P+fe(g1czNkc
zWCT8nHsa&mnTVcB-*rU)OU4sg_GK!gDac!1+1q(96;+6qJqOv+k?PPND7#HL1GQ-v
zW2~R2V~Us-;cZbmUVYX@cYdFaX=%3ZQcG!!i{^%nOpJg+M}7D$FBLjkDE-)&hDn&D
z7{<(^pNQ`pDZ6jV#-hRzCb@uZ;kczR;+Bp$Lwr8mq4~-EI2MfL{jHcGz&;}`CgQL&
zX!-{3dUwW$3R`vNZcj83A-T?uDo|+!dyWo|wkS#f=kWB+Z(oG*4(Y#;=sH2npWzyA
z?%RWQ*Y>t-a^b;H-#rIE{rJ!pycV<hS_>OYg)fC4s}+a9Z(bo6MRuU4c0|;O_a?BG
zu8{jG&jlZ31qt^U%4fs;zNR-1Mc`(vO8kN*RT9#_{HqbavZ&W2TJkM`tcrH2W$yAE
zHb_1@Se>@d7e|HUrWsE&aG%+~6L#McFs${$RKiRm)(NYO*+erc@a?6|N;4hVXWZTi
z@}^>v?o&?#{xnqEzH*u}pN@_GTe34ZWupI|$3)q@42)1|{Mhf4jugMkTWhE{Qt^RP
zYtEaKbc~leyzg)h4b5B5f0AW5mjhSqe%w3|ibN@v9O*soNU)V>j{a{O%p^a)NvZLK
ztNXPn9-SdTIMdU{#t0ldSr=@za0mlBAv8VwyElw--kLD4bA}wprz%$AcA$n!p?j?k
z1OJ5iAFW#^;IpzJni@9?P}`b2{^;gGuw%;BsbA25??Th}KRaoFPx;Vx@((Gzvg3>@
z%Y&_WO)4^HXrl-t3qmvsH&X|doeq;b#0_ETn%SLHCTn;xXls1Y+aA36{OTj}+~Jqw
zw{1OzUa*<=D|n#R3%<>)6~FFufkH`F-e7K5nAHD8J*qR#h~0eo+d6xKk@=J*8yu@7
z!`09psp2pi{5jWJ(7`aFMGb?``|eByLTHuHnYMJ0w2BRXqLT$aYbWcZTTX$8SEKZa
zplql!Vx-JOa)6(0uplBe2b4yK$0VQUfWv3`M#ZqRAiC;ZByi>|I8a6#sC_x0Y(V`L
zU(J&P$$Oj*IVfa9-u7I+_=Zd%{`~!N+pQEhCpXupFhzlTH^!$q4@SbbEmf~2Zv;aF
zo1BVNvKOeOZmD^_@dV^=*FU6QZw*-EAbvd41cq<swq8oqgZB^KE0j2>0sj>n1&2er
zNZ3;BB>jef*-jto){+VMj!HeTOWlirx8vXcGdQb_&Jjk<A*l*zXIuv<ywWg~mQ~eu
z$P)70X3lTk<^<+!S{%lr-tcQx%d&k#AXF(AvAT0c0;`gLyK^)RzJ+a_OAW|`)@K>f
zaizHsEkLnI<h}?eY_h_SwiH4rn^T5HQ4#bl{qPQXR0339x{iwE!Ruh|M!&xE?{&Cq
zrZ&RjTLx7tJF1!^%iv^|QIGJm>x^@n0eeYADR3MY-QKuR3<m{-y7G?|L6Gp>kYg!@
zK$S7}l;pYyM}Hf{6$s~nkKU1W1?C)ZKbC7{XOISV-#l^!Tqpo7fiqcKNN|QQbnLDy
z<NeQ;3m4;RT5X}(s&hhx8=$1+z+Mk+VR$O)F2+&41IGon)uip&jaf~1bEMMwvAA4K
z>9-yMXKlAOSe>%K>PT(of)}<(AJ@?R$LfO0<{i9tc0^okSg>r<4Z`e=_1hjXvZOyu
zk{_OCi@_z$!k@~Fb^Lg!!w$_9hTHA$b+K5sHWfAZrjtGsGjQE~KT(Cj(?3Hlhm9CK
zE{XtPVes6sSO3JBKV)2YIB2u+MF>OrJXgNPa87vpm>(@D`hdZ)!u$-oaFBWXuaC1l
z1}Ngeyx%6{z$l(2gmWey{>vKWe)XFS$qNm+*7Xtaa;ZULoVv#!+C9qyH?p6Ab~edl
zS{xdXXwKL1@v${_8TI#QA8^C1LwYmxjovu9%l^M}Y$2H9^ozF5lZ399b_&;jCSj2$
z;l&5ZF!VX}D)uXX5c11=1Vr|DV{*^q!w1S;aZ9$=<2W}vY<AZU95`!=Ly5KTeErlh
zh^0H4x;<>QZR@pfPdi5T;7HxqJBQ45@ME<fe>2Zfl$gxwe?97s;q}VQK0G1#XKu(N
zxjqIL;|OQIDkP(uUc8c%*eUd!xGo~Pb`DPnmBr^Cxs3N0PMxTET#V1UPh^d3F2_pd
z^u}J18>oEn>U(poTD-X8GdQrb9wV1Ssb3xxHeh^@HZ%@4qKK+JRsX*x6#ZH;zA3XA
zCEWhDao=yoCzIVb$I_Znt$bR%??)5<Rv$9V``CyNvYG!ZB{blya;b*n%q`Sep70CX
zUW-qcr5ny&s6z5Coq$>1GISa}7AP}bgth1Qw71C@;P|D=jz2beSSC)rce`2a435Uk
zxy+QOVr`0qW`Q9Uue)qnP<a-PKbj~FOT#{Rz&-!i*!JUC)DwJvP4*y4ee{TVHX{Z)
zug<!is?dS`($#TyGWFm<|E-31*N(!gv-h&c#hrl^p+b!C@&c{@l>0?Te1S8d(9JzB
z0O<T)zb~8lgA%pE&vbpo118>0lrkkL0R3pzPwk!n)VO4BekzfMH&6R%oE6H!!u$Jm
z9eOU}EW!G&ic2xRvZZnkER>^<vDd|v#A<X5eDmYb&@D8&>-ojms1aRXW{1ZfZN`7X
zT$`IqTTnj1Oy5WBHg+VPQuw#ff@c$Nr`+ObLF%#Y_K0csCUo(>U3BkzJ@O;DpT(^f
z^NO3Uo;X{H5!>GrzQvSaZix!Vea=gmy-J!;<;%uY?i9VP#R>SQc@j5!dtwC77e4(i
zOW2&Z?aNxGFANTyyO(4U3X6gadBa%SKco|9CXa*x+XY(}xeFmM?woIQ#mS%Xh_v_3
z*47v=kls+0YLx2)>@L2{2V{A%<V#6~<u*gGiZ3{m#Nz^2s?UjjiVTKF()1$Q!C1J{
z$>w;pAsKX^vuYk`J_VB7ti|M}&Vh+4(mN~*V7Uh(9@H1X858EaHon)PaBt%6dYMW%
z`8@p?m9HA^=+G0VhpBbIId-RIy{H}{pS}9XA=3ylOC!7f@8G(_seIpfNy!Q&N$N{O
zQYlfi+@!LKQW7FNvt%ndB2fq(TlNl-opsxL9Ghf2_TD4;o$L44^Iq@s{PAAzv+n!;
zjH3@TfWt0YM~yxc-W!f~-1g4|pY3F)Y0fN=S<%xMx6c9zjj%rr$yu-?L*UiX?^)o?
zWN>2iLl#^~+@O9fmj%)-*T0;Z%7lh~POan2nb2`@c}1o=9ex}acKLEG4QAry6Q~Kw
zsX)8CS#L%l31}x6$8)sez;bxt@{#OFaF`k~R{i@8KF+TCF;)12@qWSI_XwU)=lv)|
zc*Yr~1zFb{7~jD=nxb=iILzP(XOzR5H$^dRA7y8#_!y$hyhlV-RdCMW68#@4ZM562
z8m;Tr#Tz{;a;B$s@X=hm6yZSc6C^S3sv1m}g~BU-&KBC{uu)4oOuOw2y1xINai*|z
zJ$CJ$GxQ3Bn-3*af82|M!|&%A=Hyf1g@(_<#CRqc@IRzpzLO7jO6oVwz7)fWV+|I3
zt7Y(5$^0iXM-_ND`dq%<UjrtY{vCb-^&nh#K7;>H12ERG6C{1}8exZ4*y@4SM!3Hb
z!&ArA2)jimJ3}MuL3ClaoSt1R$a40THTGA**JJU-4}Il8TWo1^+_wbsbe<c!TNFaD
zPw=m@eK{cc!l;(|TpEbEm>Sa^{tn#l?%2eqN5BCOQ$IgrKQPYc+?pnTf{v&BPbO4t
z;mub<dq_gD2IRZ7Xs4@)pb!>pjyfu%i<$5#b}w}-*)uY>VXA`Xc9b3Mq1M2)b9!x^
zT&BqXa9=v}_8VMl95tb{{($ta6x%pP-BHr+$Sy5S%1Ic1gz8aU0Nyf5AR6n0qR?KZ
zCv^=Glv1vNMBqpi+I*5TU15zuGU051=bf%-6i=wojjoHtqk95`ee6Q;%4Erjqs!jt
z#n(^HH8R6<NhgK_E>j@Wr)QJC?+pO1l}o2pUq`}Qy`8cAOmT35=qvZwApy`dU4Q#`
z0`Th(+1nqDhxI>6_qBPWf#a4(_ixfS&^KQyI4|J|;R|M)-~6<Juzy%^_Quf<_^MNy
z8;|?q)b?eeJuV@boY}C!p%a1cFK~Kvnnz;uN3MIudn3_IrJYmPCLC|m?=%y6@eQ9c
zbjiwI^GD6oR`pK%zv5oK#;$+SpOJX1FiF7D5pM|76t&q{<KE<KG7A`@$xA9p3l#|v
zkx%_K9LYielfC8N%X^ja&(Oj>#cgvmtvxjvAnA;IEM{m5FA$Ms&CTS2Y#91n&3z<O
z6NhOIAC(?+{={qIS^KA*vr%1zAaaulF=9<!$)L0p`!bu%M&+yU-sk<EbnA8ajgB{<
zt%ig$kwcv`tIf#2uvNe_)QanySF5hPA+#fv`zl-euMUcMod4CRe<zZ2L)$OVbfM?Z
zlcNQjof!Du;8cc9Cxv2p(^mCE2U_NZC;DD&#|NSlLS8PdSXEqaCgs+Qq^rhnjUSRQ
z!YMj0eXADB?$^*9d|ruP0*wy&uaqFekGZ-pZ3Xz3XHg_0JPUh*7?TtU=BfD3eEo@c
zSuD;)Y~Jw*3qcth^|JOFZyet`HTQPV5$iY)*OrI8LgQzAd2VzRKL4_@MLd@bSljJ>
zf6dewl6n|dBbRJ}_4cSIw+Tf#St_I{()I~7EXKaF^Lc_!@ep~}ninj8j(Yu?)eF@2
ze@(e-<pR`I6%hozBpp!xO?$>y?-Pn~30a5*M&QKE%dSv0%4MR-AG(|L3qLz07`!kn
z!Ms)DlFqkPc;}_Ut&WTal%S`?=1R?!5B7*W#@L3uox55>?suR*ef>d$kxm@pEY!M{
z)`dsh&AAUJbfFLDf9a%(PCPry_A!H_1KmE15IS-zTTzj;yTX1)Gp6*(h{1S0vI>kW
z<^@+`D#kL%Iu>J@R^IaD{v6a8P~F{IoQwj_yH0I7gkqOJk3p2YBR=p;P4D8gghrf5
zq)TuEANwSC&kAp-NIqsTuJILUw}-OcP~!bh6DQ1Z)(wP?cN<>6L^<Fr2p6=(D6F^3
z?ypNz7?pu&NzHz2(+v9Q`_3Wl2iS3$<1G!_S2)3AT2$^F0`}968)MI7fqr5oeD8V+
zScUg3E97UwRng7v9?5)&u>PUMmr@KwhUbz!3gvL&#NV)6JykIH(E4lyZyhu^rML{T
zG(yZ}Qb2PFiBkWkXYDgDXoA@ToSV|-&7k(|J;k!18IJo4d^&up1+L?f96R|II8DV(
zyk^t_$>mj^u2wDZtla0_jaMxoQz9z&RJjG3T++{dINt)TiP6To_05n;&l}yt(+o9k
z$A85LH9_DveB!>=2=V;nLv!18p!4+k6<cC85OR9$d7Asnp{%P|eu<+53^nUbYp3$T
zfsJ&i7_#7#<yEHB`l;YUP5aF4U>s~nMt}X37Ye(#=)asjOoZ2g!m?#4&Y*VoSEQ7e
zC7gQo{{7iDb3EBL@XKZAJM^0PXO{TW0n2>Yn8nC;6uPN3!SAgOMjYk5G#q9}(f@59
z6@MDy4>B4_PI+SS@bCQEr>({eAXc*LdT<uO@b8P0H$^Ie|Ad*1cuFH==nHvDC$+*P
z(;CCA<t}I!`Q|kFt`GPd<%V+K4FaeCm~S5g8PZODW%+S$6eiDPe#QUBA$ra!^NR5V
z*ca0>N3c)A{#<pBE5`_v&|`7+%3s?FsOneJ(7HShr-N8T=L|=|;kcjGLqRe)EbqA?
zoj3><Zi<CneSL7_V4$~{@^9FxyrWM`+XjwyA7$_Jk|0a#pwGYDN-%T^;EelF1o`%F
z{Lffsf>-~3(s{`QU~nLvs+J9eL5rqkVaE5sEB8-sgm7F5tz6h5rLNoJgql)S&?|dP
z+V*=xE_s7Hy3fq*SbU52Vh@hSliV<9@D_6)l|Np*dRLc5J^~99{O(mr$K#`faF5^v
zsW`e6nZtHA1LIlcX;k*)px8|Sf37?fJs;k9eKH?YpI=}&*q)E)|K0TqeUXcMLZU|q
z49_w#D_U%ylT{+7N*qjXc<PBhAF^BWFN6ShT%6ZDb2>0}^(oI77XqgR!;5ULGSDf^
z*>|F<5*Ft2AH-x-!*GlL7uB+A5VVXo64+A(>}g7B3a(}FA=hR(LI)vMZA7c_ZU)8L
zA%}wGL_olpGSTy(6MDfO!alNkJpOf=5GGHh<3-`)2YGmZp@#oFYbPlWQ@o6|KAp(N
zt-VA&|Bt!YF6iFK;E|1<T;(?mYtnIOyFY>5H3hGgx5|F4jKf3E$*M)<aGd{6(oEO%
z!^xzQ3-ptpF*m8D^5BLNXo!l?y{E|a#@M8yKfkiY93?^;gWP^^l(li6rB;o=i!N5@
zet!6Y#ceFJ!`eBx+mk`nl&=Km+gQ8ieXEg1@#LVIP!m2Q@ciSt-j1fhc>!mz8|~_x
zd&aB!F_(Bo-KJ;=v*}h=uEdXE;E<E+ajkLeRytczTtA7v8Tn7<o2OB!TQ*BcXBKS<
zXRTBmug{_NXN%`O4s-axfX{;3evTrMG;ypJoWrTO@aID^v)J~9v{y298ZUl(ZY&oy
ziKm~}`7sHOqg{*t+*j)nOuG6+!_j*PI~Z%<28H!w<KpcLj&Hj0=_dw*eCl>IT<qd`
zx3>vt>pnYM%U9#}bXGvjTrm>fSe_-Fp~=AsLgV9ai^;g^xv-r&5st!1YYW9MypVbS
z<&FwFYjm|x5{&(R4Zr;yy>eXWBdB}{X3n6cD7Y%$DPPqMhNQP;QqL12phhP#_ikJ?
zw7oX;xAltw=U%$#^dnI)Y%FSXNZ%idR3%skDdd!9S+R7D!Z-rn+PQg-OqGXTA9NSS
z9#x`vcMdK@HDR1e6UTuMotV!xecx!K5B2&@N>4QnA@OZTQiJ0tit?ML?P{OE&oa&;
z`ow9hW8$Y<aGgc}y{jx^x989#Bq;gIojHo$Z{S0${VX0ES1#6YnZ~!<&vq7<OyJie
z5p5bT2%|V8$+wR)eF)X&S;s}Q`f$#SW_9RVC&eTtPl#E)2^l3nw8penAb)#_I>*^u
zRG1t)Gol%bV^2a){C%K_LB%#d_w5J-vrF`yCo`kq%FKm(_H?mOvD9AvYC9SPwZ|nI
zCL&;FX2+&{Q80KIP3tq?Awq|~Qg$Yx*A|w1iCkA`Jt5NTXU})h5IEd$L_;|$9^R}_
zahhLEhw+B><F=Rb!G59Z-SD?kNNix=mZYlzy2dm;aUuzxe0!QxAlC|4dz%8)s5`-f
z<@5o1$8LyX;Cq!5&<hQ+=TGhO>xZ1z{g00O48Uu?jMMFsgRpQuxsy<nJ_vhH?tYs-
zItY&45|Fz%2(EU!I8^5c;U72qS+==BXt4Y*khDAq7uR29<m?)P2;tjIznF($VRZGT
z7Q+ze;TQ3|{y~u7?9w!_90ay^CKeVY13)y{4Dl-J2O&S{c<Z5F*!AQ0TbqJzI31IJ
zQ{hr4+&!j9pg;P$75E?Sy(y4Pf~$7-c?T}lfUd{YF@gM2xY!vY*{_)oYwv^XWK`2Z
z@DohUG{wX0g0oo5n-J*rwyVpL_k??kyQ9qgoba2G|MFPoXB-rmJej!Hor3j7{vw`n
z!rRg|M}HA@AxS#hlYS`>`n=z&)aRvu^caDT*T%B|*v>`~2M$yL{j<!=R{xrye7d1Y
zPrM82TAM1rb@xNI?7ADZ9~s2%t9Gu(Pr&}#={wVXv%pZId@h}^02kMEoQ3+AAo_=V
zL-f)LXe{`c-3?y@ZbrXHnsw_Sa4n46Pk#f_|64lNZ?FLyZAC28P3s^>xV2xPK5h*@
zGBJ{lZm&RD??+>W=_T-twC~mNSOC_TFITm;W+B=3zF&9A1O(`iixhv5;fGg#gzsEG
zT#Y^TdWUWo+>ByT&f{!`!uuPpRJSPTM3m`Mwe|uqGM6f)VNQjWD||Bd?SsISx_0)1
zsXp8hIk&ToLUbflCBM<(t#HG5nyQR{exLC%`QeNtt0&UcOawjB4?(Wb27_g-I5ZTp
z+RWVb6Xj$s6lz|k6cQfnKU6zgfM(V<TtxX2ly`A;)VW@cZ^Wi~MI|b+fFrp+VZ0KX
zU89l)epO(yjDk3~Rw=Gihi}bW<e|e1|B`VSW&iVf3+A-NhJeV>)xyU!`9QjUqDIEC
z8h#sE^=@r7!N(jpqifL)g8VZ&;(1-*I8ea&!n7MYM=EKaS$0FuPCmAp(k|%ab8+|i
z*bcKslQSLM&2YZvtpj&!HT-kxU+0V~0K1Ut_>Iruz%RrfE!P->r<4zuh^7;AF;G(}
z%YwcXXPcg0?zOH!H?o99W>zKIw6Jcaa8=?ty6fkuPL!ji@cc%DXbCbeq(qat3Q&Z}
zdn1V_8~cX`G$%JwacjrDLR!^0q!%VFwP}Z7{DPO-!Y&bD7cO<HOR~gxON9cCEid#d
zG4SSek3`RyhuJy=?w?55XB?+(n~yQPw_-y_%kf`fu>xCGBff0(nUZU3Lk0QQ{{Pke
z!ERFI2%YR89_Ey*mA4zg<cvet_qR=;W906W$M|P)U9a}_O3FOyYPN~!gf3#=x_5HC
z!ZOlN&MBJatstqCo-o+7iYKaVH(K4+P<{91D8ju7%7dUxj?Bm!ve>DgPjFhp?X4SQ
z@5@#(%78g2AaR9aELeQvmiRJisnRvy{IZA)0@GBR;q$oLp#4%N+bojGAJ=<Rj#TGT
z?%HmX5i}n6nY=AFh?;fD13r0wuyX2p`P-^CWIueMWHYJ}xwM~sHTzwTB1eu80@n5O
zk>wxjCHliZQC-{?e4HY2#DC{~!kQQU%et{_`pXh|TeM204t@nf?ZG1Rgb*-L;`M&;
zIU3vw9!0lUBtYotm0X@zNf7?~WzvD+L@;gEP~n=00U`3oJ+lK|pv7}^tN&04`km|R
zzwtN+pD?S3TK%fTq#lAChcRCZ@_nsMOdsgR^9c(yrrm=m$lIWQ<med1Na)X-P|Il?
zZFAXcE<KOu{^%{gATFXpq}&&A(`8)cc*n|9y@JZEp6s<2tLViQyLj7r6*F!;Kf+Y8
zg4YE(lcZlSBhBOW*&|;Uu`H@plt*$N7hT;L%Z#TnfzbKQRG)PW6$6e1hBpl2u)fw*
zb7eQOP7eyqv$kMBoVuJQu@Zk=eH=5&o`am07Y>BK`i3uGy=L7Z>kV>G7<HLwV<Gxa
zo2u|)B8Vu39pbZ0f^1bov3$z}P<j67!DH8Gu-?uL*@y~(wW_veuyX}YN_b^v83Y7B
z#iNRnvT^XxH_K!zJPk^$6$MyF@<7sLjIdr;3UkL?L!+8%fJKdVja$D7(r-3@!Q3{u
z{KEQQ9q(^&(v@&ueccOJCU&Xoo*jUT(vn}ANrNC8;58U}Xc&YIG@Pk6hJl187e?Qb
zVI*ybDI}7CXzdv&9!e%aFGp2*{%bO@xr#Lr?vf#h=St<s2{K$;Xp3e%PljE&TAQO%
zWRSKFkihq3NHWyc%1$K%`}JJ<-~uu*R<VqBdXm8~w0v(NBN;*%n@LW*!|<<FQ<<T6
z5Wc@L$!irFfRXF^5322ZLDK93`K7>b_!7(_yRS&F4KxeX3l)r-;E4O&;rzB5@cemV
zeP?4SoO%=)cx5yXXkN(9I#K@2M56hFXHs$CO1z=?AHl0~Q<~ac56}Ar+`ynOkhN$3
z&SgsPh{YP2IVc+x<oJ22u9ZRly)jM|%|;lUdirGeM>}X}a+=sN_CaO17h8qGFvuu!
zzLO3bhk2WUQxp8Nz<Fjr@sH60IP5*hW^-m4oY^^4=u%d}dHhA_q5gFcf0<i!HEfdr
zGw(HQRNMYSlhUu?XqzpdE_?8KjM93P_}t0p`U~36rhPY~HevC3<;5T5bzrdhy!JA4
z6<Bw2^~mrqgZ~Yo=U%TC;HK=6*A`c1fhup!SuK1V9u(39mOUAUv?iwaVXS=+_G@MS
zO-4J#OQX<bk3l2sYyUL(gF<Kls<hZkreWF8FQCGxMp;37ds=q|dVhg)M2jbq1h3EJ
zBY}1$xQqQ=n^9{S4ha3E-o;sgeEMOZ`Y3z<VdVCQtoTwK(tG%><3l0tQ|F(P;`@nW
zdGXqU`;{O|;nf2=flQcwX=s(RSq^5z#4eFi5@_cMMoXP)hpU%$z1fm~L%5X2cirV4
zFmv=wS^3ci6+RYYtjT=}K$22mD_rUU{f2wB`jmkV<==>8o^~iOev-UhNrKzPC-?1U
zr~td(P!w0ngd(c9j(;UJDTyu9n_d4>kw~nyH`OVm<PNQS?uC}(8=|{X9HC#!MEs=v
z7#0tnMn%3Utk*YG)Q_D-vw-YW8pC-!D_T6zUAusGivMD+&Mu;a?617^`XwB_d7Z~s
zdl`LVPs-W<Sf(f(G9vCJFC+i>sblRL%Q&?4#^0l6iGtx+sku)sq8Uph|4`)u8W&e5
z(CE!$+6!B)nXp+pyhS6Pu5xM$hlu%4JCuL8(>f%tkD<lN@<Qy6X*65@NUtF?hm2J2
zwm%BzvB}9z)x3NGwSWJfR=>Z9T1P+i0i})DZX|!AyuZur1s^Hn)E@?oMoZ73!{7cc
zs_iM1NuN^gFd}3<bsC{@O#{=Gf$qvX*&xxW;_%2S9pZQgZqJ^J1jl_@HHm63;nv}w
zc@N4T!NZ%}Ml2fMAkskP(3_J3iMzA%71ydDD(ZE9L>>ulzJ3;HZrB8OFOQY5J*fj$
z+7#8WGYGjJtGu}F2WLi{cpmg>;jut7u_?na0yv)b<(P7*1al9LJaM{C7?0a!zbY^Q
zRR`0wxqXJ=O7w<WQ6CvzyuT=Yb88rgKQm^!mj)m)`7Jm9-!4!w{_e#qSPSV+JY4;~
zaX>6%lKL*{3Wkz2&qg>RV4!aFtYSn3<V1bdlMrl&N*Src$lMFzz8;(NiG+Sgv1PS-
z7TgEv_4}@Q?d$@>aJ_>($Lqn4Ic(45`yar$GoRPhUkhAIQvb;}go7TtZRz>k8hG0O
zZtZ?tH?;G*3r{T%fns2Ds-WZ;@E(#-h;yHSYm1zZhVmz%_DQ_Vt)Vga_@8UU=VO$r
zMa}Wu)4F{S*{m+bL$Gg#dh2yA6Y*S_QQW^ND&+^%!JRSUb-u9U?92F)tQhb-a9zc!
zH4C&JY`zr?&V}Ho{uk#~(?D6Sr>o5-3|yE`=LFOFKz8x(_nJll;8xyZyK0;UzFe#4
z(&I|tT#{8=c78SR2(U|^p{@g|`^inmM5_V9xDpAXekG6}%(SF*C=<eO?wELz7zx8+
z;WG3o0YJ09nH5wL1b#6YXTmQBfsfUorxFH2oGgdpyPZLBNod+8_hlgb<Y9cwlO6!#
z#<330w}K&$=k~Y1eqqqqE=uc=6%IZYK3eW{kIgi-jdXRmlx%(Usr6}OcR2g#(-K9i
z4iN6gIy`K+)B*lcmK?g=zd<6FDmY1_6WEUBJI3lYK^kW~<2+`=TA~Ya;L<CYx^S_e
znxP8V8b90pOZg38uw(46z#w$E>0c}H8>YmK?o1=i!$5dWbnWdK1je4}pc77i;IvXm
ze?>wyg^Aa~pE;)u!J}k*8vO<WWy<!>x&{pZYg+mFEIrB;o_~{8l{f{VN}3P;{5K2j
z+4Yi3`{p2SjE5ADvyit<623M%4e`qF+F1oAz<2qs^1bRI80%(y%01c)7pu?2ZfHAV
z>0{ms_WXL#$>*KY_8Neg_QU#Yv7=CT=fZ$9X#$E_a-<u(DFv$Q2h%+B7$<?PP&1fb
zVhnW1F4=s2gTSNZFk1Af1$aNN7q$1B!hri7j}Yy87~vXGTJWHxbIN^@TFc{*a;Nl<
zNB9g(&<T0x<;{V3Q%h%w^*k6Q9Y0GQIu93ek^kDId9W?Dh{`=V2L(BTjq1TuAeS5Y
z>@uNf1pKB&S3d=J!>_n83Ra#0?h0a!QHE|%5(xP&FX|!wXb!<Qy#sjjX*S&Dx+vDY
z7q<ljS|MdLep8971S(afFT0C+Veb91yR`?h;qEuzsf;VFP?&FhLT!?g7pi<nY_T2$
zI@_3?CiX+HclA!irQ$&lym@NeutugIjJ^+7sR?(&ZHD{dR4;1arF_rq%)WT2@!>3|
zTyq#(CN<~#-&b5PvbrnD;DO$kx}WE;1Yp2ja~TFx7koBOHMV~|8ul!uhNLFtfq}Qo
V^fCD|5E*AVu)DwF|7*~a|9=^u2VejI


From 5c83d9df544c31132686ef300cf429b019c62de2 Mon Sep 17 00:00:00 2001
From: Frederic Perez <frederic.perez@polytechnique.edu>
Date: Thu, 4 Apr 2024 10:18:33 +0200
Subject: [PATCH 06/54] particle exchange GPU

---
 src/MovWindow/SimWindow.cpp               |  11 +-
 src/Particles/Particles.h                 |   2 +-
 src/Particles/nvidiaParticles.cu          | 171 +++++++++++++---------
 src/Particles/nvidiaParticles.h           |  10 +-
 src/Patch/Patch.cpp                       |   5 +-
 src/Patch/VectorPatch.cpp                 |  74 +---------
 src/SmileiMPI/AsyncMPIbuffers.cpp         |  31 ++--
 src/SmileiMPI/AsyncMPIbuffers.h           |   2 +-
 src/SmileiMPI/SmileiMPI.cpp               |   3 +-
 src/Species/Species.cpp                   |  92 ++++++------
 src/Species/Species.h                     |   6 +-
 src/Species/SpeciesV.cpp                  |   6 +-
 src/Species/SpeciesV.h                    |   2 +-
 src/Species/SpeciesVAdaptive.cpp          |   2 +-
 src/Species/SpeciesVAdaptiveMixedSort.cpp |   2 +-
 15 files changed, 189 insertions(+), 230 deletions(-)

diff --git a/src/MovWindow/SimWindow.cpp b/src/MovWindow/SimWindow.cpp
index 08ffada69..6dbb5da57 100755
--- a/src/MovWindow/SimWindow.cpp
+++ b/src/MovWindow/SimWindow.cpp
@@ -384,14 +384,9 @@ void SimWindow::shift( VectorPatch &vecPatches, SmileiMPI *smpi, Params &params,
                         } // end loop nSpecies
 
 #if defined ( SMILEI_ACCELERATOR_MODE )
-                        if ( params.gpu_computing ) {
-                            // ADD NEW PARTS ON GPU
-                            for( unsigned int ispec=0 ; ispec<nSpecies ; ispec++ ) {
-                              mypatch->vecSpecies[ispec]->particles_to_move->clear();
-                            //   mypatch->vecSpecies[ispec]->particles->copyParticles( 0, mypatch->vecSpecies[ispec]->getNbrOfParticles(),
-                            //                                                         *mypatch->vecSpecies[ispec]->particles_to_move, 0 );
-                              mypatch->vecSpecies[ispec]->particles->initializeDataOnDevice();
-                              mypatch->vecSpecies[ispec]->particles_to_move->initializeDataOnDevice();
+                        if( params.gpu_computing ) {
+                            for( auto spec: mypatch->vecSpecies ) {
+                                spec->allocateParticlesOnDevice();
                             }
                         }
 #endif
diff --git a/src/Particles/Particles.h b/src/Particles/Particles.h
index a155baf7a..13941b40a 100755
--- a/src/Particles/Particles.h
+++ b/src/Particles/Particles.h
@@ -476,7 +476,7 @@ class Particles
     //! Extract particles escaping the box to buffers
     // -----------------------------------------------------------------------------
     virtual void extractParticles( const size_t ndim, const bool copy[], Particles* buffer[] );
-
+virtual void extractParticles( Particles* particles_to_move );
     // -----------------------------------------------------------------------------
     //! Erase particles leaving the patch object on device
     // -----------------------------------------------------------------------------
diff --git a/src/Particles/nvidiaParticles.cu b/src/Particles/nvidiaParticles.cu
index d7a63f0b3..191e0943f 100644
--- a/src/Particles/nvidiaParticles.cu
+++ b/src/Particles/nvidiaParticles.cu
@@ -33,14 +33,24 @@
 // Cell key manipulation functor definition
 ////////////////////////////////////////////////////////////////////////////////
 
-//! Structure with specific function count_if_out for thrust::tuple operator
-//! Return True if the entry is -1 as in the cell keys vector for instance
-struct count_if_out
+//! Predicate for cell_keys
+//! Return True if the entry is equal to `code`
+template<int code>
+struct cellKeyEquals
 {
     constexpr __host__ __device__ bool
     operator()( const int& x ) const
     {
-        return x == -1;
+        return x == code;
+    }
+};
+
+struct cellKeyNegative
+{
+    constexpr __host__ __device__ bool
+    operator()( const int& x ) const
+    {
+        return x < 0;
     }
 };
 
@@ -250,7 +260,7 @@ namespace detail {
     };
 
 
-    //! This functor assign a cluster key to a_particle.
+    //! This functor checks the cluster key of a_particle.
     //!
     template <typename ClusterType>
     struct OutOfClusterPredicate
@@ -286,7 +296,7 @@ namespace detail {
         __host__ __device__ bool
         operator()( const Tuple& a_particle ) const
         {
-            return thrust::get<0>( a_particle ) /* cluster key */ == -1;
+            return thrust::get<0>( a_particle ) /* cluster key */ < 0;
         }
     };
 
@@ -467,34 +477,34 @@ namespace detail {
         //         - compute bins
         // NOTE: This method consumes a lot of memory ! O(N)
 
-        const auto new_particle_to_inject_count  = particle_to_inject.deviceSize();
-        const auto current_local_particles_count = std::distance( first_particle, last_particle );
-        const auto new_particle_count            = new_particle_to_inject_count + current_local_particles_count;
+        const auto initial_count = std::distance( first_particle, last_particle );
+        const auto inject_count  = particle_to_inject.deviceSize();
+        const auto new_count     = initial_count + inject_count;
 
         // NOTE: We really want a non-initializing vector here!
         // It's possible to give a custom allocator to thrust::device_vector.
         // Create one with construct(<>) as a noop and derive from
         // thrust::device_malloc_allocator. For now we do an explicit resize.
-        particle_to_inject.softReserve( new_particle_count );
-        particle_to_inject.resize( new_particle_count ); // We probably invalidated the iterators
+        particle_to_inject.softReserve( new_count );
+        particle_to_inject.resize( new_count ); // We probably invalidated the iterators
 
         // Copy out of cluster/tile/chunk particles
         // partition_copy is way slower than copy_if/remove_copy_if on rocthrust
         // https://github.com/ROCmSoftwarePlatform/rocThrust/issues/247
 
-        const auto first_particle_to_inject = particle_iterator_provider( particle_to_inject );
+        const auto first_to_inject = particle_iterator_provider( particle_to_inject );
+        const auto first_to_reorder = first_to_inject + inject_count;
 
         // NOTE: copy_if/remove_copy_if are stable.
-        const auto partitioned_particles_bounds_true  = thrust::copy_if( thrust::device,
+        // First, copy particles that are not in their own cluster anymore
+        const auto first_already_ordered = thrust::copy_if( thrust::device,
                                                                          first_particle, last_particle,
-                                                                         // Dont overwrite the particle_to_inject (at the start of the array)
-                                                                         first_particle_to_inject + new_particle_to_inject_count,
+                                                            first_to_reorder,
                                                                          OutOfClusterPredicate<ClusterType>{ cluster_type } );
-        const auto partitioned_particles_bounds_false = thrust::remove_copy_if( thrust::device,
+        // Then, copy particles that are still in their own cluster
+        const auto end = thrust::remove_copy_if( thrust::device,
                                                                                 first_particle, last_particle,
-                                                                                // Do the copy with a destination
-                                                                                // starting from partitioned_particles_bounds_true
-                                                                                partitioned_particles_bounds_true,
+                                                 first_already_ordered,
                                                                                 OutOfClusterPredicate<ClusterType>{ cluster_type } );
 
         // Compute or recompute the cluster index of the particle_to_inject
@@ -502,23 +512,23 @@ namespace detail {
         // - we can "save" some work here if cluster index is already computed
         // for the new particles to inject (not the one we got with copy_if).
         //
-        doComputeParticleClusterKey( first_particle_to_inject,
-                                     partitioned_particles_bounds_true,
+        doComputeParticleClusterKey( first_to_inject,
+                                     first_already_ordered,
                                      cluster_type );
 
-        const auto first_particle_to_inject_no_key = particle_no_key_iterator_provider( particle_to_inject );
-        const auto particle_to_rekey_count         = std::distance( first_particle_to_inject,
-                                                                    partitioned_particles_bounds_true );
+        const auto first_to_inject_no_key  = particle_no_key_iterator_provider( particle_to_inject );
+        const auto particle_to_rekey_count = std::distance( first_to_inject,
+                                                            first_already_ordered );
 
         doSortParticleByKey( particle_to_inject.getPtrCellKeys(),
                              particle_to_inject.getPtrCellKeys() + particle_to_rekey_count,
-                             first_particle_to_inject_no_key );
+                             first_to_inject_no_key );
 
         // This free generates a lot of memory fragmentation.
         // particle_container.free();
         // Same as for particle_to_inject, non-initializing vector is best.
-        particle_container.softReserve( new_particle_count );
-        particle_container.resize( new_particle_count );
+        particle_container.softReserve( new_count );
+        particle_container.resize( new_count );
 
         // Merge by key
         // NOTE: Dont merge in place on GPU. That means we need an other large buffer!
@@ -527,9 +537,9 @@ namespace detail {
                               particle_to_inject.getPtrCellKeys(),                           // Input range 1, first key
                               particle_to_inject.getPtrCellKeys() + particle_to_rekey_count, // Input range 1, last key
                               particle_to_inject.getPtrCellKeys() + particle_to_rekey_count, // Input range 2, first key
-                              particle_to_inject.getPtrCellKeys() + new_particle_count,      // Input range 2, last key
-                              first_particle_to_inject_no_key,                               // Input range 1, first value
-                              first_particle_to_inject_no_key + particle_to_rekey_count,     // Input range 2, first value
+                              particle_to_inject.getPtrCellKeys() + new_count,      // Input range 2, last key
+                              first_to_inject_no_key,                               // Input range 1, first value
+                              first_to_inject_no_key + particle_to_rekey_count,     // Input range 2, first value
                               particle_container.getPtrCellKeys(),                           // Output range first key
                               particle_no_key_iterator_provider( particle_container ) );     // Output range first value
 
@@ -1365,38 +1375,61 @@ unsigned int nvidiaParticles::deviceCapacity() const
 }
 
 // -----------------------------------------------------------------------------
-//! Extract particles from the Particles object and put
-//! them in the Particles object `particles_to_move`
+//! Move escaping particles to the buffers
 // -----------------------------------------------------------------------------
-void nvidiaParticles::extractParticles( Particles* particles_to_move )
+void nvidiaParticles::extractParticles( const size_t ndim, const bool copy[], Particles* buffer[] )
+{
+    // Escaping particles have a cell_key equal to -2-direction
+    // where direction goes from 0 to 6 and tells which way the particle escapes.
+    // If the cell_key is -1, the particle must be destroyed so it is not extracted.
+    
+    extractParticlesByKey<-2>( copy[0], buffer[0] ); // x_min
+    extractParticlesByKey<-3>( copy[1], buffer[1] ); // x_max
+    if( ndim > 1 ) {
+        extractParticlesByKey<-4>( copy[2], buffer[2] ); // y_min
+        extractParticlesByKey<-5>( copy[3], buffer[3] ); // y_max
+        if( ndim > 2 ) {
+            extractParticlesByKey<-6>( copy[4], buffer[4] ); // z_min
+            extractParticlesByKey<-7>( copy[5], buffer[5] ); // z_max
+        }
+    }
+}
+
+
+//! Copy particles which have cell_key = key
+template< const int key>
+void nvidiaParticles::extractParticlesByKey( bool copy, Particles* buffer )
 {
     // TODO(Etienne M): We are doing extra work. We could use something like
-    // std::partition to output the invalidated particles in particles_to_move
+    // std::partition to output the invalidated particles in buffer
     // and keep the good ones. This would help us avoid the std::remove_if in
     // the particle injection and sorting algorithm.
-
-    // Manage the send data structure
-    nvidiaParticles* const cp_parts                 = static_cast<nvidiaParticles*>( particles_to_move );
-    const int              nparts                   = gpu_nparts_;
-    const int              position_dimension_count = nvidia_position_.size();
-
-    const int nparts_to_move = thrust::count_if( thrust::device,
-                                                 nvidia_cell_keys_.cbegin(),
-                                                 nvidia_cell_keys_.cbegin() + nparts,
-                                                 count_if_out() );
-
-    // Resize it, if too small (copy_if do not resize)
-    cp_parts->resize( nparts_to_move );
-
+    
+    if( ! copy ) {
+        return;
+    }
+    
+    const int nparts = gpu_nparts_;
     // Iterator of the main data structure
     // NOTE: https://nvidia.github.io/thrust/api/classes/classthrust_1_1zip__iterator.html#class-thrustzip_iterator
-    const auto source_iterator_first      = thrust::make_zip_iterator( thrust::make_tuple( nvidia_position_[0].begin(),
+    const auto source_iterator_first = thrust::make_zip_iterator( thrust::make_tuple( nvidia_position_[0].begin(),
                                                                                            nvidia_momentum_[0].begin(),
                                                                                            nvidia_momentum_[1].begin(),
                                                                                            nvidia_momentum_[2].begin(),
                                                                                            nvidia_weight_.begin(),
                                                                                            nvidia_charge_.begin() ) );
-    const auto source_iterator_last       = source_iterator_first + nparts; // std::advance
+    const auto source_iterator_last  = source_iterator_first + nparts; // std::advance
+    
+    nvidiaParticles* const cp_parts = static_cast<nvidiaParticles*>( buffer );
+    
+    const int nparts_to_copy = thrust::count_if( thrust::device,
+                                                 nvidia_cell_keys_.cbegin(),
+                                                 nvidia_cell_keys_.cbegin() + nparts,
+                                                 cellKeyEquals<key>() );
+
+    // Resize it, if too small (copy_if do not resize)
+    cp_parts->resize( nparts_to_copy );
+
     const auto destination_iterator_first = thrust::make_zip_iterator( thrust::make_tuple( cp_parts->nvidia_position_[0].begin(),
                                                                                            cp_parts->nvidia_momentum_[0].begin(),
                                                                                            cp_parts->nvidia_momentum_[1].begin(),
@@ -1404,24 +1437,23 @@ void nvidiaParticles::extractParticles( Particles* particles_to_move )
                                                                                            cp_parts->nvidia_weight_.begin(),
                                                                                            cp_parts->nvidia_charge_.begin() ) );
 
-    // Copy send particles in dedicated data structure if nvidia_cell_keys_=0 (currently = 1 if keeped, new PartBoundCond::apply(...))
+    // Copy send particles in dedicated data structure
     thrust::copy_if( thrust::device,
                      source_iterator_first,
                      source_iterator_last,
-                     // Copy depending on count_if_out()(nvidia_cell_keys_[i])
                      nvidia_cell_keys_.cbegin(),
                      destination_iterator_first,
-                     count_if_out() );
+                    cellKeyEquals<key>() );
 
-    // Copy the other position values depending on the simulation's grid
-    // dimensions
-    for( int i = 1; i < position_dimension_count; ++i ) {
+    // Copy the other position values depending on the simulation's grid dimensions
+    const int ndim_particles = nvidia_position_.size();
+    for( int i = 1; i < ndim_particles; ++i ) {
         thrust::copy_if( thrust::device,
                          nvidia_position_[i].cbegin(),
                          nvidia_position_[i].cbegin() + nparts,
                          nvidia_cell_keys_.cbegin(),
                          cp_parts->nvidia_position_[i].begin(),
-                         count_if_out() );
+                        cellKeyEquals<key>() );
     }
 
     // Special treatment for chi if radiation emission
@@ -1431,7 +1463,7 @@ void nvidiaParticles::extractParticles( Particles* particles_to_move )
                          nvidia_chi_.cbegin() + nparts,
                          nvidia_cell_keys_.cbegin(),
                          cp_parts->nvidia_chi_.begin(),
-                         count_if_out() );
+                        cellKeyEquals<key>() );
     }
 
     if( has_Monte_Carlo_process ) {
@@ -1440,7 +1472,7 @@ void nvidiaParticles::extractParticles( Particles* particles_to_move )
                          nvidia_tau_.cbegin() + nparts,
                          nvidia_cell_keys_.cbegin(),
                          cp_parts->nvidia_tau_.begin(),
-                         count_if_out() );
+                        cellKeyEquals<key>() );
     }
 
     if( tracked ) {
@@ -1449,10 +1481,10 @@ void nvidiaParticles::extractParticles( Particles* particles_to_move )
                          nvidia_id_.cbegin() + nparts,
                          nvidia_cell_keys_.cbegin(),
                          cp_parts->nvidia_id_.begin(),
-                         count_if_out() );
+                        cellKeyEquals<key>() );
     }
 
-    particles_to_move->copyFromDeviceToHost();
+    buffer->copyFromDeviceToHost();
 }
 
 
@@ -1475,7 +1507,7 @@ void nvidiaParticles::extractParticles( Particles* particles_to_move )
 //                           std::begin( nvidia_position_[i] ),
 //                           std::begin( nvidia_position_[i] ) + nparts,
 //                           std::cbegin( nvidia_cell_keys_ ),
-//                           count_if_out() );
+//                           cellKeyEquals<-1>() );
 //    }
 //
 //}
@@ -1490,7 +1522,7 @@ int nvidiaParticles::eraseLeavingParticles()
     const int nparts_to_remove         = thrust::count_if( thrust::device,
                                                            nvidia_cell_keys_.begin(),
                                                            nvidia_cell_keys_.begin() + nparts,
-                                                           count_if_out() );
+                                                           cellKeyNegative() );
 
 
     if( nparts_to_remove > 0 ) {
@@ -1508,7 +1540,7 @@ int nvidiaParticles::eraseLeavingParticles()
                            first_particle,
                            last_particle,
                            nvidia_cell_keys_.cbegin(),
-                           count_if_out() );
+                           cellKeyNegative() );
 
         // Remove the other position values depending on the simulation's grid
         // dimensions
@@ -1517,7 +1549,7 @@ int nvidiaParticles::eraseLeavingParticles()
                                nvidia_position_[i].begin(),
                                nvidia_position_[i].begin() + nparts,
                                nvidia_cell_keys_.cbegin(),
-                               count_if_out() );
+                               cellKeyNegative() );
         }
 
         if( has_quantum_parameter ) {
@@ -1525,7 +1557,7 @@ int nvidiaParticles::eraseLeavingParticles()
                                nvidia_chi_.begin(),
                                nvidia_chi_.begin() + nparts,
                                nvidia_cell_keys_.cbegin(),
-                               count_if_out() );
+                               cellKeyNegative() );
         }
 
         if( has_Monte_Carlo_process ) {
@@ -1533,7 +1565,7 @@ int nvidiaParticles::eraseLeavingParticles()
                                nvidia_tau_.begin(),
                                nvidia_tau_.begin() + nparts,
                                nvidia_cell_keys_.cbegin(),
-                               count_if_out() );
+                               cellKeyNegative() );
         }
 
         if( tracked ) {
@@ -1541,7 +1573,7 @@ int nvidiaParticles::eraseLeavingParticles()
                                nvidia_id_.begin(),
                                nvidia_id_.begin() + nparts,
                                nvidia_cell_keys_.cbegin(),
-                               count_if_out() );
+                               cellKeyNegative() );
         }
 
         // Update current number of particles
@@ -1679,8 +1711,7 @@ void nvidiaParticles::importAndSortParticles( Particles* particles_to_inject )
 int nvidiaParticles::prepareBinIndex()
 {
     if( first_index.size() == 0 ) {
-        // Some Particles object like particles_to_move do not have allocated
-        // bins, we skip theses.
+        // Some Particles object do not have allocated bins, we skip theses.
         return -1;
     }
 
diff --git a/src/Particles/nvidiaParticles.h b/src/Particles/nvidiaParticles.h
index 249a9fcf2..64164fad7 100644
--- a/src/Particles/nvidiaParticles.h
+++ b/src/Particles/nvidiaParticles.h
@@ -113,10 +113,12 @@ class nvidiaParticles : public Particles
     };
 
     // -----------------------------------------------------------------------------
-    //! Extract particles from the Particles object and put
-    //! them in the Particles object `particles_to_move`
+    //! Move escaping particles to the buffers
     // -----------------------------------------------------------------------------
-    void extractParticles( Particles* particles_to_move ) override;
+    void extractParticles( const size_t ndim, const bool copy[], Particles* buffer[] ) override;
+    
+    template< const int key>
+    void extractParticlesByKey( bool copy, Particles* buffer );
     
     // -----------------------------------------------------------------------------
     //! Erase particles leaving the patch object on device and returns the number of particle removed
@@ -124,7 +126,7 @@ class nvidiaParticles : public Particles
     int eraseLeavingParticles() override;
     
     // -----------------------------------------------------------------------------
-    //! Inject particles from particles_to_move into *this and return he number of particle added
+    //! Inject particles from particles_to_inject into *this and return the number of particle added
     // -----------------------------------------------------------------------------
     int injectParticles( Particles* particles_to_inject ) override;
 
diff --git a/src/Patch/Patch.cpp b/src/Patch/Patch.cpp
index 546e0ca08..d61c1f9e1 100755
--- a/src/Patch/Patch.cpp
+++ b/src/Patch/Patch.cpp
@@ -784,10 +784,10 @@ void Patch::cornersParticles( int ispec, Params &params, int iDim )
                 
             }
             
-            // Copy corner particles to the start or the end of the particles to be sent for the following dimension
+            // Copy corner particles to the end of the particles to be sent for the following dimension
             for( size_t otherDim = iDim+1; otherDim < (size_t) ndim; otherDim++ ) {
                 if( indices_corner_min[otherDim-iDim-1].size() > 0 && neighbor_[otherDim][0] != MPI_PROC_NULL ) {
-                    partRecv.copyParticles( indices_corner_min[otherDim-iDim-1], *buffer.partSend[otherDim][0], 0 );
+                    partRecv.copyParticles( indices_corner_min[otherDim-iDim-1], *buffer.partSend[otherDim][0], buffer.partSend[otherDim][0]->size() );
                 }
                 if( indices_corner_max[otherDim-iDim-1].size() > 0 && neighbor_[otherDim][1] != MPI_PROC_NULL ) {
                     partRecv.copyParticles( indices_corner_max[otherDim-iDim-1], *buffer.partSend[otherDim][1], buffer.partSend[otherDim][1]->size() );
@@ -1310,7 +1310,6 @@ void Patch::deleteFieldsOnDevice()
 //        for( unsigned int ispec=0 ; ispec<( *this )( ipatch )->vecSpecies.size() ; ispec++ ) {
 //            Species *spec = species( ipatch, ispec );
 //            spec->particles->initializeDataOnDevice();
-//            spec->particles_to_move->initializeDataOnDevice();
 //            //#pragma acc enter data copyin(spec->nrj_radiation)
 //        }
 
diff --git a/src/Patch/VectorPatch.cpp b/src/Patch/VectorPatch.cpp
index 22d976ba2..9067d049d 100755
--- a/src/Patch/VectorPatch.cpp
+++ b/src/Patch/VectorPatch.cpp
@@ -4671,86 +4671,22 @@ void VectorPatch::allocateDataOnDevice(Params &params,
                                        RadiationTables *radiation_tables,
                                        MultiphotonBreitWheelerTables *multiphoton_Breit_Wheeler_tables)
 {
-                                         
+
 #if defined( SMILEI_ACCELERATOR_MODE )
     // TODO(Etienne M): FREE. If we have load balancing or other patch
     // creation/destruction available (which is not the case on GPU ATM),
     // we should be taking care of freeing this GPU memory.
 
-    const int npatches = this->size();
-
-    // const int sizeofJx  = patches_[0]->EMfields->Jx_->size();
-    // const int sizeofJy  = patches_[0]->EMfields->Jy_->size();
-    // const int sizeofJz  = patches_[0]->EMfields->Jz_->size();
-    // const int sizeofRho = patches_[0]->EMfields->rho_->size();
-
-    // const int sizeofEx = patches_[0]->EMfields->Ex_->size();
-    // const int sizeofEy = patches_[0]->EMfields->Ey_->size();
-    // const int sizeofEz = patches_[0]->EMfields->Ez_->size();
-
-    // const int sizeofBx = patches_[0]->EMfields->Bx_->size();
-    // const int sizeofBy = patches_[0]->EMfields->By_->size();
-    // const int sizeofBz = patches_[0]->EMfields->Bz_->size();
-
-    for( int ipatch=0 ; ipatch<npatches ; ipatch++ ) {
+    for( auto patch: patches_ ) {
 
         // Initialize particles data structures on GPU, and synchronize it
-        for( unsigned int ispec = 0; ispec < ( *this )( ipatch )->vecSpecies.size(); ispec++ ) {
-            Species *spec = species( ipatch, ispec );
-            spec->particles->initializeDataOnDevice();
-            spec->particles_to_move->initializeDataOnDevice();
-
-            // Create photon species on the device
-            if ( spec->radiation_model_ == "mc" && spec->photon_species_) {
-                spec->radiated_photons_->initializeDataOnDevice();
-            }
-
-            // Create pair species on the device
-            if ( spec->mBW_pair_species_[0] && spec->mBW_pair_species_[1]) {
-                 spec->mBW_pair_particles_[0]->initializeDataOnDevice();
-                 spec->mBW_pair_particles_[1]->initializeDataOnDevice();
-            }
-
-            //#pragma acc enter data copyin(spec->nrj_radiation)
+        for( auto spec: patch->vecSpecies ) {
+            spec->allocateParticlesOnDevice();
         }
 
         // Allocate field data structures on GPU
-        patches_[ipatch]->allocateFieldsOnDevice();
-
-        // const double *const Jx  = patches_[ipatch]->EMfields->Jx_->data();
-        // const double *const Jy  = patches_[ipatch]->EMfields->Jy_->data();
-        // const double *const Jz  = patches_[ipatch]->EMfields->Jz_->data();
-        // const double *const Rho = patches_[ipatch]->EMfields->rho_->data();
+        patch->allocateFieldsOnDevice();
 
-        // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocate( Jx, sizeofJx );
-        // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocate( Jy, sizeofJy );
-        // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocate( Jz, sizeofJz );
-        // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocate( Rho, sizeofRho );
-
-        // const double *const Ex = patches_[ipatch]->EMfields->Ex_->data();
-        // const double *const Ey = patches_[ipatch]->EMfields->Ey_->data();
-        // const double *const Ez = patches_[ipatch]->EMfields->Ez_->data();
-
-        // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocate( Ex, sizeofEx );
-        // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocate( Ey, sizeofEy );
-        // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocate( Ez, sizeofEz );
-
-        // const double *const Bmx = patches_[ipatch]->EMfields->Bx_m->data();
-        // const double *const Bmy = patches_[ipatch]->EMfields->By_m->data();
-        // const double *const Bmz = patches_[ipatch]->EMfields->Bz_m->data();
-
-        // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocate( Bmx, sizeofBx );
-        // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocate( Bmy, sizeofBy );
-        // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocate( Bmz, sizeofBz );
-
-        // const double *const Bx = patches_[ipatch]->EMfields->Bx_->data();
-        // const double *const By = patches_[ipatch]->EMfields->By_->data();
-        // const double *const Bz = patches_[ipatch]->EMfields->Bz_->data();
-
-        // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocateAndCopyHostToDevice( Bx, sizeofBx );
-        // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocateAndCopyHostToDevice( By, sizeofBy );
-        // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocateAndCopyHostToDevice( Bz, sizeofBz );
-        
     } // end patch loop
 
     // TODO(Etienne M): We should create a function that does the copy of the radiation table.
diff --git a/src/SmileiMPI/AsyncMPIbuffers.cpp b/src/SmileiMPI/AsyncMPIbuffers.cpp
index a5a53dbb0..4cb283d17 100755
--- a/src/SmileiMPI/AsyncMPIbuffers.cpp
+++ b/src/SmileiMPI/AsyncMPIbuffers.cpp
@@ -1,5 +1,6 @@
 
 #include "AsyncMPIbuffers.h"
+#include "ParticlesFactory.h"
 #include "Field.h"
 #include "Patch.h"
 
@@ -75,29 +76,29 @@ SpeciesMPIbuffers::~SpeciesMPIbuffers()
 }
 
 
-void SpeciesMPIbuffers::allocate( unsigned int ndims )
+void SpeciesMPIbuffers::allocate( Params &params, Patch *patch )
 {
-    srequest.resize( ndims );
-    rrequest.resize( ndims );
+    srequest.resize( params.nDim_field );
+    rrequest.resize( params.nDim_field );
     
-    partRecv.resize( ndims );
-    partSend.resize( ndims );
+    partRecv.resize( params.nDim_field );
+    partSend.resize( params.nDim_field );
     
-    partSendSize.resize( ndims );
-    partRecvSize.resize( ndims );
+    partSendSize.resize( params.nDim_field );
+    partRecvSize.resize( params.nDim_field );
     
-    for( unsigned int i=0 ; i<ndims ; i++ ) {
+    for( unsigned int i=0 ; i<params.nDim_field ; i++ ) {
         srequest[i].resize( 2 );
         rrequest[i].resize( 2 );
-        partRecv[i].resize( 2 );
-        partSend[i].resize( 2 );
-        partSendSize[i].resize( 2 );
         partRecvSize[i].resize( 2 );
+        partSendSize[i].resize( 2 );
         
-        partRecv[i][0] = new Particles();
-        partRecv[i][1] = new Particles();
-        partSend[i][0] = new Particles();
-        partSend[i][1] = new Particles();
+        partRecv[i].resize( 2 );
+        partRecv[i][0] = ParticlesFactory::create( params, *patch );;
+        partRecv[i][1] = ParticlesFactory::create( params, *patch );;
+        partSend[i].resize( 2 );
+        partSend[i][0] = ParticlesFactory::create( params, *patch );;
+        partSend[i][1] = ParticlesFactory::create( params, *patch );;
     }
 }
 
diff --git a/src/SmileiMPI/AsyncMPIbuffers.h b/src/SmileiMPI/AsyncMPIbuffers.h
index 028e0777e..7b3cf1fcc 100755
--- a/src/SmileiMPI/AsyncMPIbuffers.h
+++ b/src/SmileiMPI/AsyncMPIbuffers.h
@@ -38,7 +38,7 @@ class SpeciesMPIbuffers : public AsyncMPIbuffers
     SpeciesMPIbuffers();
     ~SpeciesMPIbuffers();
     
-    void allocate( unsigned int nDim_field ) ;
+    void allocate( Params &params, Patch *patch ) ;
     
     //! ndim vectors of 2 sent packets of particles (1 per direction)
     std::vector< std::vector<Particles* > > partRecv;
diff --git a/src/SmileiMPI/SmileiMPI.cpp b/src/SmileiMPI/SmileiMPI.cpp
index c35a69fe9..4fe93fd03 100755
--- a/src/SmileiMPI/SmileiMPI.cpp
+++ b/src/SmileiMPI/SmileiMPI.cpp
@@ -929,8 +929,7 @@ void SmileiMPI::recv_species( Patch *patch, int from, int &tag, Params &params )
             recv( patch->vecSpecies[ispec]->particles, from, tag+2*ispec, recvParts );
             MPI_Type_free( &( recvParts ) );
         }
-        patch->vecSpecies[ispec]->particles->initializeDataOnDevice();
-        patch->vecSpecies[ispec]->particles_to_move->initializeDataOnDevice();
+        patch->vecSpecies[ispec]->allocateParticlesOnDevice();
 
     }
 
diff --git a/src/Species/Species.cpp b/src/Species/Species.cpp
index 0fb38f673..bfc1ae036 100755
--- a/src/Species/Species.cpp
+++ b/src/Species/Species.cpp
@@ -90,7 +90,6 @@ Species::Species( Params &params, Patch *patch ) :
 {
     // &particles_sorted[0]
     particles         = ParticlesFactory::create( params, *patch );
-    particles_to_move = ParticlesFactory::create( params, *patch );
 
     regular_number_array_.clear();
     partBoundCond = NULL;
@@ -104,7 +103,7 @@ Species::Species( Params &params, Patch *patch ) :
     dx_inv_[1] = 1./cell_length[1];
     dx_inv_[2] = 1./cell_length[2];
 
-    initCluster( params );
+    initCluster( params, patch );
     inv_nDim_particles = 1./( ( double )nDim_particle );
 
     length_[0]=0;
@@ -123,7 +122,7 @@ Species::Species( Params &params, Patch *patch ) :
 
 }//END Species creator
 
-void Species::initCluster( Params &params )
+void Species::initCluster( Params &params, Patch *patch )
 {
     // NOTE: On GPU we dont use first_index, it would contain redundant data but
     // we are forced to initialize it due to ParticleCreator::create() and the
@@ -252,7 +251,7 @@ void Species::initCluster( Params &params )
 #endif
 
     //Initialize specMPI
-    MPI_buffer_.allocate( nDim_field );
+    MPI_buffer_.allocate( params, patch );
 
     //ener_tot = 0.;
     nrj_bc_lost = 0.;
@@ -386,7 +385,6 @@ void Species::initOperators( Params &params, Patch *patch )
     typePartRecv.resize( nDim_field*2, MPI_DATATYPE_NULL );
     exchangePatch = MPI_DATATYPE_NULL;
 
-    particles_to_move->initialize( 0, *particles );
 
 }
 
@@ -396,7 +394,6 @@ void Species::initOperators( Params &params, Patch *patch )
 Species::~Species()
 {
     delete particles;
-    delete particles_to_move;
     
     delete Push;
     delete Interp;
@@ -631,6 +628,34 @@ Species::deleteSpeciesCurrentAndChargeOnDevice(
     }
 }
 
+
+void Species::allocateParticlesOnDevice()
+{
+    particles->initializeDataOnDevice();
+    for( auto partSends: MPI_buffer_.partSend ) {
+        for( auto partSend: partSends ) {
+            partSend->initializeDataOnDevice();
+        }
+    }
+    for( auto partRecvs: MPI_buffer_.partRecv ) {
+        for( auto partRecv: partRecvs ) {
+            partRecv->initializeDataOnDevice();
+        }
+    }
+
+    // Create photon species on the device
+    if( radiation_model_ == "mc" && photon_species_ ) {
+        radiated_photons_->initializeDataOnDevice();
+    }
+
+    // Create pair species on the device
+    if( mBW_pair_species_[0] && mBW_pair_species_[1] ) {
+        mBW_pair_particles_[0]->initializeDataOnDevice();
+        mBW_pair_particles_[1]->initializeDataOnDevice();
+    }
+}
+
+
 //! Copy particles from host to device
 void
 Species::copyParticlesFromHostToDevice()
@@ -1754,33 +1779,22 @@ void Species::sortParticles( Params &params )
 
     // -----------------------------
     // GPU version
-
-    // particles_to_move contains, up to here, send particles
-    //   clean it to manage recv particles
-    particles_to_move->clear(); // Clear on the host
-    // Merge all MPI_buffer_.partRecv in particles_to_move
-    for( int idim = 0; idim < params.nDim_field; idim++ ) {
-        for( int iNeighbor = 0; iNeighbor < 2; iNeighbor++ ) {
-            int n_part_recv = MPI_buffer_.partRecv[idim][iNeighbor]->size();
-            if( n_part_recv != 0 ) {
-                // insert n_part_recv in particles_to_move from 0
-                MPI_buffer_.partRecv[idim][iNeighbor]->copyParticles( 0,
-                                                                     n_part_recv,
-                                                                     *particles_to_move,
-                                                                     particles_to_move->size() );
+    
+    // Merge all MPI_buffer_.partRecv in the first one
+    Particles * first_buffer = MPI_buffer_.partRecv[0][0];
+    for( auto &partRecvs: MPI_buffer_.partRecv ) {
+        for( auto partRecv: partRecvs ) {
+            if( partRecv != first_buffer && partRecv->size() > 0 ) {
+                partRecv->copyParticles( 0, partRecv->size(), *first_buffer, first_buffer->size() );
+                partRecv->clear();
             }
         }
     }
-
-    particles_to_move->copyFromHostToDevice();
-
-    // // Erase particles that leaves this patch
-    // particles->last_index[0] = particles->eraseLeavingParticles();
-    //
-    // // Inject newly arrived particles in particles_to_move
-    // particles->last_index[0] += particles->injectParticles( particles_to_move );
-
-    particles->importAndSortParticles( particles_to_move );
+    
+    first_buffer->copyFromHostToDevice();
+    
+    particles->importAndSortParticles( first_buffer );
+    
 #else
 
     // --------------------------
@@ -1791,24 +1805,6 @@ void Species::sortParticles( Params &params )
     int ndim = params.nDim_field;
     int idim;
 
-    // Compute total number of particles received
-    // int total_number_part_recv = 0;
-    //Merge all MPI_buffer_.partRecv in particles_to_move
-    // for( int idim = 0; idim < ndim; idim++ ) {
-    //     for( int iNeighbor=0 ; iNeighbor<2 ; iNeighbor++ ) {
-    //         int n_part_recv = MPI_buffer_.partRecv[idim][iNeighbor]->size();
-    //         if( ( n_part_recv!=0 ) ) {
-    //              // insert n_part_recv in particles_to_move from 0
-    //             //MPI_buffer_.partRecv[idim][iNeighbor]->copyParticles( 0, n_part_recv, *particles_to_move, 0 );
-    //             total_number_part_recv += n_part_recv;
-    //             //particles->last_index[particles->last_index.size()-1] += n_part_recv;
-    //             //particles->cell_keys.resize(particles->cell_keys.size()+n_part_recv);
-    //         }
-    //     }
-    // }
-    //cout << "\t Species id : " << species_number_ << " - nparticles recv : " << blabla << endl;
-
-
     // Sort to adapt do cell_keys usage
     std::vector<int> indexes_of_particles_to_exchange;
     for ( int ipart=0 ; ipart< (int)(getNbrOfParticles()) ; ipart++ ) {
diff --git a/src/Species/Species.h b/src/Species/Species.h
index b91c9521b..83a2bab9d 100755
--- a/src/Species/Species.h
+++ b/src/Species/Species.h
@@ -147,8 +147,6 @@ class Species
 
     //! Vector containing all Particles of the considered Species
     Particles *particles;
-    //! Data structure through which passes particles which move from one patch to another
-    Particles *particles_to_move;
     Particles particles_sorted[2];
     //std::vector<int> index_of_particles_to_exchange;
 
@@ -344,7 +342,7 @@ class Species
     // -----------------------------------------------------------------------------
     //  5. Methods
 
-    virtual void initCluster( Params & );
+    virtual void initCluster( Params &, Patch * );
 
     virtual void resizeCluster( Params & );
 
@@ -386,6 +384,8 @@ class Species
 
 #if defined( SMILEI_ACCELERATOR_MODE )
 
+    void allocateParticlesOnDevice();
+
     //! Copy particles from host to device
     void
     copyParticlesFromHostToDevice();
diff --git a/src/Species/SpeciesV.cpp b/src/Species/SpeciesV.cpp
index 89d12b340..4a4199b63 100755
--- a/src/Species/SpeciesV.cpp
+++ b/src/Species/SpeciesV.cpp
@@ -46,7 +46,7 @@ using namespace std;
 SpeciesV::SpeciesV( Params &params, Patch *patch ) :
     Species( params, patch )
 {
-    initCluster( params );
+    initCluster( params, patch );
     npack_ = 0 ;
     packsize_ = 0;
 
@@ -106,7 +106,7 @@ SpeciesV::~SpeciesV()
 }
 
 
-void SpeciesV::initCluster( Params &params )
+void SpeciesV::initCluster( Params &params, Patch *patch )
 {
     int ncells = 1;
     for( unsigned int iDim=0 ; iDim<nDim_field ; iDim++ ) {
@@ -224,7 +224,7 @@ void SpeciesV::initCluster( Params &params )
 #endif
 
     //Initialize specMPI
-    MPI_buffer_.allocate( nDim_field );
+    MPI_buffer_.allocate( params, patch );
 
     //ener_tot = 0.;
     nrj_bc_lost = 0.;
diff --git a/src/Species/SpeciesV.h b/src/Species/SpeciesV.h
index 39dc45089..7f5fe587c 100755
--- a/src/Species/SpeciesV.h
+++ b/src/Species/SpeciesV.h
@@ -26,7 +26,7 @@ class SpeciesV : public Species
     //! Species destructor
     virtual ~SpeciesV();
 
-    void initCluster( Params &params ) override;
+    void initCluster( Params &params, Patch *patch ) override;
     
     //! Method calculating the Particle dynamics (interpolation, pusher, projection)
     void dynamics( double time, unsigned int ispec,
diff --git a/src/Species/SpeciesVAdaptive.cpp b/src/Species/SpeciesVAdaptive.cpp
index 98813c71e..273362561 100755
--- a/src/Species/SpeciesVAdaptive.cpp
+++ b/src/Species/SpeciesVAdaptive.cpp
@@ -46,7 +46,7 @@ using namespace std;
 SpeciesVAdaptive::SpeciesVAdaptive( Params &params, Patch *patch ) :
     SpeciesV( params, patch )
 {
-    initCluster( params );
+    initCluster( params, patch );
     npack_ = 0 ;
     packsize_ = 0;
 }//END SpeciesVAdaptive creator
diff --git a/src/Species/SpeciesVAdaptiveMixedSort.cpp b/src/Species/SpeciesVAdaptiveMixedSort.cpp
index cc809d8c3..1889f0cd8 100755
--- a/src/Species/SpeciesVAdaptiveMixedSort.cpp
+++ b/src/Species/SpeciesVAdaptiveMixedSort.cpp
@@ -46,7 +46,7 @@ using namespace std;
 SpeciesVAdaptiveMixedSort::SpeciesVAdaptiveMixedSort( Params &params, Patch *patch ) :
     SpeciesV( params, patch )
 {
-    initCluster( params );
+    initCluster( params, patch );
     npack_ = 0 ;
     packsize_ = 0;
 

From ecd3be0916ff5f49672114c12a1a81b2af734668 Mon Sep 17 00:00:00 2001
From: Frederic Perez <frederic.perez@polytechnique.edu>
Date: Thu, 4 Apr 2024 11:24:01 +0200
Subject: [PATCH 07/54] forgot to remove function

---
 src/Particles/Particles.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Particles/Particles.h b/src/Particles/Particles.h
index 13941b40a..a155baf7a 100755
--- a/src/Particles/Particles.h
+++ b/src/Particles/Particles.h
@@ -476,7 +476,7 @@ class Particles
     //! Extract particles escaping the box to buffers
     // -----------------------------------------------------------------------------
     virtual void extractParticles( const size_t ndim, const bool copy[], Particles* buffer[] );
-virtual void extractParticles( Particles* particles_to_move );
+
     // -----------------------------------------------------------------------------
     //! Erase particles leaving the patch object on device
     // -----------------------------------------------------------------------------

From 3afa356406ed886340af9afec929152fbf277ac3 Mon Sep 17 00:00:00 2001
From: Frederic Perez <frederic.perez@polytechnique.edu>
Date: Thu, 18 Apr 2024 14:24:02 +0200
Subject: [PATCH 08/54] scatter recvBuffers on CPU instead of GPU

---
 makefile                          |  2 +-
 src/Particles/Particles.cpp       | 39 ++++++++++++++-
 src/Particles/Particles.h         |  5 +-
 src/Particles/nvidiaParticles.cu  | 81 +++++++++++++++----------------
 src/Particles/nvidiaParticles.h   | 11 +++--
 src/Patch/Patch.cpp               |  3 +-
 src/Smilei.cpp                    |  2 +-
 src/SmileiMPI/AsyncMPIbuffers.cpp | 17 +++++--
 src/Species/Species.cpp           | 16 +++---
 9 files changed, 110 insertions(+), 66 deletions(-)

diff --git a/makefile b/makefile
index 3aaff0201..36239640d 100755
--- a/makefile
+++ b/makefile
@@ -216,7 +216,7 @@ endif
 ifneq (,$(call parse_config,gpu_amd))
 	CXXFLAGS += -DSMILEI_ACCELERATOR_MODE
 	GPU_COMPILER ?= $(CC)
-	GPU_COMPILER_FLAGS += -x hip -DSMILEI_ACCELERATOR_MODE -std=c++14 $(DIRS:%=-I%) #$(PY_FLAGS)
+	GPU_COMPILER_FLAGS += -x hip -DSMILEI_ACCELERATOR_MODE -std=c++14 $(DIRS:%=-I%)
 	GPU_COMPILER_FLAGS += -I$(BUILD_DIR)/src/Python $(PY_CXXFLAGS)
 	GPU_KERNEL_SRCS := $(shell find src/* -name \*.cu)
 	GPU_KERNEL_OBJS := $(addprefix $(BUILD_DIR)/, $(GPU_KERNEL_SRCS:.cu=.o))
diff --git a/src/Particles/Particles.cpp b/src/Particles/Particles.cpp
index b675ac12f..688c53085 100755
--- a/src/Particles/Particles.cpp
+++ b/src/Particles/Particles.cpp
@@ -1305,8 +1305,37 @@ void Particles::copyFromDeviceToHost()
 }
 
 // Loop all particles and copy the outgoing ones to buffers
-void Particles::extractParticles( const size_t /* ndim */, const bool copy[], Particles* buffer[] )
+void Particles::copyLeavingParticlesToBuffers( const bool copy[], Particles* buffer[] )
 {
+    // Leaving particles have a cell_key equal to -2-direction
+    // where direction goes from 0 to 6 and tells which way the particle escapes.
+    // If the cell_key is -1, the particle must be destroyed so it is not extracted.
+
+#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE )
+
+    // GPU
+    
+    // Copy leaving particles to buffer[0] on the GPU
+    copyLeavingParticlesToBuffer( buffer[0] );
+    
+    // Dispatch between the different buffers on the CPU
+    // (doing this on the GPU is slower; maybe replacing thrust operations with pure cuda would work)
+    vector<size_t> indices;
+    for( size_t ipart = 0; ipart < buffer[0]->size(); ipart++ ) {
+        int direction = -buffer[0]->cell_keys[ipart] - 2;
+        if( direction > 0 ) {
+            if( copy[direction] ) {
+                buffer[0]->copyParticle( ipart, *buffer[direction] );
+            }
+            indices.push_back( ipart );
+        }
+    }
+    buffer[0]->eraseParticles( indices );
+
+#else
+
+    // CPU
+    
     for( size_t ipart = 0; ipart < size(); ipart++ ) {
         if( cell_keys[ipart] < -1 ) {
             int direction = -cell_keys[ipart] - 2;
@@ -1315,8 +1344,16 @@ void Particles::extractParticles( const size_t /* ndim */, const bool copy[], Pa
             }
         }
     }
+    
+#endif
 }
 
+void Particles::copyLeavingParticlesToBuffer( Particles* )
+{
+    ERROR( "Device only feature, should not have come here!" );
+}
+
+
 void Particles::savePositions() {
     unsigned int ndim = Position.size(), npart = size();
     double *p[3], *pold[3];
diff --git a/src/Particles/Particles.h b/src/Particles/Particles.h
index a155baf7a..86f9f9cac 100755
--- a/src/Particles/Particles.h
+++ b/src/Particles/Particles.h
@@ -473,9 +473,10 @@ class Particles
     // Accelerator specific virtual functions
 
     // -----------------------------------------------------------------------------
-    //! Extract particles escaping the box to buffers
+    //! Extract particles leaving the box to buffers
     // -----------------------------------------------------------------------------
-    virtual void extractParticles( const size_t ndim, const bool copy[], Particles* buffer[] );
+    void copyLeavingParticlesToBuffers( const bool copy[], Particles* buffer[] );
+    virtual void copyLeavingParticlesToBuffer( Particles* buffer );
 
     // -----------------------------------------------------------------------------
     //! Erase particles leaving the patch object on device
diff --git a/src/Particles/nvidiaParticles.cu b/src/Particles/nvidiaParticles.cu
index 191e0943f..efca22ad5 100644
--- a/src/Particles/nvidiaParticles.cu
+++ b/src/Particles/nvidiaParticles.cu
@@ -54,6 +54,15 @@ struct cellKeyNegative
     }
 };
 
+struct cellKeyBelowMinus1
+{
+    constexpr __host__ __device__ bool
+    operator()( const int& x ) const
+    {
+        return x < -1;
+    }
+};
+
 namespace detail {
 
     ////////////////////////////////////////////////////////////////////////////////
@@ -1375,49 +1384,33 @@ unsigned int nvidiaParticles::deviceCapacity() const
 }
 
 // -----------------------------------------------------------------------------
-//! Move escaping particles to the buffers
+//! Move leaving particles to the buffer
 // -----------------------------------------------------------------------------
-void nvidiaParticles::extractParticles( const size_t ndim, const bool copy[], Particles* buffer[] )
+void nvidiaParticles::copyLeavingParticlesToBuffer( Particles* buffer )
 {
-    // Escaping particles have a cell_key equal to -2-direction
-    // where direction goes from 0 to 6 and tells which way the particle escapes.
-    // If the cell_key is -1, the particle must be destroyed so it is not extracted.
-    
-    extractParticlesByKey<-2>( copy[0], buffer[0] ); // x_min
-    extractParticlesByKey<-3>( copy[1], buffer[1] ); // x_max
-    if( ndim > 1 ) {
-        extractParticlesByKey<-4>( copy[2], buffer[2] ); // y_min
-        extractParticlesByKey<-5>( copy[3], buffer[3] ); // y_max
-        if( ndim > 2 ) {
-            extractParticlesByKey<-6>( copy[4], buffer[4] ); // z_min
-            extractParticlesByKey<-7>( copy[5], buffer[5] ); // z_max
-        }
-    }
+    copyParticlesByPredicate( buffer, cellKeyBelowMinus1() );
+    buffer->copyFromDeviceToHost();
 }
 
 
-//! Copy particles which have cell_key = key
-template< const int key>
-void nvidiaParticles::extractParticlesByKey( bool copy, Particles* buffer )
+//! Copy particles which statisfy some predicate
+template<typename Predicate>
+void nvidiaParticles::copyParticlesByPredicate( Particles* buffer, Predicate pred )
 {
     // TODO(Etienne M): We are doing extra work. We could use something like
     // std::partition to output the invalidated particles in buffer
     // and keep the good ones. This would help us avoid the std::remove_if in
     // the particle injection and sorting algorithm.
     
-    if( ! copy ) {
-        return;
-    }
-    
     const int nparts = gpu_nparts_;
     // Iterator of the main data structure
     // NOTE: https://nvidia.github.io/thrust/api/classes/classthrust_1_1zip__iterator.html#class-thrustzip_iterator
     const auto source_iterator_first = thrust::make_zip_iterator( thrust::make_tuple( nvidia_position_[0].begin(),
-                                                                                           nvidia_momentum_[0].begin(),
-                                                                                           nvidia_momentum_[1].begin(),
-                                                                                           nvidia_momentum_[2].begin(),
-                                                                                           nvidia_weight_.begin(),
-                                                                                           nvidia_charge_.begin() ) );
+                                                                                      nvidia_momentum_[0].begin(),
+                                                                                      nvidia_momentum_[1].begin(),
+                                                                                      nvidia_momentum_[2].begin(),
+                                                                                      nvidia_weight_.begin(),
+                                                                                      nvidia_charge_.begin() ) );
     const auto source_iterator_last  = source_iterator_first + nparts; // std::advance
     
     nvidiaParticles* const cp_parts = static_cast<nvidiaParticles*>( buffer );
@@ -1425,7 +1418,7 @@ void nvidiaParticles::extractParticlesByKey( bool copy, Particles* buffer )
     const int nparts_to_copy = thrust::count_if( thrust::device,
                                                  nvidia_cell_keys_.cbegin(),
                                                  nvidia_cell_keys_.cbegin() + nparts,
-                                                 cellKeyEquals<key>() );
+                                                 pred );
 
     // Resize it, if too small (copy_if do not resize)
     cp_parts->resize( nparts_to_copy );
@@ -1443,7 +1436,7 @@ void nvidiaParticles::extractParticlesByKey( bool copy, Particles* buffer )
                      source_iterator_last,
                      nvidia_cell_keys_.cbegin(),
                      destination_iterator_first,
-                    cellKeyEquals<key>() );
+                     pred );
 
     // Copy the other position values depending on the simulation's grid dimensions
     const int ndim_particles = nvidia_position_.size();
@@ -1453,7 +1446,7 @@ void nvidiaParticles::extractParticlesByKey( bool copy, Particles* buffer )
                          nvidia_position_[i].cbegin() + nparts,
                          nvidia_cell_keys_.cbegin(),
                          cp_parts->nvidia_position_[i].begin(),
-                        cellKeyEquals<key>() );
+                         pred );
     }
 
     // Special treatment for chi if radiation emission
@@ -1463,7 +1456,7 @@ void nvidiaParticles::extractParticlesByKey( bool copy, Particles* buffer )
                          nvidia_chi_.cbegin() + nparts,
                          nvidia_cell_keys_.cbegin(),
                          cp_parts->nvidia_chi_.begin(),
-                        cellKeyEquals<key>() );
+                         pred );
     }
 
     if( has_Monte_Carlo_process ) {
@@ -1472,7 +1465,7 @@ void nvidiaParticles::extractParticlesByKey( bool copy, Particles* buffer )
                          nvidia_tau_.cbegin() + nparts,
                          nvidia_cell_keys_.cbegin(),
                          cp_parts->nvidia_tau_.begin(),
-                        cellKeyEquals<key>() );
+                         pred );
     }
 
     if( tracked ) {
@@ -1481,10 +1474,9 @@ void nvidiaParticles::extractParticlesByKey( bool copy, Particles* buffer )
                          nvidia_id_.cbegin() + nparts,
                          nvidia_cell_keys_.cbegin(),
                          cp_parts->nvidia_id_.begin(),
-                        cellKeyEquals<key>() );
+                         pred );
     }
 
-    buffer->copyFromDeviceToHost();
 }
 
 
@@ -1516,14 +1508,19 @@ void nvidiaParticles::extractParticlesByKey( bool copy, Particles* buffer )
 //! Erase particles leaving the patch object on device
 // -----------------------------------------------------------------------------
 int nvidiaParticles::eraseLeavingParticles()
+{
+    return eraseParticlesByPredicate( cellKeyNegative() );
+}
+
+template<typename Predicate>
+int nvidiaParticles::eraseParticlesByPredicate( Predicate pred )
 {
     const int position_dimension_count = nvidia_position_.size();
     const int nparts                   = gpu_nparts_;
     const int nparts_to_remove         = thrust::count_if( thrust::device,
                                                            nvidia_cell_keys_.begin(),
                                                            nvidia_cell_keys_.begin() + nparts,
-                                                           cellKeyNegative() );
-
+                                                           pred );
 
     if( nparts_to_remove > 0 ) {
         const auto first_particle = thrust::make_zip_iterator( thrust::make_tuple( nvidia_position_[0].begin(),
@@ -1540,7 +1537,7 @@ int nvidiaParticles::eraseLeavingParticles()
                            first_particle,
                            last_particle,
                            nvidia_cell_keys_.cbegin(),
-                           cellKeyNegative() );
+                           pred );
 
         // Remove the other position values depending on the simulation's grid
         // dimensions
@@ -1549,7 +1546,7 @@ int nvidiaParticles::eraseLeavingParticles()
                                nvidia_position_[i].begin(),
                                nvidia_position_[i].begin() + nparts,
                                nvidia_cell_keys_.cbegin(),
-                               cellKeyNegative() );
+                               pred );
         }
 
         if( has_quantum_parameter ) {
@@ -1557,7 +1554,7 @@ int nvidiaParticles::eraseLeavingParticles()
                                nvidia_chi_.begin(),
                                nvidia_chi_.begin() + nparts,
                                nvidia_cell_keys_.cbegin(),
-                               cellKeyNegative() );
+                               pred );
         }
 
         if( has_Monte_Carlo_process ) {
@@ -1565,7 +1562,7 @@ int nvidiaParticles::eraseLeavingParticles()
                                nvidia_tau_.begin(),
                                nvidia_tau_.begin() + nparts,
                                nvidia_cell_keys_.cbegin(),
-                               cellKeyNegative() );
+                               pred );
         }
 
         if( tracked ) {
@@ -1573,7 +1570,7 @@ int nvidiaParticles::eraseLeavingParticles()
                                nvidia_id_.begin(),
                                nvidia_id_.begin() + nparts,
                                nvidia_cell_keys_.cbegin(),
-                               cellKeyNegative() );
+                               pred );
         }
 
         // Update current number of particles
diff --git a/src/Particles/nvidiaParticles.h b/src/Particles/nvidiaParticles.h
index 64164fad7..ba689f1e8 100644
--- a/src/Particles/nvidiaParticles.h
+++ b/src/Particles/nvidiaParticles.h
@@ -113,18 +113,21 @@ class nvidiaParticles : public Particles
     };
 
     // -----------------------------------------------------------------------------
-    //! Move escaping particles to the buffers
+    //! Move leaving particles to the buffers
     // -----------------------------------------------------------------------------
-    void extractParticles( const size_t ndim, const bool copy[], Particles* buffer[] ) override;
+    void copyLeavingParticlesToBuffer( Particles* buffer ) override;
     
-    template< const int key>
-    void extractParticlesByKey( bool copy, Particles* buffer );
+    template<typename Predicate>
+    void copyParticlesByPredicate( Particles* buffer, Predicate pred );
     
     // -----------------------------------------------------------------------------
     //! Erase particles leaving the patch object on device and returns the number of particle removed
     // -----------------------------------------------------------------------------
     int eraseLeavingParticles() override;
     
+    template<typename Predicate>
+    int eraseParticlesByPredicate( Predicate pred );
+    
     // -----------------------------------------------------------------------------
     //! Inject particles from particles_to_inject into *this and return the number of particle added
     // -----------------------------------------------------------------------------
diff --git a/src/Patch/Patch.cpp b/src/Patch/Patch.cpp
index d61c1f9e1..585f76f97 100755
--- a/src/Patch/Patch.cpp
+++ b/src/Patch/Patch.cpp
@@ -539,6 +539,7 @@ void Patch::copyExchParticlesToBuffers( int ispec, Params &params )
     
     cleanMPIBuffers( ispec, params );
     
+    // Make a list of buffers
     bool copy[params.nDim_field*2];
     Particles* sendBuffer[params.nDim_field*2];
     for( size_t iDim = 0; iDim < params.nDim_field; iDim++ ) {
@@ -552,7 +553,7 @@ void Patch::copyExchParticlesToBuffers( int ispec, Params &params )
         copy[1] = copy[1] && ( Pcoordinates[0]!=params.number_of_patches[0]-1 || vecSpecies[ispec]->boundary_conditions_[0][1]=="periodic" );
     }
     
-    part.extractParticles( params.nDim_field, copy, sendBuffer );
+    part.copyLeavingParticlesToBuffers( copy, sendBuffer );
     
 } // copyExchParticlesToBuffers(... iDim)
 
diff --git a/src/Smilei.cpp b/src/Smilei.cpp
index 0ab0db1a2..eae1993d9 100755
--- a/src/Smilei.cpp
+++ b/src/Smilei.cpp
@@ -124,7 +124,7 @@ int main( int argc, char *argv[] )
         // oblivious to the program (only one, the one by default).
         // This could be a missed but very advanced optimization for some
         // kernels/exchange.
-        ERROR( "Simlei needs only one accelerator (GPU). Look for HIP_VISIBLE_DEVICES or 'gpu-bind=closest' in your SLURM script or use a custom binding script." );
+        ERROR( "Smilei needs only one accelerator (GPU). Look for HIP_VISIBLE_DEVICES or 'gpu-bind=closest' in your SLURM script or use a custom binding script." );
     } else {
         // ::omp_set_default_device(0);
     }
diff --git a/src/SmileiMPI/AsyncMPIbuffers.cpp b/src/SmileiMPI/AsyncMPIbuffers.cpp
index 4cb283d17..ff8efb17f 100755
--- a/src/SmileiMPI/AsyncMPIbuffers.cpp
+++ b/src/SmileiMPI/AsyncMPIbuffers.cpp
@@ -93,12 +93,21 @@ void SpeciesMPIbuffers::allocate( Params &params, Patch *patch )
         partRecvSize[i].resize( 2 );
         partSendSize[i].resize( 2 );
         
+        // NOTE: send/recv buffers on xmin / xmax use a different constructor because
+        //       they must be sent on GPU for exchanging particles
         partRecv[i].resize( 2 );
-        partRecv[i][0] = ParticlesFactory::create( params, *patch );;
-        partRecv[i][1] = ParticlesFactory::create( params, *patch );;
         partSend[i].resize( 2 );
-        partSend[i][0] = ParticlesFactory::create( params, *patch );;
-        partSend[i][1] = ParticlesFactory::create( params, *patch );;
+        if( i == 0 ) {
+            partRecv[i][0] = ParticlesFactory::create( params, *patch );
+            partRecv[i][1] = ParticlesFactory::create( params, *patch );
+            partSend[i][0] = ParticlesFactory::create( params, *patch );
+            partSend[i][1] = ParticlesFactory::create( params, *patch );
+        } else {
+            partRecv[i][0] = new Particles();
+            partRecv[i][1] = new Particles();
+            partSend[i][0] = new Particles();
+            partSend[i][1] = new Particles();
+        }
     }
 }
 
diff --git a/src/Species/Species.cpp b/src/Species/Species.cpp
index bfc1ae036..65358f555 100755
--- a/src/Species/Species.cpp
+++ b/src/Species/Species.cpp
@@ -632,16 +632,12 @@ Species::deleteSpeciesCurrentAndChargeOnDevice(
 void Species::allocateParticlesOnDevice()
 {
     particles->initializeDataOnDevice();
-    for( auto partSends: MPI_buffer_.partSend ) {
-        for( auto partSend: partSends ) {
-            partSend->initializeDataOnDevice();
-        }
-    }
-    for( auto partRecvs: MPI_buffer_.partRecv ) {
-        for( auto partRecv: partRecvs ) {
-            partRecv->initializeDataOnDevice();
-        }
-    }
+    
+    // The first send/recv buffers are also on device
+    MPI_buffer_.partSend[0][0]->initializeDataOnDevice();
+    MPI_buffer_.partSend[0][1]->initializeDataOnDevice();
+    MPI_buffer_.partRecv[0][0]->initializeDataOnDevice();
+    MPI_buffer_.partRecv[0][1]->initializeDataOnDevice();
 
     // Create photon species on the device
     if( radiation_model_ == "mc" && photon_species_ ) {

From ccac4ba75a1c2af9a183508be5448972f201e9cb Mon Sep 17 00:00:00 2001
From: cprouveur <charles.prouveur@cea.fr>
Date: Thu, 18 Apr 2024 17:02:26 +0200
Subject: [PATCH 09/54] Implentation of GPU acceleration for the 1D cartesian
 geometry

---
 src/Checkpoint/Checkpoint.cpp                 |    8 +-
 src/ElectroMagn/ElectroMagn1D.cpp             |   67 +-
 src/ElectroMagnBC/ElectroMagnBC1D_SM.cpp      |   60 +-
 src/ElectroMagnBC/ElectroMagnBC1D_SM.h        |    6 +-
 src/ElectroMagnSolver/MA_Solver1D_norm.cpp    |   91 +-
 src/ElectroMagnSolver/MF_Solver1D_Yee.cpp     |   53 +-
 src/Interpolator/Interpolator1D.cpp           |    2 +-
 src/Interpolator/Interpolator1D.h             |    2 +-
 src/Interpolator/Interpolator1D2Order.cpp     |  448 +++++--
 src/Interpolator/Interpolator1D2Order.h       |  114 +-
 src/Interpolator/Interpolator1D2OrderV.cpp    |   10 +-
 src/Interpolator/Interpolator1D2OrderV.h      |    4 +-
 src/Interpolator/Interpolator1D3Order.h       |    8 +-
 src/Interpolator/Interpolator1D4Order.h       |   88 +-
 src/Interpolator/Interpolator1DWT2Order.cpp   |    6 +-
 src/Interpolator/Interpolator1DWT2Order.h     |    4 +-
 src/Interpolator/Interpolator1DWT2OrderV.cpp  |   10 +-
 src/Interpolator/Interpolator1DWT2OrderV.h    |    4 +-
 src/Interpolator/Interpolator1DWT4Order.h     |    4 +-
 src/Interpolator/InterpolatorFactory.h        |   10 +
 src/Params/Params.h                           |    2 +-
 src/Particles/nvidiaParticles.cu              |  308 ++++-
 src/Patch/SyncVectorPatch.cpp                 |    2 +-
 src/Projector/Projector1D.h                   |   10 +-
 src/Projector/Projector1D2Order.cpp           |   52 +-
 src/Projector/Projector1D2Order.h             |    5 +-
 src/Projector/Projector1D2OrderGPU.cpp        |  385 ++++++
 src/Projector/Projector1D2OrderGPU.h          |  127 ++
 .../Projector1D2OrderGPUKernelCUDAHIP.cu      | 1103 +++++++++++++++++
 .../Projector1D2OrderGPUKernelCUDAHIP.h       |   71 ++
 src/Projector/Projector1D4Order.cpp           |   20 +-
 src/Projector/Projector1D4Order.h             |    1 -
 src/Projector/Projector2D2OrderGPU.cpp        |   34 +-
 src/Projector/Projector2D2OrderGPU.h          |    2 +-
 src/Projector/Projector2D2OrderGPUKernel.cpp  |   12 +-
 .../Projector2D2OrderGPUKernelCUDAHIP.cu      |   64 +-
 .../Projector2D2OrderGPUKernelCUDAHIP.h       |   10 +-
 src/Projector/Projector3D2OrderGPU.cpp        |   20 +-
 src/Projector/Projector3D2OrderGPU.h          |    2 +-
 .../Projector3D2OrderGPUKernelCUDAHIP.cu      |   20 +-
 .../Projector3D2OrderGPUKernelCUDAHIP.h       |   14 +-
 src/SmileiMPI/SmileiMPI.cpp                   |    8 +-
 42 files changed, 2805 insertions(+), 466 deletions(-)
 mode change 100644 => 100755 src/Interpolator/Interpolator1D2OrderV.cpp
 mode change 100644 => 100755 src/Interpolator/Interpolator1D2OrderV.h
 mode change 100644 => 100755 src/Particles/nvidiaParticles.cu
 create mode 100755 src/Projector/Projector1D2OrderGPU.cpp
 create mode 100755 src/Projector/Projector1D2OrderGPU.h
 create mode 100755 src/Projector/Projector1D2OrderGPUKernelCUDAHIP.cu
 create mode 100755 src/Projector/Projector1D2OrderGPUKernelCUDAHIP.h
 mode change 100644 => 100755 src/Projector/Projector2D2OrderGPUKernel.cpp
 mode change 100644 => 100755 src/Projector/Projector2D2OrderGPUKernelCUDAHIP.cu
 mode change 100644 => 100755 src/Projector/Projector2D2OrderGPUKernelCUDAHIP.h
 mode change 100644 => 100755 src/Projector/Projector3D2OrderGPUKernelCUDAHIP.cu
 mode change 100644 => 100755 src/Projector/Projector3D2OrderGPUKernelCUDAHIP.h

diff --git a/src/Checkpoint/Checkpoint.cpp b/src/Checkpoint/Checkpoint.cpp
index 13c3d28a5..97d43c258 100755
--- a/src/Checkpoint/Checkpoint.cpp
+++ b/src/Checkpoint/Checkpoint.cpp
@@ -478,8 +478,8 @@ void Checkpoint::dumpPatch( Patch *patch, Params &params, H5Write &g )
                 name << setfill( '0' ) << setw( 2 ) << bcId;
                 string groupName=Tools::merge( "EM_boundary-species-", name.str() );
                 H5Write b = g.group( groupName );
-                b.attr( "By_val", embc->By_val );
-                b.attr( "Bz_val", embc->Bz_val );
+                b.attr( "By_val", embc->By_val_ );
+                b.attr( "Bz_val", embc->Bz_val_ );
             } else if( dynamic_cast<ElectroMagnBC2D_SM *>( EMfields->emBoundCond[bcId] ) ) {
                 ElectroMagnBC2D_SM *embc = static_cast<ElectroMagnBC2D_SM *>( EMfields->emBoundCond[bcId] );
                 ostringstream name( "" );
@@ -889,8 +889,8 @@ void Checkpoint::restartPatch( Patch *patch, Params &params, H5Read &g )
                 name << setfill( '0' ) << setw( 2 ) << bcId;
                 string groupName = Tools::merge( "EM_boundary-species-", name.str() );
                 H5Read b = g.group( groupName );
-                b.attr( "By_val", embc->By_val );
-                b.attr( "Bz_val", embc->Bz_val );
+                b.attr( "By_val", embc->By_val_ );
+                b.attr( "Bz_val", embc->Bz_val_ );
             } else if( dynamic_cast<ElectroMagnBC2D_SM *>( EMfields->emBoundCond[bcId] ) ) {
                 ElectroMagnBC2D_SM *embc = static_cast<ElectroMagnBC2D_SM *>( EMfields->emBoundCond[bcId] );
                 ostringstream name( "" );
diff --git a/src/ElectroMagn/ElectroMagn1D.cpp b/src/ElectroMagn/ElectroMagn1D.cpp
index ea97df8fb..d9ecbe478 100755
--- a/src/ElectroMagn/ElectroMagn1D.cpp
+++ b/src/ElectroMagn/ElectroMagn1D.cpp
@@ -559,34 +559,65 @@ void ElectroMagn1D::saveMagneticFields( bool is_spectral )
 void ElectroMagn1D::centerMagneticFields()
 {
     // Static cast of the fields
-    Field1D *Bx1D   = static_cast<Field1D *>( Bx_ );
-    Field1D *By1D   = static_cast<Field1D *>( By_ );
-    Field1D *Bz1D   = static_cast<Field1D *>( Bz_ );
-    Field1D *Bx1D_m = static_cast<Field1D *>( Bx_m );
-    Field1D *By1D_m = static_cast<Field1D *>( By_m );
-    Field1D *Bz1D_m = static_cast<Field1D *>( Bz_m );
+    const double *const __restrict__ Bx1D = Bx_->data();
+    const double *const __restrict__ By1D = By_->data();
+    const double *const __restrict__ Bz1D = Bz_->data();
+    double *const __restrict__ Bx1D_m     = Bx_m->data();
+    double *const __restrict__ By1D_m     = By_m->data();
+    double *const __restrict__ Bz1D_m     = Bz_m->data();
+    const unsigned int nx_p = dimPrim[0];
+    const unsigned int nx_d = dimDual[0];
+
 
     // for Bx^(p)
-    for( unsigned int i=0 ; i<dimPrim[0] ; i++ ) {
-        ( *Bx1D_m )( i ) = ( ( *Bx1D )( i )+ ( *Bx1D_m )( i ) )*0.5 ;
+#if defined( SMILEI_OPENACC_MODE )
+    const int sizeofBx = Bx_->size();
+    const int sizeofBy = By_->size();
+    const int sizeofBz = Bz_->size();
+
+    #pragma acc parallel present(Bx1D[0:sizeofBx],Bx1D_m[0:sizeofBx])
+    #pragma acc loop gang worker vector
+#elif defined( SMILEI_ACCELERATOR_GPU_OMP )
+    #pragma omp target
+    #pragma omp teams distribute parallel for //simd
+#endif
+    for( unsigned int i=0 ; i<nx_p ; i++ ) {
+        Bx1D_m[i] = ( Bx1D[i] + Bx1D_m[i] ) * 0.5;
     }
 
     // for By^(d) & Bz^(d)
-    for( unsigned int i=0 ; i<dimDual[0] ; i++ ) {
-        ( *By1D_m )( i )= ( ( *By1D )( i )+( *By1D_m )( i ) )*0.5 ;
-        ( *Bz1D_m )( i )= ( ( *Bz1D )( i )+( *Bz1D_m )( i ) )*0.5 ;
+#if defined( SMILEI_OPENACC_MODE )
+    #pragma acc parallel present(Bz1D[0:sizeofBz],Bz1D_m[0:sizeofBz],By1D[0:sizeofBy],By1D_m[0:sizeofBy])
+    #pragma acc loop gang worker vector
+#elif defined( SMILEI_ACCELERATOR_GPU_OMP )
+    #pragma omp target
+    #pragma omp teams distribute parallel for //simd
+#endif
+    for( unsigned int i=0 ; i<nx_d ; i++ ) {
+        By1D_m[i] = ( By1D[i] + By1D_m[i] ) * 0.5;
+        Bz1D_m[i] = ( Bz1D[i] + Bz1D_m[i] ) * 0.5;
     }
     
     if (use_BTIS3){
-        // Static-cast of the fields
-        Field1D *By_oldBTIS3 = static_cast<Field1D *>( By_mBTIS3 );
-        Field1D *Bz_oldBTIS3 = static_cast<Field1D *>( Bz_mBTIS3 );
-
-        for( unsigned int i=0 ; i<dimPrim[0]-1 ; i++ ) {
+        double *const By1D_oldBTIS3 = By_mBTIS3->data();
+        double *const Bz1D_oldBTIS3 = Bz_mBTIS3->data();
+#if defined( SMILEI_OPENACC_MODE )
+    const int sizeofByBTIS3 = By_mBTIS3->size();
+    const int sizeofBzBTIS3 = Bz_mBTIS3->size();
+    #pragma acc parallel present(By1D_oldBTIS3[0:sizeofByBTIS3],By1D[0:sizeofBy],Bz1D_oldBTIS3[0:sizeofBzBTIS3],Bz1D[0:sizeofBz])
+    #pragma acc loop gang vector
+#elif defined( SMILEI_ACCELERATOR_GPU_OMP )
+    #pragma omp target
+    #pragma omp teams distribute parallel for
+#endif
+#if !defined( SMILEI_ACCELERATOR_MODE )
+        #pragma omp simd
+#endif
+        for( unsigned int i=0 ; i<nx_p-1 ; i++ ) {
             // Magnetic field By^(p) for BTIS3 interpolation
-            ( *By_oldBTIS3 )( i ) = ( ( *By1D )( i+1 ) + ( *By_oldBTIS3 )( i ) )*0.5;
+            By1D_oldBTIS3[i] = By1D[i+1] + By1D_oldBTIS3[i] * 0.5;
             // Magnetic field Bz^(p) for BTIS3 interpolation
-            ( *Bz_oldBTIS3 )( i ) = ( ( *Bz1D )( i+1 ) + ( *Bz_oldBTIS3 )( i ) )*0.5;
+            Bz1D_oldBTIS3[i] = Bz1D[i+1] + Bz1D_oldBTIS3[i] * 0.5;
         }
     }
     
diff --git a/src/ElectroMagnBC/ElectroMagnBC1D_SM.cpp b/src/ElectroMagnBC/ElectroMagnBC1D_SM.cpp
index d00c3cdb1..c9e89d0fe 100755
--- a/src/ElectroMagnBC/ElectroMagnBC1D_SM.cpp
+++ b/src/ElectroMagnBC/ElectroMagnBC1D_SM.cpp
@@ -17,23 +17,23 @@ ElectroMagnBC1D_SM::ElectroMagnBC1D_SM( Params &params, Patch *patch, unsigned i
     : ElectroMagnBC1D( params, patch, i_boundary )
 {
     // Parameters for the Silver-Mueller boundary conditions
-    Alpha = 2./( 1.+dt_ov_d[0] );
-    Beta  = ( dt_ov_d[0]-1. )/( 1.+dt_ov_d[0] );
-    Gamma = 4./( 1.+dt_ov_d[0] );
+    Alpha_ = 2. / ( 1. + dt_ov_d[0] );
+    Beta_  = ( dt_ov_d[0] - 1. ) / ( 1. + dt_ov_d[0] );
+    Gamma_ = 4. / ( 1. + dt_ov_d[0] );
     
-    By_val = 0.;
-    Bz_val = 0.;
+    By_val_ = 0.;
+    Bz_val_ = 0.;
     
     sign_ = (double) (i_boundary_ % 2) *2 - 1.; // -1 or 1 for min or max
     
     if( i_boundary == 0 ) {
-        iE = 0;
-        iB = 0;
-        iB_old = 1;
+        iE_ = 0;
+        iB_ = 0;
+        iB_old_ = 1;
     } else {
-        iE = n_p[0] - 1;
-        iB = n_d[0] - 1;
-        iB_old = iB - 1;
+        iE_ = n_p[0] - 1;
+        iB_ = n_d[0] - 1;
+        iB_old_ = iB_ - 1;
     }
     
 }
@@ -50,15 +50,15 @@ void ElectroMagnBC1D_SM::save_fields( Field *my_field, Patch *patch )
     
     if( i_boundary_ == 0 && patch->isXmin() ) {
         if( field1D->name=="By" ) {
-            By_val = ( *my_field )( 0 );
+            By_val_ = ( *my_field )( 0 );
         } else if( field1D->name=="Bz" ) {
-            Bz_val = ( *my_field )( 0 );
+            Bz_val_ = ( *my_field )( 0 );
         }
     } else if( i_boundary_ == 1 && patch->isXmax() ) {
         if( field1D->name=="By" ) {
-            By_val = ( *my_field )( field1D->dims()[0]-1 );
+            By_val_ = ( *my_field )( field1D->dims()[0]-1 );
         } else if( field1D->name=="Bz" ) {
-            Bz_val = ( *my_field )( field1D->dims()[0]-1 );
+            Bz_val_ = ( *my_field )( field1D->dims()[0]-1 );
         }
         
     }
@@ -74,11 +74,17 @@ void ElectroMagnBC1D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch *
     if( patch->isBoundary( i_boundary_ ) ) {
     
         //Field1D* Ex1D   = static_cast<Field1D*>(EMfields->Ex_);
-        Field1D *Ey1D   = static_cast<Field1D *>( EMfields->Ey_ );
+        /*Field1D *Ey1D   = static_cast<Field1D *>( EMfields->Ey_ );
         Field1D *Ez1D   = static_cast<Field1D *>( EMfields->Ez_ );
         Field1D *By1D   = static_cast<Field1D *>( EMfields->By_ );
-        Field1D *Bz1D   = static_cast<Field1D *>( EMfields->Bz_ );
+        Field1D *Bz1D   = static_cast<Field1D *>( EMfields->Bz_ );*/
         
+        const Field  *E[3]{ EMfields->Ex_, EMfields->Ey_, EMfields->Ez_ };
+        const Field  *B[3]{ EMfields->Bx_, EMfields->By_, EMfields->Bz_ };
+        const double *const __restrict__ E1 = E[1]->data_;
+        const double *const __restrict__ E2 = E[2]->data_;
+        double *const __restrict__ B1       = B[1]->data_;
+        double *const __restrict__ B2       = B[2]->data_;
         // Lasers
         double by = 0., bz = 0.;
         vector<double> pos( 1 );
@@ -88,11 +94,25 @@ void ElectroMagnBC1D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch *
             bz += vecLaser[ilaser]->getAmplitude1( pos, time_dual, 0, 0 );
         }
         
+#ifdef SMILEI_OPENACC_MODE
+        const int sizeofE1 = E[1]->number_of_points_;
+        const int sizeofE2 = E[2]->number_of_points_;
+        const int sizeofB1 = B[1]->number_of_points_;
+        const int sizeofB2 = B[2]->number_of_points_;
+#endif
         // Apply Silver-Mueller EM boundary condition at x=xmin or xmax
         
-        ( *By1D )( iB ) = -sign_*Alpha*( *Ez1D )( iE ) + Beta*( ( *By1D )( iB_old )-By_val ) + Gamma*by + By_val;
-        ( *Bz1D )( iB ) =  sign_*Alpha*( *Ey1D )( iE ) + Beta*( ( *Bz1D )( iB_old )-Bz_val ) + Gamma*bz + Bz_val;
-        
+#ifdef SMILEI_OPENACC_MODE
+        #pragma acc parallel present(E1[0:sizeofE1],E2[0:sizeofE2],B1[0:sizeofB1],B2[0:sizeofB2])
+#elif defined( SMILEI_ACCELERATOR_GPU_OMP )
+        #pragma omp target
+#endif
+        {
+            //( *By1D )( iB_ ) = -sign_*Alpha_*( *Ez1D )( iE_ ) + Beta_*( ( *By1D )( iB_old_ )-By_val_ ) + Gamma_*by + By_val_;
+            //( *Bz1D )( iB_ ) =  sign_*Alpha_*( *Ey1D )( iE_ ) + Beta_*( ( *Bz1D )( iB_old_ )-Bz_val_ ) + Gamma_*bz + Bz_val_;
+            B1[ iB_ ] = -sign_ * Alpha_ * E2[iE_] + Beta_ * ( B1[iB_old_] - By_val_) + Gamma_ * by + By_val_;
+            B2[ iB_ ] = -sign_ * Alpha_ * E1[iE_] + Beta_ * ( B2[iB_old_] - Bz_val_) + Gamma_ * bz + Bz_val_;
+        }
     }
     
 }
diff --git a/src/ElectroMagnBC/ElectroMagnBC1D_SM.h b/src/ElectroMagnBC/ElectroMagnBC1D_SM.h
index ac17f856d..ccbc499c1 100755
--- a/src/ElectroMagnBC/ElectroMagnBC1D_SM.h
+++ b/src/ElectroMagnBC/ElectroMagnBC1D_SM.h
@@ -17,16 +17,16 @@ class ElectroMagnBC1D_SM : public ElectroMagnBC1D
     
     void save_fields( Field *, Patch *patch ) override;
     
-    double By_val, Bz_val;
+    double By_val_, Bz_val_;
     
     
 private:
 
     //! Constants used for the Silver-Mueller boundary conditions
-    double Alpha, Beta, Gamma;
+    double Alpha_, Beta_, Gamma_;
     
     //! Locations to apply the profile
-    unsigned int iE, iB, iB_old;
+    unsigned int iE_, iB_, iB_old_;
     int sign_;
     
 };
diff --git a/src/ElectroMagnSolver/MA_Solver1D_norm.cpp b/src/ElectroMagnSolver/MA_Solver1D_norm.cpp
index 7e04123f4..9b9f0d53d 100755
--- a/src/ElectroMagnSolver/MA_Solver1D_norm.cpp
+++ b/src/ElectroMagnSolver/MA_Solver1D_norm.cpp
@@ -15,28 +15,101 @@ MA_Solver1D_norm::~MA_Solver1D_norm()
 
 void MA_Solver1D_norm::operator()( ElectroMagn *fields )
 {
+    {
     const unsigned int nx_p = fields->dimPrim[0];
     const unsigned int nx_d = fields->dimDual[0];
-    Field1D *Ex1D = static_cast<Field1D *>( fields->Ex_ );
+    /*Field1D *Ex1D = static_cast<Field1D *>( fields->Ex_ );
     Field1D *Ey1D = static_cast<Field1D *>( fields->Ey_ );
     Field1D *Ez1D = static_cast<Field1D *>( fields->Ez_ );
     Field1D *By1D = static_cast<Field1D *>( fields->By_ );
     Field1D *Bz1D = static_cast<Field1D *>( fields->Bz_ );
     Field1D *Jx1D = static_cast<Field1D *>( fields->Jx_ );
     Field1D *Jy1D = static_cast<Field1D *>( fields->Jy_ );
-    Field1D *Jz1D = static_cast<Field1D *>( fields->Jz_ );
-    
+    Field1D *Jz1D = static_cast<Field1D *>( fields->Jz_ );*/
+
+    double *const __restrict__ Ex1D       = fields->Ex_->data(); // [x] : dual in x   primal in y,z
+    double *const __restrict__ Ey1D       = fields->Ey_->data(); // [x] : dual in y   primal in x,z
+    double *const __restrict__ Ez1D       = fields->Ez_->data(); // [x] : dual in z   primal in x,y
+    //const double *const __restrict__ Bx1D = fields->Bx_->data(); // [x] : dual in y,z primal in x
+    const double *const __restrict__ By1D = fields->By_->data(); // [x] : dual in x,z primal in y
+    const double *const __restrict__ Bz1D = fields->Bz_->data(); // [x] : dual in x,y primal in z
+    const double *const __restrict__ Jx1D = fields->Jx_->data(); // [x] : dual in x   primal in y,z
+    const double *const __restrict__ Jy1D = fields->Jy_->data(); // [x] : dual in y   primal in x,z
+    const double *const __restrict__ Jz1D = fields->Jz_->data(); // [x] : dual in z   primal in x,y 
+
+    {
+        fields->Ex_->copyFromDeviceToHost();
+        fields->Ey_->copyFromDeviceToHost();
+        fields->Ez_->copyFromDeviceToHost();
+        fields->Jx_->copyFromDeviceToHost();
+        fields->Jy_->copyFromDeviceToHost();
+        fields->Jz_->copyFromDeviceToHost();
+    }
+    std::cout<< "printing before in MA solver ex, ey and ez for nx_d="<<nx_d<< "then jx,jy,jz" <<std::endl;
+    for( unsigned int ix=0 ; ix<std::min(nx_d,nx_p) ; ++ix ) {
+        std::cout<< std::setprecision (15)<<Ex1D[ix] << " " <<Ey1D[ix] << " "<<Ez1D[ix] << " " << Jx1D[ix] << " " <<Jy1D[ix] << " "<<Jz1D[ix]<<std::endl;
+    }
     // --------------------
     // Solve Maxwell-Ampere
     // --------------------
     // Calculate the electrostatic field ex on the dual grid
-    for( unsigned int ix=0 ; ix<nx_d ; ix++ ) {
-        ( *Ex1D )( ix )= ( *Ex1D )( ix ) - dt * ( *Jx1D )( ix ) ;
+#if defined( SMILEI_OPENACC_MODE )                                                                                                     
+    const int sizeofEx = fields->Ex_->number_of_points_;                                                                               
+    const int sizeofEy = fields->Ey_->number_of_points_;                                                                               
+    const int sizeofEz = fields->Ez_->number_of_points_;                                                                               
+    //const int sizeofBx = fields->Bx_->number_of_points_;                                                                               
+    const int sizeofBy = fields->By_->number_of_points_;                                                                               
+    const int sizeofBz = fields->Bz_->number_of_points_;                   
+    #pragma acc parallel present( Ex1D[0:sizeofEx], Jx1D[0:sizeofEx] )
+    #pragma acc loop gang worker vector
+#elif defined( SMILEI_ACCELERATOR_GPU_OMP )
+    #pragma omp target
+    #pragma omp teams distribute parallel for
+#endif
+#if !defined( SMILEI_ACCELERATOR_MODE )
+        #pragma omp simd
+#endif
+    for( unsigned int ix=0 ; ix<nx_d ; ++ix ) {
+        //( *Ex1D )( ix )= ( *Ex1D )( ix ) - dt * ( *Jx1D )( ix ) ;
+        Ex1D[ix] -= dt * Jx1D[ix];
+    }
+    // Transverse fields ey, ez  are defined on the primal grid    #pragma acc parallel present( Ex1D[0:sizeofEx], Jx1D[0:sizeofEx], Bx1D[0:sizeofBz],Ey1D[0:sizeofEx], Jy1D[0:sizeofEx], By1D[0:sizeofBz],Ez1D[0:sizeofEx], Jz1D[0:sizeofEx], Bz1D[0:sizeofBz]  )                             
+
+#if defined( SMILEI_OPENACC_MODE )                    
+    #pragma acc parallel present(Ey1D[0:sizeofEy], Jy1D[0:sizeofEy], By1D[0:sizeofBy],Ez1D[0:sizeofEz], Jz1D[0:sizeofEz], Bz1D[0:sizeofBz])
+    #pragma acc loop gang worker vector
+#elif defined( SMILEI_ACCELERATOR_GPU_OMP )
+    #pragma omp target
+    #pragma omp teams distribute parallel for
+#endif
+#if !defined( SMILEI_ACCELERATOR_MODE )
+        #pragma omp simd
+#endif
+    for( unsigned int ix=0 ; ix<nx_p ; ++ix ) {
+        Ey1D[ix] -= dt_ov_dx * Bz1D[ix+1] - Bz1D[ix] - dt * Jy1D[ix];
+        Ez1D[ix] += dt_ov_dx * By1D[ix+1] - By1D[ix] - dt * Jz1D[ix];
+        //( *Ey1D )( ix )= ( *Ey1D )( ix ) - dt_ov_dx * ( ( *Bz1D )( ix+1 ) - ( *Bz1D )( ix ) ) - dt * ( *Jy1D )( ix ) ;
+        //( *Ez1D )( ix )= ( *Ez1D )( ix ) + dt_ov_dx * ( ( *By1D )( ix+1 ) - ( *By1D )( ix ) ) - dt * ( *Jz1D )( ix ) ;
+    }
+
+    {
+        fields->Ex_->copyFromDeviceToHost();
+        fields->Ey_->copyFromDeviceToHost();
+        fields->Ez_->copyFromDeviceToHost();
     }
-    // Transverse fields ey, ez  are defined on the primal grid
-    for( unsigned int ix=0 ; ix<nx_p ; ix++ ) {
-        ( *Ey1D )( ix )= ( *Ey1D )( ix ) - dt_ov_dx * ( ( *Bz1D )( ix+1 ) - ( *Bz1D )( ix ) ) - dt * ( *Jy1D )( ix ) ;
-        ( *Ez1D )( ix )= ( *Ez1D )( ix ) + dt_ov_dx * ( ( *By1D )( ix+1 ) - ( *By1D )( ix ) ) - dt * ( *Jz1D )( ix ) ;
+    }
+    // to be deleted
+    {
+        const unsigned int nx_p = fields->dimPrim[0];
+        const unsigned int nx_d = fields->dimDual[0];
+        double *const __restrict__ Ex1D       = fields->Ex_->data(); // [x] : dual in x   primal in y,z
+        double *const __restrict__ Ey1D       = fields->Ey_->data(); // [x] : dual in y   primal in x,z
+        double *const __restrict__ Ez1D       = fields->Ez_->data(); // [x] : dual in z   primal in x,y
+
+        std::cout<< "printing after in MA solver ex, ey and ez for nx_d="<<nx_d<< "nx_p = "<< nx_p<<std::endl;
+        for( unsigned int ix=0 ; ix<std::min(nx_d,nx_p) ; ++ix ) {
+            std::cout<< std::setprecision (15)<<Ex1D[ix] << " " <<Ey1D[ix] << " "<<Ez1D[ix] << " " <<std::endl;
+        }
     }
 }
 
diff --git a/src/ElectroMagnSolver/MF_Solver1D_Yee.cpp b/src/ElectroMagnSolver/MF_Solver1D_Yee.cpp
index d36d815e4..39b9489f5 100755
--- a/src/ElectroMagnSolver/MF_Solver1D_Yee.cpp
+++ b/src/ElectroMagnSolver/MF_Solver1D_Yee.cpp
@@ -20,7 +20,7 @@ void MF_Solver1D_Yee::operator()( ElectroMagn *fields )
     const unsigned int nx_d = fields->dimDual[0];
     
     // Static-cast of the fields
-    Field1D* Ey1D;
+    /*Field1D* Ey1D;
     Field1D* Ez1D;
     if (isEFilterApplied) {
         Ey1D = static_cast<Field1D*>(fields->filter_->Ey_[0]);
@@ -28,17 +28,56 @@ void MF_Solver1D_Yee::operator()( ElectroMagn *fields )
     } else {
         Ey1D = static_cast<Field1D*>(fields->Ey_);
         Ez1D = static_cast<Field1D*>(fields->Ez_);
-    }
-    Field1D *By1D   = static_cast<Field1D *>( fields->By_ );
-    Field1D *Bz1D   = static_cast<Field1D *>( fields->Bz_ );
+    }*/
+    const double *const __restrict__ Ey1D = isEFilterApplied ? fields->filter_->Ey_[0]->data() :
+                                                               fields->Ey_->data(); // [ix] : dual in y   primal in x,z
+    const double *const __restrict__ Ez1D = isEFilterApplied ? fields->filter_->Ez_[0]->data() :
+                                                               fields->Ez_->data();// [ix] : dual in z   primal in x,y
+    
+    //Field1D *By1D   = static_cast<Field1D *>( fields->By_ );
+    //Field1D *Bz1D   = static_cast<Field1D *>( fields->Bz_ );
+    double *const __restrict__ By1D       = fields->By_->data();// [ix] : dual in x,z primal in y
+    double *const __restrict__ Bz1D       = fields->Bz_->data();// [ix] : dual in x,y primal in z
+    
+    // to be deleted
+    /*std::cout<< "printing before in FM solver by and bz for nx_d-1="<<nx_d-1<<std::endl;
+    for( unsigned int ix=1 ; ix<nx_d-1 ; ++ix ) {
+        std::cout<<By1D[ix] << " "<<Bz1D[ix] <<std::endl;
+    }*/
     // ---------------------
     // Solve Maxwell-Faraday
     // ---------------------
     // NB: bx is given in 1d and defined when initializing the fields (here put to 0)
     // Transverse fields  by & bz are defined on the dual grid
-    //for (unsigned int ix=1 ; ix<nx_p ; ix++) {
+#if defined( SMILEI_OPENACC_MODE )                                                                                                    
+    const int sizeofEy = fields->Ey_->number_of_points_;
+    const int sizeofEz = fields->Ez_->number_of_points_;
+    const int sizeofBy = fields->By_->number_of_points_;
+    const int sizeofBz = fields->Bz_->number_of_points_;
+    #pragma acc parallel present( By1D[0:sizeofBy], Bz1D[0:sizeofBz],Ey1D[0:sizeofEy],Ez1D[0:sizeofEz] )                                               
+    #pragma acc loop gang vector             
+#elif defined( SMILEI_ACCELERATOR_GPU_OMP )
+    #pragma omp target
+    #pragma omp teams distribute parallel for
+#endif
+#if !defined( SMILEI_ACCELERATOR_MODE )
+    #pragma omp simd
+#endif
     for( unsigned int ix=1 ; ix<nx_d-1 ; ix++ ) {
-        ( *By1D )( ix )= ( *By1D )( ix ) + dt_ov_dx * ( ( *Ez1D )( ix ) - ( *Ez1D )( ix-1 ) ) ;
-        ( *Bz1D )( ix )= ( *Bz1D )( ix ) - dt_ov_dx * ( ( *Ey1D )( ix ) - ( *Ey1D )( ix-1 ) ) ;
+        By1D[ix] = By1D[ix] + dt_ov_dx * ( Ez1D[ix] - Ez1D[ix-1] );
+        Bz1D[ix] = Bz1D[ix] - dt_ov_dx * ( Ey1D[ix] - Ey1D[ix-1] );
+        //( *By1D )( ix )= ( *By1D )( ix ) + dt_ov_dx * ( ( *Ez1D )( ix ) - ( *Ez1D )( ix-1 ) ) ;
+        //( *Bz1D )( ix )= ( *Bz1D )( ix ) - dt_ov_dx * ( ( *Ey1D )( ix ) - ( *Ey1D )( ix-1 ) ) ;
+    }
+
+
+
+    /*{
+        fields->By_->copyFromDeviceToHost();
+        fields->Bz_->copyFromDeviceToHost();
     }
+    std::cout<< "printing after in FM solver by and bz for nx_d-1="<<nx_d-1<<std::endl;
+    for( unsigned int ix=1 ; ix<nx_d-1 ; ++ix ) {
+        std::cout<<By1D[ix] << " "<<Bz1D[ix] <<std::endl;
+    }*/
 }
diff --git a/src/Interpolator/Interpolator1D.cpp b/src/Interpolator/Interpolator1D.cpp
index e10b611bd..cdf84992c 100755
--- a/src/Interpolator/Interpolator1D.cpp
+++ b/src/Interpolator/Interpolator1D.cpp
@@ -11,7 +11,7 @@ Interpolator1D::Interpolator1D( Patch *patch )
     : Interpolator()
 {
 
-    index_domain_begin =  patch->getCellStartingGlobalIndex( 0 );
+    i_domain_begin_ =  patch->getCellStartingGlobalIndex( 0 );
     
 }
 
diff --git a/src/Interpolator/Interpolator1D.h b/src/Interpolator/Interpolator1D.h
index c1324e0a3..408b6ac3a 100755
--- a/src/Interpolator/Interpolator1D.h
+++ b/src/Interpolator/Interpolator1D.h
@@ -22,7 +22,7 @@ class Interpolator1D : public Interpolator
 protected:
     //! Inverse of the spatial-step
     double dx_inv_;
-    unsigned int index_domain_begin;
+    unsigned int i_domain_begin_;
 };
 
 #endif
diff --git a/src/Interpolator/Interpolator1D2Order.cpp b/src/Interpolator/Interpolator1D2Order.cpp
index e867b29be..a74c951cd 100755
--- a/src/Interpolator/Interpolator1D2Order.cpp
+++ b/src/Interpolator/Interpolator1D2Order.cpp
@@ -14,7 +14,6 @@ using namespace std;
 Interpolator1D2Order::Interpolator1D2Order( Params &params, Patch *patch ) : Interpolator1D( patch )
 {
     dx_inv_ = 1.0/params.cell_length[0];
-
 }
 
 // ---------------------------------------------------------------------------------------------------------------------
@@ -23,31 +22,27 @@ Interpolator1D2Order::Interpolator1D2Order( Params &params, Patch *patch ) : Int
 void Interpolator1D2Order::fields( ElectroMagn *EMfields, Particles &particles, int ipart, int nparts, double *ELoc, double *BLoc )
 {
     // Static cast of the electromagnetic fields
-    Field1D *Ex1D     = static_cast<Field1D *>( EMfields->Ex_ );
-    Field1D *Ey1D     = static_cast<Field1D *>( EMfields->Ey_ );
-    Field1D *Ez1D     = static_cast<Field1D *>( EMfields->Ez_ );
-    Field1D *Bx1D_m   = static_cast<Field1D *>( EMfields->Bx_m );
-    Field1D *By1D_m   = static_cast<Field1D *>( EMfields->By_m );
-    Field1D *Bz1D_m   = static_cast<Field1D *>( EMfields->Bz_m );
+    Field1D *Ex1D    = static_cast<Field1D *>( EMfields->Ex_ );
+    Field1D *Ey1D    = static_cast<Field1D *>( EMfields->Ey_ );
+    Field1D *Ez1D    = static_cast<Field1D *>( EMfields->Ez_ );
+    Field1D *Bx1D_m  = static_cast<Field1D *>( EMfields->Bx_m );
+    Field1D *By1D_m  = static_cast<Field1D *>( EMfields->By_m );
+    Field1D *Bz1D_m  = static_cast<Field1D *>( EMfields->Bz_m );
 
     // Particle position (in units of the spatial-step)
-    double xpn = particles.position( 0, ipart )*dx_inv_;
+    double xjn = particles.position( 0, ipart ) * dx_inv_;
     // Calculate coeffs
-    int idx_p[1], idx_d[1];
-    double delta_p[1];
-    double coeffxp[3];
-    double coeffxd[3];
-    coeffs( xpn, idx_p, idx_d, coeffxp, coeffxd, delta_p );
+    coeffs( xjn );
 
     // Interpolate the fields from the Dual grid : Ex, By, Bz
-    *( ELoc+0*nparts ) = compute( coeffxd, Ex1D,   idx_d[0] );
-    *( BLoc+1*nparts ) = compute( coeffxd, By1D_m, idx_d[0] );
-    *( BLoc+2*nparts ) = compute( coeffxd, Bz1D_m, idx_d[0] );
+    *( ELoc+0*nparts ) = compute( coeffd_, Ex1D,   id_ );
+    *( BLoc+1*nparts ) = compute( coeffd_, By1D_m, id_ );
+    *( BLoc+2*nparts ) = compute( coeffd_, Bz1D_m, id_ );
 
     // Interpolate the fields from the Primal grid : Ey, Ez, Bx
-    *( ELoc+1*nparts ) = compute( coeffxp, Ey1D,   idx_p[0] );
-    *( ELoc+2*nparts ) = compute( coeffxp, Ez1D,   idx_p[0] );
-    *( BLoc+0*nparts ) = compute( coeffxp, Bx1D_m, idx_p[0] );
+    *( ELoc+1*nparts ) = compute( coeffp_, Ey1D,   ip_ );
+    *( ELoc+2*nparts ) = compute( coeffp_, Ez1D,   ip_ );
+    *( BLoc+0*nparts ) = compute( coeffp_, Bx1D_m, ip_ );
 
 }//END Interpolator1D2Order
 
@@ -83,37 +78,33 @@ void Interpolator1D2Order::fieldsAndCurrents( ElectroMagn *EMfields, Particles &
     }
 
     // Particle position (in units of the spatial-step)
-    double xpn = particles.position( 0, ipart )*dx_inv_;
+    double xjn = particles.position( 0, ipart )*dx_inv_;
     // Calculate coeffs
-    int idx_p[1], idx_d[1];
-    double delta_p[1];
-    double coeffxp[3];
-    double coeffxd[3];
-    coeffs( xpn, idx_p, idx_d, coeffxp, coeffxd, delta_p );
+    coeffs( xjn );
 
     int nparts( particles.numberOfParticles() );
 
     // Interpolate the fields from the Dual grid : Ex, By, Bz
-    *( ELoc+0*nparts ) = compute( coeffxd, Ex1D,   idx_d[0] );
-    *( BLoc+1*nparts ) = compute( coeffxd, By1D_m, idx_d[0] );
-    *( BLoc+2*nparts ) = compute( coeffxd, Bz1D_m, idx_d[0] );
+    *( ELoc+0*nparts ) = compute( coeffd_, Ex1D,   id_ );
+    *( BLoc+1*nparts ) = compute( coeffd_, By1D_m, id_ );
+    *( BLoc+2*nparts ) = compute( coeffd_, Bz1D_m, id_ );
 
     // Interpolate the fields from the Primal grid : Ey, Ez, Bx
-    *( ELoc+1*nparts ) = compute( coeffxp, Ey1D,   idx_p[0] );
-    *( ELoc+2*nparts ) = compute( coeffxp, Ez1D,   idx_p[0] );
-    *( BLoc+0*nparts ) = compute( coeffxp, Bx1D_m, idx_p[0] );
+    *( ELoc+1*nparts ) = compute( coeffp_, Ey1D,   ip_ );
+    *( ELoc+2*nparts ) = compute( coeffp_, Ez1D,   ip_ );
+    *( BLoc+0*nparts ) = compute( coeffp_, Bx1D_m, ip_ );
 
     // Interpolate the fields from the Primal grid : Jy, Jz, Rho
-    JLoc->y = compute(     coeffxp, Jy1D,  idx_p[0] );
-    JLoc->z = compute(     coeffxp, Jz1D,  idx_p[0] );
-    ( *RhoLoc ) = compute( coeffxp, Rho1D, idx_p[0] );
+    JLoc->y = compute( coeffp_, Jy1D,  ip_ );
+    JLoc->z = compute( coeffp_, Jz1D,  ip_ );
+    ( *RhoLoc ) = compute( coeffp_, Rho1D, ip_ );
 
     // Interpolate the fields from the Dual grid : Jx
-    JLoc->x = compute( coeffxd, Jx1D,  idx_d[0] );
+    JLoc->x = compute( coeffd_, Jx1D,  id_ );
     
     if (smpi->use_BTIS3){
-        *( BLocyBTIS3+0*nparts ) = compute( coeffxp, By1DBTIS3, idx_p[0] );
-        *( BLoczBTIS3+0*nparts ) = compute( coeffxp, Bz1DBTIS3, idx_p[0] );
+        *( BLocyBTIS3+0*nparts ) = compute( &coeffp_[1], By1DBTIS3, ip_ );
+        *( BLoczBTIS3+0*nparts ) = compute( &coeffp_[1], Bz1DBTIS3, ip_ );
     }
 
 }
@@ -122,114 +113,269 @@ void Interpolator1D2Order::fieldsAndCurrents( ElectroMagn *EMfields, Particles &
 void Interpolator1D2Order::oneField( Field **field, Particles &particles, int *istart, int *iend, double *FieldLoc, double *, double *, double * )
 {
     Field1D *F = static_cast<Field1D *>( *field );
-    int idx_p[1], idx_d[1];
-    double delta_p[1];
-    double coeffxp[3];
-    double coeffxd[3];
-    double *coeff = F->isDual( 0 ) ? coeffxd : coeffxp;
-    int *i = F->isDual( 0 ) ? &idx_d[0] : &idx_p[0];
+    double *coeff = F->isDual( 0 ) ? coeffd_ : coeffp_;
+    int *i = F->isDual( 0 ) ? &id_ : &ip_;
 
     for( int ipart=*istart ; ipart<*iend; ipart++ ) {
-        double xpn = particles.position( 0, ipart )*dx_inv_;
-        coeffs( xpn, idx_p, idx_d, coeffxp, coeffxd, delta_p );
+        double xjn = particles.position( 0, ipart )*dx_inv_;
+        coeffs( xjn );
         FieldLoc[ipart] = compute( coeff, F, *i );
     }
 }
 
-void Interpolator1D2Order::fieldsWrapper( ElectroMagn *EMfields, Particles &particles, SmileiMPI *smpi, int *istart, int *iend, int ithread, unsigned int, int )
+void Interpolator1D2Order::fieldsWrapper( ElectroMagn *EMfields,
+                                          Particles &particles, SmileiMPI *smpi,
+                                          int *istart, int *iend, int ithread, unsigned int, int )
 {
-    double *Epart = &( smpi->dynamics_Epart[ithread][0] );
-    double *Bpart = &( smpi->dynamics_Bpart[ithread][0] );
-    int    *iold  = &( smpi->dynamics_iold[ithread][0] );
-    double *delta = &( smpi->dynamics_deltaold[ithread][0] );
+    {
+    double *const __restrict__ ELoc = smpi->dynamics_Epart[ithread].data();//&( smpi->dynamics_Epart[ithread][0] );
+    double *const __restrict__ BLoc = smpi->dynamics_Bpart[ithread].data();//&( smpi->dynamics_Bpart[ithread][0] );
 
-    // Static cast of the electromagnetic fields
-    Field1D *Ex1D = static_cast<Field1D *>( EMfields->Ex_ );
-    Field1D *Ey1D = static_cast<Field1D *>( EMfields->Ey_ );
-    Field1D *Ez1D = static_cast<Field1D *>( EMfields->Ez_ );
-    Field1D *Bx1D = static_cast<Field1D *>( EMfields->Bx_m );
-    Field1D *By1D = static_cast<Field1D *>( EMfields->By_m );
-    Field1D *Bz1D = static_cast<Field1D *>( EMfields->Bz_m );
+    int    *const __restrict__ iold  = smpi->dynamics_iold[ithread].data();//&( smpi->dynamics_iold[ithread][0] );
+    double *const __restrict__ delta = smpi->dynamics_deltaold[ithread].data();//&( smpi->dynamics_deltaold[ithread][0] );
 
+    const double *const __restrict__ position_x = particles.getPtrPosition( 0 );
+
+    // Static cast of the electromagnetic fields
+    const double *const __restrict__ Ex1D = static_cast<Field1D *>( EMfields->Ex_ )->data();
+    const double *const __restrict__ Ey1D = static_cast<Field1D *>( EMfields->Ey_ )->data();
+    const double *const __restrict__ Ez1D = static_cast<Field1D *>( EMfields->Ez_ )->data();
+    const double *const __restrict__ Bx1D = static_cast<Field1D *>( EMfields->Bx_m )->data();
+    const double *const __restrict__ By1D = static_cast<Field1D *>( EMfields->By_m )->data();
+    const double *const __restrict__ Bz1D = static_cast<Field1D *>( EMfields->Bz_m )->data();
+
+#if defined(SMILEI_OPENACC_MODE)
+    const int sizeofEx = EMfields->Ex_->size();
+    const int sizeofEy = EMfields->Ey_->size();
+    const int sizeofEz = EMfields->Ez_->size();
+    const int sizeofBx = EMfields->Bx_m->size();
+    const int sizeofBy = EMfields->By_m->size();
+    const int sizeofBz = EMfields->Bz_m->size();
+#endif
 
     //Loop on bin particles
-    int nparts = particles.numberOfParticles();
+    const int nparts = particles.numberOfParticles();
+    const int first_index = *istart;
+    const int last_index  = *iend;
+    double accdx_inv[2];
+    accdx_inv[0]= dx_inv_;
+    /*std::cout<< "printing before in interpolator ex, ey and ez then bx,by,bz" <<std::endl;
+    for( unsigned int ix=first_index ; ix<last_index; ++ix ) {
+        std::cout<< std::setprecision (15)<<Ex1D[ix] << " " <<Ey1D[ix] << " "<<Ez1D[ix] << " " 
+        << Bx1D[ix] << " " <<By1D[ix] << " "<<Bz1D[ix] << " " << iold[ix] << " " <<delta[ix] <<std::endl;
+    }
+
     
-    if (!smpi->use_BTIS3){ // without BTIS-3 interpolation
+        EMfields->Ex_->copyFromDeviceToHost();
+        EMfields->Ey_->copyFromDeviceToHost();
+        EMfields->Ez_->copyFromDeviceToHost();
+        EMfields->Jx_->copyFromDeviceToHost();
+        EMfields->Jy_->copyFromDeviceToHost();
+        EMfields->Jz_->copyFromDeviceToHost();
+    }
+    std::cout<< "printing before in interpolator after copyFromDeviceToHost ex, ey and ez then bx,by,bz" <<std::endl;
+    for( unsigned int ix=first_index ; ix<last_index; ++ix ) {
+        std::cout<< std::setprecision (15)<<Ex1D[ix] << " " <<Ey1D[ix] << " "<<Ez1D[ix] << " " << Bx1D[ix] << " " <<By1D[ix] << " "<<Bz1D[ix]<<std::endl;
+    }
+
+
+    std::cout<<"print in interpolator fields wrapper eloc before computation and  CopyDeviceToHost"<<std::endl;
+    for (int ipart=*istart; ipart < *iend; ipart++){
+        std::cout<<ELoc[0*nparts+ipart]<< " " << ELoc[1*nparts+ipart]<< " " << ELoc[2*nparts+ipart]<< std::endl;
+    }
+
+
+    smilei::tools::gpu::HostDeviceMemoryManagement::CopyDeviceToHost( &( ( smpi->dynamics_Epart[ithread] )[0*nparts] ), nparts );
+    smilei::tools::gpu::HostDeviceMemoryManagement::CopyDeviceToHost( &( ( smpi->dynamics_Epart[ithread] )[1*nparts] ), nparts );
+    smilei::tools::gpu::HostDeviceMemoryManagement::CopyDeviceToHost( &( ( smpi->dynamics_Epart[ithread] )[2*nparts] ), nparts );
+    smilei::tools::gpu::HostDeviceMemoryManagement::CopyDeviceToHost( &( ( smpi->dynamics_Bpart[ithread] )[0*nparts] ), nparts );
+    smilei::tools::gpu::HostDeviceMemoryManagement::CopyDeviceToHost( &( ( smpi->dynamics_Bpart[ithread] )[1*nparts] ), nparts );
+    smilei::tools::gpu::HostDeviceMemoryManagement::CopyDeviceToHost( &( ( smpi->dynamics_Bpart[ithread] )[2*nparts] ), nparts );
+    smilei::tools::gpu::HostDeviceMemoryManagement::CopyDeviceToHost( &( ( smpi->dynamics_iold[ithread] )[0] ), nparts );
+    smilei::tools::gpu::HostDeviceMemoryManagement::CopyDeviceToHost( &( ( smpi->dynamics_deltaold[ithread] )[0]), nparts );
+
+
+
+    std::cout<<"print in interpolator fields wrapper eloc before computation and after CopyDeviceToHost"<<std::endl;
+    for (int ipart=*istart; ipart < *iend; ipart++){
+        std::cout<<ELoc[0*nparts+ipart]<< " " << ELoc[1*nparts+ipart]<< " " << ELoc[2*nparts+ipart]<< " " << iold[ipart] << " " <<delta[ipart]<< std::endl;
+    }
+
+    */
+
+
+    if (!smpi->use_BTIS3){
+        //for (int ipart=*istart; ipart < *iend; ipart++){
+#if defined( SMILEI_ACCELERATOR_GPU_OMP )
+    #pragma omp target map( to : i_domain_begin_) is_device_ptr (position_x)
+    #pragma omp teams distribute parallel for
+#elif defined(SMILEI_OPENACC_MODE)
+    #pragma acc enter data create(this)
+    #pragma acc update device(this)
+    size_t interpolation_range_size = ( last_index + 0 * nparts ) - first_index;
+    #pragma acc parallel present(ELoc [first_index:interpolation_range_size],\
+                                 BLoc [first_index:interpolation_range_size],\
+                                 iold  [first_index:interpolation_range_size],\
+                                 delta [first_index:interpolation_range_size],\
+                                 Ex1D [0:sizeofEx],\
+                                 Ey1D [0:sizeofEy],\
+                                 Ez1D [0:sizeofEz],\
+                                 Bx1D [0:sizeofBx],\
+                                 By1D [0:sizeofBy],\
+                                 Bz1D [0:sizeofBz])\
+    deviceptr(position_x)              \
+    copyin(accdx_inv[0:2]) //copyin(dx_inv_[:1])     //copyin(dx_inv_)
+    #pragma acc loop gang worker vector
+#endif
+    for( int ipart = first_index; ipart < last_index; ipart++ ) {
+            // Normalized particle position
+            //double xpn = position_x[ipart] * dx_inv_;//particles.position( 0, ipart )*dx_inv_;
+            const double xpn = position_x[ipart] *  accdx_inv[0];
+            // Calculate coeffs
+            int idx_p[1], idx_d[1];
+            double delta_p[1];
+            double coeffxp[3];
+            double coeffxd[3];
+
+            coeffs( xpn, idx_p, idx_d, coeffxp, coeffxd, delta_p );
+
+            // Interpolation of Ex^(d)
+            ELoc[0*nparts+ipart] = compute( &coeffxd[0], Ex1D, idx_d[0] );
+            // Interpolation of Ey^(p)
+            ELoc[1*nparts+ipart] = compute( &coeffxp[0], Ey1D, idx_p[0] );
+            // Interpolation of Ez^(p)
+            ELoc[2*nparts+ipart] = compute( &coeffxp[0], Ez1D, idx_p[0] );
+            // Interpolation of Bx^(p)
+            BLoc[0*nparts+ipart] = compute( &coeffxp[0], Bx1D, idx_p[0] );
+            // Interpolation of By^(d)
+            BLoc[1*nparts+ipart] = compute( &coeffxd[0], By1D, idx_d[0] );
+            // Interpolation of Bz^(d)
+            BLoc[2*nparts+ipart] = compute( &coeffxd[0], Bz1D, idx_d[0] );
+
+            //Buffering of iol and delta
+            iold[0*nparts+ipart]  = idx_p[0];
+            delta[0*nparts+ipart] = delta_p[0];
+
+    } // end ipart loop
+    #if defined(SMILEI_OPENACC_MODE)
+        #pragma acc exit data delete(this)
+    #endif
+
+    } else { // with B-TIS3 interpolation
+        double *const __restrict__ BypartBTIS3  = smpi->dynamics_Bpart_yBTIS3[ithread].data();
+        double *const __restrict__ BzpartBTIS3  = smpi->dynamics_Bpart_zBTIS3[ithread].data();
+        const double *const __restrict__ By1D_mBTIS3 = static_cast<Field1D *>( EMfields->By_mBTIS3 )->data();
+        const double *const __restrict__ Bz1D_mBTIS3 = static_cast<Field1D *>( EMfields->Bz_mBTIS3 )->data();
+#if defined( SMILEI_ACCELERATOR_GPU_OMP )
+        #pragma omp target map( to : i_domain_begin_) is_device_ptr ( position_x)
+        #pragma omp teams distribute parallel for
+#elif defined(SMILEI_OPENACC_MODE)
+        #pragma acc enter data create(this)
+        #pragma acc update device(this)
+        size_t interpolation_range_size = ( last_index + 1 * nparts ) - first_index;
+        #pragma acc parallel present(ELoc [first_index:interpolation_range_size],\
+                                    BLoc [first_index:interpolation_range_size],\
+                                    BypartBTIS3 [first_index:interpolation_range_size],\
+                                    BzpartBTIS3 [first_index:interpolation_range_size],\
+                                    iold [first_index:interpolation_range_size],\
+                                    delta [first_index:interpolation_range_size],\
+                                    Ex1D [0:sizeofEx],\
+                                    Ey1D [0:sizeofEy],\
+                                    Ez1D [0:sizeofEz],\
+                                    Bx1D [0:sizeofBx],\
+                                    By1D [0:sizeofBy],\
+                                    Bz1D [0:sizeofBz],\
+                                    By1D_mBTIS3 [0:sizeofEz],\
+                                    Bz1D_mBTIS3 [0:sizeofEy])\
+        deviceptr(position_x)              \
+        copyin(d_inv_)
+        #pragma acc loop gang worker vector
+#endif
+      
+        // would it be possile to just use another  #pragma acc parallel present(  
+        // for By1D_mBTIS3 [0:sizeofEz],\ Bz1D_mBTIS3 [0:sizeofEy])\ BypartBTIS3 [first_index:interpolation_range_size],\
+                                 BzpartBTIS3 [first_index:interpolation_range_size],\
+        // ?
+
+       /* Field1D *By1D_mBTIS3 = static_cast<Field1D *>( EMfields->By_mBTIS3 );
+        Field1D *Bz1D_mBTIS3 = static_cast<Field1D *>( EMfields->Bz_mBTIS3 );
+        double  *BypartBTIS3 = &( smpi->dynamics_Bpart_yBTIS3[ithread][0]  );
+        double  *BzpartBTIS3 = &( smpi->dynamics_Bpart_zBTIS3[ithread][0]  );*/
+        
         for (int ipart=*istart; ipart < *iend; ipart++){
 
             // Normalized particle position
-            double xpn = particles.position( 0, ipart )*dx_inv_;
+            double xpn = position_x[ipart] * dx_inv_;//particles.position( 0, ipart )*dx_inv_;
 
             // Calculate coeffs
             int idx_p[1], idx_d[1];
             double delta_p[1];
             double coeffxp[3];
             double coeffxd[3];
+
             coeffs( xpn, idx_p, idx_d, coeffxp, coeffxd, delta_p );
 
             // Interpolation of Ex^(d)
-            *( Epart+0*nparts+ipart ) = compute( coeffxd, Ex1D, idx_d[0] );
+            ELoc[0*nparts+ipart] = compute( coeffxd, Ex1D, idx_d[0] );
             // Interpolation of Ey^(p)
-            *( Epart+1*nparts+ipart ) = compute( coeffxp, Ey1D, idx_p[0] );
+            ELoc[1*nparts+ipart] = compute( coeffxp, Ey1D, idx_p[0] );
             // Interpolation of Ez^(p)
-            *( Epart+2*nparts+ipart ) = compute( coeffxp, Ez1D, idx_p[0] );
+            ELoc[2*nparts+ipart] = compute( coeffxp, Ez1D, idx_p[0] );
             // Interpolation of Bx^(p)
-            *( Bpart+0*nparts+ipart ) = compute( coeffxp, Bx1D, idx_p[0] );
+            BLoc[0*nparts+ipart] = compute( coeffxp, Bx1D, idx_p[0] );
             // Interpolation of By^(d)
-            *( Bpart+1*nparts+ipart ) = compute( coeffxd, By1D, idx_d[0] );
+            BLoc[1*nparts+ipart] = compute( coeffxd, By1D, idx_d[0] );
             // Interpolation of Bz^(d)
-            *( Bpart+2*nparts+ipart ) = compute( coeffxd, Bz1D, idx_d[0] );
+            BLoc[2*nparts+ipart] = compute( coeffxd, Bz1D, idx_d[0] );
+            // Interpolation of ByBTIS3^(p)
+            BypartBTIS3[0*nparts+ipart ]  = compute( coeffxp, By1D_mBTIS3, idx_p[0] );
+            // Interpolation of BzBTIS3^(p)
+            BzpartBTIS3[0*nparts+ipart ]  = compute( coeffxp, Bz1D_mBTIS3, idx_p[0] );
+            
 
             //Buffering of iol and delta
-            *( iold+0*nparts+ipart)   = idx_p[0];
-            *( delta+0*nparts+ipart)  = delta_p[0];
+            iold[0*nparts+ipart]  = idx_p[0];
+            delta[0*nparts+ipart] = delta_p[0];
 
         } // end ipart loop
-    } else { // with B-TIS3 interpolation
-      
-      Field1D *By1D_mBTIS3 = static_cast<Field1D *>( EMfields->By_mBTIS3 );
-      Field1D *Bz1D_mBTIS3 = static_cast<Field1D *>( EMfields->Bz_mBTIS3 );
-      double  *BypartBTIS3 = &( smpi->dynamics_Bpart_yBTIS3[ithread][0]  );
-      double  *BzpartBTIS3 = &( smpi->dynamics_Bpart_zBTIS3[ithread][0]  );
-      
-      for (int ipart=*istart; ipart < *iend; ipart++){
-
-          // Normalized particle position
-          double xpn = particles.position( 0, ipart )*dx_inv_;
-
-          // Calculate coeffs
-          int idx_p[1], idx_d[1];
-          double delta_p[1];
-          double coeffxp[3];
-          double coeffxd[3];
-
-          coeffs( xpn, idx_p, idx_d, coeffxp, coeffxd, delta_p );
-
-          // Interpolation of Ex^(d)
-          *( Epart+0*nparts+ipart ) = compute( coeffxd, Ex1D, idx_d[0] );
-          // Interpolation of Ey^(p)
-          *( Epart+1*nparts+ipart ) = compute( coeffxp, Ey1D, idx_p[0] );
-          // Interpolation of Ez^(p)
-          *( Epart+2*nparts+ipart ) = compute( coeffxp, Ez1D, idx_p[0] );
-          // Interpolation of Bx^(p)
-          *( Bpart+0*nparts+ipart ) = compute( coeffxp, Bx1D, idx_p[0] );
-          // Interpolation of By^(d)
-          *( Bpart+1*nparts+ipart ) = compute( coeffxd, By1D, idx_d[0] );
-          // Interpolation of Bz^(d)
-          *( Bpart+2*nparts+ipart ) = compute( coeffxd, Bz1D, idx_d[0] );
-          // Interpolation of ByBTIS3^(p)
-          *( BypartBTIS3+0*nparts )  = compute( coeffxp, By1D_mBTIS3, idx_p[0] );
-          // Interpolation of BzBTIS3^(p)
-          *( BzpartBTIS3+0*nparts )  = compute( coeffxp, Bz1D_mBTIS3, idx_p[0] );
-          
-          //Buffering of iol and delta
-          *( iold+0*nparts+ipart)   = idx_p[0];
-          *( delta+0*nparts+ipart)  = delta_p[0];
-
-      } // end ipart loop
-      
+    #if defined(SMILEI_OPENACC_MODE)
+        #pragma acc exit data delete(this)
+    #endif
+    } // end with B-TIS interpolation
+
+    /*{
+        EMfields->Ex_->copyFromDeviceToHost();
+        EMfields->Ey_->copyFromDeviceToHost();
+        EMfields->Ez_->copyFromDeviceToHost();
     }
+    double *const __restrict__ ELoc = smpi->dynamics_Epart[ithread].data();//&( smpi->dynamics_Epart[ithread][0] );
+    double *const __restrict__ BLoc = smpi->dynamics_Bpart[ithread].data();//&( smpi->dynamics_Bpart[ithread][0] );
+*/ 
+    }
+    // to be deleted
+    {
+        const int nparts = particles.numberOfParticles();
+        double *const __restrict__ ELoc = smpi->dynamics_Epart[ithread].data();//&( smpi->dynamics_Epart[ithread][0] );
+        double *const __restrict__ BLoc = smpi->dynamics_Bpart[ithread].data();//&( smpi->dynamics_Bpart[ithread][0] );
+        std::cout<< std::setprecision (15)<<"print in interpolator fields wrapper eloc before CopyDeviceToHost"<<std::endl;
+        for (int ipart=*istart; ipart < *iend; ipart++){
+            std::cout<<ELoc[0*nparts+ipart]<< " " << ELoc[1*nparts+ipart]<< " " << ELoc[2*nparts+ipart]<< std::endl;
+        }
+        {
+            smilei::tools::gpu::HostDeviceMemoryManagement::CopyDeviceToHost( &( ( smpi->dynamics_Epart[ithread] )[0*nparts] ), nparts );
+            smilei::tools::gpu::HostDeviceMemoryManagement::CopyDeviceToHost( &( ( smpi->dynamics_Epart[ithread] )[1*nparts] ), nparts );
+            smilei::tools::gpu::HostDeviceMemoryManagement::CopyDeviceToHost( &( ( smpi->dynamics_Epart[ithread] )[2*nparts] ), nparts );
+            smilei::tools::gpu::HostDeviceMemoryManagement::CopyDeviceToHost( &( ( smpi->dynamics_Bpart[ithread] )[0*nparts] ), nparts );
+            smilei::tools::gpu::HostDeviceMemoryManagement::CopyDeviceToHost( &( ( smpi->dynamics_Bpart[ithread] )[1*nparts] ), nparts );
+            smilei::tools::gpu::HostDeviceMemoryManagement::CopyDeviceToHost( &( ( smpi->dynamics_Bpart[ithread] )[2*nparts] ), nparts );
+
+        }
+        std::cout<<"print in interpolator fields wrapper eloc after CopyDeviceToHost"<<std::endl;
+        for (int ipart=*istart; ipart < *iend; ipart++){
+            std::cout<<ELoc[0*nparts+ipart]<< " " << ELoc[1*nparts+ipart]<< " " << ELoc[2*nparts+ipart]<< std::endl;
+        }
+    }
+
 }
 
 // Interpolator specific to tracked particles. A selection of particles may be provided
@@ -306,13 +452,13 @@ void Interpolator1D2Order::fieldsAndEnvelope( ElectroMagn *EMfields, Particles &
             // Interpolation of Bz^(d)
             *( Bpart+2*nparts+ipart ) = compute( coeffxd, Bz1D, idx_d[0] );
             // Interpolation of Phi^(p)
-            *( PHIpart+0*nparts+ipart ) = compute( coeffxp, Phi1D, idx_p[0] );
+            *( PHIpart+0*nparts+ipart ) = compute( coeffxp, Phi1D, idx_d[0] );
             // Interpolation of GradPhix^(p)
-            *( GradPHIpart+0*nparts+ipart ) = compute( coeffxp, GradPhix1D, idx_p[0] );
+            *( GradPHIpart+0*nparts+ipart ) = compute( coeffxp, GradPhix1D, idx_d[0] );
             // Interpolation of GradPhiy^(p)
-            *( GradPHIpart+1*nparts+ipart ) = compute( coeffxp, GradPhiy1D, idx_p[0] );
+            *( GradPHIpart+1*nparts+ipart ) = compute( coeffxp, GradPhiy1D, idx_d[0] );
             // Interpolation of GradPhiz^(p)
-            *( GradPHIpart+2*nparts+ipart ) = compute( coeffxp, GradPhiz1D, idx_p[0] );
+            *( GradPHIpart+2*nparts+ipart ) = compute( coeffxp, GradPhiz1D, idx_d[0] );
 
             //Buffering of iol and delta
             *( iold+0*nparts+ipart)  = idx_p[0];
@@ -354,13 +500,13 @@ void Interpolator1D2Order::fieldsAndEnvelope( ElectroMagn *EMfields, Particles &
             // Interpolation of BzBTIS3^(p)
             *( BzpartBTIS3+0*nparts )  = compute( coeffxp, Bz1D_mBTIS3, idx_p[0] );
             // Interpolation of Phi^(p)
-            *( PHIpart+0*nparts+ipart )     = compute( coeffxp, Phi1D, idx_p[0] );
+            *( PHIpart+0*nparts+ipart )     = compute( coeffxp, Phi1D, idx_d[0] );
             // Interpolation of GradPhix^(p)
-            *( GradPHIpart+0*nparts+ipart ) = compute( coeffxp, GradPhix1D, idx_p[0] );
+            *( GradPHIpart+0*nparts+ipart ) = compute( coeffxp, GradPhix1D, idx_d[0] );
             // Interpolation of GradPhiy^(p)
-            *( GradPHIpart+1*nparts+ipart ) = compute( coeffxp, GradPhiy1D, idx_p[0] );
+            *( GradPHIpart+1*nparts+ipart ) = compute( coeffxp, GradPhiy1D, idx_d[0] );
             // Interpolation of GradPhiz^(p)
-            *( GradPHIpart+2*nparts+ipart ) = compute( coeffxp, GradPhiz1D, idx_p[0] );
+            *( GradPHIpart+2*nparts+ipart ) = compute( coeffxp, GradPhiz1D, idx_d[0] );
 
             //Buffering of iol and delta
             *( iold+0*nparts+ipart)  = idx_p[0];
@@ -394,24 +540,27 @@ void Interpolator1D2Order::timeCenteredEnvelope( ElectroMagn *EMfields, Particle
 
         // Calculate coeffs
 
-        int idx_p[1];
+        int idx_p[1], idx_d[1];
         double delta_p[1];
         double coeffxp[3];
+        double coeffxd[3];
 
-        coeffs( xpn, idx_p, NULL, coeffxp, NULL, delta_p );
+        coeffs( xpn, idx_p, idx_d, coeffxp, coeffxd, delta_p );
 
         // Interpolation of Phi^(p)
-        *( PHI_mpart+0*nparts+ipart )     = compute( coeffxp, Phi_m1D, idx_p[0] );
+        *( PHI_mpart+0*nparts+ipart )     = compute( coeffxp, Phi_m1D, idx_d[0] );
         // Interpolation of GradPhix^(p)
-        *( GradPHI_mpart+0*nparts+ipart ) = compute( coeffxp, GradPhix_m1D, idx_p[0] );
+        *( GradPHI_mpart+0*nparts+ipart ) = compute( coeffxp, GradPhix_m1D, idx_d[0] );
         // Interpolation of GradPhiy^(p)
-        *( GradPHI_mpart+1*nparts+ipart ) = compute( coeffxp, GradPhiy_m1D, idx_p[0] );
+        *( GradPHI_mpart+1*nparts+ipart ) = compute( coeffxp, GradPhiy_m1D, idx_d[0] );
         // Interpolation of GradPhiz^(p)
-        *( GradPHI_mpart+2*nparts+ipart ) = compute( coeffxp, GradPhiz_m1D, idx_p[0] );
+        *( GradPHI_mpart+2*nparts+ipart ) = compute( coeffxp, GradPhiz_m1D, idx_d[0] );
 
         //Buffering of iol and delta
         *( iold+ipart+0*nparts)  = idx_p[0];
         *( delta+ipart+0*nparts) = delta_p[0];
+
+
     }
 
 } // END Interpolator1D2Order
@@ -429,30 +578,42 @@ void Interpolator1D2Order::envelopeAndSusceptibility( ElectroMagn *EMfields, Par
     double xpn = particles.position( 0, ipart )*dx_inv_;
 
     // Indexes of the central nodes
-    int idx_p[1];
-    double delta_p[1];
-    double coeffxp[3];
-    coeffs( xpn, idx_p, NULL, coeffxp, NULL, delta_p );
+    ip_ = round( xpn );
+
+    // Declaration and calculation of the coefficient for interpolation
+    double deltax, delta2;
+
+
+    deltax     = xpn - ( double )ip_;
+    delta2     = deltax*deltax;
+    coeffp_[0] = 0.5 * ( delta2-deltax+0.25 );
+    coeffp_[1] = 0.75 - delta2;
+    coeffp_[2] = 0.5 * ( delta2+deltax+0.25 );
+
+
+    //!\todo CHECK if this is correct for both primal & dual grids !!!
+    // First index for summation
+    ip_ = ip_ - i_domain_begin_;
 
     // -------------------------
     // Interpolation of Env_A_abs_^(p)
     // -------------------------
-    *( Env_A_abs_Loc )  = compute( coeffxp, Env_A_abs_1D, idx_p[0] ); 
+    *( Env_A_abs_Loc )  = compute( coeffp_, Env_A_abs_1D, ip_ ); //compute( &coeffp_[1], Env_A_abs_1D, ip_ );
 
     // -------------------------
     // Interpolation of Env_Chi_^(p)
     // -------------------------
-    *( Env_Chi_Loc )    = compute( coeffxp, Env_Chi_1D, idx_p[0] ); 
+    *( Env_Chi_Loc )    = compute( coeffp_, Env_Chi_1D, ip_ ); //compute( &coeffp_[1], Env_Chi_1D, ip_ );
 
     // -------------------------
     // Interpolation of Env_E_abs_^(p)
     // -------------------------
-    *( Env_E_abs_Loc )  = compute( coeffxp, Env_E_abs_1D, idx_p[0] ); 
+    *( Env_E_abs_Loc )  = compute( coeffp_, Env_E_abs_1D, ip_ ); // compute( &coeffp_[1], Env_E_abs_1D, ip_ );
 
     // -------------------------
     // Interpolation of Env_Ex_abs_^(p)
     // -------------------------
-    *( Env_Ex_abs_Loc ) = compute( coeffxp, Env_Ex_abs_1D, idx_p[0] ); 
+    *( Env_Ex_abs_Loc ) = compute( coeffp_, Env_Ex_abs_1D, ip_ ); // compute( &coeffp_[1], Env_Ex_abs_1D, ip_ );
 
 } // END Interpolator1D2Order
 
@@ -466,13 +627,26 @@ void Interpolator1D2Order::envelopeFieldForIonization( ElectroMagn *EMfields, Pa
     //Loop on bin particles
     for( int ipart=*istart ; ipart<*iend; ipart++ ) {
 
-        // Normalized particle position
-        double xpn = particles.position( 0, ipart )*dx_inv_;
-
         int idx_p[1];
         double delta_p[1];
         double coeffxp[3];
-        coeffs( xpn, idx_p, NULL, coeffxp, NULL, delta_p );
+
+        // Normalized particle position
+        double xpn = particles.position( 0, ipart )*dx_inv_;
+
+        double delta2;
+
+        // Primal
+        idx_p[0]     = round( xpn );                 // index of the central point
+        delta_p[0]   = xpn -( double )idx_p[0];      // normalized distance to the central node
+        delta2       = pow( delta_p[0], 2 );         // square of the normalized distance to the central node
+
+        // 2nd order interpolation on 3 nodes
+        coeffxp[0]   = 0.5 * ( delta2-delta_p[0]+0.25 );
+        coeffxp[1]   = ( 0.75-delta2 );
+        coeffxp[2]   = 0.5 * ( delta2+delta_p[0]+0.25 );
+
+        idx_p[0]    -= i_domain_begin_;
 
         // ---------------------------------
         // Interpolation of Env_E_abs^(p)
diff --git a/src/Interpolator/Interpolator1D2Order.h b/src/Interpolator/Interpolator1D2Order.h
index 268e33b5a..46b426ff0 100755
--- a/src/Interpolator/Interpolator1D2Order.h
+++ b/src/Interpolator/Interpolator1D2Order.h
@@ -5,6 +5,8 @@
 #include "Interpolator1D.h"
 
 #include "Field1D.h"
+#include "gpu.h"
+
 //  --------------------------------------------------------------------------------------------------------------------
 //! Class for 2nd order interpolator for 1Dcartesian simulations
 //  --------------------------------------------------------------------------------------------------------------------
@@ -13,7 +15,7 @@ class Interpolator1D2Order final : public Interpolator1D
 
 public:
     Interpolator1D2Order( Params &, Patch * );
-    ~Interpolator1D2Order() override final {};
+    ~Interpolator1D2Order() override {}; //final
     
     inline void __attribute__((always_inline)) fields( ElectroMagn *EMfields, Particles &particles, int ipart, int nparts, double *ELoc, double *BLoc );
     inline void __attribute__((always_inline)) fieldsForTasks( ElectroMagn *EMfields, Particles &particles, int ipart, int nparts, double *ELoc, double *BLoc, int *iold, double *delta );
@@ -22,11 +24,23 @@ class Interpolator1D2Order final : public Interpolator1D
     void fieldsSelection( ElectroMagn *EMfields, Particles &particles, double *buffer, int offset, std::vector<unsigned int> *selection ) override final;
     void oneField( Field **field, Particles &particles, int *istart, int *iend, double *FieldLoc, double *l1=NULL, double *l2=NULL, double *l3=NULL ) override final;
 
-    inline double __attribute__((always_inline)) compute( double *coeff, Field1D *f, int idx )
+    inline double __attribute__((always_inline)) 
+    compute( double *coeff, Field1D *f, int idx )
     {
         double interp_res =  coeff[0] * ( *f )( idx-1 )   + coeff[1] * ( *f )( idx )   + coeff[2] * ( *f )( idx+1 );
         return interp_res;
-    };
+    }
+
+    SMILEI_ACCELERATOR_DECLARE_ROUTINE
+    static inline double __attribute__((always_inline))
+    compute( const double *__restrict__ coeff,
+             const double *__restrict__ f, 
+             int idx )
+    {
+        double interp_res = coeff[0] * f[idx-1] + coeff[1] * f[idx] + coeff[2] * f[idx+1];
+        return interp_res;
+    }
+    SMILEI_ACCELERATOR_DECLARE_ROUTINE_END
 
     void fieldsAndEnvelope( ElectroMagn *EMfields, Particles &particles, SmileiMPI *smpi, int *istart, int *iend, int ithread, int ipart_ref = 0 ) override final;
     void timeCenteredEnvelope( ElectroMagn *EMfields, Particles &particles, SmileiMPI *smpi, int *istart, int *iend, int ithread, int ipart_ref = 0 ) override final;
@@ -34,38 +48,82 @@ class Interpolator1D2Order final : public Interpolator1D
     void envelopeFieldForIonization( ElectroMagn *EMfields, Particles &particles, SmileiMPI *smpi, int *istart, int *iend, int ithread, int ipart_ref = 0 ) override final;
 
 private:
-    inline void coeffs( double xpn, int* idx_p, int* idx_d,
-                        double *coeffxp, double *coeffxd, double* delta_p )
+    inline void __attribute__((always_inline)) coeffs( double xjn )
+    {
+        double xjmxi2;
+
+        // Dual
+        id_    = std::round( xjn + 0.5 );      // index of the central point
+        xjmxi  = xjn - static_cast<double>(id_) + 0.5; // normalized distance to the central node
+        xjmxi2 = xjmxi*xjmxi;            // square of the normalized distance to the central node
+
+        // 2nd order interpolation on 3 nodes
+        coeffd_[0] = 0.5 * ( xjmxi2-xjmxi + 0.25 );
+        coeffd_[1] = ( 0.75 - xjmxi2 );
+        coeffd_[2] = 0.5 * ( xjmxi2+xjmxi + 0.25 );
+
+        id_ -= i_domain_begin_;
+
+        // Primal
+        ip_      = std::round( xjn );    // index of the central point
+        xjmxi  = xjn - static_cast<double>(ip_); // normalized distance to the central node
+        xjmxi2 = xjmxi * xjmxi;   // square of the normalized distance to the central node
+
+        // 2nd order interpolation on 3 nodes
+        coeffp_[0] = 0.5 * ( xjmxi2 - xjmxi + 0.25 );
+        coeffp_[1] = ( 0.75 - xjmxi2 );
+        coeffp_[2] = 0.5 * ( xjmxi2 + xjmxi + 0.25 );
+
+        ip_ -= i_domain_begin_;
+    }
+    
+    // 2nd order interpolation on 3 nodes
+    SMILEI_ACCELERATOR_DECLARE_ROUTINE
+    inline void __attribute__( ( always_inline ) )
+    coeffs( double xpn, int* idx_p, int* idx_d,
+            double *coeffxp, double *coeffxd, double* delta_p ) const
     {
         double delta, delta2;
         
-        // Primal
-        idx_p[0]    = round( xpn );                 // index of the central point
-        delta_p[0]  = xpn -( double )idx_p[0];      // normalized distance to the central node
-        delta2      = pow( delta_p[0], 2 );         // square of the normalized distance to the central node
+        // index of the central point
+        idx_p[0]   = std::round( xpn );
+        idx_d[0]   = std::round( xpn + 0.5 );
+
+
+        delta      = xpn - static_cast<double>( idx_d[0] ) + 0.5; // normalized distance to the central node
+        delta2     = delta * delta;                   // square of the normalized distance to the central node
         
-        // 2nd order interpolation on 3 nodes
-        coeffxp[0]   = 0.5 * ( delta2-delta_p[0]+0.25 );
-        coeffxp[1]   = ( 0.75-delta2 );
-        coeffxp[2]   = 0.5 * ( delta2+delta_p[0]+0.25 );
+        coeffxd[0] = 0.5 * ( delta2 - delta + 0.25 );
+        coeffxd[1] = ( 0.75 - delta2 );
+        coeffxd[2] = 0.5 * ( delta2 + delta + 0.25 );
+
+
+	delta      = xpn - static_cast<double>( idx_p[0] );
+        delta2     = delta * delta; // pow( delta_p[0], 2 );   // square of the normalized distance to the central node
+        
+        coeffxp[0] = 0.5 * ( delta2 - delta_p[0] + 0.25 );
+        coeffxp[1] = ( 0.75 - delta2 );
+        coeffxp[2] = 0.5 * ( delta2 + delta_p[0] + 0.25 );
         
-        idx_p[0]   -= index_domain_begin;
-
-        if(idx_d){
-            // Dual
-            idx_d[0]    = round( xpn+0.5 );              // index of the central point
-            delta       = xpn - ( double )idx_d[0] +0.5; // normalized distance to the central node
-            delta2      = delta*delta;                   // square of the normalized distance to the central node
-            
-            // 2nd order interpolation on 3 nodes
-            coeffxd[0]   = 0.5 * ( delta2-delta+0.25 );
-            coeffxd[1]   = ( 0.75-delta2 );
-            coeffxd[2]   = 0.5 * ( delta2+delta+0.25 );
-            
-            idx_d[0]   -= index_domain_begin;
-        }
+        delta_p[0] = delta;   // normalized distance to the central node
+
+        
+        idx_p[0] = idx_p[0] - i_domain_begin_;
+        idx_d[0] = idx_d[0] - i_domain_begin_;
         
     }    
+    SMILEI_ACCELERATOR_DECLARE_ROUTINE_END
+    // Last prim index computed
+    int ip_;
+    // Last dual index computed
+    int id_;
+    // Last delta computed
+    double xjmxi;
+    // Interpolation coefficient on Prim grid
+    double coeffp_[3];
+    // Interpolation coefficient on Dual grid
+    double coeffd_[3];
+
 
 };//END class
 
diff --git a/src/Interpolator/Interpolator1D2OrderV.cpp b/src/Interpolator/Interpolator1D2OrderV.cpp
old mode 100644
new mode 100755
index 31c3b7d4c..2b99cc66b
--- a/src/Interpolator/Interpolator1D2OrderV.cpp
+++ b/src/Interpolator/Interpolator1D2OrderV.cpp
@@ -176,7 +176,7 @@ void Interpolator1D2OrderV::fieldsWrapper( ElectroMagn *EMfields, Particles &par
         coeffd[1] = ( 0.75-xjmxi2 );
         coeffd[2] = 0.5 * ( xjmxi2+xjmxi+0.25 );
 
-        idx -= index_domain_begin;
+        idx -= i_domain_begin_;
 
         // Primal
         ipx      = round( xjn );    // index of the central point
@@ -188,7 +188,7 @@ void Interpolator1D2OrderV::fieldsWrapper( ElectroMagn *EMfields, Particles &par
         coeffp[1] = ( 0.75-xjmxi2 );
         coeffp[2] = 0.5 * ( xjmxi2+xjmxi+0.25 );
 
-        ipx -= index_domain_begin;
+        ipx -= i_domain_begin_;
 
         // // Interpolate the fields from the Dual grid : Ex, By, Bz
         Epart_x[ipart] = coeffd[0] * Ex[idx-1]   + coeffd[1] * Ex[idx]   + coeffd[2] * Ex[idx+1];
@@ -329,7 +329,7 @@ void Interpolator1D2OrderV::timeCenteredEnvelope( ElectroMagn *EMfields, Particl
 
         //!\todo CHECK if this is correct for both primal & dual grids !!!
         // First index for summation
-        ip_ = ip_ - index_domain_begin;
+        ip_ = ip_ - i_domain_begin_;
 
         // -------------------------
         // Interpolation of Phiold^(p)
@@ -388,7 +388,7 @@ void Interpolator1D2OrderV::envelopeAndSusceptibility( ElectroMagn *EMfields, Pa
 
     //!\todo CHECK if this is correct for both primal & dual grids !!!
     // First index for summation
-    ip_ = ip_ - index_domain_begin;
+    ip_ = ip_ - i_domain_begin_;
 
     // -------------------------
     // Interpolation of Env_A_abs_^(p)
@@ -441,7 +441,7 @@ void Interpolator1D2OrderV::envelopeFieldForIonization( ElectroMagn *EMfields, P
 
         //!\todo CHECK if this is correct for both primal & dual grids !!!
         // First index for summation
-        ip_ = ip_ - index_domain_begin;
+        ip_ = ip_ - i_domain_begin_;
 
         // ---------------------------------
         // Interpolation of Env_E_abs^(p)
diff --git a/src/Interpolator/Interpolator1D2OrderV.h b/src/Interpolator/Interpolator1D2OrderV.h
old mode 100644
new mode 100755
index b7dce6588..7c72f9ca2
--- a/src/Interpolator/Interpolator1D2OrderV.h
+++ b/src/Interpolator/Interpolator1D2OrderV.h
@@ -48,7 +48,7 @@ class Interpolator1D2OrderV final : public Interpolator1D
         coeffd_[1] = ( 0.75-xjmxi2 );
         coeffd_[2] = 0.5 * ( xjmxi2+xjmxi+0.25 );
 
-        id_ -= index_domain_begin;
+        id_ -= i_domain_begin_;
 
         // Primal
         ip_      = round( xjn );    // index of the central point
@@ -60,7 +60,7 @@ class Interpolator1D2OrderV final : public Interpolator1D
         coeffp_[1] = ( 0.75-xjmxi2 );
         coeffp_[2] = 0.5 * ( xjmxi2+xjmxi+0.25 );
 
-        ip_ -= index_domain_begin;
+        ip_ -= i_domain_begin_;
     }
 
     // Last prim index computed
diff --git a/src/Interpolator/Interpolator1D3Order.h b/src/Interpolator/Interpolator1D3Order.h
index e9c821925..3228ed39b 100755
--- a/src/Interpolator/Interpolator1D3Order.h
+++ b/src/Interpolator/Interpolator1D3Order.h
@@ -42,7 +42,7 @@ class Interpolator1D3Order final : public Interpolator1D
         coeffd_[2]  = dble_1ov6 + 0.5*( xi+xi2-xi3 );
         coeffd_[3]  = xi3*dble_1ov6;
 
-        id_ -= index_domain_begin;
+        id_ -= i_domain_begin_;
 
         // Primal
         ip_ = ( int )xjn;          // index of the 2nd node
@@ -56,7 +56,7 @@ class Interpolator1D3Order final : public Interpolator1D
         coeffp_[2]  = dble_1ov6 + 0.5*( xi+xi2-xi3 );
         coeffp_[3]  = xi3*dble_1ov6;
 
-        ip_ -= index_domain_begin;
+        ip_ -= i_domain_begin_;
     }
 
     inline void coeffs( double xpn, int* idx_p, int* idx_d,
@@ -77,7 +77,7 @@ class Interpolator1D3Order final : public Interpolator1D
         coeffxd[2]  = dble_1ov6 + 0.5*( xi+xi2-xi3 );
         coeffxd[3]  = xi3*dble_1ov6;
 
-        idx_d[0] -= index_domain_begin;
+        idx_d[0] -= i_domain_begin_;
 
         // Primal
         idx_p[0] = ( int )xpn;          // index of the 2nd node
@@ -92,7 +92,7 @@ class Interpolator1D3Order final : public Interpolator1D
         coeffxp[2]  = dble_1ov6 + 0.5*( xi+xi2-xi3 );
         coeffxp[3]  = xi3*dble_1ov6;
 
-        idx_p[0] -= index_domain_begin;
+        idx_p[0] -= i_domain_begin_;
             
     }    
     // Last prim index computed
diff --git a/src/Interpolator/Interpolator1D4Order.h b/src/Interpolator/Interpolator1D4Order.h
index f8bd48ee4..7bca2b949 100755
--- a/src/Interpolator/Interpolator1D4Order.h
+++ b/src/Interpolator/Interpolator1D4Order.h
@@ -33,12 +33,64 @@ class Interpolator1D4Order final : public Interpolator1D
     void envelopeAndSusceptibility( ElectroMagn *EMfields, Particles &particles, int ipart, double *Env_A_abs_Loc, double *Env_Chi_Loc, double *Env_E_abs_Loc, double *Env_Ex_abs_Loc ) override final;
 
 private:
+    inline void __attribute__((always_inline)) coeffs( double xjn )
+    {
+        double xjmxi2, xjmxi3, xjmxi4;
+
+        // Dual
+        id_      = round( xjn+0.5 ); // index of the central point
+        xjmxi  = xjn -( double )id_+0.5; // normalized distance to the central node
+        xjmxi2 = xjmxi*xjmxi;     // square of the normalized distance to the central node
+        xjmxi3 = xjmxi2*xjmxi;    // cube of the normalized distance to the central node
+        xjmxi4 = xjmxi3*xjmxi;    // 4th power of the normalized distance to the central node
+
+        // coefficients for the 4th order interpolation on 5 nodes
+        coeffd_[0] = dble_1_ov_384   - dble_1_ov_48  * xjmxi  + dble_1_ov_16 * xjmxi2 - dble_1_ov_12 * xjmxi3 + dble_1_ov_24 * xjmxi4;
+        coeffd_[1] = dble_19_ov_96   - dble_11_ov_24 * xjmxi  + dble_1_ov_4 * xjmxi2  + dble_1_ov_6  * xjmxi3 - dble_1_ov_6  * xjmxi4;
+        coeffd_[2] = dble_115_ov_192 - dble_5_ov_8   * xjmxi2 + dble_1_ov_4 * xjmxi4;
+        coeffd_[3] = dble_19_ov_96   + dble_11_ov_24 * xjmxi  + dble_1_ov_4 * xjmxi2  - dble_1_ov_6  * xjmxi3 - dble_1_ov_6  * xjmxi4;
+        coeffd_[4] = dble_1_ov_384   + dble_1_ov_48  * xjmxi  + dble_1_ov_16 * xjmxi2 + dble_1_ov_12 * xjmxi3 + dble_1_ov_24 * xjmxi4;
+
+        id_ -= i_domain_begin_;
+
+        // Primal
+        ip_      = round( xjn );    // index of the central point
+        xjmxi  = xjn -( double )ip_; // normalized distance to the central node
+        xjmxi2 = xjmxi*xjmxi;     // square of the normalized distance to the central node
+        xjmxi3 = xjmxi2*xjmxi;    // cube of the normalized distance to the central node
+        xjmxi4 = xjmxi3*xjmxi;    // 4th power of the normalized distance to the central node
+
+        // coefficients for the 4th order interpolation on 5 nodes
+        coeffp_[0] = dble_1_ov_384   - dble_1_ov_48  * xjmxi  + dble_1_ov_16 * xjmxi2 - dble_1_ov_12 * xjmxi3 + dble_1_ov_24 * xjmxi4;
+        coeffp_[1] = dble_19_ov_96   - dble_11_ov_24 * xjmxi  + dble_1_ov_4 * xjmxi2  + dble_1_ov_6  * xjmxi3 - dble_1_ov_6  * xjmxi4;
+        coeffp_[2] = dble_115_ov_192 - dble_5_ov_8   * xjmxi2 + dble_1_ov_4 * xjmxi4;
+        coeffp_[3] = dble_19_ov_96   + dble_11_ov_24 * xjmxi  + dble_1_ov_4 * xjmxi2  - dble_1_ov_6  * xjmxi3 - dble_1_ov_6  * xjmxi4;
+        coeffp_[4] = dble_1_ov_384   + dble_1_ov_48  * xjmxi  + dble_1_ov_16 * xjmxi2 + dble_1_ov_12 * xjmxi3 + dble_1_ov_24 * xjmxi4;
+
+        ip_ -= i_domain_begin_;
+    }
+
     inline void coeffs( double xpn, int* idx_p, int* idx_d,
                         double *coeffxp, double *coeffxd, double* delta_p )
     {
         double delta, delta2, delta3, delta4 ;
-     
-                
+        
+        // Dual
+        idx_d[0]   = round( xpn+0.5 );       // index of the central point
+        delta      = xpn -( double )idx_d[0]+0.5; // normalized distance to the central node
+        delta2     = delta*delta;            // square of the normalized distance to the central node
+        delta3     = delta2*delta;           // cube of the normalized distance to the central node
+        delta4     = delta3*delta;           // 4th power of the normalized distance to the central node
+        
+        // coefficients for the 4th order interpolation on 5 nodes
+        coeffxd[0] = dble_1_ov_384   - dble_1_ov_48  * delta  + dble_1_ov_16 * delta2 - dble_1_ov_12 * delta3 + dble_1_ov_24 * delta4;
+        coeffxd[1] = dble_19_ov_96   - dble_11_ov_24 * delta  + dble_1_ov_4 * delta2  + dble_1_ov_6  * delta3 - dble_1_ov_6  * delta4;
+        coeffxd[2] = dble_115_ov_192 - dble_5_ov_8   * delta2 + dble_1_ov_4 * delta4;
+        coeffxd[3] = dble_19_ov_96   + dble_11_ov_24 * delta  + dble_1_ov_4 * delta2  - dble_1_ov_6  * delta3 - dble_1_ov_6  * delta4;
+        coeffxd[4] = dble_1_ov_384   + dble_1_ov_48  * delta  + dble_1_ov_16 * delta2 + dble_1_ov_12 * delta3 + dble_1_ov_24 * delta4;
+        
+        idx_d[0]  -= i_domain_begin_;
+        
         // Primal
         idx_p[0]   = round( xpn );            // index of the central point
         delta_p[0] = xpn -( double )idx_p[0]; // normalized distance to the central node
@@ -53,25 +105,7 @@ class Interpolator1D4Order final : public Interpolator1D
         coeffxp[3] = dble_19_ov_96   + dble_11_ov_24 * delta_p[0]  + dble_1_ov_4 * delta2  - dble_1_ov_6  * delta3 - dble_1_ov_6  * delta4;
         coeffxp[4] = dble_1_ov_384   + dble_1_ov_48  * delta_p[0]  + dble_1_ov_16 * delta2 + dble_1_ov_12 * delta3 + dble_1_ov_24 * delta4;
         
-        idx_p[0]  -= index_domain_begin;
-
-        if(idx_d){   
-            // Dual
-            idx_d[0]   = round( xpn+0.5 );       // index of the central point
-            delta      = xpn -( double )idx_d[0]+0.5; // normalized distance to the central node
-            delta2     = delta*delta;            // square of the normalized distance to the central node
-            delta3     = delta2*delta;           // cube of the normalized distance to the central node
-            delta4     = delta3*delta;           // 4th power of the normalized distance to the central node
-            
-            // coefficients for the 4th order interpolation on 5 nodes
-            coeffxd[0] = dble_1_ov_384   - dble_1_ov_48  * delta  + dble_1_ov_16 * delta2 - dble_1_ov_12 * delta3 + dble_1_ov_24 * delta4;
-            coeffxd[1] = dble_19_ov_96   - dble_11_ov_24 * delta  + dble_1_ov_4 * delta2  + dble_1_ov_6  * delta3 - dble_1_ov_6  * delta4;
-            coeffxd[2] = dble_115_ov_192 - dble_5_ov_8   * delta2 + dble_1_ov_4 * delta4;
-            coeffxd[3] = dble_19_ov_96   + dble_11_ov_24 * delta  + dble_1_ov_4 * delta2  - dble_1_ov_6  * delta3 - dble_1_ov_6  * delta4;
-            coeffxd[4] = dble_1_ov_384   + dble_1_ov_48  * delta  + dble_1_ov_16 * delta2 + dble_1_ov_12 * delta3 + dble_1_ov_24 * delta4;
-            
-            idx_d[0]  -= index_domain_begin;
-        }
+        idx_p[0]  -= i_domain_begin_;
     }
     
     double dble_1_ov_384 ;
@@ -86,6 +120,18 @@ class Interpolator1D4Order final : public Interpolator1D
     double dble_115_ov_192 ;
     double dble_5_ov_8 ;
 
+    // Last prim index computed
+    int ip_;
+    // Last dual index computed
+    int id_;
+    // Last delta computed
+    double xjmxi;
+    // Interpolation coefficient on Prim grid
+    double coeffp_[5];
+    // Interpolation coefficient on Dual grid
+    double coeffd_[5];
+
+
 };//END class
 
 #endif
diff --git a/src/Interpolator/Interpolator1DWT2Order.cpp b/src/Interpolator/Interpolator1DWT2Order.cpp
index 2ba3881b5..4bc058096 100755
--- a/src/Interpolator/Interpolator1DWT2Order.cpp
+++ b/src/Interpolator/Interpolator1DWT2Order.cpp
@@ -239,7 +239,7 @@ void Interpolator1DWT2Order::timeCenteredEnvelope( ElectroMagn *EMfields, Partic
 
         //!\todo CHECK if this is correct for both primal & dual grids !!!
         // First index for summation
-        ip_ = ip_ - index_domain_begin;
+        ip_ = ip_ - i_domain_begin_;
 
         // -------------------------
         // Interpolation of Phiold^(p)
@@ -298,7 +298,7 @@ void Interpolator1DWT2Order::envelopeAndSusceptibility( ElectroMagn *EMfields, P
 
     //!\todo CHECK if this is correct for both primal & dual grids !!!
     // First index for summation
-    ip_ = ip_ - index_domain_begin;
+    ip_ = ip_ - i_domain_begin_;
 
     // -------------------------
     // Interpolation of Env_A_abs_^(p)
@@ -351,7 +351,7 @@ void Interpolator1DWT2Order::envelopeFieldForIonization( ElectroMagn *EMfields,
 
         //!\todo CHECK if this is correct for both primal & dual grids !!!
         // First index for summation
-        ip_ = ip_ - index_domain_begin;
+        ip_ = ip_ - i_domain_begin_;
 
         // ---------------------------------
         // Interpolation of Env_E_abs^(p)
diff --git a/src/Interpolator/Interpolator1DWT2Order.h b/src/Interpolator/Interpolator1DWT2Order.h
index 19ea0ee7d..ff45230cf 100755
--- a/src/Interpolator/Interpolator1DWT2Order.h
+++ b/src/Interpolator/Interpolator1DWT2Order.h
@@ -47,7 +47,7 @@ class Interpolator1DWT2Order final : public Interpolator1D
         coeffd_[1] = ( 0.75-var1 );
         coeffd_[2] = 0.5 * ( var1+xjmxi+0.25 );
 
-        id_ -= index_domain_begin;
+        id_ -= i_domain_begin_;
 
         // Primal
         ip_      = round( xjn );    // index of the central point
@@ -65,7 +65,7 @@ class Interpolator1DWT2Order final : public Interpolator1D
         coeffpt_[1] = 1.0 - 2.0 * var1;
         coeffpt_[2] = var1 + 0.5 * xjmxi;
         
-        ip_ -= index_domain_begin;
+        ip_ -= i_domain_begin_;
     }
 
     // Coefficients for WT
diff --git a/src/Interpolator/Interpolator1DWT2OrderV.cpp b/src/Interpolator/Interpolator1DWT2OrderV.cpp
index c64433035..40dd63589 100755
--- a/src/Interpolator/Interpolator1DWT2OrderV.cpp
+++ b/src/Interpolator/Interpolator1DWT2OrderV.cpp
@@ -178,7 +178,7 @@ void Interpolator1DWT2OrderV::fieldsWrapper( ElectroMagn *EMfields, Particles &p
         coeffd[1] = ( 0.75-var1 );
         coeffd[2] = 0.5 * ( var1+xjmxi+0.25 );
 
-        idx -= index_domain_begin;
+        idx -= i_domain_begin_;
 
         // Primal
         ipx      = round( xjn );    // index of the central point
@@ -190,7 +190,7 @@ void Interpolator1DWT2OrderV::fieldsWrapper( ElectroMagn *EMfields, Particles &p
         coeffpt[1] = 1.0 - 2.0 * var1;
         coeffpt[2] = var1 + 0.5 * xjmxi;
 
-        ipx -= index_domain_begin;
+        ipx -= i_domain_begin_;
 
         // // Interpolate the fields from the Dual grid : Ex, By, Bz
         Epart_x[ipart] = coeffd[0] * Ex[idx-1]   + coeffd[1] * Ex[idx]   + coeffd[2] * Ex[idx+1];
@@ -331,7 +331,7 @@ void Interpolator1DWT2OrderV::timeCenteredEnvelope( ElectroMagn *EMfields, Parti
 
         //!\todo CHECK if this is correct for both primal & dual grids !!!
         // First index for summation
-        ip_ = ip_ - index_domain_begin;
+        ip_ = ip_ - i_domain_begin_;
 
         // -------------------------
         // Interpolation of Phiold^(p)
@@ -390,7 +390,7 @@ void Interpolator1DWT2OrderV::envelopeAndSusceptibility( ElectroMagn *EMfields,
 
     //!\todo CHECK if this is correct for both primal & dual grids !!!
     // First index for summation
-    ip_ = ip_ - index_domain_begin;
+    ip_ = ip_ - i_domain_begin_;
 
     // -------------------------
     // Interpolation of Env_A_abs_^(p)
@@ -443,7 +443,7 @@ void Interpolator1DWT2OrderV::envelopeFieldForIonization( ElectroMagn *EMfields,
 
         //!\todo CHECK if this is correct for both primal & dual grids !!!
         // First index for summation
-        ip_ = ip_ - index_domain_begin;
+        ip_ = ip_ - i_domain_begin_;
 
         // ---------------------------------
         // Interpolation of Env_E_abs^(p)
diff --git a/src/Interpolator/Interpolator1DWT2OrderV.h b/src/Interpolator/Interpolator1DWT2OrderV.h
index 87a083fa5..4f20849c1 100755
--- a/src/Interpolator/Interpolator1DWT2OrderV.h
+++ b/src/Interpolator/Interpolator1DWT2OrderV.h
@@ -48,7 +48,7 @@ class Interpolator1DWT2OrderV final : public Interpolator1D
         coeffd_[1] = ( 0.75-var1 );
         coeffd_[2] = 0.5 * ( var1+xjmxi+0.25 );
 
-        id_ -= index_domain_begin;
+        id_ -= i_domain_begin_;
 
         // Primal
         ip_      = round( xjn );    // index of the central point
@@ -66,7 +66,7 @@ class Interpolator1DWT2OrderV final : public Interpolator1D
         coeffpt_[1] = 1.0 - 2.0 * var1;
         coeffpt_[2] = var1 + 0.5 * xjmxi;
 
-        ip_ -= index_domain_begin;
+        ip_ -= i_domain_begin_;
     }
 
     // Coefficients for WT
diff --git a/src/Interpolator/Interpolator1DWT4Order.h b/src/Interpolator/Interpolator1DWT4Order.h
index dd5e78b13..6bc889885 100755
--- a/src/Interpolator/Interpolator1DWT4Order.h
+++ b/src/Interpolator/Interpolator1DWT4Order.h
@@ -55,7 +55,7 @@ class Interpolator1DWT4Order final : public Interpolator1D
         coeffd_[3] = dble_19_ov_96 + var1 + var3 * ( 1.5-xjmxi -var2 );
         coeffd_[4] = dble_1_ov_24 * var5 * var5;
 
-        id_ -= index_domain_begin;
+        id_ -= i_domain_begin_;
 
         // Primal
         ip_      = round( xjn );    // index of the central point
@@ -94,7 +94,7 @@ class Interpolator1DWT4Order final : public Interpolator1D
         coeffpt_[4] = var3 + var2 - var1;
         
         
-        ip_ -= index_domain_begin;
+        ip_ -= i_domain_begin_;
     }
 
     double dble_1_ov_6 ;
diff --git a/src/Interpolator/InterpolatorFactory.h b/src/Interpolator/InterpolatorFactory.h
index f2cbd7c19..37e1042fb 100755
--- a/src/Interpolator/InterpolatorFactory.h
+++ b/src/Interpolator/InterpolatorFactory.h
@@ -48,12 +48,22 @@ class InterpolatorFactory
         // 1Dcartesian simulation
         // ---------------
         if( ( params.geometry == "1Dcartesian" ) && ( params.interpolation_order == 2 ) ) {
+            if( !vectorization ) {
+                if ( params.interpolator_ == "momentum-conserving" ) {
+                    Interp = new Interpolator1D2Order( params, patch );
+                }
+                else if ( params.interpolator_ == "wt" ) {
+                    Interp = new Interpolator1DWT2Order( params, patch );
+                }
+            }
+            else {
                 if ( params.interpolator_ == "momentum-conserving" ) {
                     Interp = new Interpolator1D2OrderV( params, patch );
                 }
                 else if ( params.interpolator_ == "wt" ) {
                     Interp = new Interpolator1DWT2OrderV( params, patch );
                 }
+            }
         } else if( ( params.geometry == "1Dcartesian" ) && ( params.interpolation_order == 4 ) ) {
             if( params.interpolator_ == "momentum-conserving" ) {
                 Interp = new Interpolator1D4Order( params, patch );
diff --git a/src/Params/Params.h b/src/Params/Params.h
index e2b0603e6..41896f7e9 100755
--- a/src/Params/Params.h
+++ b/src/Params/Params.h
@@ -407,7 +407,7 @@ class Params
     //#if defined( SMILEI_ACCELERATOR_GPU_OMP )
         switch( dimension_id ) {
             case 1:
-                return -1;
+                return 4; // check for optimal value
             case 2:
                 return 4;
             case 3:
diff --git a/src/Particles/nvidiaParticles.cu b/src/Particles/nvidiaParticles.cu
old mode 100644
new mode 100755
index d7a63f0b3..16941b152
--- a/src/Particles/nvidiaParticles.cu
+++ b/src/Particles/nvidiaParticles.cu
@@ -12,10 +12,9 @@
 #include <thrust/execution_policy.h>
 #include <thrust/iterator/zip_iterator.h>
 #include <thrust/tuple.h>
-#include <thrust/count.h>
-#include <thrust/remove.h>
-#include <thrust/sort.h>
-
+#include <thrust/remove.h> // pour thrust::remove_if
+#include <thrust/sort.h> // pour thrust::sort_by_key
+#include <thrust/count.h> // pour thrust::count_if
 
 #include "Patch.h"
 #include "gpu.h"
@@ -125,11 +124,46 @@ namespace detail {
                                   ParticleNoKeyIteratorProvider particle_no_key_iterator_provider );
     };
 
+    template <Cluster::DifferenceType kClusterWidth>
+    struct Cluster1D : public Cluster
+    {
+    public:
+        Cluster1D( double   inverse_x_cell_dimension,
+                   SizeType local_x_dimension_in_cell,
+		   int CellStartingGlobalIndex_for_x);
+
+        //! Compute the cell key of a_particle. a_particle shall be a tuple (from a
+        //! zipiterator).
+        //! The first value of a_particle is the cell key value, the other values are
+        //! the positions x 
+        template <typename Tuple>
+        __host__ __device__ IDType
+        Index( const Tuple& a_particle ) const;
+
+        //! Compute the cell key of a particle range.
+        //!
+        static void
+        computeParticleClusterKey( nvidiaParticles& particle_container,
+                                   const Params&    parameters,
+                                   const Patch&     a_parent_patch );
+
+        static void
+        sortParticleByKey( nvidiaParticles& particle_container,
+                           const Params&    parameters );
+
+        static void
+        importAndSortParticles( nvidiaParticles& particle_container,
+                                nvidiaParticles& particle_to_inject,
+                                const Params&    parameters,
+                                const Patch&     a_parent_patch );
+
+        double   inverse_of_x_cell_dimension_;
+	    int CellStartingGlobalIndex_for_x_;
+    };
 
     template <Cluster::DifferenceType kClusterWidth>
     struct Cluster2D : public Cluster
     {
-    public:
     public:
         Cluster2D( double   inverse_x_cell_dimension,
                    double   inverse_y_cell_dimension,
@@ -164,18 +198,16 @@ namespace detail {
                                 const Params&    parameters,
                                 const Patch&     a_parent_patch );
 
-    public:
         double   inverse_of_x_cell_dimension_;
         double   inverse_of_y_cell_dimension_;
         SizeType local_y_dimension_in_cluster_;
-	int CellStartingGlobalIndex_for_x_;
+	    int CellStartingGlobalIndex_for_x_;
         int CellStartingGlobalIndex_for_y_;
     };
 
     template <Cluster::DifferenceType kClusterWidth>
     struct Cluster3D : public Cluster
     {
-    public:
     public:
         Cluster3D( double   inverse_x_cell_dimension,
                    double   inverse_y_cell_dimension,
@@ -183,7 +215,7 @@ namespace detail {
                    SizeType local_x_dimension_in_cell,
                    SizeType local_y_dimension_in_cell,
                    SizeType local_z_dimension_in_cell,
-		   int CellStartingGlobalIndex_for_x,
+		           int CellStartingGlobalIndex_for_x,
                    int CellStartingGlobalIndex_for_y,
                    int CellStartingGlobalIndex_for_z);
 
@@ -213,14 +245,13 @@ namespace detail {
                                 const Params&    parameters,
                                 const Patch&     a_parent_patch );
 
-    public:
         double   inverse_of_x_cell_dimension_;
         double   inverse_of_y_cell_dimension_;
         double   inverse_of_z_cell_dimension_;
         SizeType local_y_dimension_in_cluster_;
         SizeType local_z_dimension_in_cluster_;
         int CellStartingGlobalIndex_for_x_;
-	int CellStartingGlobalIndex_for_y_;
+	    int CellStartingGlobalIndex_for_y_;
         int CellStartingGlobalIndex_for_z_;
     };
 
@@ -230,19 +261,17 @@ namespace detail {
     template <typename ClusterType>
     class AssignClusterIndex
     {
-    public:
     public:
         AssignClusterIndex( ClusterType cluster_type )
             : cluster_type_{ cluster_type }
         {
-            // EMPTY
         }
 
         template <typename Tuple>
         __host__ __device__ void
         operator()( Tuple& a_particle ) const
         {
-            thrust::get<0>( a_particle ) /* cluster key */ = cluster_type_.Index( a_particle );
+            thrust::get<0>( a_particle )  = cluster_type_.Index( a_particle ); //cluster key 
         }
 
     protected:
@@ -251,16 +280,13 @@ namespace detail {
 
 
     //! This functor assign a cluster key to a_particle.
-    //!
     template <typename ClusterType>
     struct OutOfClusterPredicate
     {
-    public:
     public:
         OutOfClusterPredicate( ClusterType cluster_type )
             : cluster_type_{ cluster_type }
         {
-            // EMPTY
         }
 
         template <typename Tuple>
@@ -269,7 +295,7 @@ namespace detail {
         {
             // NOTE: its ub to set the cluster key to wrongly keyed particles
             // now..
-            return thrust::get<0>( a_particle ) /* cluster key */ != cluster_type_.Index( a_particle );
+            return thrust::get<0>( a_particle ) != cluster_type_.Index( a_particle );//cluster key 
         }
 
     protected:
@@ -286,7 +312,7 @@ namespace detail {
         __host__ __device__ bool
         operator()( const Tuple& a_particle ) const
         {
-            return thrust::get<0>( a_particle ) /* cluster key */ == -1;
+            return thrust::get<0>( a_particle )  == -1;//cluster key 
         }
     };
 
@@ -304,6 +330,12 @@ namespace detail {
         // dimensions.
 
         switch( particle_container.dimension() ) {
+            case 1: {
+                Cluster1D<Params::getGPUClusterWidth( 1 )>::computeParticleClusterKey( particle_container,
+                                                                                                parameters,
+                                                                                                a_parent_patch );
+                break;
+            }
             case 2: {
                 Cluster2D<Params::getGPUClusterWidth( 2 )>::computeParticleClusterKey( particle_container,
                                                                                                 parameters,
@@ -317,7 +349,7 @@ namespace detail {
                 break;
             }
             default:
-                // Not implemented, only Cartesian 2D or 3D for the moment
+                // Not implemented, only Cartesian 1D, 2D or 3D for the moment
                 SMILEI_ASSERT( false );
                 break;
         }
@@ -331,6 +363,11 @@ namespace detail {
         // dimensions.
 
         switch( particle_container.dimension() ) {
+            case 1: {
+                Cluster1D<Params::getGPUClusterWidth( 1 )>::sortParticleByKey( particle_container,
+                                                                                        parameters );
+                break;
+            }
             case 2: {
                 Cluster2D<Params::getGPUClusterWidth( 2 )>::sortParticleByKey( particle_container,
                                                                                         parameters );
@@ -342,7 +379,7 @@ namespace detail {
                 break;
             }
             default:
-                // Not implemented, only Cartesian 2D or 3D for the moment
+                // Not implemented, only Cartesian 1D, 2D or 3D for the moment
                 SMILEI_ASSERT( false );
                 break;
         }
@@ -392,15 +429,22 @@ namespace detail {
         // dimensions.
 
         switch( particle_container.dimension() ) {
+            case 1: {
+                Cluster1D<Params::getGPUClusterWidth( 1 )>::importAndSortParticles( particle_container,
+                                                                                             particle_to_inject,
+                                                                                             parameters,
+                                                                                             a_parent_patch );
+                break;
+            }
             case 2: {
-                Cluster2D<Params::getGPUClusterWidth( 2 /* 2D */ )>::importAndSortParticles( particle_container,
+                Cluster2D<Params::getGPUClusterWidth( 2 )>::importAndSortParticles( particle_container,
                                                                                              particle_to_inject,
                                                                                              parameters,
                                                                                              a_parent_patch );
                 break;
             }
             case 3: {
-                Cluster3D<Params::getGPUClusterWidth( 3 /* 2D */ )>::importAndSortParticles( particle_container,
+                Cluster3D<Params::getGPUClusterWidth( 3 )>::importAndSortParticles( particle_container,
                                                                                              particle_to_inject,
                                                                                              parameters,
                                                                                              a_parent_patch );
@@ -408,7 +452,7 @@ namespace detail {
             }
 
             default:
-                // Not implemented, only 2D for the moment
+                // Not implemented, only Cartesian 1D, 2D or 3D for the moment
                 SMILEI_ASSERT( false );
                 break;
         }
@@ -546,9 +590,18 @@ namespace detail {
 
 
     ////////////////////////////////////////////////////////////////////////////////
-    // Cluster2D method definitions
+    // Cluster method definitions
     ////////////////////////////////////////////////////////////////////////////////
 
+    template <Cluster::DifferenceType kClusterWidth>
+    Cluster1D<kClusterWidth>::Cluster1D( double   inverse_x_cell_dimension,
+                                         SizeType local_x_dimension_in_cell,
+					                     int CellStartingGlobalIndex_for_x)
+        : inverse_of_x_cell_dimension_{ inverse_x_cell_dimension }
+        , CellStartingGlobalIndex_for_x_{CellStartingGlobalIndex_for_x}
+    {
+    }
+
     template <Cluster::DifferenceType kClusterWidth>
     Cluster2D<kClusterWidth>::Cluster2D( double   inverse_x_cell_dimension,
                                          double   inverse_y_cell_dimension,
@@ -561,7 +614,6 @@ namespace detail {
         , CellStartingGlobalIndex_for_x_{CellStartingGlobalIndex_for_x}
 	, CellStartingGlobalIndex_for_y_{CellStartingGlobalIndex_for_y}
     {
-        // EMPTY
     }
 
     template <Cluster::DifferenceType kClusterWidth>
@@ -571,7 +623,7 @@ namespace detail {
                                          SizeType local_x_dimension_in_cell,
                                          SizeType local_y_dimension_in_cell,
                                          SizeType local_z_dimension_in_cell,
-					 int CellStartingGlobalIndex_for_x,
+					                     int CellStartingGlobalIndex_for_x,
                                          int CellStartingGlobalIndex_for_y, int CellStartingGlobalIndex_for_z )
         : inverse_of_x_cell_dimension_{ inverse_x_cell_dimension }
         , inverse_of_y_cell_dimension_{ inverse_y_cell_dimension }
@@ -582,7 +634,30 @@ namespace detail {
         , CellStartingGlobalIndex_for_y_{CellStartingGlobalIndex_for_y}
         , CellStartingGlobalIndex_for_z_{CellStartingGlobalIndex_for_z}
     {
-        // EMPTY
+    }
+
+    template <Cluster::DifferenceType kClusterWidth>
+    template <typename Tuple>
+    __host__ __device__ typename Cluster1D<kClusterWidth>::IDType
+    Cluster1D<kClusterWidth>::Index( const Tuple& a_particle ) const
+    {
+        const SizeType local_x_particle_coordinate_in_cell = static_cast<SizeType>( thrust::get<1>( a_particle ) *
+                                                                                    inverse_of_x_cell_dimension_ ) -
+                                                             CellStartingGlobalIndex_for_x_;
+
+        // These divisions will be optimized.
+        // The integer division rounding behavior is expected.
+
+        // NOTE: Flat tiles have been studied but were not as efficient for the
+        // projection. The square provides the minimal perimeter (and thus ghost
+        // cell amount) for a given area.
+        static constexpr SizeType x_cluster_dimension_in_cell = kClusterWidth;
+
+        const SizeType local_x_particle_cluster_coordinate_in_cluster = local_x_particle_coordinate_in_cell / x_cluster_dimension_in_cell;
+
+        const SizeType cluster_index = local_x_particle_cluster_coordinate_in_cluster;
+
+        return static_cast<IDType>( cluster_index );
     }
 
     template <Cluster::DifferenceType kClusterWidth>
@@ -658,6 +733,23 @@ namespace detail {
         return static_cast<IDType>( cluster_index );
     }
 
+    template <Cluster::DifferenceType kClusterWidth>
+    void
+    Cluster1D<kClusterWidth>::computeParticleClusterKey( nvidiaParticles& particle_container,
+                                                         const Params&    parameters,
+                                                         const Patch&     a_parent_patch )
+    {
+        const auto first = thrust::make_zip_iterator( thrust::make_tuple( particle_container.getPtrCellKeys(),
+                                                                          static_cast<const double*>( particle_container.getPtrPosition( 0 ) ) ) );
+        const auto last  = first + particle_container.deviceSize();
+        int CellStartingGlobalIndex_for_x = a_parent_patch.getCellStartingGlobalIndex_noGC(0);
+        printf ( "CellStartingGlobalIndex_for_x %d res %f patch size %d \n",CellStartingGlobalIndex_for_x,parameters.res_space[0], parameters.patch_size_[0]  );
+	    doComputeParticleClusterKey( first, last,
+                                     Cluster1D<Params::getGPUClusterWidth( 1 )>{ parameters.res_space[0],
+                                                                                          parameters.patch_size_[0],
+                                                                                          CellStartingGlobalIndex_for_x} );
+    }
+
     template <Cluster::DifferenceType kClusterWidth>
     void
     Cluster2D<kClusterWidth>::computeParticleClusterKey( nvidiaParticles& particle_container,
@@ -670,7 +762,7 @@ namespace detail {
         const auto last  = first + particle_container.deviceSize();
         int CellStartingGlobalIndex_for_x = a_parent_patch.getCellStartingGlobalIndex_noGC(0);
         int CellStartingGlobalIndex_for_y = a_parent_patch.getCellStartingGlobalIndex_noGC(1);
-	doComputeParticleClusterKey( first, last,
+	    doComputeParticleClusterKey( first, last,
                                      Cluster2D<Params::getGPUClusterWidth( 2 )>{ parameters.res_space[0],
                                                                                           parameters.res_space[1],
                                                                                           parameters.patch_size_[0],
@@ -693,7 +785,7 @@ namespace detail {
         int CellStartingGlobalIndex_for_x = a_parent_patch.getCellStartingGlobalIndex_noGC(0);
         int CellStartingGlobalIndex_for_y = a_parent_patch.getCellStartingGlobalIndex_noGC(1);
         int CellStartingGlobalIndex_for_z = a_parent_patch.getCellStartingGlobalIndex_noGC(2);
-	doComputeParticleClusterKey( first, last,
+	    doComputeParticleClusterKey( first, last,
                                      Cluster3D<Params::getGPUClusterWidth( 3 )>{ parameters.res_space[0],
                                                                                           parameters.res_space[1],
                                                                                           parameters.res_space[2],
@@ -705,6 +797,51 @@ namespace detail {
                                                                                           CellStartingGlobalIndex_for_z } );
     }
 
+    template <Cluster::DifferenceType kClusterWidth>
+    void
+    Cluster1D<kClusterWidth>::sortParticleByKey( nvidiaParticles& particle_container,
+                                                 const Params& )
+    {
+
+        if( particle_container.has_quantum_parameter ) {
+            if( particle_container.has_Monte_Carlo_process ) {
+                SMILEI_ASSERT( false );
+            } else {
+                SMILEI_ASSERT( false );
+            }
+        } else {
+            if( particle_container.has_Monte_Carlo_process ) {
+                SMILEI_ASSERT( false );
+            } else {
+                // The appropriate thrust::zip_iterator for the current
+                // simulation's parameters
+
+                if (particle_container.tracked) {
+                    const auto value_first = thrust::make_zip_iterator( thrust::make_tuple( particle_container.getPtrPosition( 0 ),
+                                                                                            particle_container.getPtrMomentum( 0 ),
+                                                                                            particle_container.getPtrMomentum( 1 ),
+                                                                                            particle_container.getPtrMomentum( 2 ),
+                                                                                            particle_container.getPtrWeight(),
+                                                                                            particle_container.getPtrCharge(),
+                                                                                            particle_container.getPtrId() ) );
+                    doSortParticleByKey( particle_container.getPtrCellKeys(),
+                                         particle_container.getPtrCellKeys() + particle_container.deviceSize(),
+                                         value_first );
+                }
+                else {
+                    const auto value_first = thrust::make_zip_iterator( thrust::make_tuple( particle_container.getPtrPosition( 0 ),
+                                                                                            particle_container.getPtrMomentum( 0 ),
+                                                                                            particle_container.getPtrMomentum( 1 ),
+                                                                                            particle_container.getPtrMomentum( 2 ),
+                                                                                            particle_container.getPtrWeight(),
+                                                                                            particle_container.getPtrCharge() ) );
+                    doSortParticleByKey( particle_container.getPtrCellKeys(),
+                                         particle_container.getPtrCellKeys() + particle_container.deviceSize(),
+                                         value_first );
+                }
+            }
+        }
+    }
     template <Cluster::DifferenceType kClusterWidth>
     void
     Cluster2D<kClusterWidth>::sortParticleByKey( nvidiaParticles& particle_container,
@@ -732,17 +869,30 @@ namespace detail {
                 // The appropriate thrust::zip_iterator for the current
                 // simulation's parameters
 
-                const auto value_first = thrust::make_zip_iterator( thrust::make_tuple( particle_container.getPtrPosition( 0 ),
-                                                                                        particle_container.getPtrPosition( 1 ),
-                                                                                        particle_container.getPtrMomentum( 0 ),
-                                                                                        particle_container.getPtrMomentum( 1 ),
-                                                                                        particle_container.getPtrMomentum( 2 ),
-                                                                                        particle_container.getPtrWeight(),
-                                                                                        particle_container.getPtrCharge() ) );
-
-                doSortParticleByKey( particle_container.getPtrCellKeys(),
-                                     particle_container.getPtrCellKeys() + particle_container.deviceSize(),
-                                     value_first );
+                if (particle_container.tracked) {
+                    const auto value_first = thrust::make_zip_iterator( thrust::make_tuple( particle_container.getPtrPosition( 0 ),
+                                                                                            particle_container.getPtrPosition( 1 ),
+                                                                                            particle_container.getPtrMomentum( 0 ),
+                                                                                            particle_container.getPtrMomentum( 1 ),
+                                                                                            particle_container.getPtrMomentum( 2 ),
+                                                                                            particle_container.getPtrWeight(),
+                                                                                            particle_container.getPtrCharge() ) );
+                    doSortParticleByKey( particle_container.getPtrCellKeys(),
+                                         particle_container.getPtrCellKeys() + particle_container.deviceSize(),
+                                         value_first );
+                                                                                        }
+                else {
+                    const auto value_first = thrust::make_zip_iterator( thrust::make_tuple( particle_container.getPtrPosition( 0 ),
+                                                                                            particle_container.getPtrPosition( 1 ),
+                                                                                            particle_container.getPtrMomentum( 0 ),
+                                                                                            particle_container.getPtrMomentum( 1 ),
+                                                                                            particle_container.getPtrMomentum( 2 ),
+                                                                                            particle_container.getPtrWeight(),
+                                                                                            particle_container.getPtrCharge() ) );
+                    doSortParticleByKey( particle_container.getPtrCellKeys(),
+                                         particle_container.getPtrCellKeys() + particle_container.deviceSize(),
+                                         value_first );
+                }
             }
         }
     }
@@ -805,6 +955,67 @@ namespace detail {
             }
         }
     }
+    template <Cluster::DifferenceType kClusterWidth>
+    void
+    Cluster1D<kClusterWidth>::importAndSortParticles( nvidiaParticles& particle_container,
+                                                      nvidiaParticles& particle_to_inject,
+                                                      const Params&    parameters,
+                                                      const Patch&     a_parent_patch )
+    {
+        // This is where we do a runtime dispatch depending on the simulation's
+        // qed/radiation settings.
+
+        // NOTE: For now we support dont support qed/radiations. Performance
+        // comes from specialization.
+
+        // TODO(Etienne M): Find a better way to dispatch at runtime. This is
+        // complex to read and to maintain.
+        int CellStartingGlobalIndex_for_x = a_parent_patch.getCellStartingGlobalIndex_noGC(0);
+        printf("CellStartingGlobalIndex_for_x %d \n" , CellStartingGlobalIndex_for_x );
+
+        const Cluster1D cluster_manipulator{ parameters.res_space[0],
+                                             parameters.patch_size_[0],
+                                             CellStartingGlobalIndex_for_x};
+
+        if( particle_container.has_quantum_parameter ) {
+            if( particle_container.has_Monte_Carlo_process ) {
+                SMILEI_ASSERT( false );
+            } else {
+                SMILEI_ASSERT( false );
+            }
+        } else {
+            if( particle_container.has_Monte_Carlo_process ) {
+                SMILEI_ASSERT( false );
+            } else {
+                // Returns the appropriate thrust::zip_iterator for the
+                // current simulation's parameters
+                const auto particle_iterator_provider = []( nvidiaParticles& particle_container ) {
+                    return thrust::make_zip_iterator( thrust::make_tuple( particle_container.getPtrCellKeys(),
+                                                                          particle_container.getPtrPosition( 0 ),
+                                                                          particle_container.getPtrMomentum( 0 ),
+                                                                          particle_container.getPtrMomentum( 1 ),
+                                                                          particle_container.getPtrMomentum( 2 ),
+                                                                          particle_container.getPtrWeight(),
+                                                                          particle_container.getPtrCharge() ) );
+                };
+
+                const auto particle_no_key_iterator_provider = []( nvidiaParticles& particle_container ) {
+                    return thrust::make_zip_iterator( thrust::make_tuple( particle_container.getPtrPosition( 0 ),
+                                                                          particle_container.getPtrMomentum( 0 ),
+                                                                          particle_container.getPtrMomentum( 1 ),
+                                                                          particle_container.getPtrMomentum( 2 ),
+                                                                          particle_container.getPtrWeight(),
+                                                                          particle_container.getPtrCharge() ) );
+                };
+
+                doImportAndSortParticles( particle_container,
+                                          particle_to_inject,
+                                          cluster_manipulator,
+                                          particle_iterator_provider,
+                                          particle_no_key_iterator_provider );
+            }
+        }
+    }
 
     template <Cluster::DifferenceType kClusterWidth>
     void
@@ -824,6 +1035,7 @@ namespace detail {
         int CellStartingGlobalIndex_for_x = a_parent_patch.getCellStartingGlobalIndex_noGC(0);
         int CellStartingGlobalIndex_for_y = a_parent_patch.getCellStartingGlobalIndex_noGC(1);
 
+        printf("CellStartingGlobalIndex_for_x %d \n" , CellStartingGlobalIndex_for_x );
         const Cluster2D cluster_manipulator{ parameters.res_space[0],
                                              parameters.res_space[1],
                                              parameters.patch_size_[0],
@@ -990,7 +1202,6 @@ nvidiaParticles::nvidiaParticles( const Params& parameters,
     , parent_patch_{ &a_parent_patch }
     , gpu_nparts_{}
 {
-    // EMPTY
 }
 
 nvidiaParticles::~nvidiaParticles() {
@@ -1271,10 +1482,9 @@ void nvidiaParticles::initializeDataOnDevice()
 
         // setHostBinIndex();
     } else {
-
+        printf( " parent patch %p cells starting global index %d \n", parent_patch_, parent_patch_->getCellStartingGlobalIndex_noGC(0) );
         // At this point, a copy of the host particles and last_index is on the
         // device and we know we support the space dimension.
-
         detail::Cluster::computeParticleClusterKey( *this, *parameters_, *parent_patch_ );
 
         // The particles are not correctly sorted when created.
@@ -1380,13 +1590,13 @@ void nvidiaParticles::extractParticles( Particles* particles_to_move )
     const int              nparts                   = gpu_nparts_;
     const int              position_dimension_count = nvidia_position_.size();
 
-    const int nparts_to_move = thrust::count_if( thrust::device,
+    const int number_of_particules_to_move = thrust::count_if( thrust::device,
                                                  nvidia_cell_keys_.cbegin(),
                                                  nvidia_cell_keys_.cbegin() + nparts,
                                                  count_if_out() );
 
     // Resize it, if too small (copy_if do not resize)
-    cp_parts->resize( nparts_to_move );
+    cp_parts->resize( number_of_particules_to_move );
 
     // Iterator of the main data structure
     // NOTE: https://nvidia.github.io/thrust/api/classes/classthrust_1_1zip__iterator.html#class-thrustzip_iterator
@@ -1404,7 +1614,7 @@ void nvidiaParticles::extractParticles( Particles* particles_to_move )
                                                                                            cp_parts->nvidia_weight_.begin(),
                                                                                            cp_parts->nvidia_charge_.begin() ) );
 
-    // Copy send particles in dedicated data structure if nvidia_cell_keys_=0 (currently = 1 if keeped, new PartBoundCond::apply(...))
+    // Copy send particles in dedicated data structure if nvidia_cell_keys_=0 (currently = 1 if kept, new PartBoundCond::apply(...))
     thrust::copy_if( thrust::device,
                      source_iterator_first,
                      source_iterator_last,
@@ -1653,7 +1863,6 @@ void nvidiaParticles::createParticles( int n_additional_particles )
         nvidia_id_.resize( new_size );
         thrust::fill( nvidia_id_.begin() + n_particles, nvidia_id_.begin() + new_size, 0 );
     }
-
     nvidia_cell_keys_.resize( new_size );
     thrust::fill( nvidia_cell_keys_.begin() + n_particles, nvidia_cell_keys_.begin() + new_size, -1 );
 
@@ -1747,6 +1956,9 @@ extern "C"
 {
     void* CreateGPUParticles( const void* parameters, const void* a_parent_patch )
     {
+        const Patch *temp = static_cast<const Patch*>( a_parent_patch );
+        
+        printf( " in create GPU parent patch %p cells starting global index %d \n", a_parent_patch, temp->getCellStartingGlobalIndex_noGC(0) );
         return new nvidiaParticles{ *static_cast<const Params*>( parameters ),
                                     *static_cast<const Patch*>( a_parent_patch ) };
     }
diff --git a/src/Patch/SyncVectorPatch.cpp b/src/Patch/SyncVectorPatch.cpp
index 09817b201..2982162de 100755
--- a/src/Patch/SyncVectorPatch.cpp
+++ b/src/Patch/SyncVectorPatch.cpp
@@ -48,7 +48,7 @@ void SyncVectorPatch::exchangeParticles( VectorPatch &vecPatches, int ispec, Par
 
 // ---------------------------------------------------------------------------------------------------------------------
 //! This function performs:
-//! - the exhcange of particles for each direction using the diagonal trick.
+//! - the exchange of particles for each direction using the diagonal trick.
 //! - the importation of the new particles in the particle property arrays
 //! - the sorting of particles
 // ---------------------------------------------------------------------------------------------------------------------
diff --git a/src/Projector/Projector1D.h b/src/Projector/Projector1D.h
index d51327bb7..c08c0e9a8 100755
--- a/src/Projector/Projector1D.h
+++ b/src/Projector/Projector1D.h
@@ -18,21 +18,19 @@ class Projector1D : public Projector
     virtual ~Projector1D() {};
     virtual void mv_win( unsigned int shift )
     {
-        index_domain_begin+=shift;
+        i_domain_begin_ += shift;
     }
     virtual void setMvWinLimits( unsigned int shift )
     {
-        index_domain_begin = shift;
+        i_domain_begin_ = shift;
     }
     
 protected:
     //! Inverse of the spatial step 1/dx
     double dx_inv_;
-    int index_domain_begin;
+    double dx_ov_dt_;
+    int i_domain_begin_;
     double *Jx_, *Jy_, *Jz_, *rho_;
-    
-private:
-
 };
 
 #endif
diff --git a/src/Projector/Projector1D2Order.cpp b/src/Projector/Projector1D2Order.cpp
index cd587dc71..451bca539 100755
--- a/src/Projector/Projector1D2Order.cpp
+++ b/src/Projector/Projector1D2Order.cpp
@@ -18,14 +18,12 @@ using namespace std;
 Projector1D2Order::Projector1D2Order( Params &params, Patch *patch ) : Projector1D( params, patch )
 {
     dx_inv_  = 1.0/params.cell_length[0];
-    dx_ov_dt = params.cell_length[0] / params.timestep;
+    dx_ov_dt_ = params.cell_length[0] / params.timestep;
     
-    index_domain_begin = patch->getCellStartingGlobalIndex( 0 );
-    
-    dt             = params.timestep;
-    dts2           = params.timestep/2.;
-    dts4           = params.timestep/4.;
+    i_domain_begin_ = patch->getCellStartingGlobalIndex( 0 );
     
+    dts2_           = params.timestep/2.;
+    dts4_           = params.timestep/4.;
 }
 
 
@@ -43,7 +41,7 @@ void Projector1D2Order::currents( double *Jx, double *Jy, double *Jz, Particles
     int ip_m_ipo;
     double charge_weight = inv_cell_volume * ( double )( particles.charge( ipart ) )*particles.weight( ipart );
     double xjn, xj_m_xipo, xj_m_xipo2, xj_m_xip, xj_m_xip2;
-    double crx_p = charge_weight*dx_ov_dt;                // current density for particle moving in the x-direction
+    double crx_p = charge_weight*dx_ov_dt_;                // current density for particle moving in the x-direction
     double cry_p = charge_weight*particles.momentum( 1, ipart )*invgf;  // current density in the y-direction of the macroparticle
     double crz_p = charge_weight*particles.momentum( 2, ipart )*invgf;  // current density allow the y-direction of the macroparticle
     double S0[5], S1[5], Wl[5], Wt[5], Jx_p[5];            // arrays used for the Esirkepov projection method
@@ -76,7 +74,7 @@ void Projector1D2Order::currents( double *Jx, double *Jy, double *Jz, Particles
     
     // coefficients 2nd order interpolation on 3 nodes
     ipo        = *iold;                          // index of the central node
-    ip_m_ipo = ip-ipo-index_domain_begin;
+    ip_m_ipo = ip-ipo-i_domain_begin_;
     S1[ip_m_ipo+1] = 0.5 * ( xj_m_xip2-xj_m_xip+0.25 );
     S1[ip_m_ipo+2] = ( 0.75-xj_m_xip2 );
     S1[ip_m_ipo+3] = 0.5 * ( xj_m_xip2+xj_m_xip+0.25 );
@@ -115,7 +113,7 @@ void Projector1D2Order::currentsAndDensity( double *Jx, double *Jy, double *Jz,
     int ip_m_ipo;
     double charge_weight = inv_cell_volume * ( double )( particles.charge( ipart ) )*particles.weight( ipart );
     double xjn, xj_m_xipo, xj_m_xipo2, xj_m_xip, xj_m_xip2;
-    double crx_p = charge_weight*dx_ov_dt;                // current density for particle moving in the x-direction
+    double crx_p = charge_weight*dx_ov_dt_;                // current density for particle moving in the x-direction
     double cry_p = charge_weight*particles.momentum( 1, ipart )*invgf;  // current density in the y-direction of the macroparticle
     double crz_p = charge_weight*particles.momentum( 2, ipart )*invgf;  // current density allow the y-direction of the macroparticle
     double S0[5], S1[5], Wl[5], Wt[5], Jx_p[5];            // arrays used for the Esirkepov projection method
@@ -132,7 +130,7 @@ void Projector1D2Order::currentsAndDensity( double *Jx, double *Jy, double *Jz,
     
     // Locate particle old position on the primal grid
     xj_m_xipo  = *deltaold;                   // normalized distance to the nearest grid point
-    xj_m_xipo2 = xj_m_xipo*xj_m_xipo;                 // square of the normalized distance to the nearest grid point
+    xj_m_xipo2 = xj_m_xipo*xj_m_xipo;         // square of the normalized distance to the nearest grid point
     
     // Locate particle new position on the primal grid
     xjn       = particles.position( 0, ipart ) * dx_inv_;
@@ -142,16 +140,16 @@ void Projector1D2Order::currentsAndDensity( double *Jx, double *Jy, double *Jz,
     
     
     // coefficients 2nd order interpolation on 3 nodes
-    S0[1] = 0.5 * ( xj_m_xipo2-xj_m_xipo+0.25 );
-    S0[2] = ( 0.75-xj_m_xipo2 );
-    S0[3] = 0.5 * ( xj_m_xipo2+xj_m_xipo+0.25 );
+    S0[1] = 0.5 * ( xj_m_xipo2 - xj_m_xipo + 0.25 );
+    S0[2] = ( 0.75 - xj_m_xipo2 );
+    S0[3] = 0.5 * ( xj_m_xipo2 + xj_m_xipo + 0.25 );
     
     // coefficients 2nd order interpolation on 3 nodes
     ipo = *iold;
-    ip_m_ipo = ip-ipo-index_domain_begin;
-    S1[ip_m_ipo+1] = 0.5 * ( xj_m_xip2-xj_m_xip+0.25 );
-    S1[ip_m_ipo+2] = ( 0.75-xj_m_xip2 );
-    S1[ip_m_ipo+3] = 0.5 * ( xj_m_xip2+xj_m_xip+0.25 );
+    ip_m_ipo = ip-ipo-i_domain_begin_;
+    S1[ip_m_ipo+1] = 0.5 * ( xj_m_xip2 - xj_m_xip + 0.25 );
+    S1[ip_m_ipo+2] = ( 0.75 - xj_m_xip2 );
+    S1[ip_m_ipo+3] = 0.5 * ( xj_m_xip2 + xj_m_xip + 0.25 );
     
     // coefficients used in the Esirkepov method
     for( unsigned int i=0; i<5; i++ ) {
@@ -228,7 +226,7 @@ void Projector1D2Order::basic( double *rhoj, Particles &particles, unsigned int
     S1[2] = ( 0.75-xj_m_xip2 );
     S1[3] = 0.5 * ( xj_m_xip2+xj_m_xip+0.25 );
     
-    ip -= index_domain_begin + 2 + bin_shift;
+    ip -= i_domain_begin_ + 2 + bin_shift;
     
     // 2nd order projection for charge density
     // At the 2nd order, oversize = 2.
@@ -270,7 +268,7 @@ void Projector1D2Order::ionizationCurrents( Field *Jx, Field *Jy, Field *Jz, Par
     xjmxi  = xjn - ( double )i + 0.5;      // normalized distance to the nearest grid point
     xjmxi2 = xjmxi*xjmxi;                  // square of the normalized distance to the nearest grid point
     
-    i  -= index_domain_begin;
+    i  -= i_domain_begin_;
     im1 = i-1;
     ip1 = i+1;
     
@@ -291,7 +289,7 @@ void Projector1D2Order::ionizationCurrents( Field *Jx, Field *Jy, Field *Jz, Par
     xjmxi  = xjn - ( double )i;            // normalized distance to the nearest grid point
     xjmxi2 = xjmxi*xjmxi;                  // square of the normalized distance to the nearest grid point
     
-    i  -= index_domain_begin;
+    i  -= i_domain_begin_;
     im1 = i-1;
     ip1 = i+1;
     
@@ -377,9 +375,9 @@ void Projector1D2Order::susceptibility( ElectroMagn *EMfields, Particles &partic
     for( int ipart=istart ; ipart<iend; ipart++ ) {//Loop on bin particles
     
     
-        charge_over_mass_dts2       = ( double )( particles.charge( ipart ) )*dts2*one_over_mass;
+        charge_over_mass_dts2       = ( double )( particles.charge( ipart ) )*dts2_*one_over_mass;
         // ! ponderomotive force is proportional to charge squared and the field is divided by 4 instead of 2
-        charge_sq_over_mass_sq_dts4 = ( double )( particles.charge( ipart ) )*( double )( particles.charge( ipart ) )*dts4*one_over_mass*one_over_mass;
+        charge_sq_over_mass_sq_dts4 = ( double )( particles.charge( ipart ) )*( double )( particles.charge( ipart ) )*dts4_*one_over_mass*one_over_mass;
         // (charge over mass)^2
         charge_sq_over_mass_sq      = ( double )( particles.charge( ipart ) )*( double )( particles.charge( ipart ) )*one_over_mass*one_over_mass;
         
@@ -430,7 +428,7 @@ void Projector1D2Order::susceptibility( ElectroMagn *EMfields, Particles &partic
         // ---------------------------
         // Calculate the total susceptibility
         // ---------------------------
-        ip -= index_domain_begin + 2;
+        ip -= i_domain_begin_ + 2;
         
         for( unsigned int i=0 ; i<5 ; i++ ) {
             iloc = ( i+ip );
@@ -472,9 +470,9 @@ void Projector1D2Order::susceptibilityOnBuffer( ElectroMagn */*EMfields*/, doubl
     for( int ipart=istart ; ipart<iend; ipart++ ) {//Loop on bin particles
     
     
-        charge_over_mass_dts2       = ( double )( particles.charge( ipart ) )*dts2*one_over_mass;
+        charge_over_mass_dts2       = ( double )( particles.charge( ipart ) )*dts2_*one_over_mass;
         // ! ponderomotive force is proportional to charge squared and the field is divided by 4 instead of 2
-        charge_sq_over_mass_sq_dts4 = ( double )( particles.charge( ipart ) )*( double )( particles.charge( ipart ) )*dts4*one_over_mass*one_over_mass;
+        charge_sq_over_mass_sq_dts4 = ( double )( particles.charge( ipart ) )*( double )( particles.charge( ipart ) )*dts4_*one_over_mass*one_over_mass;
         // (charge over mass)^2
         charge_sq_over_mass_sq      = ( double )( particles.charge( ipart ) )*( double )( particles.charge( ipart ) )*one_over_mass*one_over_mass;
         
@@ -525,7 +523,7 @@ void Projector1D2Order::susceptibilityOnBuffer( ElectroMagn */*EMfields*/, doubl
         // ---------------------------
         // Calculate the total susceptibility
         // ---------------------------
-        ip -= index_domain_begin + 2;
+        ip -= i_domain_begin_ + 2;
         
         for( unsigned int i=0 ; i<5 ; i++ ) {
             iloc = ( i+ip );
@@ -609,7 +607,7 @@ void Projector1D2Order::ionizationCurrentsForTasks( double *b_Jx, double *b_Jy,
     Sxd[1] = ( 0.75-xpmxid2 );
     Sxd[2] = 0.5 * ( xpmxid2+xpmxid+0.25 );
     
-    ip  -= index_domain_begin+bin_shift;
+    ip  -= i_domain_begin_+bin_shift;
     //id  -= i_domain_begin+bin_shift;
     
     
diff --git a/src/Projector/Projector1D2Order.h b/src/Projector/Projector1D2Order.h
index 364bab673..afe63551f 100755
--- a/src/Projector/Projector1D2Order.h
+++ b/src/Projector/Projector1D2Order.h
@@ -12,7 +12,7 @@ class Projector1D2Order : public Projector1D
     //! Project global current densities (EMfields->Jx_/Jy_/Jz_)
     inline void currents( double *Jx, double *Jy, double *Jz, Particles &particles, unsigned int ipart, double invgf, int *iold, double *deltaold, int bin_shift = 0 );
     //! Project global current densities (EMfields->Jx_/Jy_/Jz_/rho), diagFields timestep
-    inline void currentsAndDensity( double *Jx, double *Jy, double *Jz, double *rho, Particles &particles, unsigned int ipart, double invgf, int *iold, double *deltaold, int bin_shift = 0 );  
+    inline void  __attribute__((always_inline)) currentsAndDensity( double *Jx, double *Jy, double *Jz, double *rho, Particles &particles, unsigned int ipart, double invgf, int *iold, double *deltaold, int bin_shift = 0 );  
   
     //! Project global current charge (EMfields->rho_ , J), for initialization and diags
     void basic( double *rhoj, Particles &particles, unsigned int ipart, unsigned int type, int bin_shift = 0 ) override final;
@@ -36,8 +36,7 @@ class Projector1D2Order : public Projector1D
     void susceptibilityOnBuffer( ElectroMagn *EMfields, double *b_Chi, int bin_shift, int bdim0, Particles &particles, double species_mass, SmileiMPI *smpi, int istart, int iend,  int ithread, int icell = 0, int ipart_ref = 0 ) override final;
     
 private:
-    double dx_ov_dt;
-    double dt, dts2, dts4;
+    double dts2_, dts4_;
 };
 
 #endif
diff --git a/src/Projector/Projector1D2OrderGPU.cpp b/src/Projector/Projector1D2OrderGPU.cpp
new file mode 100755
index 000000000..79d879024
--- /dev/null
+++ b/src/Projector/Projector1D2OrderGPU.cpp
@@ -0,0 +1,385 @@
+
+
+#if defined( SMILEI_ACCELERATOR_MODE )
+#include "Projector1D2OrderGPUKernelCUDAHIP.h"
+#include <cmath>
+#include "Tools.h"
+#endif
+
+#include "Projector1D2OrderGPU.h"
+
+#include "ElectroMagn.h"
+#include "Patch.h"
+#include "gpu.h"
+
+
+Projector1D2OrderGPU::Projector1D2OrderGPU( Params &parameters, Patch *a_patch )
+    : Projector1D{ parameters, a_patch }
+{
+    Projector1D::dx_inv_         = 1.0 / parameters.cell_length[0];
+    Projector1D::dx_ov_dt_       = parameters.cell_length[0] / parameters.timestep;
+    Projector1D::i_domain_begin_ = a_patch->getCellStartingGlobalIndex( 0 );
+
+    not_spectral_  = !parameters.is_pxr;
+    dts2_ = parameters.timestep / 2.0;
+    dts4_ = dts2_ / 2.0;
+#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined ( SMILEI_OPENACC_MODE ) 
+    x_dimension_bin_count_ = parameters.getGPUBinCount( 1 );
+#else
+    ERROR( "Only usable in GPU mode! " );
+#endif
+}
+
+Projector1D2OrderGPU::~Projector1D2OrderGPU()
+{
+}
+#if defined( SMILEI_ACCELERATOR_MODE )
+
+
+//! Project global current densities (EMfields->Jx_/Jy_/Jz_)
+extern "C" void
+currentDepositionKernel1DOnDevice( double *__restrict__ host_Jx,
+                         double *__restrict__ host_Jy,
+                         double *__restrict__ host_Jz,
+                         int Jx_size,
+                         int Jy_size,
+                         int Jz_size,
+                         const double *__restrict__ device_particle_position_x,
+                         const double *__restrict__ device_particle_momentum_y,
+                         const double *__restrict__ device_particle_momentum_z,
+                         const short *__restrict__ device_particle_charge,
+                         const double *__restrict__ device_particle_weight,
+                         const int *__restrict__ host_bin_index,
+                         unsigned int x_dimension_bin_count_,
+                         const double *__restrict__ host_invgf_,
+                         const int *__restrict__ host_iold_,
+                         const double *__restrict__ host_deltaold_,
+                         double inv_cell_volume,
+                         double dx_inv_,
+                         double dx_ov_dt_,
+                         int    i_domain_begin_,
+                         int    not_spectral_ )
+{
+    cudahip1d::currentDepositionKernel1D( host_Jx, host_Jy, host_Jz,
+                                 Jx_size, Jy_size, Jz_size,
+                                 device_particle_position_x, device_particle_momentum_y,
+                                 device_particle_momentum_z,
+                                 device_particle_charge,
+                                 device_particle_weight,
+                                 host_bin_index,
+                                 x_dimension_bin_count_,
+                                 host_invgf_,
+                                 host_iold_, host_deltaold_,
+                                 inv_cell_volume,
+                                 dx_inv_,
+                                 dx_ov_dt_,
+                                 i_domain_begin_,
+                                 not_spectral_ );
+}
+
+
+//! Project global current and charge densities (EMfields->Jx_/Jy_/Jz_/rho_)
+//!
+extern "C" void
+currentAndDensityDepositionKernel1DOnDevice( double *__restrict__ host_Jx,
+                                   double *__restrict__ host_Jy,
+                                   double *__restrict__ host_Jz,
+                                   double *__restrict__ host_rho,
+                                   int Jx_size,
+                                   int Jy_size,
+                                   int Jz_size,
+                                   int rho_size,
+                                   const double *__restrict__ device_particle_position_x,
+                                   const double *__restrict__ device_particle_momentum_y,
+                                   const double *__restrict__ device_particle_momentum_z,
+                                   const short *__restrict__ device_particle_charge,
+                                   const double *__restrict__ device_particle_weight,
+                                   const int *__restrict__ host_bin_index,
+                                   unsigned int x_dimension_bin_count_,
+                                   const double *__restrict__ host_invgf_,
+                                   const int *__restrict__ host_iold_,
+                                   const double *__restrict__ host_deltaold_,
+                                   double inv_cell_volume,
+                                   double dx_inv_,
+                                   double dx_ov_dt_,
+                                   int    i_domain_begin_,
+                                   int    not_spectral_ )
+{
+    cudahip1d::currentAndDensityDepositionKernel1D( host_Jx, host_Jy, host_Jz, host_rho,
+                                           Jx_size, Jy_size, Jz_size, rho_size,
+                                           device_particle_position_x, device_particle_momentum_y,
+                                           device_particle_momentum_z,
+                                           device_particle_charge,
+                                           device_particle_weight,
+                                           host_bin_index,
+                                           x_dimension_bin_count_,
+                                           host_invgf_,
+                                           host_iold_, host_deltaold_,
+                                           inv_cell_volume,
+                                           dx_inv_,
+                                           dx_ov_dt_,
+                                           i_domain_begin_,
+                                           not_spectral_ );
+}
+#endif
+
+// ---------------------------------------------------------------------------------------------------------------------
+//! Project charge : frozen & diagFields timstep
+// ---------------------------------------------------------------------------------------------------------------------
+void Projector1D2OrderGPU::basic( double *rhoj, Particles &particles, unsigned int ipart, unsigned int type, int bin_shift )
+{
+
+    //Warning : this function is used for frozen species or initialization only and doesn't use the standard scheme.
+    //rho type = 0
+    //Jx type = 1
+    //Jy type = 2
+    //Jz type = 3
+    
+    // The variable bin received is  number of bin * cluster width.
+    // Declare local variables
+    int ip;
+    double xjn, xj_m_xip, xj_m_xip2;
+    double S1[5];            // arrays used for the Esirkepov projection method
+    
+    double charge_weight = inv_cell_volume * ( double )( particles.charge( ipart ) )*particles.weight( ipart );
+    if( type > 0 ) {
+        charge_weight *= 1./sqrt( 1.0 + particles.momentum( 0, ipart )*particles.momentum( 0, ipart )
+                                  + particles.momentum( 1, ipart )*particles.momentum( 1, ipart )
+                                  + particles.momentum( 2, ipart )*particles.momentum( 2, ipart ) );
+                                  
+        if( type == 1 ) {
+            charge_weight *= particles.momentum( 0, ipart );
+        } else if( type == 2 ) {
+            charge_weight *= particles.momentum( 1, ipart );
+        } else {
+            charge_weight *= particles.momentum( 2, ipart );
+        }
+    }
+    
+    // Initialize variables
+    for( unsigned int i=0; i<5; i++ ) {
+        S1[i]=0.;
+    }//i
+    
+    // Locate particle new position on the primal grid
+    xjn       = particles.position( 0, ipart ) * dx_inv_;
+    ip        = round( xjn + 0.5 * ( type==1 ) );     // index of the central node
+    xj_m_xip  = xjn - ( double )ip;                   // normalized distance to the nearest grid point
+    xj_m_xip2 = xj_m_xip * xj_m_xip;                  // square of the normalized distance to the nearest grid point
+    
+    // coefficients 2nd order interpolation on 3 nodes
+    //ip_m_ipo = ip-ipo;
+    S1[1] = 0.5 * ( xj_m_xip2 - xj_m_xip + 0.25 );
+    S1[2] = ( 0.75 - xj_m_xip2 );
+    S1[3] = 0.5 * ( xj_m_xip2 + xj_m_xip + 0.25 );
+    
+    ip -= i_domain_begin_ + 2 + bin_shift;
+    
+    // 2nd order projection for charge density
+    // At the 2nd order, oversize = 2.
+    for( unsigned int i=0; i<5; i++ ) {
+        rhoj[i + ip ] += charge_weight * S1[i];
+    }
+    
+}
+
+
+void Projector1D2OrderGPU::currentsAndDensityWrapper( ElectroMagn *EMfields,
+                                                      Particles   &particles,
+                                                      SmileiMPI   *smpi,
+                                                      int,
+                                                      int,
+                                                      int  ithread,
+                                                      bool diag_flag,
+                                                      bool is_spectral,
+                                                      int  ispec,
+                                                      int  icell,
+                                                      int  ipart_ref )
+{
+{
+    std::vector<int>    &iold  = smpi->dynamics_iold[ithread];
+    std::vector<double> &delta = smpi->dynamics_deltaold[ithread];
+    std::vector<double> &invgf = smpi->dynamics_invgf[ithread];
+
+        EMfields->rho_->copyFromDeviceToHost();
+        EMfields->rho_s[ispec]->copyFromDeviceToHost();
+    if( diag_flag ) {
+
+        double *const __restrict__ b_Jx = EMfields->Jx_s[ispec] ? EMfields->Jx_s[ispec]->data() : EMfields->Jx_->data();
+        unsigned int Jx_size            = EMfields->Jx_s[ispec] ? EMfields->Jx_s[ispec]->size() : EMfields->Jx_->size();
+
+        double *const __restrict__ b_Jy = EMfields->Jy_s[ispec] ? EMfields->Jy_s[ispec]->data() : EMfields->Jy_->data();
+        unsigned int Jy_size            = EMfields->Jy_s[ispec] ? EMfields->Jy_s[ispec]->size() : EMfields->Jy_->size();
+
+        double *const __restrict__ b_Jz = EMfields->Jz_s[ispec] ? EMfields->Jz_s[ispec]->data() : EMfields->Jz_->data();
+        unsigned int Jz_size            = EMfields->Jz_s[ispec] ? EMfields->Jz_s[ispec]->size() : EMfields->Jz_->size();
+
+        double *const __restrict__ b_rho = EMfields->rho_s[ispec] ? EMfields->rho_s[ispec]->data() : EMfields->rho_->data();
+        unsigned int rho_size            = EMfields->rho_s[ispec] ? EMfields->rho_s[ispec]->size() : EMfields->rho_->size();
+
+        // Does not compute Rho !
+
+#if defined( SMILEI_ACCELERATOR_MODE )
+        /*currentsAndDensity( b_Jx, b_Jy, b_Jz, b_rho,
+                            Jx_size, Jy_size, Jz_size, rho_size,
+                            particles, x_dimension_bin_count_,
+                            invgf.data(), iold.data(), delta.data(),
+                            inv_cell_volume,
+                            dx_inv_,
+                            dx_ov_dt_,
+                            i_domain_begin_,
+                            not_spectral_ );*/
+    // to be deleted
+	std::cout<<"in projector1D2orderGPUKernel.cpp l229: rho_size= "<<rho_size << std::endl;
+	for( int ipart=0 ; ipart<rho_size; ipart++ )
+	    std::cout<< std::setprecision (15)<<b_rho[ipart]<<std::endl;
+
+
+        currentAndDensityDepositionKernel1DOnDevice( b_Jx,b_Jy,b_Jz,b_rho,
+                            Jx_size, Jy_size, Jz_size, rho_size,
+                            particles.getPtrPosition( 0 ),
+                            particles.getPtrMomentum( 1 ),
+                            particles.getPtrMomentum( 2 ),
+                            particles.getPtrCharge(),
+                            particles.getPtrWeight(),
+                            particles.last_index.data(),
+                            x_dimension_bin_count_,
+                            invgf.data(),
+                            iold.data(),
+                            delta.data(),
+                            inv_cell_volume,
+                            dx_inv_,
+                            dx_ov_dt_,
+                            i_domain_begin_,
+                            not_spectral_ );
+        EMfields->rho_->copyFromDeviceToHost();
+        EMfields->rho_s[ispec]->copyFromDeviceToHost();
+        EMfields->Jx_->copyFromDeviceToHost();
+        EMfields->Jx_s[ispec]->copyFromDeviceToHost();
+        EMfields->Jy_->copyFromDeviceToHost();
+        EMfields->Jy_s[ispec]->copyFromDeviceToHost();
+        EMfields->Jz_->copyFromDeviceToHost();
+        EMfields->Jz_s[ispec]->copyFromDeviceToHost();
+        std::cout<<"in projector1D2orderGPUKernel.cpp l251 after projection: rho_size= "<<rho_size << std::endl;
+        for( int ipart=0 ; ipart<rho_size; ipart++ )
+            std::cout<<b_rho[ipart]<<std::endl;
+
+#else
+        SMILEI_ASSERT( false );
+#endif
+    } else {
+        if( is_spectral ) {
+            ERROR( "Not implemented on GPU" );
+            // }
+        }
+        else{
+
+            Jx_                        = EMfields->Jx_->data();
+            Jy_                        = EMfields->Jy_->data();
+            Jz_                        = EMfields->Jz_->data();
+            rho_                       = EMfields->rho_->data();
+
+            /*currents( Jx_, Jy_, Jz_,
+                      EMfields->Jx_->size(), EMfields->Jy_->size(), EMfields->Jz_->size(),
+                      particles, x_dimension_bin_count_, y_dimension_bin_count_,
+                      invgf.data(), iold.data(), delta.data(),
+                      inv_cell_volume,
+                      dx_inv_, dy_inv_,
+                      dx_ov_dt_, dy_ov_dt_,
+                      i_domain_begin_, j_domain_begin_,
+                      nprimy,
+                      one_third,
+                      not_spectral_ );
+        }
+            double *const __restrict__ b_Jx = EMfields->Jx_s[ispec] ? EMfields->Jx_s[ispec]->data() : EMfields->Jx_->data();
+            unsigned int Jx_size            = EMfields->Jx_s[ispec] ? EMfields->Jx_s[ispec]->size() : EMfields->Jx_->size();
+
+            double *const __restrict__ b_Jy = EMfields->Jy_s[ispec] ? EMfields->Jy_s[ispec]->data() : EMfields->Jy_->data();
+            unsigned int Jy_size            = EMfields->Jy_s[ispec] ? EMfields->Jy_s[ispec]->size() : EMfields->Jy_->size();
+
+            double *const __restrict__ b_Jz = EMfields->Jz_s[ispec] ? EMfields->Jz_s[ispec]->data() : EMfields->Jz_->data();
+            unsigned int Jz_size            = EMfields->Jz_s[ispec] ? EMfields->Jz_s[ispec]->size() : EMfields->Jz_->size();//*/
+            /*Jx_                        = EMfields->Jx_->data();
+            Jy_                        = EMfields->Jy_->data();
+            Jz_                        = EMfields->Jz_->data();*/
+
+            /*currents( Jx_, Jy_, Jz_,
+                      EMfields->Jx_->size(), EMfields->Jy_->size(), EMfields->Jz_->size(),
+                      particles, x_dimension_bin_count_,
+                      invgf.data(), iold.data(), delta.data(),
+                      inv_cell_volume,
+                      dx_inv_,
+                      dx_ov_dt_,
+                      i_domain_begin_,
+                      not_spectral_ );*/
+#if defined( SMILEI_ACCELERATOR_MODE )
+    //double *device_Jx = smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( b_Jx ) ; 
+    //printf("testing device Jx:, %p \n", device_Jx);
+            currentDepositionKernel1DOnDevice(Jx_, Jy_, Jz_, //b_Jx,b_Jy,b_Jz,
+                    //Jx_size, Jy_size, Jz_size,
+                    EMfields->Jx_->size(), EMfields->Jy_->size(), EMfields->Jz_->size(),
+                    particles.getPtrPosition( 0 ),
+                    particles.getPtrMomentum( 1 ),
+                    particles.getPtrMomentum( 2 ),
+                    particles.getPtrCharge(),
+                    particles.getPtrWeight(),
+                    particles.last_index.data(),
+                    x_dimension_bin_count_,
+                    invgf.data(),
+                    iold.data(),
+                    delta.data(),
+                    inv_cell_volume,
+                    dx_inv_,
+                    dx_ov_dt_,
+                    i_domain_begin_,
+                    not_spectral_ );
+#else
+        SMILEI_ASSERT( false );
+#endif
+        }
+    }
+}
+// to be deleted
+{
+        double *const __restrict__ b_Jx = EMfields->Jx_s[ispec] ? EMfields->Jx_s[ispec]->data() : EMfields->Jx_->data();
+        unsigned int Jx_size            = EMfields->Jx_s[ispec] ? EMfields->Jx_s[ispec]->size() : EMfields->Jx_->size();
+
+        double *const __restrict__ b_Jy = EMfields->Jy_s[ispec] ? EMfields->Jy_s[ispec]->data() : EMfields->Jy_->data();
+        unsigned int Jy_size            = EMfields->Jy_s[ispec] ? EMfields->Jy_s[ispec]->size() : EMfields->Jy_->size();
+
+        double *const __restrict__ b_Jz = EMfields->Jz_s[ispec] ? EMfields->Jz_s[ispec]->data() : EMfields->Jz_->data();
+        unsigned int Jz_size            = EMfields->Jz_s[ispec] ? EMfields->Jz_s[ispec]->size() : EMfields->Jz_->size();
+
+        double *const __restrict__ b_rho = EMfields->rho_s[ispec] ? EMfields->rho_s[ispec]->data() : EMfields->rho_->data();
+        unsigned int rho_size            = EMfields->rho_s[ispec] ? EMfields->rho_s[ispec]->size() : EMfields->rho_->size();
+
+        std::cout<<"in projector1D2orderGPUKernel.cpp l336: rho_size= "<<rho_size << " EMfields->rho_s[ispec] ? " 
+            << EMfields->rho_s[ispec] << " Jx_size " << Jx_size<< " Jy_size " << Jy_size<< " Jz_size " << Jz_size<< std::endl;
+        for( int ipart=0 ; ipart<rho_size; ipart++ )
+            std::cout<< std::setprecision (15)<<b_rho[ipart] << " " << b_Jx[ipart] << " "<< b_Jy[ipart] << " "<< b_Jz[ipart]<<std::endl;
+}
+
+}
+
+void Projector1D2OrderGPU::ionizationCurrents( Field      *Jx,
+                                               Field      *Jy,
+                                               Field      *Jz,
+                                               Particles  &particles,
+                                               int         ipart,
+                                               LocalFields Jion )
+{
+    ERROR( "Projector1D2OrderGPU::ionizationCurrents(): Not implemented !" );
+}
+
+void Projector1D2OrderGPU::susceptibility( ElectroMagn *EMfields,
+                                           Particles   &particles,
+                                           double       species_mass,
+                                           SmileiMPI   *smpi,
+                                           int          istart,
+                                           int          iend,
+                                           int          ithread,
+                                           int          icell,
+                                           int          ipart_ref )
+{
+    ERROR( "Projector1D2OrderGPU::susceptibility(): Not implemented !" );
+}
diff --git a/src/Projector/Projector1D2OrderGPU.h b/src/Projector/Projector1D2OrderGPU.h
new file mode 100755
index 000000000..7ce78af1e
--- /dev/null
+++ b/src/Projector/Projector1D2OrderGPU.h
@@ -0,0 +1,127 @@
+#ifndef SMILEI_PROJECTOR_PROJECTOR1D2ORDERGPU_H
+#define SMILEI_PROJECTOR_PROJECTOR1D2ORDERGPU_H
+
+#include "Projector1D.h"
+
+
+class Projector1D2OrderGPU : public Projector1D
+{
+public:
+    Projector1D2OrderGPU( Params &parameters, Patch *a_patch );
+    ~Projector1D2OrderGPU();
+
+    /// For initialization and diags, doesn't use the standard scheme
+    void basic( double      *rhoj,
+                Particles   &particles,
+                unsigned int ipart,
+                unsigned int type,
+                int bin_shift = 0 ) override;
+    /// Projection wrapper
+    void currentsAndDensityWrapper( ElectroMagn *EMfields,
+                                    Particles   &particles,
+                                    SmileiMPI   *smpi,
+                                    int          istart,
+                                    int          iend,
+                                    int          ithread,
+                                    bool         diag_flag,
+                                    bool         is_spectral,
+                                    int          ispec,
+                                    int          icell     = 0,
+                                    int          ipart_ref = 0 ) override;
+
+    void susceptibility( ElectroMagn *EMfields,
+                         Particles   &particles,
+                         double       species_mass,
+                         SmileiMPI   *smpi,
+                         int          istart,
+                         int          iend,
+                         int          ithread,
+                         int          icell     = 0,
+                         int          ipart_ref = 0 ) override;
+                         
+    void ionizationCurrents( Field      *Jx,
+                             Field      *Jy,
+                             Field      *Jz,
+                             Particles  &particles,
+                             int         ipart,
+                             LocalFields Jion ) override;
+
+
+    //!Wrapper for task-based implementation of Smilei
+    //! compiler complains otherwise even if it is completely useless
+    void currentsAndDensityWrapperOnBuffers( double *b_Jx,
+                                             double *b_Jy,
+                                             double *b_Jz,
+                                             double *b_rho,
+                                             int bin_width,
+                                             Particles &particles,
+                                             SmileiMPI *smpi,
+                                             int istart,
+                                             int iend,
+                                             int ithread,
+                                             bool diag_flag,
+                                             bool is_spectral,
+                                             int ispec,
+                                             int icell = 0,
+                                             int ipart_ref = 0 ) override {};
+/*#if defined( SMILEI_ACCELERATOR_MODE )
+
+extern "C" void
+currentDepositionKernel1DOnDevice( double *__restrict__ Jx,
+                         double *__restrict__ Jy,
+                         double *__restrict__ Jz,
+                         int Jx_size,
+                         int Jy_size,
+                         int Jz_size,
+                         const double *__restrict__ particle_position_x,
+                         const double *__restrict__ particle_momentum_y,
+                         const double *__restrict__ particle_momentum_z,
+                         const short *__restrict__ particle_charge,
+                         const double *__restrict__ particle_weight,
+                         const int *__restrict__ host_bin_index,
+                         unsigned int x_dimension_bin_count,
+                         const double *__restrict__ invgf_,
+                         const int *__restrict__ iold_,
+                         const double *__restrict__ deltaold_,
+                         double inv_cell_volume,
+                         double dx_inv,
+                         double dx_ov_dt,
+                         int    i_domain_begin,
+                         int    not_spectral_ );
+
+extern "C" void
+currentAndDensityDepositionKernel1DOnDevice( double *__restrict__ Jx,
+                                   double *__restrict__ Jy,
+                                   double *__restrict__ Jz,
+                                   double *__restrict__ rho,
+                                   int Jx_size,
+                                   int Jy_size,
+                                   int Jz_size,
+                                   int rho_size,
+                                   const double *__restrict__ particle_position_x,
+                                   const double *__restrict__ particle_momentum_y,
+                                   const double *__restrict__ particle_momentum_z,
+                                   const short *__restrict__ particle_charge,
+                                   const double *__restrict__ particle_weight,
+                                   const int *__restrict__ host_bin_index,
+                                   unsigned int x_dimension_bin_count,
+                                   const double *__restrict__ invgf_,
+                                   const int *__restrict__ iold_,
+                                   const double *__restrict__ deltaold_,
+                                   double inv_cell_volume,
+                                   double dx_inv,
+                                   double dx_ov_dt,
+                                   int    i_domain_begin,
+                                   int    not_spectral_ );
+
+#endif*/
+
+
+protected:
+    double dts2_;
+    double dts4_;
+    int    not_spectral_;
+    unsigned int x_dimension_bin_count_;
+};
+
+#endif
\ No newline at end of file
diff --git a/src/Projector/Projector1D2OrderGPUKernelCUDAHIP.cu b/src/Projector/Projector1D2OrderGPUKernelCUDAHIP.cu
new file mode 100755
index 000000000..0a292f4a1
--- /dev/null
+++ b/src/Projector/Projector1D2OrderGPUKernelCUDAHIP.cu
@@ -0,0 +1,1103 @@
+
+
+#if defined( __HIP__ ) 
+    #include <hip/hip_runtime.h>
+#elif defined( __NVCC__ )
+    #include <cuda_runtime.h>
+    #include <cuda.h>
+#endif
+
+#include "Params.h"
+#include "gpu.h"
+#include <iostream>
+
+#if defined( __HIP__ )
+  // HIP compiler support enabled (for .cu files)
+#else
+    #define PRIVATE_SMILEI_USE_OPENMP_PROJECTION_IMPLEMENTATION 1
+#endif
+
+#if defined( PRIVATE_SMILEI_USE_OPENMP_PROJECTION_IMPLEMENTATION )
+    #include <cmath>
+    #include "Tools.h"
+#else
+    #include <hip/hip_runtime.h>
+
+    #include "Params.h"
+    #include "gpu.h"
+#endif
+
+//    #if defined( PRIVATE_SMILEI_USE_OPENMP_PROJECTION_IMPLEMENTATION )
+
+//namespace naive {
+//
+//    void //static inline void
+//    currentDepositionKernel2D( double     *__restrict__ Jx,
+//                             double       *__restrict__ Jy,
+//                             double       *__restrict__ Jz,
+//                             int Jx_size,
+//                             int Jy_size,
+//                             int Jz_size,
+//                             const double *__restrict__ device_particle_position_x,
+//                             const double *__restrict__ device_particle_momentum_y,
+//                             const double *__restrict__ device_particle_momentum_z,
+//                             const short  *__restrict__ device_particle_charge,
+//                             const double *__restrict__ device_particle_weight,
+//                             const int    *__restrict__ host_bin_index,
+//                             unsigned int x_dimension_bin_count,
+//                             const double *__restrict__ invgf_,
+//                             const int *__restrict__ iold_,
+//                             const double *__restrict__ deltaold_,
+//                             double inv_cell_volume,
+//                             double dx_inv,
+//                             double dx_ov_dt,
+//                             int    i_domain_begin,
+//                             int    not_spectral_ )
+//    {
+//        // The OMP implementation is NOT bin aware. As per the precondition on
+//        // host_bin_index, index zero always contains the number of particles.
+//        // See nvidiaParticles::prepareBinIndex / setHostBinIndex.
+//        const unsigned int bin_count      = 1;
+//        const int          particle_count = host_bin_index[bin_count - 1];
+//
+//        #if defined( SMILEI_ACCELERATOR_GPU_OMP )
+//            #pragma omp target is_device_ptr /* map */ ( /* to: */                                            \
+//                                                         device_particle_position_x /* [0:particle_count] */, \
+//                                                         device_particle_momentum_y /* [0:particle_count] */, \
+//                                                         device_particle_momentum_z /* [0:particle_count] */, \
+//                                                         device_particle_charge /* [0:particle_count] */,     \
+//                                                         device_particle_weight /* [0:particle_count] */ )
+//            #pragma omp teams thread_limit( 64 ) distribute parallel for
+//        #elif defined( SMILEI_OPENACC_MODE )
+//            #pragma acc parallel                      \
+//            deviceptr( device_particle_position_x,    \
+//                       device_particle_momentum_y,    \
+//                       device_particle_momentum_z,    \
+//                       device_particle_charge,        \
+//                       device_particle_weight )       \
+//                present( iold [0:3 * particle_count], \
+//                         deltaold [0:3 * particle_count] )
+//            #pragma acc loop gang worker vector
+//        #endif
+//        for( int particle_index = 0; particle_index < particle_count; ++particle_index ) {
+//            const double invgf                        = invgf_[particle_index];
+//            const int *const __restrict__ iold        = &iold_[particle_index];
+//            const double *const __restrict__ deltaold = &deltaold_[particle_index];
+//
+//            double Sx0[5];
+//            double Sx1[5];
+//
+//            // Variable declaration & initialization
+//            // Esirkepov's paper: https://arxiv.org/pdf/physics/9901047.pdf
+//
+//            // Locate the particle on the primal grid at former time-step & calculate coeff. S0
+//            {
+//                const double delta  = deltaold[0 * particle_count];
+//                const double delta2 = delta * delta;
+//                Sx0[0]              = 0.0;
+//                Sx0[1]              = 0.5 * ( delta2 - delta + 0.25 );
+//                Sx0[2]              = 0.75 - delta2;
+//                Sx0[3]              = 0.5 * ( delta2 + delta + 0.25 );
+//                Sx0[4]              = 0.0;
+//            }
+//
+//            // Locate the particle on the primal grid at current time-step & calculate coeff. S1
+//            {
+//                const double xpn      = device_particle_position_x[particle_index] * dx_inv;
+//                const int    ip       = std::round( xpn );
+//                const int    ipo      = iold[0 * particle_count];
+//                const int    ip_m_ipo = ip - ipo - i_domain_begin;
+//                const double delta    = xpn - static_cast<double>( ip );
+//                const double delta2   = delta * delta;
+//
+//                Sx1[0] = 0.0;
+//                Sx1[1] = 0.0;
+//                // Sx1[2] = 0.0; // Always set below
+//                Sx1[3] = 0.0;
+//                Sx1[4] = 0.0;
+//
+//                Sx1[ip_m_ipo + 1] = 0.5 * ( delta2 - delta + 0.25 );
+//                Sx1[ip_m_ipo + 2] = 0.75 - delta2;
+//                Sx1[ip_m_ipo + 3] = 0.5 * ( delta2 + delta + 0.25 );
+//            }
+//
+//            // (x,y,z) components of the current density for the macro-particle
+//            const double charge_weight = inv_cell_volume * static_cast<double>( device_particle_charge[particle_index] ) * device_particle_weight[particle_index];
+//            const double crx_p         = charge_weight * dx_ov_dt;
+//            const double cry_p         = charge_weight * dy_ov_dt;
+//            const double crz_p         = charge_weight * ( 1.0 / 3.0 ) * device_particle_momentum_z[particle_index] * invgf;
+//
+//            // This is the particle position as grid index
+//            // This minus 2 come from the order 2 scheme, based on a 5 points stencil from -2 to +2.
+//            const int ipo = iold[0 * particle_count] - 2;
+//
+//            for( unsigned int i = 0; i < 1; ++i ) {
+//                const int iloc = ( i + ipo ) ;
+//                /* Jx[iloc] += tmpJx[0]; */
+//
+//                SMILEI_ACCELERATOR_ATOMIC
+//                Jz[iloc] += crz_p * ( Sy1[0] * ( /* 0.5 * Sx0[i] + */ Sx1[i] ) );
+//                double tmp = 0.0;
+//                for( unsigned int j = 1; j < 5; j++ ) {
+//                    tmp -= cry_p * ( Sy1[j - 1] - Sy0[j - 1] ) * ( Sx0[i] + 0.5 * ( Sx1[i] - Sx0[i] ) );
+//
+//                    SMILEI_ACCELERATOR_ATOMIC
+//                    Jy[iloc + j + not_spectral_ * ( /* i + */ ipo )] += tmp;
+//
+//                    SMILEI_ACCELERATOR_ATOMIC
+//                    Jz[iloc + j] += crz_p * ( Sy0[j] * ( 0.5 * Sx1[i] /* + Sx0[i] */ ) +
+//                                              Sy1[j] * ( /* 0.5 * Sx0[i] + */ Sx1[i] ) );
+//                }
+//            }
+//
+//            double tmpJx[5]{};
+//
+//            for( unsigned int i = 1; i < 5; ++i ) {
+//                const int iloc = ( i + ipo ) ;
+//                tmpJx[0] -= crx_p * ( Sx1[i - 1] - Sx0[i - 1] ) * ( 0.5 * ( Sy1[0] - Sy0[0] ) );
+//                SMILEI_ACCELERATOR_ATOMIC
+//                Jx[iloc] += tmpJx[0];
+//                SMILEI_ACCELERATOR_ATOMIC
+//                Jz[iloc] += crz_p * ( Sy1[0] * ( 0.5 * Sx0[i] + Sx1[i] ) );
+//                double tmp = 0.0;
+//                for( unsigned int j = 1; j < 5; ++j ) {
+//                    tmpJx[j] -= crx_p * ( Sx1[i - 1] - Sx0[i - 1] ) * ( Sy0[j] + 0.5 * ( Sy1[j] - Sy0[j] ) );
+//                    SMILEI_ACCELERATOR_ATOMIC
+//                    Jx[iloc + j] += tmpJx[j];
+//                    tmp -= cry_p * ( Sy1[j - 1] - Sy0[j - 1] ) * ( Sx0[i] + 0.5 * ( Sx1[i] - Sx0[i] ) );
+//                    SMILEI_ACCELERATOR_ATOMIC
+//                    Jy[iloc + j + not_spectral_ * ( i + ipo )] += tmp;
+//
+//                    SMILEI_ACCELERATOR_ATOMIC
+//                    Jz[iloc + j] += crz_p * ( Sy0[j] * ( 0.5 * Sx1[i] + Sx0[i] ) +
+//                                              Sy1[j] * ( 0.5 * Sx0[i] + Sx1[i] ) );
+//                }
+//            }
+//        }
+//    } // end currentDepositionKernel
+//
+//    //static inline
+//    void
+//    currentAndDensityDepositionKernel( double *__restrict__ Jx,
+//                                       double *__restrict__ Jy,
+//                                       double *__restrict__ Jz,
+//                                       double *__restrict__ rho,
+//                                       int Jx_size,
+//                                       int Jy_size,
+//                                       int Jz_size,
+//                                       int rho_size,
+//                                       const double *__restrict__ device_particle_position_x,
+//                                       const double *__restrict__ device_particle_momentum_y,
+//                                       const double *__restrict__ device_particle_momentum_z,
+//                                       const short *__restrict__ device_particle_charge,
+//                                       const double *__restrict__ device_particle_weight,
+//                                       const int *__restrict__ host_bin_index,
+//                                       unsigned int,
+//                                       unsigned int,
+//                                       const double *__restrict__ invgf_,
+//                                       const int *__restrict__ iold_,
+//                                       const double *__restrict__ deltaold_,
+//                                       double inv_cell_volume,
+//                                       double dx_inv,
+//                                       double dx_ov_dt,
+//                                       int    i_domain_begin,
+//                                       int    not_spectral_ )
+//    {
+//        // The OMP implementation is NOT bin aware. As per the precondition on
+//        // host_bin_index, index zero always contains the number of particles.
+//        // See nvidiaParticles::prepareBinIndex / setHostBinIndex.
+//        const unsigned int bin_count      = 1;
+//        const int          particle_count = host_bin_index[bin_count - 1];
+//
+//        #if defined( SMILEI_ACCELERATOR_GPU_OMP )
+//            #pragma omp target is_device_ptr /* map */ ( /* to: */                                            \
+//                                                         device_particle_position_x /* [0:particle_count] */, \
+//                                                         device_particle_momentum_y /* [0:particle_count] */, \
+//                                                         device_particle_momentum_z /* [0:particle_count] */, \
+//                                                         device_particle_charge /* [0:particle_count] */,     \
+//                                                         device_particle_weight /* [0:particle_count] */ )
+//            #pragma omp teams thread_limit( 64 ) distribute parallel for
+//        #elif defined( SMILEI_OPENACC_MODE )
+//            #pragma acc parallel                      \
+//            deviceptr( device_particle_position_x,    \
+//                       device_particle_momentum_y,    \
+//                       device_particle_momentum_z,    \
+//                       device_particle_charge,        \
+//                       device_particle_weight )       \
+//                present( iold [0:3 * particle_count], \
+//                         deltaold [0:3 * particle_count] )
+//            #pragma acc loop gang worker vector
+//        #endif
+//        for( int particle_index = 0; particle_index < particle_count; ++particle_index ) {
+//            const double invgf                        = invgf_[particle_index];
+//            const int *const __restrict__ iold        = &iold_[particle_index];
+//            const double *const __restrict__ deltaold = &deltaold_[particle_index];
+//
+//            double Sx0[5];
+//            double Sx1[5];
+//            double Sy0[5];
+//            double Sy1[5];
+//
+//            // Variable declaration & initialization
+//            // Esirkepov's paper: https://arxiv.org/pdf/physics/9901047.pdf
+//
+//            // Locate the particle on the primal grid at former time-step & calculate coeff. S0
+//            {
+//                const double delta  = deltaold[0 * particle_count];
+//                const double delta2 = delta * delta;
+//                Sx0[0]              = 0.0;
+//                Sx0[1]              = 0.5 * ( delta2 - delta + 0.25 );
+//                Sx0[2]              = 0.75 - delta2;
+//                Sx0[3]              = 0.5 * ( delta2 + delta + 0.25 );
+//                Sx0[4]              = 0.0;
+//            }
+//            // Locate the particle on the primal grid at current time-step & calculate coeff. S1
+//            {
+//                const double xpn      = device_particle_position_x[particle_index] * dx_inv;
+//                const int    ip       = std::round( xpn );
+//                const int    ipo      = iold[0 * particle_count];
+//                const int    ip_m_ipo = ip - ipo - i_domain_begin;
+//                const double delta    = xpn - static_cast<double>( ip );
+//                const double delta2   = delta * delta;
+//
+//                Sx1[0] = 0.0;
+//                Sx1[1] = 0.0;
+//                // Sx1[2] = 0.0; // Always set below
+//                Sx1[3] = 0.0;
+//                Sx1[4] = 0.0;
+//
+//                Sx1[ip_m_ipo + 1] = 0.5 * ( delta2 - delta + 0.25 );
+//                Sx1[ip_m_ipo + 2] = 0.75 - delta2;
+//                Sx1[ip_m_ipo + 3] = 0.5 * ( delta2 + delta + 0.25 );
+//            }
+//
+//            // (x,y,z) components of the current density for the macro-particle
+//            const double charge_weight = inv_cell_volume * static_cast<double>( device_particle_charge[particle_index] ) * device_particle_weight[particle_index];
+//            const double crx_p         = charge_weight * dx_ov_dt;
+//            const double cry_p         = charge_weight * dy_ov_dt;
+//            const double crz_p         = charge_weight * ( 1.0 / 3.0 ) * device_particle_momentum_z[particle_index] * invgf;
+//
+//            // This is the particle position as grid index
+//            // This minus 2 come from the order 2 scheme, based on a 5 points stencil from -2 to +2.
+//            const int ipo = iold[0 * particle_count] - 2;
+//            const int jpo = iold[1 * particle_count] - 2;
+//
+//            // case i =0
+//            for( unsigned int i = 0; i < 1; ++i ) {
+//                const int iloc = ( i + ipo ) ;
+//                /* Jx[iloc] += tmpJx[0]; */
+//
+//                SMILEI_ACCELERATOR_ATOMIC
+//                Jz[iloc] += crz_p * ( Sy1[0] * ( /* 0.5 * Sx0[i] + */ Sx1[i] ) );
+//
+//                SMILEI_ACCELERATOR_ATOMIC
+//                rho[iloc] += charge_weight * Sx1[0] * Sy1[0];
+//                double tmp = 0.0;
+//                for( unsigned int j = 1; j < 5; j++ ) {
+//                    tmp -= cry_p * ( Sy1[j - 1] - Sy0[j - 1] ) * ( Sx0[i] + 0.5 * ( Sx1[i] - Sx0[i] ) );
+//
+//                    SMILEI_ACCELERATOR_ATOMIC
+//                    Jy[iloc + j + not_spectral_ * ( /* i + */ ipo )] += tmp;
+//
+//                    SMILEI_ACCELERATOR_ATOMIC
+//                    Jz[iloc + j] += crz_p * ( Sy0[j] * ( 0.5 * Sx1[i] /* + Sx0[i] */ ) +
+//                                              Sy1[j] * ( /* 0.5 * Sx0[i] + */ Sx1[i] ) );
+//                    SMILEI_ACCELERATOR_ATOMIC
+//                    rho[iloc + j] += charge_weight * Sx1[0] * Sy1[j];
+//                }
+//            }
+//
+//            double tmpJx[5]{};
+//
+//            // case i> 0
+//            for( unsigned int i = 1; i < 5; ++i ) {
+//                const int iloc = i + ipo ;
+//                tmpJx[0] -= crx_p * ( Sx1[i - 1] - Sx0[i - 1] );
+//
+//                SMILEI_ACCELERATOR_ATOMIC
+//                Jx[iloc] += tmpJx[0];
+//
+//                SMILEI_ACCELERATOR_ATOMIC
+//                Jz[iloc] += crz_p * ( Sy1[0] * ( 0.5 * Sx0[i] + Sx1[i] ) );
+//
+//                SMILEI_ACCELERATOR_ATOMIC
+//                rho[iloc] += charge_weight * Sx1[i] * Sy1[0];
+//
+//                double tmp = 0.0;
+//                for( unsigned int j = 1; j < 5; ++j ) {
+//                    tmpJx[j] -= crx_p * ( Sx1[i - 1] - Sx0[i - 1] ) * ( Sy0[j] + 0.5 * ( Sy1[j] - Sy0[j] ) );
+//
+//                    SMILEI_ACCELERATOR_ATOMIC
+//                    Jx[iloc + j] += tmpJx[j];
+//                    tmp -= cry_p * ( Sy1[j - 1] - Sy0[j - 1] ) * ( Sx0[i] + 0.5 * ( Sx1[i] - Sx0[i] ) );
+//
+//                    SMILEI_ACCELERATOR_ATOMIC
+//                    Jy[iloc + j + not_spectral_ * ( i + ipo )] += tmp;
+//
+//                    SMILEI_ACCELERATOR_ATOMIC
+//                    Jz[iloc + j] += crz_p * ( Sy0[j] * ( 0.5 * Sx1[i] + Sx0[i] ) +
+//                                              Sy1[j] * ( 0.5 * Sx0[i] + Sx1[i] ) );
+//
+//                    SMILEI_ACCELERATOR_ATOMIC
+//                    rho[iloc + j] += charge_weight * Sx1[i] * Sy1[j];
+//                }
+//            }
+//        }
+//    } // end currentDepositionKernel
+//
+//
+//} // namespace naive
+//
+//    #else
+
+namespace cudahip1d {
+    namespace detail {
+#if defined( __HIP__ )
+        static inline void
+        checkErrors( ::hipError_t an_error_code,
+                     const char  *file_name,
+                     int          line )
+        {
+            if( an_error_code != ::hipError_t::hipSuccess ) {
+                std::cout << "HIP error at " << file_name << ":" << line
+                          << " -> " << ::hipGetErrorString( an_error_code ) << std::endl;
+                std::exit( EXIT_FAILURE );
+            }
+        }
+// For NVIDIA compiler 
+#elif defined(  __NVCC__ )
+        static inline void
+        checkErrors( ::cudaError_t an_error_code,
+                     const char  *file_name,
+                     int          line )
+        {
+            if( an_error_code != ::cudaError_t::cudaSuccess ) {
+                std::cout << "CUDA error at " << file_name << ":" << line << " -> " << ::cudaGetErrorString( an_error_code ) << std::endl;
+                std::exit( EXIT_FAILURE );
+            }
+        }
+#endif
+
+   } // namespace detail
+
+    #define checkHIPErrors( an_expression )                           \
+        do {                                                          \
+            detail::checkErrors( an_expression, __FILE__, __LINE__ ); \
+        } while( 0 )  
+
+    namespace kernel {
+        namespace atomic {
+            namespace LDS {
+                __device__ void
+                AddNoReturn( float *a_pointer, float a_value )
+                {
+        #if defined( __gfx90a__ )
+                    ::unsafeAtomicAdd( a_pointer, a_value );
+        #else
+                    ::atomicAdd( a_pointer, a_value );
+        #endif
+                }
+
+                __device__ void
+                AddNoReturn( double *a_pointer, double a_value )
+                {
+        #if defined( __gfx90a__ )
+                    ::unsafeAtomicAdd( a_pointer, a_value );
+        #else
+                    ::atomicAdd( a_pointer, a_value );
+        #endif
+                }
+            } // namespace LDS
+
+            namespace GDS {
+                __device__ void
+                AddNoReturn( double *a_pointer, double a_value )
+                {
+        #if defined( __gfx90a__ )
+                    ::unsafeAtomicAdd( a_pointer, a_value );
+        #else
+                    ::atomicAdd( a_pointer, a_value );
+        #endif
+                }
+            } // namespace GDS
+        }     // namespace atomic
+
+
+        template <typename ComputeFloat>
+        __device__ void inline __attribute__((always_inline)) init_S0(const ComputeFloat delta, ComputeFloat *__restrict__ S0)
+        {
+            const ComputeFloat delta2 = delta * delta;
+            S0[0] = static_cast<ComputeFloat>( 0.5 ) * ( delta2 - delta + static_cast<ComputeFloat>( 0.25 ) );
+            S0[1] = static_cast<ComputeFloat>( 0.75 ) - delta2;
+            S0[2] = static_cast<ComputeFloat>( 0.5 ) * ( delta2 + delta + static_cast<ComputeFloat>( 0.25 ) );
+            S0[3] = static_cast<ComputeFloat>( 0.0 ) ;
+        }
+
+        template <typename ComputeFloat>
+        __device__ void inline __attribute__((always_inline)) init_S1(const ComputeFloat xpn, const int ipo,  const int i_domain_begin,
+                                                                      ComputeFloat *__restrict__ S1)
+        {
+            // const int    ip        = static_cast<int>( xpn + 0.5 ); // std::round | rounding approximation which is correct enough and faster in this case
+            const int          ip       = std::round( xpn );
+            const int          ip_m_ipo = ip - ipo - i_domain_begin;
+            const ComputeFloat delta    = xpn - static_cast<ComputeFloat>( ip );
+            const ComputeFloat delta2   = delta * delta;
+
+            S1[0] = static_cast<ComputeFloat>( 0.0 );
+            S1[1] = static_cast<ComputeFloat>( 0.0 ); // S1[2] = 0.0; // Always set below
+            S1[3] = static_cast<ComputeFloat>( 0.0 );
+            S1[4] = static_cast<ComputeFloat>( 0.0 );
+
+            S1[ip_m_ipo + 1] = static_cast<ComputeFloat>( 0.5 ) * ( delta2 - delta + static_cast<ComputeFloat>( 0.25 ) );
+            S1[ip_m_ipo + 2] = static_cast<ComputeFloat>( 0.75 ) - delta2;
+            S1[ip_m_ipo + 3] = static_cast<ComputeFloat>( 0.5 ) * ( delta2 + delta + static_cast<ComputeFloat>( 0.25 ) );
+        }
+
+
+        template <typename ComputeFloat,
+                  typename ReductionFloat,
+                  std::size_t kWorkgroupSize>
+        __global__ void
+        // __launch_bounds__(kWorkgroupSize, 1)
+        DepositCurrentDensity_1D_Order2( double *__restrict__ device_Jx,
+                                         double *__restrict__ device_Jy,
+                                         double *__restrict__ device_Jz,
+                                         int Jx_size,
+                                         int Jy_size,
+                                         int Jz_size,
+                                         const double *__restrict__ device_particle_position_x,
+                                         const double *__restrict__ device_particle_momentum_y,
+                                         const double *__restrict__ device_particle_momentum_z,
+                                         const short *__restrict__ device_particle_charge,
+                                         const double *__restrict__ device_particle_weight,
+                                         const int *__restrict__ device_bin_index,
+                                         const double *__restrict__ device_invgf_,
+                                         const int *__restrict__ device_iold_,
+                                         const double *__restrict__ device_deltaold_,
+                                         ComputeFloat inv_cell_volume,
+                                         ComputeFloat dx_inv,
+                                         ComputeFloat dx_ov_dt,
+                                         int          i_domain_begin,
+                                         int          not_spectral_ )
+        {
+ /*printf(" Hallooo \n");
+            for (int i=0; i<Jx_size; ++i){
+                printf("i %d / %d  device_Jx[i] = %f \n", i, Jx_size,device_Jx[i] );
+            }
+            for (int i=0; i<Jy_size; ++i){
+                printf("i %d / %d  device_Jy[i] = %f \n", i, Jy_size,device_Jy[i] );
+            }
+            for (int i=0; i<Jz_size; ++i){
+                printf("i %d / %d  device_Jz[i] = %f \n", i, Jz_size,device_Jz[i] );
+            }//*/
+
+            // TODO(Etienne M): refactor this function. Break it into smaller
+            // pieces (lds init/store, coeff computation, deposition etc..)
+            // TODO(Etienne M): __ldg could be used to slightly improve GDS load
+            // speed. This would only have an effect on Nvidia cards as this
+            // operation is a no op on AMD.
+            const unsigned int workgroup_size = kWorkgroupSize; // blockDim.x;
+            const unsigned int bin_count      = gridDim.x;
+            const unsigned int loop_stride    = workgroup_size; // This stride should enable better memory access coalescing
+
+            const unsigned int x_cluster_coordinate          = blockIdx.x;
+            const unsigned int workgroup_dedicated_bin_index = x_cluster_coordinate;
+            const unsigned int thread_index_offset           = threadIdx.x;
+
+            // The unit is the cell
+            const unsigned int global_x_scratch_space_coordinate_offset = x_cluster_coordinate * Params::getGPUClusterWidth( 1 /* 1D */ );
+            const int GPUClusterWithGCWidth = Params::getGPUClusterWithGhostCellWidth( 1 /* 1D */, 2 /* 2nd order interpolation */ );
+
+            // NOTE: We gain from the particles not being sorted inside a
+            // cluster because it reduces the bank conflicts one gets when
+            // multiple threads access the same part of the shared memory. Such
+            // "conflicted" accesses are serialized !
+            // NOTE: We use a bit to much LDS. For Jx, the first row could be
+            // discarded, for Jy we could remove the first column.
+
+            static constexpr unsigned int kFieldScratchSpaceSize = Params::getGPUInterpolationClusterCellVolume( 1 /* 1D */, 2 /* 2nd order interpolation */ );
+
+            //printf("kWorkgroupSize %d  bin_count %d loop_stride %d, x_cluster_coordinate %d, workgroup_dedicated_bin_index %d, thread_index_offset %d, getGPUClusterWidth %d, GPUClusterWithGCWidth %d, kFieldScratchSpaceSize %d, global_x_scratch_space_coordinate_offset %d\n",
+            //    kWorkgroupSize, bin_count, loop_stride, x_cluster_coordinate, workgroup_dedicated_bin_index, thread_index_offset, Params::getGPUClusterWidth(1), GPUClusterWithGCWidth, kFieldScratchSpaceSize, global_x_scratch_space_coordinate_offset);
+            // NOTE: I tried having only one cache and reusing it. Doing that
+            // requires you to iterate multiple time over the particle which is
+            // possible but cost more bandwidth. The speedup was ~x0.92.
+            __shared__ ReductionFloat Jx_scratch_space[kFieldScratchSpaceSize];
+            __shared__ ReductionFloat Jy_scratch_space[kFieldScratchSpaceSize];
+            __shared__ ReductionFloat Jz_scratch_space[kFieldScratchSpaceSize];
+
+            // Init the shared memory
+
+            for( unsigned int field_index = thread_index_offset;
+                 field_index < kFieldScratchSpaceSize;
+                 field_index += workgroup_size ) {
+                Jx_scratch_space[field_index] = static_cast<ReductionFloat>( 0.0 );
+                Jy_scratch_space[field_index] = static_cast<ReductionFloat>( 0.0 );
+                Jz_scratch_space[field_index] = static_cast<ReductionFloat>( 0.0 );
+            }
+
+            __syncthreads();
+
+            const unsigned int particle_count = device_bin_index[bin_count - 1];
+
+            // This workgroup has to process distance(last_particle,
+            // first_particle) particles
+            const unsigned int first_particle = workgroup_dedicated_bin_index == 0 ? 0 : device_bin_index[workgroup_dedicated_bin_index - 1];
+            const unsigned int last_particle  = device_bin_index[workgroup_dedicated_bin_index];
+
+            //printf("first_particle %d last_particle %d particle_count %d\n", first_particle, last_particle, particle_count);
+
+            for( unsigned int particle_index = first_particle + thread_index_offset;
+                 particle_index < last_particle;
+                 particle_index += loop_stride ) {
+                const ComputeFloat invgf                  = static_cast<ComputeFloat>( device_invgf_[particle_index] );
+                const int *const __restrict__ iold        = &device_iold_[particle_index];
+                const double *const __restrict__ deltaold = &device_deltaold_[particle_index];
+
+                ComputeFloat Sx0[5];
+                ComputeFloat Sx1[5];
+
+                // Variable declaration & initialization
+                // Esirkepov's paper: https://arxiv.org/pdf/physics/9901047.pdf
+
+                // Locate the particle on the primal grid at former time-step & calculate coeff. S0
+                {
+                    const ComputeFloat delta  = deltaold[0 * particle_count];
+                    const ComputeFloat delta2 = delta * delta;
+
+                    Sx0[0] = static_cast<ComputeFloat>( 0.0 );
+                    Sx0[1] = static_cast<ComputeFloat>( 0.5 ) * ( delta2 - delta + static_cast<ComputeFloat>( 0.25 ) );
+                    Sx0[2] = static_cast<ComputeFloat>( 0.75 ) - delta2;
+                    Sx0[3] = static_cast<ComputeFloat>( 0.5 ) * ( delta2 + delta + static_cast<ComputeFloat>( 0.25 ) );
+                    Sx0[4] = static_cast<ComputeFloat>( 0.0 );
+                }
+                //init_S0(deltaold[0 * particle_count], Sx0);
+                //init_S0(deltaold[1 * particle_count], Sy0);
+
+                // Locate the particle on the primal grid at current time-step & calculate coeff. S1
+                {
+                    // const int    ip             = static_cast<int>( xpn + 0.5 ); // std::round | rounding approximation which is correct enough and faster in this case
+                    const ComputeFloat xpn      = static_cast<ComputeFloat>( device_particle_position_x[particle_index] ) * dx_inv;
+                    const int          ip       = std::round( xpn );
+                    const int          ipo      = iold[0 * particle_count];
+                    const int          ip_m_ipo = ip - ipo - i_domain_begin;
+                    const ComputeFloat delta    = xpn - static_cast<ComputeFloat>( ip );
+                    const ComputeFloat delta2   = delta * delta;
+
+                    Sx1[0] = static_cast<ComputeFloat>( 0.0 );
+                    Sx1[1] = static_cast<ComputeFloat>( 0.0 );
+                    // Sx1[2] = 0.0; // Always set below
+                    Sx1[3] = static_cast<ComputeFloat>( 0.0 );
+                    Sx1[4] = static_cast<ComputeFloat>( 0.0 );
+
+                    Sx1[ip_m_ipo + 1] = static_cast<ComputeFloat>( 0.5 ) * ( delta2 - delta + static_cast<ComputeFloat>( 0.25 ) );
+                    Sx1[ip_m_ipo + 2] = static_cast<ComputeFloat>( 0.75 ) - delta2;
+                    Sx1[ip_m_ipo + 3] = static_cast<ComputeFloat>( 0.5 ) * ( delta2 + delta + static_cast<ComputeFloat>( 0.25 ) );
+                }
+
+                // (x,y,z) components of the current density for the macro-particle
+                const ComputeFloat charge_weight = inv_cell_volume * static_cast<ComputeFloat>( device_particle_charge[particle_index] ) * static_cast<ComputeFloat>( device_particle_weight[particle_index] );
+                const ComputeFloat crx_p         = charge_weight * dx_ov_dt;
+                const ComputeFloat cry_p         = charge_weight * static_cast<ComputeFloat>( device_particle_momentum_y[particle_index] ) * invgf;
+                const ComputeFloat crz_p         = charge_weight * static_cast<ComputeFloat>( device_particle_momentum_z[particle_index] ) * invgf;
+
+                // This is the particle position as grid index
+                // This minus 2 come from the order 2 scheme, based on a 5 points stencil from -2 to +2.
+                const int ipo = iold[0 * particle_count] -
+                                2 /* Offset so we dont uses negative numbers in the loop */ -
+                                global_x_scratch_space_coordinate_offset /* Offset to get cluster relative coordinates */;
+
+                // Jx
+                ComputeFloat tmpJx[5]{};
+                for( unsigned int i = 1; i < 5; ++i ) {
+                    const int    iloc = i + ipo;
+                    tmpJx[i] = tmpJx[i-1] + crx_p * (Sx0[i-1] - Sx1[i-1]); 
+                    atomic::LDS::AddNoReturn( &Jx_scratch_space[iloc], static_cast<ReductionFloat>( tmpJx[i] ) );
+                }
+
+                // Jy
+                for( unsigned int i = 0; i < 5; ++i ) {
+                    const int    iloc = i + ipo;
+                    tmpJx[i] = cry_p * 0.5 * (Sx0[i] - Sx1[i]); 
+                    atomic::LDS::AddNoReturn( &Jy_scratch_space[iloc], static_cast<ReductionFloat>( tmpJx[i] ) );
+                }
+
+                // Jz
+                for( unsigned int i = 0; i < 5; ++i ) {
+                    const int    iloc = i + ipo;
+                    tmpJx[i] = crz_p * 0.5 * (Sx0[i] - Sx1[i]); 
+                    atomic::LDS::AddNoReturn( &Jz_scratch_space[iloc], static_cast<ReductionFloat>( tmpJx[i] ) );
+                }
+            } // particle_index
+
+            __syncthreads();
+
+            for( unsigned int field_index = thread_index_offset; field_index < kFieldScratchSpaceSize; field_index += workgroup_size ) {
+                const unsigned int local_x_scratch_space_coordinate = field_index % GPUClusterWithGCWidth; // /GPUClusterWithGCWidth
+                const unsigned int global_x_scratch_space_coordinate = global_x_scratch_space_coordinate_offset + local_x_scratch_space_coordinate;
+
+                const unsigned int global_memory_index = global_x_scratch_space_coordinate;
+                const unsigned int scratch_space_index = field_index; // local_x_scratch_space_coordinate * GPUClusterWithGCWidth + local_y_scratch_space_coordinate;
+
+                //printf("field_index %d, thread_index_offset %d, kFieldScratchSpaceSize %d, workgroup_size %d, GPUClusterWithGCWidth %d, global_x_scratch_space_coordinate_offset %d, global_memory_index %d, Jx_size %d\n",field_index, thread_index_offset, kFieldScratchSpaceSize, workgroup_size, GPUClusterWithGCWidth, global_x_scratch_space_coordinate_offset, global_memory_index, Jx_size);
+
+                // These atomics are basically free (very few of them).
+                atomic::GDS::AddNoReturn( &device_Jx[global_memory_index], static_cast<double>( Jx_scratch_space[scratch_space_index] ) );
+                atomic::GDS::AddNoReturn( &device_Jy[global_memory_index +  not_spectral_ * global_x_scratch_space_coordinate], static_cast<double>( Jy_scratch_space[scratch_space_index] ) ); //  We handle the FTDT/picsar 
+                atomic::GDS::AddNoReturn( &device_Jz[global_memory_index], static_cast<double>( Jz_scratch_space[scratch_space_index] ) );
+            }
+        } // end DepositCurrent
+
+
+        template <typename ComputeFloat,
+                  typename ReductionFloat,
+                  std::size_t kWorkgroupSize>
+        __global__ void
+        // __launch_bounds__(kWorkgroupSize, 1)
+        DepositCurrentAndDensity_1D_Order2( double *__restrict__ device_Jx,
+                                            double *__restrict__ device_Jy,
+                                            double *__restrict__ device_Jz,
+                                            double *__restrict__ device_rho,
+                                            int Jx_size,
+                                            int Jy_size,
+                                            int Jz_size,
+                                            int rho_size,
+                                            const double *__restrict__ device_particle_position_x,
+                                            const double *__restrict__ device_particle_momentum_y,
+                                            const double *__restrict__ device_particle_momentum_z,
+                                            const short *__restrict__ device_particle_charge,
+                                            const double *__restrict__ device_particle_weight,
+                                            const int *__restrict__ device_bin_index,
+                                            const double *__restrict__ device_invgf_,
+                                            const int *__restrict__ device_iold_,
+                                            const double *__restrict__ device_deltaold_,
+                                            ComputeFloat inv_cell_volume,
+                                            ComputeFloat dx_inv,
+                                            ComputeFloat dx_ov_dt,
+                                            int          i_domain_begin,
+                                            int          not_spectral_ )
+        {
+            // TODO(Etienne M): refactor this function. Break it into smaller
+            // pieces (lds init/store, coeff computation, deposition etc..)
+            // TODO(Etienne M): __ldg could be used to slightly improve GDS load
+            // speed. This would only have an effect on Nvidia cards as this
+            // operation is a no op on AMD.
+            const unsigned int workgroup_size = kWorkgroupSize; // blockDim.x;
+            const unsigned int bin_count      = gridDim.x;
+            const unsigned int loop_stride    = workgroup_size; // This stride should enable better memory access coalescing
+
+            const unsigned int x_cluster_coordinate          = blockIdx.x;
+            const unsigned int workgroup_dedicated_bin_index = x_cluster_coordinate ; 
+            const unsigned int thread_index_offset           = threadIdx.x;
+
+            // The unit is the cell
+            const unsigned int global_x_scratch_space_coordinate_offset = x_cluster_coordinate * Params::getGPUClusterWidth( 1 /* 1D */ );
+
+            // NOTE: We gain from the particles not being sorted inside a
+            // cluster because it reduces the bank conflicts one gets when
+            // multiple threads access the same part of the shared memory. Such
+            // "conflicted" accesses are serialized !
+            // NOTE: We use a bit to much LDS. For Jx, the first row could be
+            // discarded, for Jy we could remove the first column.
+
+            const int GPUClusterWithGCWidth = Params::getGPUClusterWithGhostCellWidth( 1 /* 1D */, 2 /* 2nd order interpolation */ );
+            static constexpr unsigned int kFieldScratchSpaceSize = Params::getGPUInterpolationClusterCellVolume( 1 /* 1D */, 2 /* 2nd order interpolation */ );
+
+            // NOTE: I tried having only one cache and reusing it. Doing that
+            // requires you to iterate multiple time over the particle which is
+            // possible but cost more bandwidth. The speedup was ~x0.92.
+            __shared__ ReductionFloat Jx_scratch_space[kFieldScratchSpaceSize];
+            __shared__ ReductionFloat Jy_scratch_space[kFieldScratchSpaceSize];
+            __shared__ ReductionFloat Jz_scratch_space[kFieldScratchSpaceSize];
+            __shared__ ReductionFloat rho_scratch_space[kFieldScratchSpaceSize];
+
+            // Init the shared memory
+
+            for( unsigned int field_index = thread_index_offset;
+                field_index < kFieldScratchSpaceSize;
+                field_index += workgroup_size ) {
+                Jx_scratch_space[field_index]  = static_cast<ReductionFloat>( 0.0 );
+                Jy_scratch_space[field_index]  = static_cast<ReductionFloat>( 0.0 );
+                Jz_scratch_space[field_index]  = static_cast<ReductionFloat>( 0.0 );
+                rho_scratch_space[field_index] = static_cast<ReductionFloat>( 0.0 );
+            }
+
+            __syncthreads();
+
+            const unsigned int particle_count = device_bin_index[bin_count - 1];
+
+            // This workgroup has to process distance(last_particle,
+            // first_particle) particles
+            const unsigned int first_particle = workgroup_dedicated_bin_index == 0 ? 0 : device_bin_index[workgroup_dedicated_bin_index - 1];
+            const unsigned int last_particle  = device_bin_index[workgroup_dedicated_bin_index];
+
+            //printf(" first_particle %d last_particle %d loopstride %d \n",first_particle, last_particle, loop_stride);
+
+            for( unsigned int particle_index = first_particle + thread_index_offset;
+                 particle_index < last_particle;
+                 particle_index += loop_stride ) {
+                const ComputeFloat                  invgf = static_cast<ComputeFloat>( device_invgf_[particle_index] );
+                const int *const __restrict__        iold = &device_iold_[particle_index];
+                const double *const __restrict__ deltaold = &device_deltaold_[particle_index];
+
+                //printf("in projector cuda l735: particle charge= %f weight %f position_x= %f, momentum y = %f, momentum z = %f, charge*sqrt(2) %+4.15e \n", static_cast<ComputeFloat>( device_particle_charge[particle_index]) , static_cast<ComputeFloat>( device_particle_weight[particle_index]),
+		//	       	static_cast<ComputeFloat>( device_particle_position_x[particle_index] ), static_cast<ComputeFloat>( device_particle_momentum_y[particle_index] ), 
+		//     		static_cast<ComputeFloat>( device_particle_momentum_z[particle_index] ), static_cast<ComputeFloat>( device_particle_charge[particle_index]) * static_cast<ComputeFloat>(sqrt(2.0)));
+
+                ComputeFloat Sx0[5];
+                ComputeFloat Sx1[5];
+
+                // Variable declaration & initialization
+                // Esirkepov's paper: https://arxiv.org/pdf/physics/9901047.pdf
+
+                // Locate the particle on the primal grid at former time-step & calculate coeff. S0
+                {
+                    const ComputeFloat delta  = deltaold[0 * particle_count];
+                    const ComputeFloat delta2 = delta * delta;
+
+                    Sx0[0] = static_cast<ComputeFloat>( 0.0 );
+                    Sx0[1] = static_cast<ComputeFloat>( 0.5 ) * ( delta2 - delta + static_cast<ComputeFloat>( 0.25 ) );
+                    Sx0[2] = static_cast<ComputeFloat>( 0.75 ) - delta2;
+                    Sx0[3] = static_cast<ComputeFloat>( 0.5 ) * ( delta2 + delta + static_cast<ComputeFloat>( 0.25 ) );
+                    Sx0[4] = static_cast<ComputeFloat>( 0.0 );
+                }
+
+                // Locate the particle on the primal grid at current time-step & calculate coeff. S1
+                {
+                    // const int    ip             = static_cast<int>( xpn + 0.5 ); // std::round | rounding approximation which is correct enough and faster in this case
+                    const ComputeFloat xpn      = static_cast<ComputeFloat>( device_particle_position_x[particle_index] ) * dx_inv;
+                    const int          ip       = std::round( xpn );
+                    const int          ipo      = iold[0 * particle_count];
+                    const int          ip_m_ipo = ip - ipo - i_domain_begin;
+                    const ComputeFloat delta    = xpn - static_cast<ComputeFloat>( ip );
+                    const ComputeFloat delta2   = delta * delta;
+
+                    Sx1[0] = static_cast<ComputeFloat>( 0.0 );
+                    Sx1[1] = static_cast<ComputeFloat>( 0.0 );
+                    // Sx1[2] = 0.0; // Always set below
+                    Sx1[3] = static_cast<ComputeFloat>( 0.0 );
+                    Sx1[4] = static_cast<ComputeFloat>( 0.0 );
+
+                    Sx1[ip_m_ipo + 1] = static_cast<ComputeFloat>( 0.5 ) * ( delta2 - delta + static_cast<ComputeFloat>( 0.25 ) );
+                    Sx1[ip_m_ipo + 2] = static_cast<ComputeFloat>( 0.75 ) - delta2;
+                    Sx1[ip_m_ipo + 3] = static_cast<ComputeFloat>( 0.5 ) * ( delta2 + delta + static_cast<ComputeFloat>( 0.25 ) );
+                }
+
+                // (x,y,z) components of the current density for the macro-particle
+                const ComputeFloat charge_weight = inv_cell_volume * static_cast<ComputeFloat>( device_particle_charge[particle_index] ) * static_cast<ComputeFloat>( device_particle_weight[particle_index] );
+                const ComputeFloat crx_p         = charge_weight * dx_ov_dt;
+                const ComputeFloat cry_p         = charge_weight * static_cast<ComputeFloat>( device_particle_momentum_y[particle_index] ) * invgf;
+                const ComputeFloat crz_p         = charge_weight * static_cast<ComputeFloat>( device_particle_momentum_z[particle_index] ) * invgf;
+
+                // This is the particle position as grid index
+                // This minus 2 come from the order 2 scheme, based on a 5 points stencil from -2 to +2.
+                const int ipo = iold[0 * particle_count] -
+                                2 /* Offset so we dont uses negative numbers in the loop */ -
+                                global_x_scratch_space_coordinate_offset /* Offset to get cluster relative coordinates */;
+
+                // Jx
+                ComputeFloat tmpJx[5]{};
+                for( unsigned int i = 1; i < 5; ++i ) {
+                    const int    iloc = i + ipo;
+                    tmpJx[i] = tmpJx[i-1] + crx_p * (Sx0[i-1] - Sx1[i-1]); 
+                    atomic::LDS::AddNoReturn( &Jx_scratch_space[iloc], static_cast<ReductionFloat>( tmpJx[i] ) );
+                }
+
+                // Jy
+                for( unsigned int i = 0; i < 5; ++i ) {
+                    const int    iloc = i + ipo;
+                    tmpJx[i] = cry_p * 0.5 * (Sx0[i] - Sx1[i]); 
+                    atomic::LDS::AddNoReturn( &Jy_scratch_space[iloc], static_cast<ReductionFloat>( tmpJx[i] ) );
+                }
+
+                // Jz
+                for( unsigned int i = 0; i < 5; ++i ) {
+                    const int    iloc = i + ipo;
+                    tmpJx[i] = crz_p * 0.5 * (Sx0[i] - Sx1[i]); 
+                    atomic::LDS::AddNoReturn( &Jz_scratch_space[iloc], static_cast<ReductionFloat>( tmpJx[i] ) );
+                }
+
+                // Rho
+                for( unsigned int i = 0; i < 5; ++i ) {
+                    const int iloc = i + ipo;
+                    atomic::LDS::AddNoReturn( &rho_scratch_space[iloc], static_cast<ReductionFloat>( charge_weight * Sx1[i] ) );
+                }
+
+                // improvements ideas: 1. unrolling to reduce the size of Sx0 and Sx1
+                // 2. combine the loops
+
+                /*
+                // 
+                {
+                    //ComputeFloat tmp = 0.5 * (Sx0[0] - Sx1[0]); // = - 0.5 * Sx1[0]
+                    atomic::LDS::AddNoReturn( &Jy_scratch_space[ipo], static_cast<ReductionFloat>( -cry_p * 0.5 * Sx1[0] ) );
+                    atomic::LDS::AddNoReturn( &Jz_scratch_space[ipo], static_cast<ReductionFloat>( -crz_p * 0.5 * Sx1[0] ) );
+                    atomic::LDS::AddNoReturn( &rho_scratch_space[ipo], static_cast<ReductionFloat>( charge_weight * Sx1[0] ) );
+                }*/
+                /*for( unsigned int i = 1; i < 4; ++i ) {
+                    const int iloc = i + ipo;
+                    tmpJx[i] = tmpJx[i-1] + crx_p * (Sx0[i-1] - Sx1[i-1]); 
+                    ComputeFloat tmp = 0.5 * (Sx0[i] - Sx1[i]);
+                    atomic::LDS::AddNoReturn( &Jx_scratch_space[iloc], static_cast<ReductionFloat>( tmpJx[i] ) );
+                    atomic::LDS::AddNoReturn( &Jy_scratch_space[iloc], static_cast<ReductionFloat>( cry_p * tmp ) );
+                    atomic::LDS::AddNoReturn( &Jz_scratch_space[iloc], static_cast<ReductionFloat>( crz_p * tmp ) );
+                    atomic::LDS::AddNoReturn( &rho_scratch_space[iloc], static_cast<ReductionFloat>( charge_weight * Sx1[i] ) );
+                }*/
+                /* i=4
+                {
+                    const int iloc = i + ipo;
+                    tmpJx[4] = tmpJx[3] + crx_p * (Sx0[i-1] - Sx1[i-1]); // can save some registers by  tmpJx[0] instead of tmpJx[4] ? reducing its size from 5 to 4?
+                    //ComputeFloat tmp = 0.5 * (Sx0[4] - Sx1[4]); // = -0.5 * Sx1[4]
+                    atomic::LDS::AddNoReturn( &Jx_scratch_space[iloc], static_cast<ReductionFloat>( tmpJx[i] ) );    
+                    atomic::LDS::AddNoReturn( &Jy_scratch_space[iloc], static_cast<ReductionFloat>( -cry_p * 0.5 * Sx1[4] ) ); //null
+                    atomic::LDS::AddNoReturn( &Jz_scratch_space[iloc], static_cast<ReductionFloat>( -crz_p * 0.5 * Sx1[4] ) ); //null
+                    atomic::LDS::AddNoReturn( &rho_scratch_space[iloc], static_cast<ReductionFloat>( charge_weight * Sx1[4] ) ); //null
+                }
+
+
+                */
+
+            } // particle_index
+
+            __syncthreads();
+
+            for( unsigned int field_index = thread_index_offset;
+                 field_index < kFieldScratchSpaceSize;
+                 field_index += workgroup_size ) {
+
+                const unsigned int local_x_scratch_space_coordinate = field_index % GPUClusterWithGCWidth;
+                const unsigned int global_x_scratch_space_coordinate = global_x_scratch_space_coordinate_offset + local_x_scratch_space_coordinate;
+
+                const unsigned int global_memory_index = global_x_scratch_space_coordinate;
+                const unsigned int scratch_space_index = field_index; // local_x_scratch_space_coordinate * GPUClusterWithGCWidth + local_y_scratch_space_coordinate;
+
+                // These atomics are basically free (very few of them).
+                atomic::GDS::AddNoReturn( &device_Jx[global_memory_index], static_cast<double>( Jx_scratch_space[scratch_space_index] ) );
+                atomic::GDS::AddNoReturn( &device_Jy[global_memory_index + /* We handle the FTDT/picsar */ not_spectral_ * global_x_scratch_space_coordinate], static_cast<double>( Jy_scratch_space[scratch_space_index] ) );
+                atomic::GDS::AddNoReturn( &device_Jz[global_memory_index], static_cast<double>( Jz_scratch_space[scratch_space_index] ) );
+                atomic::GDS::AddNoReturn( &device_rho[global_memory_index], static_cast<double>( rho_scratch_space[scratch_space_index] ) );
+            }
+        }
+    } // namespace kernel
+
+
+    //static inline
+    void
+    currentDepositionKernel1D( double *__restrict__ host_Jx,
+                             double *__restrict__ host_Jy,
+                             double *__restrict__ host_Jz,
+                             int Jx_size,
+                             int Jy_size,
+                             int Jz_size,
+                             const double *__restrict__ device_particle_position_x,
+                             const double *__restrict__ device_particle_momentum_y,
+                             const double *__restrict__ device_particle_momentum_z,
+                             const short *__restrict__ device_particle_charge,
+                             const double *__restrict__ device_particle_weight,
+                             const int *__restrict__ host_bin_index,
+                             unsigned int x_dimension_bin_count,
+                             const double *__restrict__ host_invgf_,
+                             const int *__restrict__ host_iold_,
+                             const double *__restrict__ host_deltaold_,
+                             double inv_cell_volume,
+                             double dx_inv,
+                             double dx_ov_dt,
+                             int    i_domain_begin,
+                             int    not_spectral_ )
+    {
+        SMILEI_ASSERT( Params::getGPUClusterWidth( 1 /* 1D */ ) != -1 &&
+                       Params::getGPUClusterGhostCellBorderWidth( 2 /* 2nd order interpolation */ ) != -1 );
+
+        // NOTE:
+        // This cluster is very strongly bound by atomic operations in LDS (shared memory)
+        // TODO(Etienne M): Find a way to lessen the atomic usage
+
+        const ::dim3 kGridDimension  { static_cast<uint32_t>( x_dimension_bin_count ), 1, 1 };
+
+        static constexpr std::size_t kWorkgroupSize = 128;
+        const ::dim3                 kBlockDimension{ static_cast<uint32_t>( kWorkgroupSize ), 1, 1 };
+
+        // NOTE: On cards lacking hardware backed Binary64 atomic operations,
+        // falling back to Binary32 (supposing hardware support for atomic
+        // operations) can lead to drastic performance improvement.
+        // One just need to assign 'float' to ReductionFloat.
+        //
+        using ComputeFloat   = double;
+        using ReductionFloat = double;
+
+	auto KernelFunction = kernel::DepositCurrentDensity_1D_Order2<ComputeFloat, ReductionFloat, kWorkgroupSize>;
+#if defined ( __HIP__ ) 
+        hipLaunchKernelGGL( KernelFunction,
+                            kGridDimension,
+                            kBlockDimension,
+                            0, // Shared memory
+                            0, // Stream
+                            // Kernel arguments
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_Jx ),
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_Jy ),
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_Jz ),
+                            Jx_size, Jy_size, Jz_size,
+                            device_particle_position_x,
+                            device_particle_momentum_y,
+                            device_particle_momentum_z,
+                            device_particle_charge,
+                            device_particle_weight,
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_bin_index ),
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_invgf_ ),
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_iold_ ),
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_deltaold_ ),
+                            inv_cell_volume,
+                            dx_inv,
+                            dx_ov_dt,
+                            i_domain_begin,
+                            not_spectral_ );
+
+        checkHIPErrors( ::hipDeviceSynchronize() );
+#elif defined (  __NVCC__ )
+    //double *device_Jx = smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_Jx ) ; 
+    //printf("testing device Jx:, %p \n", device_Jx);
+    /*for (int i=0; i<Jx_size ; ++i){
+        printf("device_Jx[i] = %Lf \n", device_Jx[i]);
+    }//*/
+
+	KernelFunction <<<
+                            kGridDimension,
+                            kBlockDimension,
+                            0, // Shared memory
+                            0 // Stream
+                       >>>
+                       (
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_Jx ),
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_Jy ),
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_Jz ),
+                            Jx_size, Jy_size, Jz_size,
+                            device_particle_position_x,
+                            device_particle_momentum_y,
+                            device_particle_momentum_z,
+                            device_particle_charge,
+                            device_particle_weight,
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_bin_index ),
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_invgf_ ),
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_iold_ ),
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_deltaold_ ),
+                            inv_cell_volume,
+                            dx_inv,
+                            dx_ov_dt,
+                            i_domain_begin,
+                            not_spectral_
+                       );
+        checkHIPErrors( ::cudaDeviceSynchronize() );
+#endif
+    }
+
+    //static inline 
+    void
+    currentAndDensityDepositionKernel1D( double *__restrict__ host_Jx,
+                                       double *__restrict__ host_Jy,
+                                       double *__restrict__ host_Jz,
+                                       double *__restrict__ host_rho,
+                                       int Jx_size,
+                                       int Jy_size,
+                                       int Jz_size,
+                                       int rho_size,
+                                       const double *__restrict__ device_particle_position_x,
+                                       const double *__restrict__ device_particle_momentum_y,
+                                       const double *__restrict__ device_particle_momentum_z,
+                                       const short *__restrict__ device_particle_charge,
+                                       const double *__restrict__ device_particle_weight,
+                                       const int *__restrict__ host_bin_index,
+                                       unsigned int x_dimension_bin_count,
+                                       const double *__restrict__ host_invgf_,
+                                       const int *__restrict__ host_iold_,
+                                       const double *__restrict__ host_deltaold_,
+                                       double inv_cell_volume,
+                                       double dx_inv,
+                                       double dx_ov_dt,
+                                       int    i_domain_begin,
+                                       int    not_spectral_ )
+    {
+        // & because one  1D ; 2 because of 2nd order interpolation
+        SMILEI_ASSERT( Params::getGPUClusterWidth( 1 ) != -1 && 
+                       Params::getGPUClusterGhostCellBorderWidth( 2 ) != -1 );
+
+        const ::dim3 kGridDimension  { static_cast<uint32_t>( x_dimension_bin_count ), 1, 1 };
+
+        static constexpr std::size_t kWorkgroupSize = 128;
+        const ::dim3                 kBlockDimension{ static_cast<uint32_t>( kWorkgroupSize ), 1, 1 };
+
+        //printf("ClusterWidth %d clusterGhostCellBorderWidth %d x_dimension_bin_count %d \n",Params::getGPUClusterWidth( 1), Params::getGPUClusterGhostCellBorderWidth( 2),  x_dimension_bin_count);
+
+        // NOTE: On cards lacking hardware backed Binary64 atomic operations,
+        // falling back to Binary32 (supposing hardware support for atomic
+        // operations) can lead to drastic performance improvement.
+        // One just need to assign 'float' to ReductionFloat.
+        //
+        using ComputeFloat   = double;
+        using ReductionFloat = double;
+        auto KernelFunction = kernel::DepositCurrentAndDensity_1D_Order2<ComputeFloat, ReductionFloat, kWorkgroupSize>;
+#if defined ( __HIP__ ) 
+        hipLaunchKernelGGL( KernelFunction,
+                            kGridDimension,
+                            kBlockDimension,
+                            0, // Shared memory
+                            0, // Stream
+                            // Kernel arguments
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_Jx ),
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_Jy ),
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_Jz ),
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_rho ),
+                            Jx_size, Jy_size, Jz_size, rho_size,
+                            device_particle_position_x,
+                            device_particle_momentum_y,
+                            device_particle_momentum_z,
+                            device_particle_charge,
+                            device_particle_weight,
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_bin_index ),
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_invgf_ ),
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_iold_ ),
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_deltaold_ ),
+                            inv_cell_volume,
+                            dx_inv,
+                            dx_ov_dt,
+                            i_domain_begin,
+                            not_spectral_ );
+
+        checkHIPErrors( ::hipDeviceSynchronize() );
+#elif defined (  __NVCC__ )
+	//printf("device bin index in projector cuda: %d \n",*host_bin_index);
+	//for(int i=0; i<*host_bin_index;++i)
+	//    std::cout<<"in projector cuda, device_particle_position_x[i]"<< device_particle_position_x[i]<<std::endl;
+        KernelFunction <<<                                                                                                             
+                            kGridDimension,                                                                                            
+                            kBlockDimension,                                                                                           
+                            0, // Shared memory                                                                                        
+                            0 // Stream                                                                                                
+                       >>>                                                                                                             
+                       (                                                                                                               
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_Jx ),                               
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_Jy ),                               
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_Jz ),
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_rho ),
+                            Jx_size, Jy_size, Jz_size, rho_size,
+                            device_particle_position_x,                                                                                
+                            device_particle_momentum_y,                                                                                
+                            device_particle_momentum_z,                                                                                
+                            device_particle_charge,                                                                                    
+                            device_particle_weight,                                                                                    
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_bin_index ),                        
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_invgf_ ),                           
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_iold_ ),                            
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_deltaold_ ),                        
+                            inv_cell_volume,                                                                                           
+                            dx_inv,                                                                                           
+                            dx_ov_dt,                                                                                       
+                            i_domain_begin,                                                                                              
+                            not_spectral_                                                                                               
+                       );                                                                                                              
+        checkHIPErrors( ::cudaDeviceSynchronize() );  
+#endif 
+    }
+
+} // namespace cudahip1D
+
+
diff --git a/src/Projector/Projector1D2OrderGPUKernelCUDAHIP.h b/src/Projector/Projector1D2OrderGPUKernelCUDAHIP.h
new file mode 100755
index 000000000..37cabb963
--- /dev/null
+++ b/src/Projector/Projector1D2OrderGPUKernelCUDAHIP.h
@@ -0,0 +1,71 @@
+//! HIP CUDA implementation
+
+#ifndef Projector1D2OrderGPUKernelCUDAHIP_H
+#define Projector1D2OrderGPUKernelCUDAHIP_H
+
+#if defined( SMILEI_ACCELERATOR_MODE )
+
+#if defined( __HIP__ )
+    #include <hip/hip_runtime.h>
+#elif defined( __NVCC__ )
+    #include <cuda_runtime.h>
+    #include <cuda.h>
+#endif
+
+#include "Params.h"
+#include "gpu.h"
+
+namespace cudahip1d {
+
+void currentDepositionKernel1D( double *__restrict__ host_Jx,
+                               double *__restrict__ host_Jy,
+                               double *__restrict__ host_Jz,
+                               int Jx_size,
+                               int Jy_size,
+                               int Jz_size,
+                               const double *__restrict__ device_particle_position_x,
+                               const double *__restrict__ device_particle_momentum_y,
+                               const double *__restrict__ device_particle_momentum_z,
+                               const short *__restrict__ device_particle_charge,
+                               const double *__restrict__ device_particle_weight,
+                               const int *__restrict__ host_bin_index,
+                               unsigned int x_dimension_bin_count,
+                               const double *__restrict__ host_invgf_,
+                               const int *__restrict__ host_iold_,
+                               const double *__restrict__ host_deltaold_,
+                               double inv_cell_volume,
+                               double dx_inv,
+                               double dx_ov_dt,
+                               int    i_domain_begin,
+                               int    not_spectral_ );
+
+void currentAndDensityDepositionKernel1D(
+                                double *__restrict__ host_Jx,
+                                double *__restrict__ host_Jy,
+                                double *__restrict__ host_Jz,
+                                double *__restrict__ host_rho,
+                                int Jx_size,
+                                int Jy_size,
+                                int Jz_size,
+                                int rho_size,
+                                const double *__restrict__ device_particle_position_x,
+                                const double *__restrict__ device_particle_momentum_y,
+                                const double *__restrict__ device_particle_momentum_z,
+                                const short *__restrict__ device_particle_charge,
+                                const double *__restrict__ device_particle_weight,
+                                const int *__restrict__ host_bin_index,
+                                unsigned int x_dimension_bin_count,
+                                const double *__restrict__ host_invgf_,
+                                const int *__restrict__ host_iold_,
+                                const double *__restrict__ host_deltaold_,
+                                double inv_cell_volume,
+                                double dx_inv,
+                                double dx_ov_dt,
+                                int    i_domain_begin,
+                                int    not_spectral_ );
+
+} // namespace cudahip1d
+
+#endif
+#endif
+
diff --git a/src/Projector/Projector1D4Order.cpp b/src/Projector/Projector1D4Order.cpp
index e78ddea67..ea4eafa4a 100755
--- a/src/Projector/Projector1D4Order.cpp
+++ b/src/Projector/Projector1D4Order.cpp
@@ -19,11 +19,11 @@ Projector1D4Order::Projector1D4Order( Params &params, Patch *patch )
     : Projector1D( params, patch )
 {
     dx_inv_  = 1.0/params.cell_length[0];
-    dx_ov_dt = params.cell_length[0] / params.timestep;
+    dx_ov_dt_ = params.cell_length[0] / params.timestep;
     
     //double defined for use in coefficients
     
-    index_domain_begin = patch->getCellStartingGlobalIndex( 0 );
+    i_domain_begin_ = patch->getCellStartingGlobalIndex( 0 );
     
     DEBUG( "cell_length "<< params.cell_length[0] );
     
@@ -43,7 +43,7 @@ void Projector1D4Order::currents( double *Jx, double *Jy, double *Jz, Particles
     int ip_m_ipo;
     double charge_weight = inv_cell_volume * ( double )( particles.charge( ipart ) )*particles.weight( ipart );
     double xjn, xj_m_xipo, xj_m_xipo2, xj_m_xipo3, xj_m_xipo4, xj_m_xip, xj_m_xip2, xj_m_xip3, xj_m_xip4;
-    double crx_p = charge_weight*dx_ov_dt;                // current density for particle moving in the x-direction
+    double crx_p = charge_weight*dx_ov_dt_;                // current density for particle moving in the x-direction
     double cry_p = charge_weight*particles.momentum( 1, ipart )*invgf;  // current density in the y-direction of the macroparticle
     double crz_p = charge_weight*particles.momentum( 2, ipart )*invgf;  // current density allow the y-direction of the macroparticle
     double S0[7], S1[7], Wl[7], Wt[7], Jx_p[7];            // arrays used for the Esirkepov projection method
@@ -82,7 +82,7 @@ void Projector1D4Order::currents( double *Jx, double *Jy, double *Jz, Particles
     
     // coefficients 2nd order interpolation on 5 nodes
     ipo        = *iold;                          // index of the central node
-    ip_m_ipo = ip-ipo-index_domain_begin;
+    ip_m_ipo = ip-ipo-i_domain_begin_;
     
     S1[ip_m_ipo+1] = dble_1_ov_384   - dble_1_ov_48  * xj_m_xip  + dble_1_ov_16 * xj_m_xip2 - dble_1_ov_12 * xj_m_xip3 + dble_1_ov_24 * xj_m_xip4;
     S1[ip_m_ipo+2] = dble_19_ov_96   - dble_11_ov_24 * xj_m_xip  + dble_1_ov_4 * xj_m_xip2  + dble_1_ov_6  * xj_m_xip3 - dble_1_ov_6  * xj_m_xip4;
@@ -125,7 +125,7 @@ void Projector1D4Order::currentsAndDensity( double *Jx, double *Jy, double *Jz,
     int ip_m_ipo;
     double charge_weight = inv_cell_volume * ( double )( particles.charge( ipart ) )*particles.weight( ipart );
     double xjn, xj_m_xipo, xj_m_xipo2, xj_m_xipo3, xj_m_xipo4, xj_m_xip, xj_m_xip2, xj_m_xip3, xj_m_xip4;
-    double crx_p = charge_weight*dx_ov_dt;                // current density for particle moving in the x-direction
+    double crx_p = charge_weight*dx_ov_dt_;                // current density for particle moving in the x-direction
     double cry_p = charge_weight*particles.momentum( 1, ipart )*invgf;  // current density in the y-direction of the macroparticle
     double crz_p = charge_weight*particles.momentum( 2, ipart )*invgf;  // current density allow the y-direction of the macroparticle
     double S0[7], S1[7], Wl[7], Wt[7], Jx_p[7];            // arrays used for the Esirkepov projection method
@@ -164,7 +164,7 @@ void Projector1D4Order::currentsAndDensity( double *Jx, double *Jy, double *Jz,
     
     // coefficients 2nd order interpolation on 5 nodes
     ipo        = *iold;                          // index of the central node
-    ip_m_ipo = ip-ipo-index_domain_begin;
+    ip_m_ipo = ip-ipo-i_domain_begin_;
     
     S1[ip_m_ipo+1] = dble_1_ov_384   - dble_1_ov_48  * xj_m_xip  + dble_1_ov_16 * xj_m_xip2 - dble_1_ov_12 * xj_m_xip3 + dble_1_ov_24 * xj_m_xip4;
     S1[ip_m_ipo+2] = dble_19_ov_96   - dble_11_ov_24 * xj_m_xip  + dble_1_ov_4 * xj_m_xip2  + dble_1_ov_6  * xj_m_xip3 - dble_1_ov_6  * xj_m_xip4;
@@ -253,7 +253,7 @@ void Projector1D4Order::basic( double *rhoj, Particles &particles, unsigned int
     S1[4] = dble_19_ov_96   + dble_11_ov_24 * xj_m_xip  + dble_1_ov_4 * xj_m_xip2  - dble_1_ov_6  * xj_m_xip3 - dble_1_ov_6  * xj_m_xip4;
     S1[5] = dble_1_ov_384   + dble_1_ov_48  * xj_m_xip  + dble_1_ov_16 * xj_m_xip2 + dble_1_ov_12 * xj_m_xip3 + dble_1_ov_24 * xj_m_xip4;
     
-    ip -= index_domain_begin + 3 + bin_shift ;
+    ip -= i_domain_begin_ + 3 + bin_shift ;
     
     // 4th order projection for the charge density
     // At the 4th order, oversize = 3.
@@ -299,7 +299,7 @@ void Projector1D4Order::ionizationCurrents( Field *Jx, Field *Jy, Field *Jz, Par
     xjmxi3 = xjmxi2*xjmxi;                 // cube
     xjmxi4 = xjmxi2*xjmxi2;                 // fourth-power
     
-    i  -= index_domain_begin;
+    i  -= i_domain_begin_;
     im2 = i-2;
     im1 = i-1;
     ip1 = i+1;
@@ -326,7 +326,7 @@ void Projector1D4Order::ionizationCurrents( Field *Jx, Field *Jy, Field *Jz, Par
     xjmxi  = xjn - ( double )i;            // normalized distance to the nearest grid point
     xjmxi2 = xjmxi*xjmxi;                  // square of the normalized distance to the nearest grid point
     
-    i  -= index_domain_begin;
+    i  -= i_domain_begin_;
     im2 = i-2;
     im1 = i-1;
     ip1 = i+1;
@@ -476,7 +476,7 @@ void  Projector1D4Order::ionizationCurrentsForTasks( double *b_Jx, double *b_Jy,
     Sxd[3] = dble_19_ov_96   + dble_11_ov_24 * xpmxid  + dble_1_ov_4  * xpmxid2 - dble_1_ov_6  * xpmxid3 - dble_1_ov_6  * xpmxid4;
     Sxd[4] = dble_1_ov_384   + dble_1_ov_48  * xpmxid  + dble_1_ov_16 * xpmxid2 + dble_1_ov_12 * xpmxid3 + dble_1_ov_24 * xpmxid4;
 
-    ip  -= index_domain_begin+bin_shift;
+    ip  -= i_domain_begin_+bin_shift;
     // id  -= i_domain_begin;
 
     for (unsigned int i=0 ; i<5 ; i++) {
diff --git a/src/Projector/Projector1D4Order.h b/src/Projector/Projector1D4Order.h
index 6cd570d62..3ef38a7c7 100755
--- a/src/Projector/Projector1D4Order.h
+++ b/src/Projector/Projector1D4Order.h
@@ -33,7 +33,6 @@ class Projector1D4Order : public Projector1D
     void susceptibility( ElectroMagn *EMfields, Particles &particles, double species_mass, SmileiMPI *smpi, int istart, int iend,  int ithread, int icell = 0, int ipart_ref = 0 ) override final;
 
 private:
-    double dx_ov_dt;
     static constexpr double dble_1_ov_384   = 1.0/384.0;
     static constexpr double dble_1_ov_48    = 1.0/48.0 ;
     static constexpr double dble_1_ov_16    = 1.0/16.0 ;
diff --git a/src/Projector/Projector2D2OrderGPU.cpp b/src/Projector/Projector2D2OrderGPU.cpp
index cfe20eb7d..82e1fd0e2 100755
--- a/src/Projector/Projector2D2OrderGPU.cpp
+++ b/src/Projector/Projector2D2OrderGPU.cpp
@@ -21,7 +21,7 @@ Projector2D2OrderGPU::Projector2D2OrderGPU( Params &parameters, Patch *a_patch )
     // initialize it's member variable) we better initialize
     // Projector2D2OrderGPU's member variable after explicitly initializing
     // Projector2D.
-    not_spectral  = !parameters.is_pxr;
+    not_spectral_  = !parameters.is_pxr;
     dt   = parameters.timestep;
     dts2 = dt / 2.0;
     dts4 = dts2 / 2.0;
@@ -69,10 +69,10 @@ currentDepositionKernel2DOnDevice( double *__restrict__ Jx,
                          int    i_domain_begin,
                          int    j_domain_begin,
                          int    nprimy,
-                         int    not_spectral );
+                         int    not_spectral_ );
 
 extern "C" void
-currentAndDensityDepositionKernelOnDevice( double *__restrict__ Jx,
+currentAndDensityDepositionKernel2DOnDevice( double *__restrict__ Jx,
                                    double *__restrict__ Jy,
                                    double *__restrict__ Jz,
                                    double *__restrict__ rho,
@@ -99,7 +99,7 @@ currentAndDensityDepositionKernelOnDevice( double *__restrict__ Jx,
                                    int    i_domain_begin,
                                    int    j_domain_begin,
                                    int    nprimy,
-                                   int    not_spectral );
+                                   int    not_spectral_ );
 
 
 #endif
@@ -130,7 +130,7 @@ namespace { // Unnamed namespace == static == internal linkage == no exported sy
               int    j_domain_begin,
               int    nprimy,
               double,
-              int not_spectral )
+              int not_spectral_ )
     {
 #if defined( SMILEI_ACCELERATOR_MODE )//SMILEI_ACCELERATOR_GPU_OMP )
         currentDepositionKernel2DOnDevice( Jx,
@@ -158,7 +158,7 @@ namespace { // Unnamed namespace == static == internal linkage == no exported sy
                                  i_domain_begin,
                                  j_domain_begin,
                                  nprimy,
-                                 not_spectral );
+                                 not_spectral_ );
 #else
         SMILEI_ASSERT( false );
 #endif
@@ -191,10 +191,10 @@ namespace { // Unnamed namespace == static == internal linkage == no exported sy
                         int    j_domain_begin,
                         int    nprimy,
                         double,
-                        int not_spectral )
+                        int not_spectral_ )
     {
 #if defined( SMILEI_ACCELERATOR_MODE )//SMILEI_ACCELERATOR_GPU_OMP )
-        currentAndDensityDepositionKernelOnDevice( Jx,
+        currentAndDensityDepositionKernel2DOnDevice( Jx,
                                            Jy,
                                            Jz,
                                            rho,
@@ -221,7 +221,7 @@ namespace { // Unnamed namespace == static == internal linkage == no exported sy
                                            i_domain_begin,
                                            j_domain_begin,
                                            nprimy,
-                                           not_spectral );
+                                           not_spectral_ );
 #else
         SMILEI_ASSERT( false );
 #endif
@@ -368,7 +368,7 @@ void Projector2D2OrderGPU::currentsAndDensityWrapper( ElectroMagn *EMfields,
         //                         i_domain_begin_, j_domain_begin_,
         //                         nprimy,
         //                         one_third,
-        //                         not_spectral );
+        //                         not_spectral_ );
         // }
 
         // Does not compute Rho !
@@ -385,7 +385,7 @@ void Projector2D2OrderGPU::currentsAndDensityWrapper( ElectroMagn *EMfields,
                             i_domain_begin_, j_domain_begin_,
                             nprimy,
                             one_third,
-                            not_spectral );
+                            not_spectral_ );
 
     } else {
         // If no field diagnostics this timestep, then the projection is done directly on the total arrays
@@ -401,7 +401,7 @@ void Projector2D2OrderGPU::currentsAndDensityWrapper( ElectroMagn *EMfields,
             //                         i_domain_begin_, j_domain_begin_,
             //                         nprimy,
             //                         one_third,
-            //                         not_spectral );
+            //                         not_spectral_ );
             // }
         } else {
 
@@ -420,7 +420,7 @@ void Projector2D2OrderGPU::currentsAndDensityWrapper( ElectroMagn *EMfields,
                       i_domain_begin_, j_domain_begin_,
                       nprimy,
                       one_third,
-                      not_spectral );
+                      not_spectral_ );
         }
     }
 }
@@ -467,7 +467,7 @@ void Projector2D2OrderGPU::susceptibility( ElectroMagn *EMfields,
 //                         int    i_domain_begin,
 //                         int    j_domain_begin,
 //                         int    nprimy,
-//                         int    not_spectral )
+//                         int    not_spectral_ )
 //{
 //    #if defined( PRIVATE_SMILEI_USE_OPENMP_PROJECTION_IMPLEMENTATION )
 //    naive:: // the naive, OMP version serves as a reference along with the CPU version
@@ -490,7 +490,7 @@ void Projector2D2OrderGPU::susceptibility( ElectroMagn *EMfields,
 //                                 dx_ov_dt, dy_ov_dt,
 //                                 i_domain_begin, j_domain_begin,
 //                                 nprimy,
-//                                 not_spectral );
+//                                 not_spectral_ );
 //}
 //
 //
@@ -524,7 +524,7 @@ void Projector2D2OrderGPU::susceptibility( ElectroMagn *EMfields,
 //                                   int    i_domain_begin,
 //                                   int    j_domain_begin,
 //                                   int    nprimy,
-//                                   int    not_spectral )
+//                                   int    not_spectral_ )
 //{
 //    #if defined( PRIVATE_SMILEI_USE_OPENMP_PROJECTION_IMPLEMENTATION )
 //    naive:: // the naive, OMP version serves as a reference along with the CPU version
@@ -547,7 +547,7 @@ void Projector2D2OrderGPU::susceptibility( ElectroMagn *EMfields,
 //                                           dx_ov_dt, dy_ov_dt,
 //                                           i_domain_begin, j_domain_begin,
 //                                           nprimy,
-//                                           not_spectral );
+//                                           not_spectral_ );
 //}
 //#endif
 
diff --git a/src/Projector/Projector2D2OrderGPU.h b/src/Projector/Projector2D2OrderGPU.h
index 9a799f9b5..15b6d2afa 100755
--- a/src/Projector/Projector2D2OrderGPU.h
+++ b/src/Projector/Projector2D2OrderGPU.h
@@ -78,7 +78,7 @@ class Projector2D2OrderGPU : public Projector2D
     double dt;
     double dts2;
     double dts4;
-    int    not_spectral;
+    int    not_spectral_;
     unsigned int x_dimension_bin_count_;
     unsigned int y_dimension_bin_count_;
 };
diff --git a/src/Projector/Projector2D2OrderGPUKernel.cpp b/src/Projector/Projector2D2OrderGPUKernel.cpp
old mode 100644
new mode 100755
index 8f38f52fe..2f36ae2b0
--- a/src/Projector/Projector2D2OrderGPUKernel.cpp
+++ b/src/Projector/Projector2D2OrderGPUKernel.cpp
@@ -33,7 +33,7 @@ currentDepositionKernel2DOnDevice( double *__restrict__ host_Jx,
                          int    i_domain_begin,
                          int    j_domain_begin,
                          int    nprimy,
-                         int    not_spectral )
+                         int    not_spectral_ )
 {
     //#if defined( PRIVATE_SMILEI_USE_OPENMP_PROJECTION_IMPLEMENTATION )
     //naive:: // the naive, OMP version serves as a reference along with the CPU version
@@ -56,14 +56,14 @@ currentDepositionKernel2DOnDevice( double *__restrict__ host_Jx,
                                  dx_ov_dt, dy_ov_dt,
                                  i_domain_begin, j_domain_begin,
                                  nprimy,
-                                 not_spectral );
+                                 not_spectral_ );
 }
 
 
 //! Project global current and charge densities (EMfields->Jx_/Jy_/Jz_/rho_)
 //!
 extern "C" void
-currentAndDensityDepositionKernelOnDevice( double *__restrict__ host_Jx,
+currentAndDensityDepositionKernel2DOnDevice( double *__restrict__ host_Jx,
                                    double *__restrict__ host_Jy,
                                    double *__restrict__ host_Jz,
                                    double *__restrict__ host_rho,
@@ -90,14 +90,14 @@ currentAndDensityDepositionKernelOnDevice( double *__restrict__ host_Jx,
                                    int    i_domain_begin,
                                    int    j_domain_begin,
                                    int    nprimy,
-                                   int    not_spectral )
+                                   int    not_spectral_ )
 {
     //#if defined( PRIVATE_SMILEI_USE_OPENMP_PROJECTION_IMPLEMENTATION )
     //naive:: // the naive, OMP version serves as a reference along with the CPU version
     //#else
     cudahip2d::
     //#endif
-        currentAndDensityDepositionKernel( host_Jx, host_Jy, host_Jz, host_rho,
+        currentAndDensityDepositionKernel2D( host_Jx, host_Jy, host_Jz, host_rho,
                                            Jx_size, Jy_size, Jz_size, rho_size,
                                            device_particle_position_x, device_particle_position_y,
                                            device_particle_momentum_z,
@@ -113,7 +113,7 @@ currentAndDensityDepositionKernelOnDevice( double *__restrict__ host_Jx,
                                            dx_ov_dt, dy_ov_dt,
                                            i_domain_begin, j_domain_begin,
                                            nprimy,
-                                           not_spectral );
+                                           not_spectral_ );
 }
 #endif
 
diff --git a/src/Projector/Projector2D2OrderGPUKernelCUDAHIP.cu b/src/Projector/Projector2D2OrderGPUKernelCUDAHIP.cu
old mode 100644
new mode 100755
index 666a409f4..ad966328a
--- a/src/Projector/Projector2D2OrderGPUKernelCUDAHIP.cu
+++ b/src/Projector/Projector2D2OrderGPUKernelCUDAHIP.cu
@@ -20,20 +20,20 @@
 
 #if defined( __HIP__ )
   // HIP compiler support enabled (for .cu files)
-    #else
-        #define PRIVATE_SMILEI_USE_OPENMP_PROJECTION_IMPLEMENTATION 1
-    #endif
+#else
+    #define PRIVATE_SMILEI_USE_OPENMP_PROJECTION_IMPLEMENTATION 1
+#endif
 
-   #if defined( PRIVATE_SMILEI_USE_OPENMP_PROJECTION_IMPLEMENTATION )
-        #include <cmath>
+#if defined( PRIVATE_SMILEI_USE_OPENMP_PROJECTION_IMPLEMENTATION )
+    #include <cmath>
 
-        #include "Tools.h"
-    #else
-        #include <hip/hip_runtime.h>
+    #include "Tools.h"
+#else
+    #include <hip/hip_runtime.h>
 
-        #include "Params.h"
-        #include "gpu.h"
-    #endif
+    #include "Params.h"
+    #include "gpu.h"
+#endif
 
 //    #if defined( PRIVATE_SMILEI_USE_OPENMP_PROJECTION_IMPLEMENTATION )
 
@@ -65,7 +65,7 @@
 //                             int    i_domain_begin,
 //                             int    j_domain_begin,
 //                             int    nprimy,
-//                             int    not_spectral )
+//                             int    not_spectral_ )
 //    {
 //        // The OMP implementation is NOT bin aware. As per the precondition on
 //        // host_bin_index, index zero always contains the number of particles.
@@ -185,7 +185,7 @@
 //                    tmp -= cry_p * ( Sy1[j - 1] - Sy0[j - 1] ) * ( Sx0[i] + 0.5 * ( Sx1[i] - Sx0[i] ) );
 //
 //                    SMILEI_ACCELERATOR_ATOMIC
-//                    Jy[iloc + j + not_spectral * ( /* i + */ ipo )] += tmp;
+//                    Jy[iloc + j + not_spectral_ * ( /* i + */ ipo )] += tmp;
 //
 //                    SMILEI_ACCELERATOR_ATOMIC
 //                    Jz[iloc + j] += crz_p * ( Sy0[j] * ( 0.5 * Sx1[i] /* + Sx0[i] */ ) +
@@ -209,7 +209,7 @@
 //                    Jx[iloc + j] += tmpJx[j];
 //                    tmp -= cry_p * ( Sy1[j - 1] - Sy0[j - 1] ) * ( Sx0[i] + 0.5 * ( Sx1[i] - Sx0[i] ) );
 //                    SMILEI_ACCELERATOR_ATOMIC
-//                    Jy[iloc + j + not_spectral * ( i + ipo )] += tmp;
+//                    Jy[iloc + j + not_spectral_ * ( i + ipo )] += tmp;
 //
 //                    SMILEI_ACCELERATOR_ATOMIC
 //                    Jz[iloc + j] += crz_p * ( Sy0[j] * ( 0.5 * Sx1[i] + Sx0[i] ) +
@@ -248,7 +248,7 @@
 //                                       int    i_domain_begin,
 //                                       int    j_domain_begin,
 //                                       int    nprimy,
-//                                       int    not_spectral )
+//                                       int    not_spectral_ )
 //    {
 //        // The OMP implementation is NOT bin aware. As per the precondition on
 //        // host_bin_index, index zero always contains the number of particles.
@@ -372,7 +372,7 @@
 //                    tmp -= cry_p * ( Sy1[j - 1] - Sy0[j - 1] ) * ( Sx0[i] + 0.5 * ( Sx1[i] - Sx0[i] ) );
 //
 //                    SMILEI_ACCELERATOR_ATOMIC
-//                    Jy[iloc + j + not_spectral * ( /* i + */ ipo )] += tmp;
+//                    Jy[iloc + j + not_spectral_ * ( /* i + */ ipo )] += tmp;
 //
 //                    SMILEI_ACCELERATOR_ATOMIC
 //                    Jz[iloc + j] += crz_p * ( Sy0[j] * ( 0.5 * Sx1[i] /* + Sx0[i] */ ) +
@@ -407,7 +407,7 @@
 //                    tmp -= cry_p * ( Sy1[j - 1] - Sy0[j - 1] ) * ( Sx0[i] + 0.5 * ( Sx1[i] - Sx0[i] ) );
 //
 //                    SMILEI_ACCELERATOR_ATOMIC
-//                    Jy[iloc + j + not_spectral * ( i + ipo )] += tmp;
+//                    Jy[iloc + j + not_spectral_ * ( i + ipo )] += tmp;
 //
 //                    SMILEI_ACCELERATOR_ATOMIC
 //                    Jz[iloc + j] += crz_p * ( Sy0[j] * ( 0.5 * Sx1[i] + Sx0[i] ) +
@@ -567,7 +567,7 @@ namespace cudahip2d {
                                          int          i_domain_begin,
                                          int          j_domain_begin,
                                          int          nprimy,
-                                         int          not_spectral )
+                                         int          not_spectral_ )
         {
             // TODO(Etienne M): refactor this function. Break it into smaller
             // pieces (lds init/store, coeff computation, deposition etc..)
@@ -867,7 +867,7 @@ namespace cudahip2d {
 
                 // These atomics are basically free (very few of them).
                 atomic::GDS::AddNoReturn( &device_Jx[global_memory_index], static_cast<double>( Jx_scratch_space[scratch_space_index] ) );
-                atomic::GDS::AddNoReturn( &device_Jy[global_memory_index + /* We handle the FTDT/picsar */ not_spectral * global_x_scratch_space_coordinate], static_cast<double>( Jy_scratch_space[scratch_space_index] ) );
+                atomic::GDS::AddNoReturn( &device_Jy[global_memory_index + /* We handle the FTDT/picsar */ not_spectral_ * global_x_scratch_space_coordinate], static_cast<double>( Jy_scratch_space[scratch_space_index] ) );
                 atomic::GDS::AddNoReturn( &device_Jz[global_memory_index], static_cast<double>( Jz_scratch_space[scratch_space_index] ) );
             }
         } // end DepositCurrent
@@ -903,7 +903,7 @@ namespace cudahip2d {
                                             int          i_domain_begin,
                                             int          j_domain_begin,
                                             int          nprimy,
-                                            int          not_spectral )
+                                            int          not_spectral_ )
         {
             // TODO(Etienne M): refactor this function. Break it into smaller
             // pieces (lds init/store, coeff computation, deposition etc..)
@@ -1146,7 +1146,7 @@ namespace cudahip2d {
 
                 // These atomics are basically free (very few of them).
                 atomic::GDS::AddNoReturn( &device_Jx[global_memory_index], static_cast<double>( Jx_scratch_space[scratch_space_index] ) );
-                atomic::GDS::AddNoReturn( &device_Jy[global_memory_index + /* We handle the FTDT/picsar */ not_spectral * global_x_scratch_space_coordinate], static_cast<double>( Jy_scratch_space[scratch_space_index] ) );
+                atomic::GDS::AddNoReturn( &device_Jy[global_memory_index + /* We handle the FTDT/picsar */ not_spectral_ * global_x_scratch_space_coordinate], static_cast<double>( Jy_scratch_space[scratch_space_index] ) );
                 atomic::GDS::AddNoReturn( &device_Jz[global_memory_index], static_cast<double>( Jz_scratch_space[scratch_space_index] ) );
                 atomic::GDS::AddNoReturn( &device_rho[global_memory_index], static_cast<double>( rho_scratch_space[scratch_space_index] ) );
             }
@@ -1181,7 +1181,7 @@ namespace cudahip2d {
                              int    i_domain_begin,
                              int    j_domain_begin,
                              int    nprimy,
-                             int    not_spectral )
+                             int    not_spectral_ )
     {
         SMILEI_ASSERT( Params::getGPUClusterWidth( 2 /* 2D */ ) != -1 &&
                        Params::getGPUClusterGhostCellBorderWidth( 2 /* 2nd order interpolation */ ) != -1 );
@@ -1229,7 +1229,7 @@ namespace cudahip2d {
                             dx_ov_dt, dy_ov_dt,
                             i_domain_begin, j_domain_begin,
                             nprimy,
-                            not_spectral );
+                            not_spectral_ );
 
         checkHIPErrors( ::hipDeviceSynchronize() );
 #elif defined (  __NVCC__ )
@@ -1258,7 +1258,7 @@ namespace cudahip2d {
                             dx_ov_dt, dy_ov_dt,
                             i_domain_begin, j_domain_begin,
                             nprimy,
-                            not_spectral
+                            not_spectral_
                        );
         checkHIPErrors( ::cudaDeviceSynchronize() );
 #endif
@@ -1266,7 +1266,7 @@ namespace cudahip2d {
 
     //static inline 
     void
-    currentAndDensityDepositionKernel( double *__restrict__ host_Jx,
+    currentAndDensityDepositionKernel2D( double *__restrict__ host_Jx,
                                        double *__restrict__ host_Jy,
                                        double *__restrict__ host_Jz,
                                        double *__restrict__ host_rho,
@@ -1293,7 +1293,7 @@ namespace cudahip2d {
                                        int    i_domain_begin,
                                        int    j_domain_begin,
                                        int    nprimy,
-                                       int    not_spectral )
+                                       int    not_spectral_ )
     {
         SMILEI_ASSERT( Params::getGPUClusterWidth( 2 /* 2D */ ) != -1 &&
                        Params::getGPUClusterGhostCellBorderWidth( 2 /* 2nd order interpolation */ ) != -1 );
@@ -1341,7 +1341,7 @@ namespace cudahip2d {
                             dx_ov_dt, dy_ov_dt,
                             i_domain_begin, j_domain_begin,
                             nprimy,
-                            not_spectral );
+                            not_spectral_ );
 
         checkHIPErrors( ::hipDeviceSynchronize() );
 #elif defined (  __NVCC__ )
@@ -1371,7 +1371,7 @@ namespace cudahip2d {
                             dx_ov_dt, dy_ov_dt,                                                                                        
                             i_domain_begin, j_domain_begin,                                                                            
                             nprimy,                                                                                                    
-                            not_spectral                                                                                               
+                            not_spectral_                                                                                               
                        );                                                                                                              
         checkHIPErrors( ::cudaDeviceSynchronize() );  
 #endif 
@@ -1409,7 +1409,7 @@ namespace cudahip2d {
 //                         int    i_domain_begin,
 //                         int    j_domain_begin,
 //                         int    nprimy,
-//                         int    not_spectral )
+//                         int    not_spectral_ )
 //{
 //    #if defined( PRIVATE_SMILEI_USE_OPENMP_PROJECTION_IMPLEMENTATION )
 //    naive:: // the naive, OMP version serves as a reference along with the CPU version
@@ -1432,7 +1432,7 @@ namespace cudahip2d {
 //                                 dx_ov_dt, dy_ov_dt,
 //                                 i_domain_begin, j_domain_begin,
 //                                 nprimy,
-//                                 not_spectral );
+//                                 not_spectral_ );
 //}
 //
 ////! Project global current and charge densities (EMfields->Jx_/Jy_/Jz_/rho_)
@@ -1465,7 +1465,7 @@ namespace cudahip2d {
 //                                   int    i_domain_begin,
 //                                   int    j_domain_begin,
 //                                   int    nprimy,
-//                                   int    not_spectral )
+//                                   int    not_spectral_ )
 //{
 //    #if defined( PRIVATE_SMILEI_USE_OPENMP_PROJECTION_IMPLEMENTATION )
 //    naive:: // the naive, OMP version serves as a reference along with the CPU version
@@ -1488,6 +1488,6 @@ namespace cudahip2d {
 //                                           dx_ov_dt, dy_ov_dt,
 //                                           i_domain_begin, j_domain_begin,
 //                                           nprimy,
-//                                           not_spectral );
+//                                           not_spectral_ );
 //}
 
diff --git a/src/Projector/Projector2D2OrderGPUKernelCUDAHIP.h b/src/Projector/Projector2D2OrderGPUKernelCUDAHIP.h
old mode 100644
new mode 100755
index d607a4ab4..7aae8d2c6
--- a/src/Projector/Projector2D2OrderGPUKernelCUDAHIP.h
+++ b/src/Projector/Projector2D2OrderGPUKernelCUDAHIP.h
@@ -20,8 +20,7 @@
 
 namespace cudahip2d {
 //static
-void
-    currentDepositionKernel2D( double *__restrict__ host_Jx,
+void currentDepositionKernel2D( double *__restrict__ host_Jx,
                                double *__restrict__ host_Jy,
                                double *__restrict__ host_Jz,
                                int Jx_size,
@@ -46,11 +45,10 @@ void
                                int    i_domain_begin,
                                int    j_domain_begin,
                                int    nprimy,
-                               int    not_spectral );
+                               int    not_spectral_ );
 
 //static 
-inline void
-    currentAndDensityDepositionKernel(
+void currentAndDensityDepositionKernel2D(
                                 double *__restrict__ host_Jx,
                                 double *__restrict__ host_Jy,
                                 double *__restrict__ host_Jz,
@@ -78,7 +76,7 @@ inline void
                                 int    i_domain_begin,
                                 int    j_domain_begin,
                                 int    nprimy,
-                                int    not_spectral );
+                                int    not_spectral_ );
 
 } // namespace cudahip2d
 
diff --git a/src/Projector/Projector3D2OrderGPU.cpp b/src/Projector/Projector3D2OrderGPU.cpp
index 39342b204..910fc7d14 100755
--- a/src/Projector/Projector3D2OrderGPU.cpp
+++ b/src/Projector/Projector3D2OrderGPU.cpp
@@ -25,7 +25,7 @@ Projector3D2OrderGPU::Projector3D2OrderGPU( Params &parameters, Patch *a_patch )
     // initialize it's member variable) we better initialize
     // Projector2D2OrderGPU's member variable after explicitly initializing
     // Projector2D.
-    not_spectral  = !parameters.is_pxr;
+    not_spectral_  = !parameters.is_pxr;
     dt   = parameters.timestep;
     dts2 = dt / 2.0;
     dts4 = dts2 / 2.0;
@@ -83,7 +83,7 @@ currentDeposition3DOnDevice( double *__restrict__ Jx,
                          int    k_domain_begin,
                          int    nprimy,
                          int    nprimz,
-                         int    not_spectral );
+                         int    not_spectral_ );
 
 extern "C" void
 densityDeposition3DOnDevice( 
@@ -114,7 +114,7 @@ densityDeposition3DOnDevice(
                          int    k_domain_begin,
                          int    nprimy, 
                          int    nprimz,
-                         int    not_spectral );
+                         int    not_spectral_ );
 #endif
 
 namespace { // Unnamed namespace == static == internal linkage == no exported symbols
@@ -148,7 +148,7 @@ namespace { // Unnamed namespace == static == internal linkage == no exported sy
               int    nprimy, 
               int    nprimz,
               double,
-              int not_spectral )
+              int not_spectral_ )
     {
 #if defined( SMILEI_ACCELERATOR_MODE )
         currentDeposition3DOnDevice( Jx,
@@ -181,7 +181,7 @@ namespace { // Unnamed namespace == static == internal linkage == no exported sy
                                  j_domain_begin,
                                  k_domain_begin,
                                  nprimy, nprimz,
-                                 not_spectral );
+                                 not_spectral_ );
 #else
         SMILEI_ASSERT( false );
 #endif
@@ -213,7 +213,7 @@ namespace { // Unnamed namespace == static == internal linkage == no exported sy
                         int    nprimy,
                         int    nprimz,
                         double,
-                        int not_spectral )
+                        int not_spectral_ )
     {
 #if defined( SMILEI_ACCELERATOR_MODE )
         densityDeposition3DOnDevice( 
@@ -243,7 +243,7 @@ namespace { // Unnamed namespace == static == internal linkage == no exported sy
                                  j_domain_begin,
                                  k_domain_begin,
                                  nprimy, nprimz,
-                                 not_spectral );
+                                 not_spectral_ );
 #else
         SMILEI_ASSERT( false );
 #endif
@@ -401,7 +401,7 @@ void Projector3D2OrderGPU::currentsAndDensityWrapper( ElectroMagn *EMfields,
                 i_domain_begin_, j_domain_begin_, k_domain_begin_,
                 nprimy, nprimz,
                 one_third,
-                not_spectral );
+                not_spectral_ );
 
         double *const __restrict__ b_rho  = EMfields->rho_s[ispec] ? EMfields->rho_s[ispec]->data() : EMfields->rho_->data();
         unsigned int rho_size             = EMfields->rho_s[ispec] ? EMfields->rho_s[ispec]->size() : EMfields->rho_->size();
@@ -416,7 +416,7 @@ void Projector3D2OrderGPU::currentsAndDensityWrapper( ElectroMagn *EMfields,
                   i_domain_begin_, j_domain_begin_, k_domain_begin_,
                   nprimy, nprimz,
                   one_third,
-                  not_spectral );
+                  not_spectral_ );
 
     // If requested performs then the charge density deposition
     } else {
@@ -440,7 +440,7 @@ void Projector3D2OrderGPU::currentsAndDensityWrapper( ElectroMagn *EMfields,
                 i_domain_begin_, j_domain_begin_, k_domain_begin_,
                 nprimy, nprimz,
                 one_third,
-                not_spectral );
+                not_spectral_ );
     }
 
         // TODO(Etienne M): DIAGS. Find a way to get rho. We could:
diff --git a/src/Projector/Projector3D2OrderGPU.h b/src/Projector/Projector3D2OrderGPU.h
index 2fac2402e..5aa1927ac 100755
--- a/src/Projector/Projector3D2OrderGPU.h
+++ b/src/Projector/Projector3D2OrderGPU.h
@@ -78,7 +78,7 @@ class Projector3D2OrderGPU : public Projector3D
     double dt;
     double dts2;
     double dts4;
-    int    not_spectral;
+    int    not_spectral_;
     unsigned int x_dimension_bin_count_;
     unsigned int y_dimension_bin_count_;
     unsigned int z_dimension_bin_count_;
diff --git a/src/Projector/Projector3D2OrderGPUKernelCUDAHIP.cu b/src/Projector/Projector3D2OrderGPUKernelCUDAHIP.cu
old mode 100644
new mode 100755
index 195a02667..4c6e07224
--- a/src/Projector/Projector3D2OrderGPUKernelCUDAHIP.cu
+++ b/src/Projector/Projector3D2OrderGPUKernelCUDAHIP.cu
@@ -162,7 +162,7 @@ namespace cudahip {
                                          int          k_domain_begin,
                                          int          nprimy,
                                          int          nprimz,
-                                         int          not_spectral )
+                                         int          not_spectral_ )
         {
             // Potential future work for optimization: Break the kernel into smaller
             // pieces (lds init/store, coeff computation, deposition etc..)
@@ -501,8 +501,8 @@ namespace cudahip {
 
                 // These atomics are basically free (very few of them).
                 atomic::GDS::AddNoReturn( &device_Jx[global_memory_index],                                                                                             static_cast<double>( Jx_scratch_space[field_index] ) );
-                atomic::GDS::AddNoReturn( &device_Jy[global_memory_index + /* We handle the FTDT/picsar */ not_spectral * global_x_scratch_space_coordinate * nprimz], static_cast<double>( Jy_scratch_space[field_index] ) );
-                atomic::GDS::AddNoReturn( &device_Jz[global_memory_index + /* We handle the FTDT/picsar */ not_spectral * (global_x_scratch_space_coordinate * nprimy + global_y_scratch_space_coordinate)],                                                                                             static_cast<double>(  Jz_scratch_space[field_index] ) );
+                atomic::GDS::AddNoReturn( &device_Jy[global_memory_index + /* We handle the FTDT/picsar */ not_spectral_ * global_x_scratch_space_coordinate * nprimz], static_cast<double>( Jy_scratch_space[field_index] ) );
+                atomic::GDS::AddNoReturn( &device_Jz[global_memory_index + /* We handle the FTDT/picsar */ not_spectral_ * (global_x_scratch_space_coordinate * nprimy + global_y_scratch_space_coordinate)],                                                                                             static_cast<double>(  Jz_scratch_space[field_index] ) );
             }
         } // end DepositCurrent
 
@@ -536,7 +536,7 @@ namespace cudahip {
                                             int          k_domain_begin,
                                             int          nprimy,
                                             int          nprimz,
-                                            int          not_spectral )
+                                            int          not_spectral_ )
         {
             // TODO(Etienne M): refactor this function. Break it into smaller
             // pieces (lds init/store, coeff computation, deposition etc..)
@@ -716,7 +716,7 @@ namespace cudahip {
                                int    k_domain_begin,
                                int    nprimy,
                                int    nprimz,
-                               int    not_spectral )
+                               int    not_spectral_ )
     {
         SMILEI_ASSERT( Params::getGPUClusterWidth( 3 /* 2D */ ) != -1 &&
                        Params::getGPUClusterGhostCellBorderWidth( 2 /* 2nd order interpolation */ ) != -1 );
@@ -767,7 +767,7 @@ namespace cudahip {
                             dx_ov_dt, dy_ov_dt, dz_ov_dt,
                             i_domain_begin, j_domain_begin, k_domain_begin,
                             nprimy, nprimz,
-                            not_spectral 
+                            not_spectral_ 
                         );
 
         checkHIPErrors( ::hipDeviceSynchronize() );
@@ -799,7 +799,7 @@ namespace cudahip {
                             dx_ov_dt, dy_ov_dt, dz_ov_dt,
                             i_domain_begin, j_domain_begin, k_domain_begin,
                             nprimy, nprimz,
-                            not_spectral
+                            not_spectral_
                        );
         checkHIPErrors( ::cudaDeviceSynchronize() );
 #endif
@@ -836,7 +836,7 @@ namespace cudahip {
                                 int    k_domain_begin,
                                 int    nprimy,
                                 int    nprimz,
-                                int    not_spectral )
+                                int    not_spectral_ )
     {
         SMILEI_ASSERT( Params::getGPUClusterWidth( 3 /* 2D */ ) != -1 &&
                        Params::getGPUClusterGhostCellBorderWidth( 2 /* 2nd order interpolation */ ) != -1 );
@@ -886,7 +886,7 @@ namespace cudahip {
                             dx_ov_dt, dy_ov_dt, dz_ov_dt,
                             i_domain_begin, j_domain_begin, k_domain_begin,
                             nprimy, nprimz,
-                            not_spectral );
+                            not_spectral_ );
 
         checkHIPErrors( ::hipDeviceSynchronize() );
 #elif defined (  __NVCC__ )
@@ -914,7 +914,7 @@ namespace cudahip {
                             dx_ov_dt, dy_ov_dt, dz_ov_dt,
                             i_domain_begin, j_domain_begin, k_domain_begin,
                             nprimy, nprimz,
-                            not_spectral
+                            not_spectral_
                        );
         checkHIPErrors( ::cudaDeviceSynchronize() );
 #endif
diff --git a/src/Projector/Projector3D2OrderGPUKernelCUDAHIP.h b/src/Projector/Projector3D2OrderGPUKernelCUDAHIP.h
old mode 100644
new mode 100755
index 94368f4dd..eba3f0d0d
--- a/src/Projector/Projector3D2OrderGPUKernelCUDAHIP.h
+++ b/src/Projector/Projector3D2OrderGPUKernelCUDAHIP.h
@@ -17,9 +17,8 @@
 #include "gpu.h"
 
 namespace cudahip {
-//static
-inline void
-    currentDepositionKernel3D( double *__restrict__ host_Jx,
+//static inline
+void currentDepositionKernel3D( double *__restrict__ host_Jx,
                                double *__restrict__ host_Jy,
                                double *__restrict__ host_Jz,
                                int Jx_size,
@@ -50,11 +49,10 @@ inline void
                                int    k_domain_begin,
                                int    nprimy,
                                int    nprimz,
-                               int    not_spectral );
+                               int    not_spectral_ );
 
-//static 
-inline void
-    densityDepositionKernel3D(
+//static inline
+void densityDepositionKernel3D(
                                 double *__restrict__ host_rho,
                                 int rho_size,
                                 const double *__restrict__ device_particle_position_x,
@@ -82,7 +80,7 @@ inline void
                                 int    k_domain_begin,
                                 int    nprimy,
                                 int    nprimz,
-                                int    not_spectral );
+                                int    not_spectral_ );
 
 } // namespace cudahip
 
diff --git a/src/SmileiMPI/SmileiMPI.cpp b/src/SmileiMPI/SmileiMPI.cpp
index c35a69fe9..262a57b34 100755
--- a/src/SmileiMPI/SmileiMPI.cpp
+++ b/src/SmileiMPI/SmileiMPI.cpp
@@ -1319,9 +1319,9 @@ void SmileiMPI::isend( ElectroMagn *EM, int to, int &irequest, vector<MPI_Reques
 
             if( dynamic_cast<ElectroMagnBC1D_SM *>( EM->emBoundCond[bcId] ) ) {
                 ElectroMagnBC1D_SM *embc = static_cast<ElectroMagnBC1D_SM *>( EM->emBoundCond[bcId] );
-                MPI_Isend( &( embc->By_val ), 1, MPI_DOUBLE, to, tag+irequest, MPI_COMM_WORLD, &requests[irequest] );
+                MPI_Isend( &( embc->By_val_ ), 1, MPI_DOUBLE, to, tag+irequest, MPI_COMM_WORLD, &requests[irequest] );
                 irequest++;
-                MPI_Isend( &( embc->Bz_val ), 1, MPI_DOUBLE, to, tag+irequest, MPI_COMM_WORLD, &requests[irequest] );
+                MPI_Isend( &( embc->Bz_val_ ), 1, MPI_DOUBLE, to, tag+irequest, MPI_COMM_WORLD, &requests[irequest] );
                 irequest++;
             } else if( dynamic_cast<ElectroMagnBC2D_SM *>( EM->emBoundCond[bcId] ) ) {
                 // BCs at the x-border
@@ -1855,9 +1855,9 @@ void SmileiMPI::recv( ElectroMagn *EM, int from, int &tag, bool recv_xmin_bc )
             if( dynamic_cast<ElectroMagnBC1D_SM *>( EM->emBoundCond[bcId] ) ) {
                 ElectroMagnBC1D_SM *embc = static_cast<ElectroMagnBC1D_SM *>( EM->emBoundCond[bcId] );
                 MPI_Status status;
-                MPI_Recv( &( embc->By_val ), 1, MPI_DOUBLE, from, tag, MPI_COMM_WORLD, &status );
+                MPI_Recv( &( embc->By_val_ ), 1, MPI_DOUBLE, from, tag, MPI_COMM_WORLD, &status );
                 tag++;
-                MPI_Recv( &( embc->Bz_val ), 1, MPI_DOUBLE, from, tag, MPI_COMM_WORLD, &status );
+                MPI_Recv( &( embc->Bz_val_ ), 1, MPI_DOUBLE, from, tag, MPI_COMM_WORLD, &status );
                 tag++;
             } else if( dynamic_cast<ElectroMagnBC2D_SM *>( EM->emBoundCond[bcId] ) ) {
                 // BCs at the x-border

From 530529d6bc2b9b683de311df7af8e33332aa5d14 Mon Sep 17 00:00:00 2001
From: Frederic Perez <frederic.perez@polytechnique.edu>
Date: Thu, 18 Apr 2024 21:39:27 +0200
Subject: [PATCH 10/54] fix analysis

---
 .../validate_tst2d_v_o2_qed_cascade_vranic_cartesian.py         | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/validation/analyses/validate_tst2d_v_o2_qed_cascade_vranic_cartesian.py b/validation/analyses/validate_tst2d_v_o2_qed_cascade_vranic_cartesian.py
index ee807d65b..da25c961c 100644
--- a/validation/analyses/validate_tst2d_v_o2_qed_cascade_vranic_cartesian.py
+++ b/validation/analyses/validate_tst2d_v_o2_qed_cascade_vranic_cartesian.py
@@ -247,7 +247,7 @@ def adaptive_error(values, statistics, thresholds):
 
 thresholds = {}
 thresholds["points"] = np.array([0. ,10 ,100,1000])
-thresholds["factor"] = np.array([1e9, 1.,0.5, 0.2])
+thresholds["factor"] = np.array([1e9, 1.,0.6, 0.2])
 
 Validate("Average gamma for the electrons vs time", average_gamma["electron"], adaptive_error(average_gamma["electron"], Nelectron, thresholds))
 Validate("Average gamma for the positrons vs time", average_gamma["positron"], adaptive_error(average_gamma["positron"], Npositron, thresholds))

From ff0266ec8be29aee51ce53e63c9b7d78d821f261 Mon Sep 17 00:00:00 2001
From: Frederic Perez <frederic.perez@polytechnique.edu>
Date: Fri, 19 Apr 2024 10:20:49 +0200
Subject: [PATCH 11/54] more

---
 .../validate_tst2d_v_o2_qed_cascade_vranic_cartesian.py         | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/validation/analyses/validate_tst2d_v_o2_qed_cascade_vranic_cartesian.py b/validation/analyses/validate_tst2d_v_o2_qed_cascade_vranic_cartesian.py
index da25c961c..8d5b8ddb1 100644
--- a/validation/analyses/validate_tst2d_v_o2_qed_cascade_vranic_cartesian.py
+++ b/validation/analyses/validate_tst2d_v_o2_qed_cascade_vranic_cartesian.py
@@ -247,7 +247,7 @@ def adaptive_error(values, statistics, thresholds):
 
 thresholds = {}
 thresholds["points"] = np.array([0. ,10 ,100,1000])
-thresholds["factor"] = np.array([1e9, 1.,0.6, 0.2])
+thresholds["factor"] = np.array([1e9, 1.,0.7, 0.2])
 
 Validate("Average gamma for the electrons vs time", average_gamma["electron"], adaptive_error(average_gamma["electron"], Nelectron, thresholds))
 Validate("Average gamma for the positrons vs time", average_gamma["positron"], adaptive_error(average_gamma["positron"], Npositron, thresholds))

From 73b7de886b88a1183e2febafcb8e981354823d77 Mon Sep 17 00:00:00 2001
From: cprouveur <charles.prouveur@cea.fr>
Date: Fri, 19 Apr 2024 17:15:30 +0200
Subject: [PATCH 12/54] added the appropriate field1D and projector factory
 source files + cleanup ; no results on adastra, probably an issue with openmp

---
 src/ElectroMagnSolver/MA_Solver1D_norm.cpp    |  43 ------
 src/ElectroMagnSolver/MF_Solver1D_Yee.cpp     |  29 ----
 src/Field/Field1D.cpp                         | 127 ++++++++++++++++--
 src/Interpolator/Interpolator1D2Order.cpp     |  93 +------------
 src/Interpolator/Interpolator1D2Order.h       |   2 +-
 src/Particles/nvidiaParticles.cu              |  13 +-
 src/Projector/Projector1D2OrderGPU.cpp        |  93 +------------
 .../Projector1D2OrderGPUKernelCUDAHIP.cu      |  33 -----
 src/Projector/Projector3D2OrderGPUKernel.cpp  |   3 -
 .../Projector3D2OrderGPUKernelCUDAHIP.cu      |   3 -
 src/Projector/ProjectorFactory.h              |   7 +-
 11 files changed, 129 insertions(+), 317 deletions(-)

diff --git a/src/ElectroMagnSolver/MA_Solver1D_norm.cpp b/src/ElectroMagnSolver/MA_Solver1D_norm.cpp
index 9b9f0d53d..4ef123b2d 100755
--- a/src/ElectroMagnSolver/MA_Solver1D_norm.cpp
+++ b/src/ElectroMagnSolver/MA_Solver1D_norm.cpp
@@ -15,17 +15,8 @@ MA_Solver1D_norm::~MA_Solver1D_norm()
 
 void MA_Solver1D_norm::operator()( ElectroMagn *fields )
 {
-    {
     const unsigned int nx_p = fields->dimPrim[0];
     const unsigned int nx_d = fields->dimDual[0];
-    /*Field1D *Ex1D = static_cast<Field1D *>( fields->Ex_ );
-    Field1D *Ey1D = static_cast<Field1D *>( fields->Ey_ );
-    Field1D *Ez1D = static_cast<Field1D *>( fields->Ez_ );
-    Field1D *By1D = static_cast<Field1D *>( fields->By_ );
-    Field1D *Bz1D = static_cast<Field1D *>( fields->Bz_ );
-    Field1D *Jx1D = static_cast<Field1D *>( fields->Jx_ );
-    Field1D *Jy1D = static_cast<Field1D *>( fields->Jy_ );
-    Field1D *Jz1D = static_cast<Field1D *>( fields->Jz_ );*/
 
     double *const __restrict__ Ex1D       = fields->Ex_->data(); // [x] : dual in x   primal in y,z
     double *const __restrict__ Ey1D       = fields->Ey_->data(); // [x] : dual in y   primal in x,z
@@ -37,18 +28,6 @@ void MA_Solver1D_norm::operator()( ElectroMagn *fields )
     const double *const __restrict__ Jy1D = fields->Jy_->data(); // [x] : dual in y   primal in x,z
     const double *const __restrict__ Jz1D = fields->Jz_->data(); // [x] : dual in z   primal in x,y 
 
-    {
-        fields->Ex_->copyFromDeviceToHost();
-        fields->Ey_->copyFromDeviceToHost();
-        fields->Ez_->copyFromDeviceToHost();
-        fields->Jx_->copyFromDeviceToHost();
-        fields->Jy_->copyFromDeviceToHost();
-        fields->Jz_->copyFromDeviceToHost();
-    }
-    std::cout<< "printing before in MA solver ex, ey and ez for nx_d="<<nx_d<< "then jx,jy,jz" <<std::endl;
-    for( unsigned int ix=0 ; ix<std::min(nx_d,nx_p) ; ++ix ) {
-        std::cout<< std::setprecision (15)<<Ex1D[ix] << " " <<Ey1D[ix] << " "<<Ez1D[ix] << " " << Jx1D[ix] << " " <<Jy1D[ix] << " "<<Jz1D[ix]<<std::endl;
-    }
     // --------------------
     // Solve Maxwell-Ampere
     // --------------------
@@ -88,28 +67,6 @@ void MA_Solver1D_norm::operator()( ElectroMagn *fields )
     for( unsigned int ix=0 ; ix<nx_p ; ++ix ) {
         Ey1D[ix] -= dt_ov_dx * Bz1D[ix+1] - Bz1D[ix] - dt * Jy1D[ix];
         Ez1D[ix] += dt_ov_dx * By1D[ix+1] - By1D[ix] - dt * Jz1D[ix];
-        //( *Ey1D )( ix )= ( *Ey1D )( ix ) - dt_ov_dx * ( ( *Bz1D )( ix+1 ) - ( *Bz1D )( ix ) ) - dt * ( *Jy1D )( ix ) ;
-        //( *Ez1D )( ix )= ( *Ez1D )( ix ) + dt_ov_dx * ( ( *By1D )( ix+1 ) - ( *By1D )( ix ) ) - dt * ( *Jz1D )( ix ) ;
-    }
-
-    {
-        fields->Ex_->copyFromDeviceToHost();
-        fields->Ey_->copyFromDeviceToHost();
-        fields->Ez_->copyFromDeviceToHost();
-    }
-    }
-    // to be deleted
-    {
-        const unsigned int nx_p = fields->dimPrim[0];
-        const unsigned int nx_d = fields->dimDual[0];
-        double *const __restrict__ Ex1D       = fields->Ex_->data(); // [x] : dual in x   primal in y,z
-        double *const __restrict__ Ey1D       = fields->Ey_->data(); // [x] : dual in y   primal in x,z
-        double *const __restrict__ Ez1D       = fields->Ez_->data(); // [x] : dual in z   primal in x,y
-
-        std::cout<< "printing after in MA solver ex, ey and ez for nx_d="<<nx_d<< "nx_p = "<< nx_p<<std::endl;
-        for( unsigned int ix=0 ; ix<std::min(nx_d,nx_p) ; ++ix ) {
-            std::cout<< std::setprecision (15)<<Ex1D[ix] << " " <<Ey1D[ix] << " "<<Ez1D[ix] << " " <<std::endl;
-        }
     }
 }
 
diff --git a/src/ElectroMagnSolver/MF_Solver1D_Yee.cpp b/src/ElectroMagnSolver/MF_Solver1D_Yee.cpp
index 39b9489f5..9dd869768 100755
--- a/src/ElectroMagnSolver/MF_Solver1D_Yee.cpp
+++ b/src/ElectroMagnSolver/MF_Solver1D_Yee.cpp
@@ -19,31 +19,14 @@ void MF_Solver1D_Yee::operator()( ElectroMagn *fields )
     // const unsigned int nx_p = fields->dimPrim[0];
     const unsigned int nx_d = fields->dimDual[0];
     
-    // Static-cast of the fields
-    /*Field1D* Ey1D;
-    Field1D* Ez1D;
-    if (isEFilterApplied) {
-        Ey1D = static_cast<Field1D*>(fields->filter_->Ey_[0]);
-        Ez1D = static_cast<Field1D*>(fields->filter_->Ez_[0]);
-    } else {
-        Ey1D = static_cast<Field1D*>(fields->Ey_);
-        Ez1D = static_cast<Field1D*>(fields->Ez_);
-    }*/
     const double *const __restrict__ Ey1D = isEFilterApplied ? fields->filter_->Ey_[0]->data() :
                                                                fields->Ey_->data(); // [ix] : dual in y   primal in x,z
     const double *const __restrict__ Ez1D = isEFilterApplied ? fields->filter_->Ez_[0]->data() :
                                                                fields->Ez_->data();// [ix] : dual in z   primal in x,y
     
-    //Field1D *By1D   = static_cast<Field1D *>( fields->By_ );
-    //Field1D *Bz1D   = static_cast<Field1D *>( fields->Bz_ );
     double *const __restrict__ By1D       = fields->By_->data();// [ix] : dual in x,z primal in y
     double *const __restrict__ Bz1D       = fields->Bz_->data();// [ix] : dual in x,y primal in z
     
-    // to be deleted
-    /*std::cout<< "printing before in FM solver by and bz for nx_d-1="<<nx_d-1<<std::endl;
-    for( unsigned int ix=1 ; ix<nx_d-1 ; ++ix ) {
-        std::cout<<By1D[ix] << " "<<Bz1D[ix] <<std::endl;
-    }*/
     // ---------------------
     // Solve Maxwell-Faraday
     // ---------------------
@@ -66,18 +49,6 @@ void MF_Solver1D_Yee::operator()( ElectroMagn *fields )
     for( unsigned int ix=1 ; ix<nx_d-1 ; ix++ ) {
         By1D[ix] = By1D[ix] + dt_ov_dx * ( Ez1D[ix] - Ez1D[ix-1] );
         Bz1D[ix] = Bz1D[ix] - dt_ov_dx * ( Ey1D[ix] - Ey1D[ix-1] );
-        //( *By1D )( ix )= ( *By1D )( ix ) + dt_ov_dx * ( ( *Ez1D )( ix ) - ( *Ez1D )( ix-1 ) ) ;
-        //( *Bz1D )( ix )= ( *Bz1D )( ix ) - dt_ov_dx * ( ( *Ey1D )( ix ) - ( *Ey1D )( ix-1 ) ) ;
     }
 
-
-
-    /*{
-        fields->By_->copyFromDeviceToHost();
-        fields->Bz_->copyFromDeviceToHost();
-    }
-    std::cout<< "printing after in FM solver by and bz for nx_d-1="<<nx_d-1<<std::endl;
-    for( unsigned int ix=1 ; ix<nx_d-1 ; ++ix ) {
-        std::cout<<By1D[ix] << " "<<Bz1D[ix] <<std::endl;
-    }*/
 }
diff --git a/src/Field/Field1D.cpp b/src/Field/Field1D.cpp
index d0fa18b2f..e1da59ff4 100755
--- a/src/Field/Field1D.cpp
+++ b/src/Field/Field1D.cpp
@@ -188,12 +188,37 @@ double Field1D::norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] )
     
     return nrj;
 }
-
 //! Perform the norm2 on Device
 #if defined(SMILEI_ACCELERATOR_MODE)
 double Field1D::norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] )
 {
-    ERROR("Not implemented");
+
+    double nrj( 0. );
+    
+    int idxlocalstart[1];
+    int idxlocalend[1];
+    idxlocalstart[0] = istart[0][isDual_[0]];
+    idxlocalend[0]   = istart[0][isDual_[0]]+bufsize[0][isDual_[0]];
+
+    const double *const __restrict__ field = data();
+
+#if defined( SMILEI_ACCELERATOR_GPU_OMP )
+    #pragma omp target teams distribute parallel for\
+		      map(tofrom: nrj)  \
+                      map(to: idxlocalstart[0]) \
+		      /* is_device_ptr( data_ )*/ \
+		      reduction(+:nrj) 
+#elif defined( SMILEI_OPENACC_MODE )
+    #pragma acc parallel present(field) //deviceptr( data_ )
+    #pragma acc loop gang worker vector reduction(+:nrj)
+#endif
+
+    for( unsigned int i=idxlocalstart[0] ; i< idxlocalend[0] ; i++) {
+            nrj += field[i]*field[i];
+    }
+
+    return nrj;
+
 }
 #endif
 
@@ -246,15 +271,23 @@ void Field1D::create_sub_fields  ( int iDim, int iNeighbor, int ghost_size )
     if ( sendFields_[iDim*2+iNeighbor] == NULL ) {
         sendFields_[iDim*2+iNeighbor] = new Field1D(size);
         recvFields_[iDim*2+iNeighbor] = new Field1D(size);
+#if defined( SMILEI_ACCELERATOR_MODE ) 
+       if( ( name[0] == 'B' ) || ( name[0] == 'J' || name[0] == 'R' ) ) {
+           sendFields_[iDim * 2 + iNeighbor]->allocateAndCopyFromHostToDevice();
+           recvFields_[iDim * 2 + iNeighbor]->allocateAndCopyFromHostToDevice();
+       }
+#endif
     }
     else if( ghost_size != (int) sendFields_[iDim*2+iNeighbor]->dims_[iDim] ) {
+#if defined( SMILEI_OPENACC_MODE ) || defined( SMILEI_ACCELERATOR_GPU_OMP )
+        ERROR( "To Do GPU : envelope" );
+#endif
         delete sendFields_[iDim*2+iNeighbor];
         sendFields_[iDim*2+iNeighbor] = new Field1D(size);
         delete recvFields_[iDim*2+iNeighbor];
         recvFields_[iDim*2+iNeighbor] = new Field1D(size);
     }
 }
-
 void Field1D::extract_fields_exch( int iDim, int iNeighbor, int ghost_size )
 {
     std::vector<unsigned int> size = dims_;
@@ -267,13 +300,30 @@ void Field1D::extract_fields_exch( int iDim, int iNeighbor, int ghost_size )
 
     unsigned int NX = size[0];
 
-    double* sub = sendFields_[iDim*2+iNeighbor]->data_;
-    double* field = data_;
+    double *__restrict__ sub = sendFields_[iDim*2+iNeighbor]->data_;
+    const double*__restrict__ field = data_;
+
+#if defined( SMILEI_ACCELERATOR_GPU_OMP )
+    // At initialization, this data is NOT on the GPU
+    const bool should_manipulate_gpu_memory = name[0] == 'B' &&
+                                              smilei::tools::gpu::HostDeviceMemoryManagement::IsHostPointerMappedOnDevice( sub );
+    SMILEI_ASSERT( smilei::tools::gpu::HostDeviceMemoryManagement::IsHostPointerMappedOnDevice( field ) ==
+                   smilei::tools::gpu::HostDeviceMemoryManagement::IsHostPointerMappedOnDevice( sub ) );
+    const unsigned field_first = ix;
+    const unsigned field_last  = ix + NX - 1;
+    #pragma omp target if( should_manipulate_gpu_memory )
+    #pragma omp teams distribute parallel for
+#elif defined( SMILEI_OPENACC_MODE )
+    const int subSize = sendFields_[iDim*2+iNeighbor]->size();
+    const int fSize = number_of_points_;
+    bool fieldName( (name.substr(0,1) == "B") );
+    #pragma acc parallel present( field[0:fSize], sub[0:subSize] ) if (fieldName)
+    #pragma acc loop gang worker vector
+#endif
     for( unsigned int i=0; i<NX; i++ ) {
         sub[i] = field[ (ix+i) ];
     }
 }
-
 void Field1D::inject_fields_exch ( int iDim, int iNeighbor, int ghost_size )
 {
     std::vector<unsigned int> size = dims_;
@@ -286,8 +336,25 @@ void Field1D::inject_fields_exch ( int iDim, int iNeighbor, int ghost_size )
 
     unsigned int NX = size[0];
 
-    double* sub = recvFields_[iDim*2+(iNeighbor+1)%2]->data_;
-    double* field = data_;
+    const double *__restrict__ sub = recvFields_[iDim*2+(iNeighbor+1)%2]->data_;
+    double *__restrict__ field = data_;
+
+#if defined( SMILEI_ACCELERATOR_GPU_OMP )
+    // At initialization, this data is NOT on the GPU
+    const bool should_manipulate_gpu_memory = name[0] == 'B' &&
+                                              smilei::tools::gpu::HostDeviceMemoryManagement::IsHostPointerMappedOnDevice( sub );
+    const unsigned field_first = ix;
+    const unsigned field_last  = ix + NX - 1;
+    #pragma omp target if( should_manipulate_gpu_memory ) \
+        map( tofrom : field [field_first:field_last - field_first] )
+    #pragma omp teams distribute parallel for
+#elif defined( SMILEI_OPENACC_MODE )
+    int subSize = recvFields_[iDim*2+(iNeighbor+1)%2]->size();
+    const int fSize = number_of_points_;
+    bool fieldName( name.substr(0,1) == "B" );
+    #pragma acc parallel present( field[0:fSize], sub[0:subSize] ) if (fieldName)
+    #pragma acc loop gang worker vector
+#endif
     for( unsigned int i=0; i<NX; i++ ) {
         field[ (ix+i) ] = sub[i];
     }
@@ -305,13 +372,29 @@ void Field1D::extract_fields_sum ( int iDim, int iNeighbor, int ghost_size )
 
     unsigned int NX = size[0];
 
-    double* sub = sendFields_[iDim*2+iNeighbor]->data_;
-    double* field = data_;
+    double *__restrict__ sub         = sendFields_[iDim*2+iNeighbor]->data_;
+    const double *__restrict__ field = data_;
+
+#if defined( SMILEI_ACCELERATOR_GPU_OMP )
+    // At initialization, this data is NOT on the GPU
+    const bool should_manipulate_gpu_memory = (name[0] == 'J' || name[0] == 'R') &&
+                                              smilei::tools::gpu::HostDeviceMemoryManagement::IsHostPointerMappedOnDevice( sub );
+    const unsigned field_first = ix;
+    const unsigned field_last  = ix + NX - 1;
+    #pragma omp target if( should_manipulate_gpu_memory ) \
+        map( to : field [field_first:field_last - field_first] )
+    #pragma omp teams distribute parallel for 
+#elif defined( SMILEI_OPENACC_MODE )
+    const int subSize = sendFields_[iDim*2+iNeighbor]->size();
+    const int fSize = number_of_points_;
+    bool fieldName( ((name.substr(0,1) == "J") || (name.substr(0,1) == "R") ) && smilei::tools::gpu::HostDeviceMemoryManagement::IsHostPointerMappedOnDevice( sub ));
+    #pragma acc parallel copy(field[0:fSize]) present(  sub[0:subSize] ) if (fieldName)
+    #pragma acc loop gang worker vector
+#endif
     for( unsigned int i=0; i<NX; i++ ) {
         sub[i] = field[ (ix+i) ];
     }
 }
-
 void Field1D::inject_fields_sum  ( int iDim, int iNeighbor, int ghost_size )
 {
     std::vector<unsigned int> size = dims_;
@@ -324,9 +407,27 @@ void Field1D::inject_fields_sum  ( int iDim, int iNeighbor, int ghost_size )
 
     unsigned int NX = size[0];
 
-    double* sub = recvFields_[iDim*2+(iNeighbor+1)%2]->data_;
-    double* field = data_;
+    const double *__restrict__ sub = recvFields_[iDim*2+(iNeighbor+1)%2]->data_;
+    double *__restrict__ field     = data_;
+
+#if defined( SMILEI_ACCELERATOR_GPU_OMP )
+    // At initialization, this data is NOT on the GPU
+    const bool should_manipulate_gpu_memory = (name[0] == 'J' || name[0] == 'R') &&
+                                              smilei::tools::gpu::HostDeviceMemoryManagement::IsHostPointerMappedOnDevice( sub );
+    const unsigned field_first = ix;
+    const unsigned field_last  = ix + NX - 1;
+    #pragma omp target if( should_manipulate_gpu_memory ) \
+        map( tofrom : field [field_first:field_last - field_first] )
+    #pragma omp teams distribute parallel for
+#elif defined( SMILEI_OPENACC_MODE )
+    int subSize = recvFields_[iDim*2+(iNeighbor+1)%2]->size();
+    int fSize = number_of_points_;
+    bool fieldName( name.substr(0,1) == "J" || name.substr(0,1) == "R");
+    #pragma acc parallel copy(field[0:fSize]) present( sub[0:subSize] ) if (fieldName)
+    #pragma acc loop gang worker vector
+#endif
     for( unsigned int i=0; i<NX; i++ ) {
         field[ (ix+i) ] += sub[i];
     }
 }
+
diff --git a/src/Interpolator/Interpolator1D2Order.cpp b/src/Interpolator/Interpolator1D2Order.cpp
index a74c951cd..dd245bfd1 100755
--- a/src/Interpolator/Interpolator1D2Order.cpp
+++ b/src/Interpolator/Interpolator1D2Order.cpp
@@ -127,12 +127,11 @@ void Interpolator1D2Order::fieldsWrapper( ElectroMagn *EMfields,
                                           Particles &particles, SmileiMPI *smpi,
                                           int *istart, int *iend, int ithread, unsigned int, int )
 {
-    {
-    double *const __restrict__ ELoc = smpi->dynamics_Epart[ithread].data();//&( smpi->dynamics_Epart[ithread][0] );
-    double *const __restrict__ BLoc = smpi->dynamics_Bpart[ithread].data();//&( smpi->dynamics_Bpart[ithread][0] );
+    double *const __restrict__ ELoc = smpi->dynamics_Epart[ithread].data();
+    double *const __restrict__ BLoc = smpi->dynamics_Bpart[ithread].data();
 
-    int    *const __restrict__ iold  = smpi->dynamics_iold[ithread].data();//&( smpi->dynamics_iold[ithread][0] );
-    double *const __restrict__ delta = smpi->dynamics_deltaold[ithread].data();//&( smpi->dynamics_deltaold[ithread][0] );
+    int    *const __restrict__ iold  = smpi->dynamics_iold[ithread].data();
+    double *const __restrict__ delta = smpi->dynamics_deltaold[ithread].data();
 
     const double *const __restrict__ position_x = particles.getPtrPosition( 0 );
 
@@ -159,53 +158,8 @@ void Interpolator1D2Order::fieldsWrapper( ElectroMagn *EMfields,
     const int last_index  = *iend;
     double accdx_inv[2];
     accdx_inv[0]= dx_inv_;
-    /*std::cout<< "printing before in interpolator ex, ey and ez then bx,by,bz" <<std::endl;
-    for( unsigned int ix=first_index ; ix<last_index; ++ix ) {
-        std::cout<< std::setprecision (15)<<Ex1D[ix] << " " <<Ey1D[ix] << " "<<Ez1D[ix] << " " 
-        << Bx1D[ix] << " " <<By1D[ix] << " "<<Bz1D[ix] << " " << iold[ix] << " " <<delta[ix] <<std::endl;
-    }
-
-    
-        EMfields->Ex_->copyFromDeviceToHost();
-        EMfields->Ey_->copyFromDeviceToHost();
-        EMfields->Ez_->copyFromDeviceToHost();
-        EMfields->Jx_->copyFromDeviceToHost();
-        EMfields->Jy_->copyFromDeviceToHost();
-        EMfields->Jz_->copyFromDeviceToHost();
-    }
-    std::cout<< "printing before in interpolator after copyFromDeviceToHost ex, ey and ez then bx,by,bz" <<std::endl;
-    for( unsigned int ix=first_index ; ix<last_index; ++ix ) {
-        std::cout<< std::setprecision (15)<<Ex1D[ix] << " " <<Ey1D[ix] << " "<<Ez1D[ix] << " " << Bx1D[ix] << " " <<By1D[ix] << " "<<Bz1D[ix]<<std::endl;
-    }
-
-
-    std::cout<<"print in interpolator fields wrapper eloc before computation and  CopyDeviceToHost"<<std::endl;
-    for (int ipart=*istart; ipart < *iend; ipart++){
-        std::cout<<ELoc[0*nparts+ipart]<< " " << ELoc[1*nparts+ipart]<< " " << ELoc[2*nparts+ipart]<< std::endl;
-    }
-
-
-    smilei::tools::gpu::HostDeviceMemoryManagement::CopyDeviceToHost( &( ( smpi->dynamics_Epart[ithread] )[0*nparts] ), nparts );
-    smilei::tools::gpu::HostDeviceMemoryManagement::CopyDeviceToHost( &( ( smpi->dynamics_Epart[ithread] )[1*nparts] ), nparts );
-    smilei::tools::gpu::HostDeviceMemoryManagement::CopyDeviceToHost( &( ( smpi->dynamics_Epart[ithread] )[2*nparts] ), nparts );
-    smilei::tools::gpu::HostDeviceMemoryManagement::CopyDeviceToHost( &( ( smpi->dynamics_Bpart[ithread] )[0*nparts] ), nparts );
-    smilei::tools::gpu::HostDeviceMemoryManagement::CopyDeviceToHost( &( ( smpi->dynamics_Bpart[ithread] )[1*nparts] ), nparts );
-    smilei::tools::gpu::HostDeviceMemoryManagement::CopyDeviceToHost( &( ( smpi->dynamics_Bpart[ithread] )[2*nparts] ), nparts );
-    smilei::tools::gpu::HostDeviceMemoryManagement::CopyDeviceToHost( &( ( smpi->dynamics_iold[ithread] )[0] ), nparts );
-    smilei::tools::gpu::HostDeviceMemoryManagement::CopyDeviceToHost( &( ( smpi->dynamics_deltaold[ithread] )[0]), nparts );
-
-
-
-    std::cout<<"print in interpolator fields wrapper eloc before computation and after CopyDeviceToHost"<<std::endl;
-    for (int ipart=*istart; ipart < *iend; ipart++){
-        std::cout<<ELoc[0*nparts+ipart]<< " " << ELoc[1*nparts+ipart]<< " " << ELoc[2*nparts+ipart]<< " " << iold[ipart] << " " <<delta[ipart]<< std::endl;
-    }
-
-    */
-
 
     if (!smpi->use_BTIS3){
-        //for (int ipart=*istart; ipart < *iend; ipart++){
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
     #pragma omp target map( to : i_domain_begin_) is_device_ptr (position_x)
     #pragma omp teams distribute parallel for
@@ -229,7 +183,6 @@ void Interpolator1D2Order::fieldsWrapper( ElectroMagn *EMfields,
 #endif
     for( int ipart = first_index; ipart < last_index; ipart++ ) {
             // Normalized particle position
-            //double xpn = position_x[ipart] * dx_inv_;//particles.position( 0, ipart )*dx_inv_;
             const double xpn = position_x[ipart] *  accdx_inv[0];
             // Calculate coeffs
             int idx_p[1], idx_d[1];
@@ -297,11 +250,6 @@ void Interpolator1D2Order::fieldsWrapper( ElectroMagn *EMfields,
                                  BzpartBTIS3 [first_index:interpolation_range_size],\
         // ?
 
-       /* Field1D *By1D_mBTIS3 = static_cast<Field1D *>( EMfields->By_mBTIS3 );
-        Field1D *Bz1D_mBTIS3 = static_cast<Field1D *>( EMfields->Bz_mBTIS3 );
-        double  *BypartBTIS3 = &( smpi->dynamics_Bpart_yBTIS3[ithread][0]  );
-        double  *BzpartBTIS3 = &( smpi->dynamics_Bpart_zBTIS3[ithread][0]  );*/
-        
         for (int ipart=*istart; ipart < *iend; ipart++){
 
             // Normalized particle position
@@ -343,39 +291,6 @@ void Interpolator1D2Order::fieldsWrapper( ElectroMagn *EMfields,
     #endif
     } // end with B-TIS interpolation
 
-    /*{
-        EMfields->Ex_->copyFromDeviceToHost();
-        EMfields->Ey_->copyFromDeviceToHost();
-        EMfields->Ez_->copyFromDeviceToHost();
-    }
-    double *const __restrict__ ELoc = smpi->dynamics_Epart[ithread].data();//&( smpi->dynamics_Epart[ithread][0] );
-    double *const __restrict__ BLoc = smpi->dynamics_Bpart[ithread].data();//&( smpi->dynamics_Bpart[ithread][0] );
-*/ 
-    }
-    // to be deleted
-    {
-        const int nparts = particles.numberOfParticles();
-        double *const __restrict__ ELoc = smpi->dynamics_Epart[ithread].data();//&( smpi->dynamics_Epart[ithread][0] );
-        double *const __restrict__ BLoc = smpi->dynamics_Bpart[ithread].data();//&( smpi->dynamics_Bpart[ithread][0] );
-        std::cout<< std::setprecision (15)<<"print in interpolator fields wrapper eloc before CopyDeviceToHost"<<std::endl;
-        for (int ipart=*istart; ipart < *iend; ipart++){
-            std::cout<<ELoc[0*nparts+ipart]<< " " << ELoc[1*nparts+ipart]<< " " << ELoc[2*nparts+ipart]<< std::endl;
-        }
-        {
-            smilei::tools::gpu::HostDeviceMemoryManagement::CopyDeviceToHost( &( ( smpi->dynamics_Epart[ithread] )[0*nparts] ), nparts );
-            smilei::tools::gpu::HostDeviceMemoryManagement::CopyDeviceToHost( &( ( smpi->dynamics_Epart[ithread] )[1*nparts] ), nparts );
-            smilei::tools::gpu::HostDeviceMemoryManagement::CopyDeviceToHost( &( ( smpi->dynamics_Epart[ithread] )[2*nparts] ), nparts );
-            smilei::tools::gpu::HostDeviceMemoryManagement::CopyDeviceToHost( &( ( smpi->dynamics_Bpart[ithread] )[0*nparts] ), nparts );
-            smilei::tools::gpu::HostDeviceMemoryManagement::CopyDeviceToHost( &( ( smpi->dynamics_Bpart[ithread] )[1*nparts] ), nparts );
-            smilei::tools::gpu::HostDeviceMemoryManagement::CopyDeviceToHost( &( ( smpi->dynamics_Bpart[ithread] )[2*nparts] ), nparts );
-
-        }
-        std::cout<<"print in interpolator fields wrapper eloc after CopyDeviceToHost"<<std::endl;
-        for (int ipart=*istart; ipart < *iend; ipart++){
-            std::cout<<ELoc[0*nparts+ipart]<< " " << ELoc[1*nparts+ipart]<< " " << ELoc[2*nparts+ipart]<< std::endl;
-        }
-    }
-
 }
 
 // Interpolator specific to tracked particles. A selection of particles may be provided
diff --git a/src/Interpolator/Interpolator1D2Order.h b/src/Interpolator/Interpolator1D2Order.h
index 46b426ff0..c55af0222 100755
--- a/src/Interpolator/Interpolator1D2Order.h
+++ b/src/Interpolator/Interpolator1D2Order.h
@@ -98,7 +98,7 @@ class Interpolator1D2Order final : public Interpolator1D
         coeffxd[2] = 0.5 * ( delta2 + delta + 0.25 );
 
 
-	delta      = xpn - static_cast<double>( idx_p[0] );
+	    delta      = xpn - static_cast<double>( idx_p[0] );
         delta2     = delta * delta; // pow( delta_p[0], 2 );   // square of the normalized distance to the central node
         
         coeffxp[0] = 0.5 * ( delta2 - delta_p[0] + 0.25 );
diff --git a/src/Particles/nvidiaParticles.cu b/src/Particles/nvidiaParticles.cu
index 16941b152..693f87dab 100755
--- a/src/Particles/nvidiaParticles.cu
+++ b/src/Particles/nvidiaParticles.cu
@@ -743,8 +743,7 @@ namespace detail {
                                                                           static_cast<const double*>( particle_container.getPtrPosition( 0 ) ) ) );
         const auto last  = first + particle_container.deviceSize();
         int CellStartingGlobalIndex_for_x = a_parent_patch.getCellStartingGlobalIndex_noGC(0);
-        printf ( "CellStartingGlobalIndex_for_x %d res %f patch size %d \n",CellStartingGlobalIndex_for_x,parameters.res_space[0], parameters.patch_size_[0]  );
-	    doComputeParticleClusterKey( first, last,
+	doComputeParticleClusterKey( first, last,
                                      Cluster1D<Params::getGPUClusterWidth( 1 )>{ parameters.res_space[0],
                                                                                           parameters.patch_size_[0],
                                                                                           CellStartingGlobalIndex_for_x} );
@@ -762,7 +761,7 @@ namespace detail {
         const auto last  = first + particle_container.deviceSize();
         int CellStartingGlobalIndex_for_x = a_parent_patch.getCellStartingGlobalIndex_noGC(0);
         int CellStartingGlobalIndex_for_y = a_parent_patch.getCellStartingGlobalIndex_noGC(1);
-	    doComputeParticleClusterKey( first, last,
+	doComputeParticleClusterKey( first, last,
                                      Cluster2D<Params::getGPUClusterWidth( 2 )>{ parameters.res_space[0],
                                                                                           parameters.res_space[1],
                                                                                           parameters.patch_size_[0],
@@ -785,7 +784,7 @@ namespace detail {
         int CellStartingGlobalIndex_for_x = a_parent_patch.getCellStartingGlobalIndex_noGC(0);
         int CellStartingGlobalIndex_for_y = a_parent_patch.getCellStartingGlobalIndex_noGC(1);
         int CellStartingGlobalIndex_for_z = a_parent_patch.getCellStartingGlobalIndex_noGC(2);
-	    doComputeParticleClusterKey( first, last,
+	doComputeParticleClusterKey( first, last,
                                      Cluster3D<Params::getGPUClusterWidth( 3 )>{ parameters.res_space[0],
                                                                                           parameters.res_space[1],
                                                                                           parameters.res_space[2],
@@ -971,7 +970,6 @@ namespace detail {
         // TODO(Etienne M): Find a better way to dispatch at runtime. This is
         // complex to read and to maintain.
         int CellStartingGlobalIndex_for_x = a_parent_patch.getCellStartingGlobalIndex_noGC(0);
-        printf("CellStartingGlobalIndex_for_x %d \n" , CellStartingGlobalIndex_for_x );
 
         const Cluster1D cluster_manipulator{ parameters.res_space[0],
                                              parameters.patch_size_[0],
@@ -1035,7 +1033,6 @@ namespace detail {
         int CellStartingGlobalIndex_for_x = a_parent_patch.getCellStartingGlobalIndex_noGC(0);
         int CellStartingGlobalIndex_for_y = a_parent_patch.getCellStartingGlobalIndex_noGC(1);
 
-        printf("CellStartingGlobalIndex_for_x %d \n" , CellStartingGlobalIndex_for_x );
         const Cluster2D cluster_manipulator{ parameters.res_space[0],
                                              parameters.res_space[1],
                                              parameters.patch_size_[0],
@@ -1482,7 +1479,6 @@ void nvidiaParticles::initializeDataOnDevice()
 
         // setHostBinIndex();
     } else {
-        printf( " parent patch %p cells starting global index %d \n", parent_patch_, parent_patch_->getCellStartingGlobalIndex_noGC(0) );
         // At this point, a copy of the host particles and last_index is on the
         // device and we know we support the space dimension.
         detail::Cluster::computeParticleClusterKey( *this, *parameters_, *parent_patch_ );
@@ -1956,9 +1952,6 @@ extern "C"
 {
     void* CreateGPUParticles( const void* parameters, const void* a_parent_patch )
     {
-        const Patch *temp = static_cast<const Patch*>( a_parent_patch );
-        
-        printf( " in create GPU parent patch %p cells starting global index %d \n", a_parent_patch, temp->getCellStartingGlobalIndex_noGC(0) );
         return new nvidiaParticles{ *static_cast<const Params*>( parameters ),
                                     *static_cast<const Patch*>( a_parent_patch ) };
     }
diff --git a/src/Projector/Projector1D2OrderGPU.cpp b/src/Projector/Projector1D2OrderGPU.cpp
index 79d879024..c63223885 100755
--- a/src/Projector/Projector1D2OrderGPU.cpp
+++ b/src/Projector/Projector1D2OrderGPU.cpp
@@ -195,14 +195,11 @@ void Projector1D2OrderGPU::currentsAndDensityWrapper( ElectroMagn *EMfields,
                                                       int  ispec,
                                                       int  icell,
                                                       int  ipart_ref )
-{
 {
     std::vector<int>    &iold  = smpi->dynamics_iold[ithread];
     std::vector<double> &delta = smpi->dynamics_deltaold[ithread];
     std::vector<double> &invgf = smpi->dynamics_invgf[ithread];
 
-        EMfields->rho_->copyFromDeviceToHost();
-        EMfields->rho_s[ispec]->copyFromDeviceToHost();
     if( diag_flag ) {
 
         double *const __restrict__ b_Jx = EMfields->Jx_s[ispec] ? EMfields->Jx_s[ispec]->data() : EMfields->Jx_->data();
@@ -220,20 +217,6 @@ void Projector1D2OrderGPU::currentsAndDensityWrapper( ElectroMagn *EMfields,
         // Does not compute Rho !
 
 #if defined( SMILEI_ACCELERATOR_MODE )
-        /*currentsAndDensity( b_Jx, b_Jy, b_Jz, b_rho,
-                            Jx_size, Jy_size, Jz_size, rho_size,
-                            particles, x_dimension_bin_count_,
-                            invgf.data(), iold.data(), delta.data(),
-                            inv_cell_volume,
-                            dx_inv_,
-                            dx_ov_dt_,
-                            i_domain_begin_,
-                            not_spectral_ );*/
-    // to be deleted
-	std::cout<<"in projector1D2orderGPUKernel.cpp l229: rho_size= "<<rho_size << std::endl;
-	for( int ipart=0 ; ipart<rho_size; ipart++ )
-	    std::cout<< std::setprecision (15)<<b_rho[ipart]<<std::endl;
-
 
         currentAndDensityDepositionKernel1DOnDevice( b_Jx,b_Jy,b_Jz,b_rho,
                             Jx_size, Jy_size, Jz_size, rho_size,
@@ -252,17 +235,6 @@ void Projector1D2OrderGPU::currentsAndDensityWrapper( ElectroMagn *EMfields,
                             dx_ov_dt_,
                             i_domain_begin_,
                             not_spectral_ );
-        EMfields->rho_->copyFromDeviceToHost();
-        EMfields->rho_s[ispec]->copyFromDeviceToHost();
-        EMfields->Jx_->copyFromDeviceToHost();
-        EMfields->Jx_s[ispec]->copyFromDeviceToHost();
-        EMfields->Jy_->copyFromDeviceToHost();
-        EMfields->Jy_s[ispec]->copyFromDeviceToHost();
-        EMfields->Jz_->copyFromDeviceToHost();
-        EMfields->Jz_s[ispec]->copyFromDeviceToHost();
-        std::cout<<"in projector1D2orderGPUKernel.cpp l251 after projection: rho_size= "<<rho_size << std::endl;
-        for( int ipart=0 ; ipart<rho_size; ipart++ )
-            std::cout<<b_rho[ipart]<<std::endl;
 
 #else
         SMILEI_ASSERT( false );
@@ -270,53 +242,11 @@ void Projector1D2OrderGPU::currentsAndDensityWrapper( ElectroMagn *EMfields,
     } else {
         if( is_spectral ) {
             ERROR( "Not implemented on GPU" );
-            // }
         }
         else{
 
-            Jx_                        = EMfields->Jx_->data();
-            Jy_                        = EMfields->Jy_->data();
-            Jz_                        = EMfields->Jz_->data();
-            rho_                       = EMfields->rho_->data();
-
-            /*currents( Jx_, Jy_, Jz_,
-                      EMfields->Jx_->size(), EMfields->Jy_->size(), EMfields->Jz_->size(),
-                      particles, x_dimension_bin_count_, y_dimension_bin_count_,
-                      invgf.data(), iold.data(), delta.data(),
-                      inv_cell_volume,
-                      dx_inv_, dy_inv_,
-                      dx_ov_dt_, dy_ov_dt_,
-                      i_domain_begin_, j_domain_begin_,
-                      nprimy,
-                      one_third,
-                      not_spectral_ );
-        }
-            double *const __restrict__ b_Jx = EMfields->Jx_s[ispec] ? EMfields->Jx_s[ispec]->data() : EMfields->Jx_->data();
-            unsigned int Jx_size            = EMfields->Jx_s[ispec] ? EMfields->Jx_s[ispec]->size() : EMfields->Jx_->size();
-
-            double *const __restrict__ b_Jy = EMfields->Jy_s[ispec] ? EMfields->Jy_s[ispec]->data() : EMfields->Jy_->data();
-            unsigned int Jy_size            = EMfields->Jy_s[ispec] ? EMfields->Jy_s[ispec]->size() : EMfields->Jy_->size();
-
-            double *const __restrict__ b_Jz = EMfields->Jz_s[ispec] ? EMfields->Jz_s[ispec]->data() : EMfields->Jz_->data();
-            unsigned int Jz_size            = EMfields->Jz_s[ispec] ? EMfields->Jz_s[ispec]->size() : EMfields->Jz_->size();//*/
-            /*Jx_                        = EMfields->Jx_->data();
-            Jy_                        = EMfields->Jy_->data();
-            Jz_                        = EMfields->Jz_->data();*/
-
-            /*currents( Jx_, Jy_, Jz_,
-                      EMfields->Jx_->size(), EMfields->Jy_->size(), EMfields->Jz_->size(),
-                      particles, x_dimension_bin_count_,
-                      invgf.data(), iold.data(), delta.data(),
-                      inv_cell_volume,
-                      dx_inv_,
-                      dx_ov_dt_,
-                      i_domain_begin_,
-                      not_spectral_ );*/
 #if defined( SMILEI_ACCELERATOR_MODE )
-    //double *device_Jx = smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( b_Jx ) ; 
-    //printf("testing device Jx:, %p \n", device_Jx);
-            currentDepositionKernel1DOnDevice(Jx_, Jy_, Jz_, //b_Jx,b_Jy,b_Jz,
-                    //Jx_size, Jy_size, Jz_size,
+            currentDepositionKernel1DOnDevice(Jx_, Jy_, Jz_,
                     EMfields->Jx_->size(), EMfields->Jy_->size(), EMfields->Jz_->size(),
                     particles.getPtrPosition( 0 ),
                     particles.getPtrMomentum( 1 ),
@@ -339,27 +269,6 @@ void Projector1D2OrderGPU::currentsAndDensityWrapper( ElectroMagn *EMfields,
         }
     }
 }
-// to be deleted
-{
-        double *const __restrict__ b_Jx = EMfields->Jx_s[ispec] ? EMfields->Jx_s[ispec]->data() : EMfields->Jx_->data();
-        unsigned int Jx_size            = EMfields->Jx_s[ispec] ? EMfields->Jx_s[ispec]->size() : EMfields->Jx_->size();
-
-        double *const __restrict__ b_Jy = EMfields->Jy_s[ispec] ? EMfields->Jy_s[ispec]->data() : EMfields->Jy_->data();
-        unsigned int Jy_size            = EMfields->Jy_s[ispec] ? EMfields->Jy_s[ispec]->size() : EMfields->Jy_->size();
-
-        double *const __restrict__ b_Jz = EMfields->Jz_s[ispec] ? EMfields->Jz_s[ispec]->data() : EMfields->Jz_->data();
-        unsigned int Jz_size            = EMfields->Jz_s[ispec] ? EMfields->Jz_s[ispec]->size() : EMfields->Jz_->size();
-
-        double *const __restrict__ b_rho = EMfields->rho_s[ispec] ? EMfields->rho_s[ispec]->data() : EMfields->rho_->data();
-        unsigned int rho_size            = EMfields->rho_s[ispec] ? EMfields->rho_s[ispec]->size() : EMfields->rho_->size();
-
-        std::cout<<"in projector1D2orderGPUKernel.cpp l336: rho_size= "<<rho_size << " EMfields->rho_s[ispec] ? " 
-            << EMfields->rho_s[ispec] << " Jx_size " << Jx_size<< " Jy_size " << Jy_size<< " Jz_size " << Jz_size<< std::endl;
-        for( int ipart=0 ; ipart<rho_size; ipart++ )
-            std::cout<< std::setprecision (15)<<b_rho[ipart] << " " << b_Jx[ipart] << " "<< b_Jy[ipart] << " "<< b_Jz[ipart]<<std::endl;
-}
-
-}
 
 void Projector1D2OrderGPU::ionizationCurrents( Field      *Jx,
                                                Field      *Jy,
diff --git a/src/Projector/Projector1D2OrderGPUKernelCUDAHIP.cu b/src/Projector/Projector1D2OrderGPUKernelCUDAHIP.cu
index 0a292f4a1..df3c3dbbc 100755
--- a/src/Projector/Projector1D2OrderGPUKernelCUDAHIP.cu
+++ b/src/Projector/Projector1D2OrderGPUKernelCUDAHIP.cu
@@ -480,17 +480,6 @@ namespace cudahip1d {
                                          int          i_domain_begin,
                                          int          not_spectral_ )
         {
- /*printf(" Hallooo \n");
-            for (int i=0; i<Jx_size; ++i){
-                printf("i %d / %d  device_Jx[i] = %f \n", i, Jx_size,device_Jx[i] );
-            }
-            for (int i=0; i<Jy_size; ++i){
-                printf("i %d / %d  device_Jy[i] = %f \n", i, Jy_size,device_Jy[i] );
-            }
-            for (int i=0; i<Jz_size; ++i){
-                printf("i %d / %d  device_Jz[i] = %f \n", i, Jz_size,device_Jz[i] );
-            }//*/
-
             // TODO(Etienne M): refactor this function. Break it into smaller
             // pieces (lds init/store, coeff computation, deposition etc..)
             // TODO(Etienne M): __ldg could be used to slightly improve GDS load
@@ -517,7 +506,6 @@ namespace cudahip1d {
 
             static constexpr unsigned int kFieldScratchSpaceSize = Params::getGPUInterpolationClusterCellVolume( 1 /* 1D */, 2 /* 2nd order interpolation */ );
 
-            //printf("kWorkgroupSize %d  bin_count %d loop_stride %d, x_cluster_coordinate %d, workgroup_dedicated_bin_index %d, thread_index_offset %d, getGPUClusterWidth %d, GPUClusterWithGCWidth %d, kFieldScratchSpaceSize %d, global_x_scratch_space_coordinate_offset %d\n",
             //    kWorkgroupSize, bin_count, loop_stride, x_cluster_coordinate, workgroup_dedicated_bin_index, thread_index_offset, Params::getGPUClusterWidth(1), GPUClusterWithGCWidth, kFieldScratchSpaceSize, global_x_scratch_space_coordinate_offset);
             // NOTE: I tried having only one cache and reusing it. Doing that
             // requires you to iterate multiple time over the particle which is
@@ -545,8 +533,6 @@ namespace cudahip1d {
             const unsigned int first_particle = workgroup_dedicated_bin_index == 0 ? 0 : device_bin_index[workgroup_dedicated_bin_index - 1];
             const unsigned int last_particle  = device_bin_index[workgroup_dedicated_bin_index];
 
-            //printf("first_particle %d last_particle %d particle_count %d\n", first_particle, last_particle, particle_count);
-
             for( unsigned int particle_index = first_particle + thread_index_offset;
                  particle_index < last_particle;
                  particle_index += loop_stride ) {
@@ -639,8 +625,6 @@ namespace cudahip1d {
                 const unsigned int global_memory_index = global_x_scratch_space_coordinate;
                 const unsigned int scratch_space_index = field_index; // local_x_scratch_space_coordinate * GPUClusterWithGCWidth + local_y_scratch_space_coordinate;
 
-                //printf("field_index %d, thread_index_offset %d, kFieldScratchSpaceSize %d, workgroup_size %d, GPUClusterWithGCWidth %d, global_x_scratch_space_coordinate_offset %d, global_memory_index %d, Jx_size %d\n",field_index, thread_index_offset, kFieldScratchSpaceSize, workgroup_size, GPUClusterWithGCWidth, global_x_scratch_space_coordinate_offset, global_memory_index, Jx_size);
-
                 // These atomics are basically free (very few of them).
                 atomic::GDS::AddNoReturn( &device_Jx[global_memory_index], static_cast<double>( Jx_scratch_space[scratch_space_index] ) );
                 atomic::GDS::AddNoReturn( &device_Jy[global_memory_index +  not_spectral_ * global_x_scratch_space_coordinate], static_cast<double>( Jy_scratch_space[scratch_space_index] ) ); //  We handle the FTDT/picsar 
@@ -731,8 +715,6 @@ namespace cudahip1d {
             const unsigned int first_particle = workgroup_dedicated_bin_index == 0 ? 0 : device_bin_index[workgroup_dedicated_bin_index - 1];
             const unsigned int last_particle  = device_bin_index[workgroup_dedicated_bin_index];
 
-            //printf(" first_particle %d last_particle %d loopstride %d \n",first_particle, last_particle, loop_stride);
-
             for( unsigned int particle_index = first_particle + thread_index_offset;
                  particle_index < last_particle;
                  particle_index += loop_stride ) {
@@ -740,10 +722,6 @@ namespace cudahip1d {
                 const int *const __restrict__        iold = &device_iold_[particle_index];
                 const double *const __restrict__ deltaold = &device_deltaold_[particle_index];
 
-                //printf("in projector cuda l735: particle charge= %f weight %f position_x= %f, momentum y = %f, momentum z = %f, charge*sqrt(2) %+4.15e \n", static_cast<ComputeFloat>( device_particle_charge[particle_index]) , static_cast<ComputeFloat>( device_particle_weight[particle_index]),
-		//	       	static_cast<ComputeFloat>( device_particle_position_x[particle_index] ), static_cast<ComputeFloat>( device_particle_momentum_y[particle_index] ), 
-		//     		static_cast<ComputeFloat>( device_particle_momentum_z[particle_index] ), static_cast<ComputeFloat>( device_particle_charge[particle_index]) * static_cast<ComputeFloat>(sqrt(2.0)));
-
                 ComputeFloat Sx0[5];
                 ComputeFloat Sx1[5];
 
@@ -954,12 +932,6 @@ namespace cudahip1d {
 
         checkHIPErrors( ::hipDeviceSynchronize() );
 #elif defined (  __NVCC__ )
-    //double *device_Jx = smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_Jx ) ; 
-    //printf("testing device Jx:, %p \n", device_Jx);
-    /*for (int i=0; i<Jx_size ; ++i){
-        printf("device_Jx[i] = %Lf \n", device_Jx[i]);
-    }//*/
-
 	KernelFunction <<<
                             kGridDimension,
                             kBlockDimension,
@@ -1025,8 +997,6 @@ namespace cudahip1d {
         static constexpr std::size_t kWorkgroupSize = 128;
         const ::dim3                 kBlockDimension{ static_cast<uint32_t>( kWorkgroupSize ), 1, 1 };
 
-        //printf("ClusterWidth %d clusterGhostCellBorderWidth %d x_dimension_bin_count %d \n",Params::getGPUClusterWidth( 1), Params::getGPUClusterGhostCellBorderWidth( 2),  x_dimension_bin_count);
-
         // NOTE: On cards lacking hardware backed Binary64 atomic operations,
         // falling back to Binary32 (supposing hardware support for atomic
         // operations) can lead to drastic performance improvement.
@@ -1064,9 +1034,6 @@ namespace cudahip1d {
 
         checkHIPErrors( ::hipDeviceSynchronize() );
 #elif defined (  __NVCC__ )
-	//printf("device bin index in projector cuda: %d \n",*host_bin_index);
-	//for(int i=0; i<*host_bin_index;++i)
-	//    std::cout<<"in projector cuda, device_particle_position_x[i]"<< device_particle_position_x[i]<<std::endl;
         KernelFunction <<<                                                                                                             
                             kGridDimension,                                                                                            
                             kBlockDimension,                                                                                           
diff --git a/src/Projector/Projector3D2OrderGPUKernel.cpp b/src/Projector/Projector3D2OrderGPUKernel.cpp
index f77a4fda3..c8c4a02f8 100644
--- a/src/Projector/Projector3D2OrderGPUKernel.cpp
+++ b/src/Projector/Projector3D2OrderGPUKernel.cpp
@@ -71,7 +71,6 @@ currentDeposition3DOnDevice( double *__restrict__ host_Jx,
                            int    nprimz,
                            int    not_spectral )
 {
-    //	printf("We are doing current deposition on GPU \n");
     //#if defined( PRIVATE_SMILEI_USE_OPENMP_PROJECTION_IMPLEMENTATION )
     //acc:: // OpenMP or OpenACC version serves as a reference along with the CPU version
     //#else
@@ -134,8 +133,6 @@ densityDeposition3DOnDevice(
                                      int    nprimz,
                                      int    not_spectral )
 {
-  //printf("We are doing a densitydeposition on GPU \n");
-
     	//#if defined( PRIVATE_SMILEI_USE_OPENMP_PROJECTION_IMPLEMENTATION )
     //acc:: // OpenMP or OpenACC version serves as a reference along with the CPU version
     //#else
diff --git a/src/Projector/Projector3D2OrderGPUKernelCUDAHIP.cu b/src/Projector/Projector3D2OrderGPUKernelCUDAHIP.cu
index 4c6e07224..3d22a71cc 100755
--- a/src/Projector/Projector3D2OrderGPUKernelCUDAHIP.cu
+++ b/src/Projector/Projector3D2OrderGPUKernelCUDAHIP.cu
@@ -238,9 +238,6 @@ namespace cudahip {
                                                                                      device_bin_index[workgroup_dedicated_bin_index - 1];
             const unsigned int last_particle  = device_bin_index[workgroup_dedicated_bin_index];
 
-//std::cout << first_particle << std::endl;
-//printf("%d \n",first_particle);
-
             for( unsigned int particle_index = first_particle + thread_index_offset;
                  particle_index < last_particle;
                  particle_index += loop_stride ) {
diff --git a/src/Projector/ProjectorFactory.h b/src/Projector/ProjectorFactory.h
index db8c39e1f..365180673 100755
--- a/src/Projector/ProjectorFactory.h
+++ b/src/Projector/ProjectorFactory.h
@@ -3,6 +3,7 @@
 
 #include "Projector.h"
 #include "Projector1D2Order.h"
+#include "Projector1D2OrderGPU.h"
 #include "Projector1D4Order.h"
 #include "Projector2D2Order.h"
 #include "Projector2D2OrderGPU.h"
@@ -33,7 +34,11 @@ class ProjectorFactory
         // 1Dcartesian simulation
         // ---------------
         if( ( params.geometry == "1Dcartesian" ) && ( params.interpolation_order == ( unsigned int )2 ) ) {
-            Proj = new Projector1D2Order( params, patch );
+            #if defined( SMILEI_ACCELERATOR_MODE )
+                Proj = new Projector1D2OrderGPU( params, patch );
+            #else
+                Proj = new Projector1D2Order( params, patch );
+            #endif
         } else if( ( params.geometry == "1Dcartesian" ) && ( params.interpolation_order == ( unsigned int )4 ) ) {
             Proj = new Projector1D4Order( params, patch );
         }

From 1fc01c700aa8a887fe11f4a33bfa87744b84ee5e Mon Sep 17 00:00:00 2001
From: Frederic Perez <frederic.perez@polytechnique.edu>
Date: Thu, 25 Apr 2024 10:05:25 +0200
Subject: [PATCH 13/54] add smilei_omp_threads in namelist

---
 .../gpu/tst3d_gpu_o2_thermal_plasma_medium.py   |  2 +-
 .../gpu/tst3d_gpu_o2_thermal_plasma_short.py    |  1 -
 .../gpu/tst3d_v_o2_thermal_plasma_medium.py     |  2 +-
 .../gpu/tst3d_v_o2_thermal_plasma_short.py      |  1 -
 benchmarks/tst2d_18_em_pml.py                   |  1 -
 benchmarks/tst2d_s_o4_laser_wake_vay.py         |  2 --
 benchmarks/tst2d_s_o4_radiation_pressure_acc.py |  2 --
 .../tst2d_tasks_01_radiation_pressure_acc.py    |  1 -
 benchmarks/tst2d_v_o2_em_propagation.py         |  2 --
 benchmarks/tst2d_v_o4_em_propagation.py         |  2 --
 benchmarks/tst2d_v_o4_laser_wake_vay.py         |  2 --
 .../tst2d_v_o4_multiphoton_Breit_Wheeler.py     |  3 ---
 benchmarks/tst2d_v_o4_radiation_pressure_acc.py |  2 --
 benchmarks/tst3d_s_o4_em_propagation.py         |  2 --
 doc/Sphinx/Overview/releases.rst                | 11 ++++++-----
 doc/Sphinx/Use/namelist.rst                     | 17 ++++++++++++-----
 src/Params/Params.cpp                           | 14 +++++++++-----
 src/Python/pyinit.py                            |  3 ++-
 18 files changed, 31 insertions(+), 39 deletions(-)

diff --git a/benchmarks/gpu/tst3d_gpu_o2_thermal_plasma_medium.py b/benchmarks/gpu/tst3d_gpu_o2_thermal_plasma_medium.py
index a50614236..cb8c8f26a 100644
--- a/benchmarks/gpu/tst3d_gpu_o2_thermal_plasma_medium.py
+++ b/benchmarks/gpu/tst3d_gpu_o2_thermal_plasma_medium.py
@@ -66,7 +66,7 @@ def InitialChargeDensity(x, y, z):
      number_of_patches = kPatchPerGridDimension,
      EM_boundary_conditions = [ ["periodic"] ],
      print_every = 10,
-     random_seed = smilei_mpi_rank)
+     )
 
 Vectorization(mode = "off")
 
diff --git a/benchmarks/gpu/tst3d_gpu_o2_thermal_plasma_short.py b/benchmarks/gpu/tst3d_gpu_o2_thermal_plasma_short.py
index 548746977..a627232f9 100644
--- a/benchmarks/gpu/tst3d_gpu_o2_thermal_plasma_short.py
+++ b/benchmarks/gpu/tst3d_gpu_o2_thermal_plasma_short.py
@@ -61,7 +61,6 @@
     gpu_computing = True,
 
     # random_seed = 0xDEADBEEF,
-    random_seed = smilei_mpi_rank,
 )
 
 Vectorization(
diff --git a/benchmarks/gpu/tst3d_v_o2_thermal_plasma_medium.py b/benchmarks/gpu/tst3d_v_o2_thermal_plasma_medium.py
index 524f564d0..92c598c19 100644
--- a/benchmarks/gpu/tst3d_v_o2_thermal_plasma_medium.py
+++ b/benchmarks/gpu/tst3d_v_o2_thermal_plasma_medium.py
@@ -67,7 +67,7 @@ def InitialChargeDensity(x, y, z):
      number_of_patches = kPatchPerGridDimension,
      EM_boundary_conditions = [ ["periodic"] ],
      print_every = 10,
-     random_seed = smilei_mpi_rank)
+     )
 
 Vectorization(mode = "on")
 
diff --git a/benchmarks/gpu/tst3d_v_o2_thermal_plasma_short.py b/benchmarks/gpu/tst3d_v_o2_thermal_plasma_short.py
index 3672fd9d0..bc553aa62 100644
--- a/benchmarks/gpu/tst3d_v_o2_thermal_plasma_short.py
+++ b/benchmarks/gpu/tst3d_v_o2_thermal_plasma_short.py
@@ -61,7 +61,6 @@
     gpu_computing = False,
 
     # random_seed = 0xDEADBEEF,
-    random_seed = smilei_mpi_rank,
 )
 
 Vectorization(
diff --git a/benchmarks/tst2d_18_em_pml.py b/benchmarks/tst2d_18_em_pml.py
index 18dc5ee09..703fe50fc 100755
--- a/benchmarks/tst2d_18_em_pml.py
+++ b/benchmarks/tst2d_18_em_pml.py
@@ -25,7 +25,6 @@
                               ['PML','PML'],
                              ],
     number_of_pml_cells             = [[10,10],[10,10]],
-    random_seed = smilei_mpi_rank
 )
 
 Antenna(
diff --git a/benchmarks/tst2d_s_o4_laser_wake_vay.py b/benchmarks/tst2d_s_o4_laser_wake_vay.py
index 4a4725b51..bb75f29e6 100644
--- a/benchmarks/tst2d_s_o4_laser_wake_vay.py
+++ b/benchmarks/tst2d_s_o4_laser_wake_vay.py
@@ -28,8 +28,6 @@
 
     solve_poisson = False,
     print_every = 100,
-
-    random_seed = smilei_mpi_rank
 )
 
 MovingWindow(
diff --git a/benchmarks/tst2d_s_o4_radiation_pressure_acc.py b/benchmarks/tst2d_s_o4_radiation_pressure_acc.py
index 7adbc8844..755cbf763 100755
--- a/benchmarks/tst2d_s_o4_radiation_pressure_acc.py
+++ b/benchmarks/tst2d_s_o4_radiation_pressure_acc.py
@@ -28,8 +28,6 @@
         ['silver-muller'],
         ['periodic'],
     ],
-
-    random_seed = smilei_mpi_rank
 )
 
 Vectorization(
diff --git a/benchmarks/tst2d_tasks_01_radiation_pressure_acc.py b/benchmarks/tst2d_tasks_01_radiation_pressure_acc.py
index 9d4c9af87..6a8530bf0 100644
--- a/benchmarks/tst2d_tasks_01_radiation_pressure_acc.py
+++ b/benchmarks/tst2d_tasks_01_radiation_pressure_acc.py
@@ -39,7 +39,6 @@
         ['periodic'],
     ],
     cluster_width = 16,
-    random_seed = smilei_mpi_rank
 )
 
 
diff --git a/benchmarks/tst2d_v_o2_em_propagation.py b/benchmarks/tst2d_v_o2_em_propagation.py
index cc152fa80..76741f1bd 100644
--- a/benchmarks/tst2d_v_o2_em_propagation.py
+++ b/benchmarks/tst2d_v_o2_em_propagation.py
@@ -32,8 +32,6 @@
     ],
 
     EM_boundary_conditions_k = [[cos(ang), sin(ang)],[-1.,0.],[0.,1.],[0.,-1.]],
-
-    random_seed = smilei_mpi_rank
 )
 
 Vectorization(
diff --git a/benchmarks/tst2d_v_o4_em_propagation.py b/benchmarks/tst2d_v_o4_em_propagation.py
index c6ed8b064..2e15305b0 100644
--- a/benchmarks/tst2d_v_o4_em_propagation.py
+++ b/benchmarks/tst2d_v_o4_em_propagation.py
@@ -32,8 +32,6 @@
     ],
 
     EM_boundary_conditions_k = [[cos(ang), sin(ang)],[-1.,0.],[0.,1.],[0.,-1.]],
-
-    random_seed = smilei_mpi_rank
 )
 
 Vectorization(
diff --git a/benchmarks/tst2d_v_o4_laser_wake_vay.py b/benchmarks/tst2d_v_o4_laser_wake_vay.py
index 969263838..9e56337d0 100644
--- a/benchmarks/tst2d_v_o4_laser_wake_vay.py
+++ b/benchmarks/tst2d_v_o4_laser_wake_vay.py
@@ -28,8 +28,6 @@
 
     solve_poisson = False,
     print_every = 100,
-
-    random_seed = smilei_mpi_rank
 )
 
 Vectorization(
diff --git a/benchmarks/tst2d_v_o4_multiphoton_Breit_Wheeler.py b/benchmarks/tst2d_v_o4_multiphoton_Breit_Wheeler.py
index 7db269c5e..91907aee9 100755
--- a/benchmarks/tst2d_v_o4_multiphoton_Breit_Wheeler.py
+++ b/benchmarks/tst2d_v_o4_multiphoton_Breit_Wheeler.py
@@ -95,9 +95,6 @@ def n0_positron(x,y):
     simulation_time = Tsim,
 
     EM_boundary_conditions = [field_cond, field_cond],
-
-    random_seed = smilei_mpi_rank,
-
     reference_angular_frequency_SI = wr
 )
 
diff --git a/benchmarks/tst2d_v_o4_radiation_pressure_acc.py b/benchmarks/tst2d_v_o4_radiation_pressure_acc.py
index c0feb79d2..353418604 100644
--- a/benchmarks/tst2d_v_o4_radiation_pressure_acc.py
+++ b/benchmarks/tst2d_v_o4_radiation_pressure_acc.py
@@ -28,8 +28,6 @@
         ['silver-muller'],
         ['periodic'],
     ],
-
-    random_seed = smilei_mpi_rank
 )
 
 
diff --git a/benchmarks/tst3d_s_o4_em_propagation.py b/benchmarks/tst3d_s_o4_em_propagation.py
index 6a87f2dfa..ef97569d5 100755
--- a/benchmarks/tst3d_s_o4_em_propagation.py
+++ b/benchmarks/tst3d_s_o4_em_propagation.py
@@ -21,8 +21,6 @@
     simulation_time = Tsim,
     
     EM_boundary_conditions = [ ['silver-muller'] ],
-    
-    random_seed = smilei_mpi_rank
 )
 
 LaserGaussian3D(
diff --git a/doc/Sphinx/Overview/releases.rst b/doc/Sphinx/Overview/releases.rst
index 8027d2f3d..5c3e9d046 100755
--- a/doc/Sphinx/Overview/releases.rst
+++ b/doc/Sphinx/Overview/releases.rst
@@ -30,6 +30,12 @@ Changes made in the repository (not released)
 * Features:
 
   * Relativistic field initialization now supports multiple species and both direction propagations.
+  * Added the argument ``phase_offset`` in laser definitions such as ``LaserGaussian2D``.
+  * The ``LaserGaussianAM`` definition will only use one coordinate for its ``focus`` argument 
+    (the transverse coordinate of the focus in this geometry is zero).
+  * Small improvements in PML for envelope model (AM and 2D).
+  * Deprecated ``smilei_rand_max``.
+  * New namelist variables ``smilei_omp_threads`` and ``smilei_total_cores``.
 
 * Happi:
 
@@ -44,11 +50,6 @@ Changes made in the repository (not released)
 
   * Dark theme (click the switch on the bottom left, or set browser preferences).
 
-* Added the argument ``phase_offset`` in laser definitions such as ``LaserGaussian2D``.
-* The ``LaserGaussianAM`` definition will only use one coordinate for its ``focus`` argument 
-  (the transverse coordinate of the focus in this geometry is zero).
-* Small improvements in PML for envelope model (AM and 2D).
-
 * Bug fixes:
 
   * ``dump_minutes`` often failed to write some checkpoint files.
diff --git a/doc/Sphinx/Use/namelist.rst b/doc/Sphinx/Use/namelist.rst
index ad318954c..f7deebcae 100755
--- a/doc/Sphinx/Use/namelist.rst
+++ b/doc/Sphinx/Use/namelist.rst
@@ -60,7 +60,8 @@ for each MPI process). The following steps are executed:
 
    * The rank of the current MPI process as :py:data:`smilei_mpi_rank`.
    * The total number of MPI processes as :py:data:`smilei_mpi_size`.
-   * The maximum random integer as :py:data:`smilei_rand_max`.
+   * The number of OpenMP threads per MPI :py:data:`smilei_omp_threads`.
+   * The total number of cores :py:data:`smilei_total_cores`.
 
 #. The namelist(s) is executed.
 
@@ -3619,9 +3620,15 @@ namelist. They should not be re-defined by the user!
 
   The total number of MPI processes.
 
-..
-  <<Not showing this anymore because of new rand system>>
-  .. py:data:: smilei_rand_max
+.. py:data:: smilei_omp_threads
+
+  The number of OpenMP threads per MPI.
+
+.. py:data:: smilei_total_cores
 
-    The largest random integer.
+  The total number of cores.
 
+.. note::
+  
+  These variables can be access during ``happi`` post-processing, e.g.
+  ``S.namelist.smilei_mpi_size``.
\ No newline at end of file
diff --git a/src/Params/Params.cpp b/src/Params/Params.cpp
index bc9fb8ed4..803cdf9e5 100755
--- a/src/Params/Params.cpp
+++ b/src/Params/Params.cpp
@@ -129,16 +129,20 @@ Params::Params( SmileiMPI *smpi, std::vector<std::string> namelistsFiles ) :
     PyObject_SetAttrString( Py_main, "_test_mode", Py_False );
     PyTools::checkPyError();
 
-    // here we add the rank, in case some script need it
+    // we add the rank, in case some script needs it
     PyModule_AddIntConstant( Py_main, "smilei_mpi_rank", smpi->getRank() );
 
-    // here we add the MPI size, in case some script need it
+    // we add the MPI size, in case some script needs it
     PyModule_AddIntConstant( Py_main, "smilei_mpi_size", smpi->getSize() );
     namelist += string( "smilei_mpi_size = " ) + to_string( smpi->getSize() ) + "\n";
 
-    // here we add the larget int, important to get a valid seed for randomization
-    PyModule_AddIntConstant( Py_main, "smilei_rand_max", RAND_MAX );
-    namelist += string( "smilei_rand_max = " ) + to_string( RAND_MAX ) + "\n\n";
+    // we add the openMP size, in case some script needs it
+    PyModule_AddIntConstant( Py_main, "smilei_omp_threads", smpi->getOMPMaxThreads() );
+    namelist += string( "smilei_omp_threads = " ) + to_string( smpi->getOMPMaxThreads() ) + "\n";
+
+    // we add the total number of cores, in case some script needs it
+    PyModule_AddIntConstant( Py_main, "smilei_total_cores", smpi->getGlobalNumCores() );
+    namelist += string( "smilei_total_cores = " ) + to_string( smpi->getGlobalNumCores() ) + "\n";
 
     // Running pyprofiles.py
     runScript( string( reinterpret_cast<const char *>( pyprofiles_py ), pyprofiles_py_len ), "pyprofiles.py", globals );
diff --git a/src/Python/pyinit.py b/src/Python/pyinit.py
index 56febc475..f5aeeb7e1 100755
--- a/src/Python/pyinit.py
+++ b/src/Python/pyinit.py
@@ -645,7 +645,8 @@ class MultiphotonBreitWheeler(SmileiComponent):
 # Smilei-defined
 smilei_mpi_rank = 0
 smilei_mpi_size = 1
-smilei_rand_max = 2**31-1
+smilei_omp_threads = 1
+smilei_total_cores = 1
 
 # Variable to set to False for the actual run (useful for the test mode)
 _test_mode = True

From 348faa03cf42084839984b61870942c6a9af05d1 Mon Sep 17 00:00:00 2001
From: Frederic Perez <frederic.perez@polytechnique.edu>
Date: Fri, 26 Apr 2024 00:08:51 +0200
Subject: [PATCH 14/54] fix particle exchange

---
 src/Particles/Particles.cpp      |  4 ++--
 src/Particles/Particles.h        |  4 ++--
 src/Particles/nvidiaParticles.cu | 14 ++++++++++----
 src/Particles/nvidiaParticles.h  |  2 +-
 src/Patch/Patch.cpp              |  4 ++--
 5 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/src/Particles/Particles.cpp b/src/Particles/Particles.cpp
index 688c53085..34eaeb161 100755
--- a/src/Particles/Particles.cpp
+++ b/src/Particles/Particles.cpp
@@ -1299,13 +1299,13 @@ void Particles::copyFromHostToDevice()
 {
     ERROR( "Device only feature, should not have come here!" );
 }
-void Particles::copyFromDeviceToHost()
+void Particles::copyFromDeviceToHost( bool )
 {
     ERROR( "Device only feature, should not have come here!" );
 }
 
 // Loop all particles and copy the outgoing ones to buffers
-void Particles::copyLeavingParticlesToBuffers( const bool copy[], Particles* buffer[] )
+void Particles::copyLeavingParticlesToBuffers( const vector<bool> copy, const vector<Particles*> buffer )
 {
     // Leaving particles have a cell_key equal to -2-direction
     // where direction goes from 0 to 6 and tells which way the particle escapes.
diff --git a/src/Particles/Particles.h b/src/Particles/Particles.h
index 86f9f9cac..c0e5958e3 100755
--- a/src/Particles/Particles.h
+++ b/src/Particles/Particles.h
@@ -435,7 +435,7 @@ class Particles
     virtual void initializeDataOnDevice();
     virtual void initializeIDsOnDevice();
     virtual void copyFromHostToDevice();
-    virtual void copyFromDeviceToHost();
+    virtual void copyFromDeviceToHost( bool copy_keys = false );
 
     //! Return the pointer toward the Position[idim] vector
     virtual double* getPtrPosition( int idim ) {
@@ -475,7 +475,7 @@ class Particles
     // -----------------------------------------------------------------------------
     //! Extract particles leaving the box to buffers
     // -----------------------------------------------------------------------------
-    void copyLeavingParticlesToBuffers( const bool copy[], Particles* buffer[] );
+    void copyLeavingParticlesToBuffers( const std::vector<bool> copy, const std::vector<Particles*> buffer );
     virtual void copyLeavingParticlesToBuffer( Particles* buffer );
 
     // -----------------------------------------------------------------------------
diff --git a/src/Particles/nvidiaParticles.cu b/src/Particles/nvidiaParticles.cu
index efca22ad5..af45bfadd 100644
--- a/src/Particles/nvidiaParticles.cu
+++ b/src/Particles/nvidiaParticles.cu
@@ -1348,7 +1348,7 @@ void nvidiaParticles::copyFromHostToDevice()
 // -------------------------------------------------------------------------------------------------
 //! Copy device to host
 // -------------------------------------------------------------------------------------------------
-void nvidiaParticles::copyFromDeviceToHost()
+void nvidiaParticles::copyFromDeviceToHost( bool copy_keys )
 {
     for (int idim=0;idim<Position.size();idim++) {
         Position[idim].resize( gpu_nparts_ );
@@ -1374,6 +1374,10 @@ void nvidiaParticles::copyFromDeviceToHost()
         Id.resize( gpu_nparts_ );
         thrust::copy((nvidia_id_).begin(), (nvidia_id_).begin()+gpu_nparts_, (Id).begin());
     }
+    if (copy_keys) {
+        cell_keys.resize( gpu_nparts_ );
+        thrust::copy((nvidia_cell_keys_).begin(), (nvidia_cell_keys_).begin()+gpu_nparts_, (cell_keys).begin());
+    }
 }
 
 unsigned int nvidiaParticles::deviceCapacity() const
@@ -1389,7 +1393,7 @@ unsigned int nvidiaParticles::deviceCapacity() const
 void nvidiaParticles::copyLeavingParticlesToBuffer( Particles* buffer )
 {
     copyParticlesByPredicate( buffer, cellKeyBelowMinus1() );
-    buffer->copyFromDeviceToHost();
+    buffer->copyFromDeviceToHost( true );
 }
 
 
@@ -1410,7 +1414,8 @@ void nvidiaParticles::copyParticlesByPredicate( Particles* buffer, Predicate pre
                                                                                       nvidia_momentum_[1].begin(),
                                                                                       nvidia_momentum_[2].begin(),
                                                                                       nvidia_weight_.begin(),
-                                                                                      nvidia_charge_.begin() ) );
+                                                                                      nvidia_charge_.begin(),
+                                                                                      nvidia_cell_keys_.begin() ) );
     const auto source_iterator_last  = source_iterator_first + nparts; // std::advance
     
     nvidiaParticles* const cp_parts = static_cast<nvidiaParticles*>( buffer );
@@ -1428,7 +1433,8 @@ void nvidiaParticles::copyParticlesByPredicate( Particles* buffer, Predicate pre
                                                                                            cp_parts->nvidia_momentum_[1].begin(),
                                                                                            cp_parts->nvidia_momentum_[2].begin(),
                                                                                            cp_parts->nvidia_weight_.begin(),
-                                                                                           cp_parts->nvidia_charge_.begin() ) );
+                                                                                           cp_parts->nvidia_charge_.begin(),
+                                                                                           cp_parts->nvidia_cell_keys_.begin() ) );
 
     // Copy send particles in dedicated data structure
     thrust::copy_if( thrust::device,
diff --git a/src/Particles/nvidiaParticles.h b/src/Particles/nvidiaParticles.h
index ba689f1e8..5fa0a933b 100644
--- a/src/Particles/nvidiaParticles.h
+++ b/src/Particles/nvidiaParticles.h
@@ -78,7 +78,7 @@ class nvidiaParticles : public Particles
     void copyFromHostToDevice() override;
     
     //! Update the particles from device to host
-    void copyFromDeviceToHost() override;
+    void copyFromDeviceToHost( bool copy_keys = false ) override;
 
     unsigned int deviceCapacity() const override;
 
diff --git a/src/Patch/Patch.cpp b/src/Patch/Patch.cpp
index 585f76f97..8fa4022aa 100755
--- a/src/Patch/Patch.cpp
+++ b/src/Patch/Patch.cpp
@@ -540,8 +540,8 @@ void Patch::copyExchParticlesToBuffers( int ispec, Params &params )
     cleanMPIBuffers( ispec, params );
     
     // Make a list of buffers
-    bool copy[params.nDim_field*2];
-    Particles* sendBuffer[params.nDim_field*2];
+    vector<bool> copy( params.nDim_field*2, false );
+    vector<Particles*> sendBuffer( params.nDim_field*2, nullptr );
     for( size_t iDim = 0; iDim < params.nDim_field; iDim++ ) {
         copy[2*iDim+0] = neighbor_[iDim][0] != MPI_PROC_NULL;
         copy[2*iDim+1] = neighbor_[iDim][1] != MPI_PROC_NULL;

From 4b2f6487debd5d5290f29ef9a4583981ccd19cf2 Mon Sep 17 00:00:00 2001
From: Frederic Perez <frederic.perez@polytechnique.edu>
Date: Fri, 26 Apr 2024 00:12:42 +0200
Subject: [PATCH 15/54] make happi working with virtualenv

---
 makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/makefile b/makefile
index 3aaff0201..4c9ada85b 100755
--- a/makefile
+++ b/makefile
@@ -52,7 +52,7 @@ DIRS := $(shell find src -type d)
 SRCS := $(shell find src/* -name \*.cpp)
 OBJS := $(addprefix $(BUILD_DIR)/, $(SRCS:.cpp=.o))
 DEPS := $(addprefix $(BUILD_DIR)/, $(SRCS:.cpp=.d))
-SITEDIR = $(shell $(PYTHONEXE) -c 'import site; site._script()' --user-site)
+SITEDIR = $(shell d=`$(PYTHONEXE) -m site --user-site` && echo $$d || $(PYTHONEXE) -c "import sysconfig; print(sysconfig.get_path('purelib'))")
 
 # Smilei tools
 TABLES_DIR := tools/tables

From 8bcaeb4790678eb695a143d10d9ff3956269d820 Mon Sep 17 00:00:00 2001
From: Francesco Massimo <francescomassimo@MacBook-Pro-de-sei2.local>
Date: Mon, 29 Apr 2024 06:54:32 +0200
Subject: [PATCH 16/54] add publication

---
 doc/Sphinx/Overview/material.rst | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/doc/Sphinx/Overview/material.rst b/doc/Sphinx/Overview/material.rst
index 3322c2857..048d8e1b5 100644
--- a/doc/Sphinx/Overview/material.rst
+++ b/doc/Sphinx/Overview/material.rst
@@ -30,7 +30,7 @@ Papers involving Smilei
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
 Only papers published in peer-reviewed journals are listed (for the complete list of citing papers see `Google Scholar <https://scholar.google.com/scholar?hl=fr&as_sdt=2005&sciodt=0,5&cites=17416460455672944837&scipsc=&q=&scisbd=1>`_).
-As of April 2024, 181 papers have been published covering a broad range of topics:
+As of April 2024, 182 papers have been published covering a broad range of topics:
 
 * laser-plasma interaction (LPI) / inertial fusion (FCI)
 * ultra-high intensity (UHI) applications
@@ -50,6 +50,12 @@ Following is the distribution of these topics in the listed publications up to N
    Use the python script doc/doi2publications.py to generate entries from a DOI number, and paste them here
    You can count the number of papers in the list with the vim command :%s/.. \[//gn. 
 
+.. [Yao2024]
+
+    W. Yao, M. Nakatsutsumi, S. Buffechoux, P. Antici, M. Borghesi, A. Ciardi, S. N. Chen, E. d’Humières, L. Gremillet, R. Heathcote, V. Horný, P. McKenna, M. N. Quinn, L. Romagnani, R. Royle,  G. Sarri, Y. Sentoku, H.-P. Schlenvoigt, T. Toncian, O. Tresca, L. Vassura, O. Willi, J. Fuchs,
+    `Optimizing laser coupling, matter heating, and particle acceleration from solids using multiplexed ultraintense lasers`,
+    `Matter and Radiation at Extremes 9, 047202 (2024) <https://doi.org/10.1063/5.0184919>`_
+
 .. [Luo2024]
 
     M. Luo, C. Riconda, I. Pusztai, A. Grassi, J. S. Wurtele, and T. Fülöp,

From 152c4bee0ba2cbabdd72720d9ece45b1e3956139 Mon Sep 17 00:00:00 2001
From: cprouveur <charles.prouveur@cea.fr>
Date: Mon, 29 Apr 2024 15:31:27 +0200
Subject: [PATCH 17/54] Fix in coef found on adastra thanks to a different
 compiler behaviour compared to nvc++

---
 src/Interpolator/Interpolator1D2Order.h | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/src/Interpolator/Interpolator1D2Order.h b/src/Interpolator/Interpolator1D2Order.h
index c55af0222..9a1b2a9e4 100755
--- a/src/Interpolator/Interpolator1D2Order.h
+++ b/src/Interpolator/Interpolator1D2Order.h
@@ -89,7 +89,6 @@ class Interpolator1D2Order final : public Interpolator1D
         idx_p[0]   = std::round( xpn );
         idx_d[0]   = std::round( xpn + 0.5 );
 
-
         delta      = xpn - static_cast<double>( idx_d[0] ) + 0.5; // normalized distance to the central node
         delta2     = delta * delta;                   // square of the normalized distance to the central node
         
@@ -97,17 +96,14 @@ class Interpolator1D2Order final : public Interpolator1D
         coeffxd[1] = ( 0.75 - delta2 );
         coeffxd[2] = 0.5 * ( delta2 + delta + 0.25 );
 
-
-	    delta      = xpn - static_cast<double>( idx_p[0] );
+        delta      = xpn - static_cast<double>( idx_p[0] );
         delta2     = delta * delta; // pow( delta_p[0], 2 );   // square of the normalized distance to the central node
-        
+
+        delta_p[0] = delta;   // normalized distance to the central node	
         coeffxp[0] = 0.5 * ( delta2 - delta_p[0] + 0.25 );
         coeffxp[1] = ( 0.75 - delta2 );
         coeffxp[2] = 0.5 * ( delta2 + delta_p[0] + 0.25 );
         
-        delta_p[0] = delta;   // normalized distance to the central node
-
-        
         idx_p[0] = idx_p[0] - i_domain_begin_;
         idx_d[0] = idx_d[0] - i_domain_begin_;
         

From 27dd743d2f95a8e7c8db4acdb4d835809dd9c1b0 Mon Sep 17 00:00:00 2001
From: Francesco Massimo
 <francescomassimo@client-172-18-71-182.eduroam.universite-paris-saclay.fr>
Date: Thu, 2 May 2024 13:45:00 +0200
Subject: [PATCH 18/54] add publication

---
 doc/Sphinx/Overview/material.rst | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/doc/Sphinx/Overview/material.rst b/doc/Sphinx/Overview/material.rst
index 048d8e1b5..9e6e17daf 100644
--- a/doc/Sphinx/Overview/material.rst
+++ b/doc/Sphinx/Overview/material.rst
@@ -30,7 +30,7 @@ Papers involving Smilei
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
 Only papers published in peer-reviewed journals are listed (for the complete list of citing papers see `Google Scholar <https://scholar.google.com/scholar?hl=fr&as_sdt=2005&sciodt=0,5&cites=17416460455672944837&scipsc=&q=&scisbd=1>`_).
-As of April 2024, 182 papers have been published covering a broad range of topics:
+As of April 2024, 183 papers have been published covering a broad range of topics:
 
 * laser-plasma interaction (LPI) / inertial fusion (FCI)
 * ultra-high intensity (UHI) applications
@@ -50,6 +50,12 @@ Following is the distribution of these topics in the listed publications up to N
    Use the python script doc/doi2publications.py to generate entries from a DOI number, and paste them here
    You can count the number of papers in the list with the vim command :%s/.. \[//gn. 
 
+.. [Pan2024]
+
+    Z. Pan, J. Liu, P. Wang, Z. Mei, Z. Cao, D. Kong, S. Xu, Z. Liu, Y. Liang, Z. Peng, T. Xu, T. Song, X. Chen, Q. Wu, Y. Zhang, Q. Han, H. Chen, J. Zhao, Y. Gao, S. Chen, Y. Zhao, X. Yan, Y. Shou, W. Ma,
+    `Electron acceleration and x-ray generation from near-critical-density carbon nanotube foams driven by moderately relativistic lasers`,
+    `Physics of Plasmas 31, 043108 (2024) <https://doi.org/10.1063/5.0202843>`_
+
 .. [Yao2024]
 
     W. Yao, M. Nakatsutsumi, S. Buffechoux, P. Antici, M. Borghesi, A. Ciardi, S. N. Chen, E. d’Humières, L. Gremillet, R. Heathcote, V. Horný, P. McKenna, M. N. Quinn, L. Romagnani, R. Royle,  G. Sarri, Y. Sentoku, H.-P. Schlenvoigt, T. Toncian, O. Tresca, L. Vassura, O. Willi, J. Fuchs,

From 337a1ee153cceb54ba38ae936ff2bf9b48142d9b Mon Sep 17 00:00:00 2001
From: Frederic Perez <frederic.perez@polytechnique.edu>
Date: Tue, 7 May 2024 10:35:01 +0200
Subject: [PATCH 19/54] Sort on gpu with thrust::gather

---
 src/Particles/nvidiaParticles.cu | 141 ++++++++++++++++---------------
 src/Particles/nvidiaParticles.h  |  26 ++++++
 2 files changed, 98 insertions(+), 69 deletions(-)

diff --git a/src/Particles/nvidiaParticles.cu b/src/Particles/nvidiaParticles.cu
index af45bfadd..5a6524a88 100644
--- a/src/Particles/nvidiaParticles.cu
+++ b/src/Particles/nvidiaParticles.cu
@@ -15,6 +15,7 @@
 #include <thrust/count.h>
 #include <thrust/remove.h>
 #include <thrust/sort.h>
+#include <thrust/gather.h>
 
 
 #include "Patch.h"
@@ -468,10 +469,8 @@ namespace detail {
                                        ParticleNoKeyIteratorProvider particle_no_key_iterator_provider )
     {
         const auto first_particle = particle_iterator_provider( particle_container );
-
-        auto last_particle = first_particle +
-                             particle_container.deviceSize(); // Obviously, we use half open ranges
-
+        auto last_particle = first_particle + particle_container.deviceSize();
+        
         // Remove out of bound particles
         // Using more memory, we could use the faster remove_copy_if
         // NOTE: remove_if is stable.
@@ -479,82 +478,86 @@ namespace detail {
                                            first_particle,
                                            last_particle,
                                            OutOfBoundaryPredicate{} );
-
-        // Idea 1: - remove_copy_if instead of copy_if
-        //         - sort(the_particles_to_inject)
-        //         - merge
-        //         - compute bins
-        // NOTE: This method consumes a lot of memory ! O(N)
-
+        
         const auto initial_count = std::distance( first_particle, last_particle );
         const auto inject_count  = particle_to_inject.deviceSize();
         const auto new_count     = initial_count + inject_count;
-
+        
+        // Resize particles
         // NOTE: We really want a non-initializing vector here!
         // It's possible to give a custom allocator to thrust::device_vector.
         // Create one with construct(<>) as a noop and derive from
         // thrust::device_malloc_allocator. For now we do an explicit resize.
-        particle_to_inject.softReserve( new_count );
-        particle_to_inject.resize( new_count ); // We probably invalidated the iterators
-
-        // Copy out of cluster/tile/chunk particles
-        // partition_copy is way slower than copy_if/remove_copy_if on rocthrust
-        // https://github.com/ROCmSoftwarePlatform/rocThrust/issues/247
-
-        const auto first_to_inject = particle_iterator_provider( particle_to_inject );
-        const auto first_to_reorder = first_to_inject + inject_count;
-
-        // NOTE: copy_if/remove_copy_if are stable.
-        // First, copy particles that are not in their own cluster anymore
-        const auto first_already_ordered = thrust::copy_if( thrust::device,
-                                                                         first_particle, last_particle,
-                                                            first_to_reorder,
-                                                                         OutOfClusterPredicate<ClusterType>{ cluster_type } );
-        // Then, copy particles that are still in their own cluster
-        const auto end = thrust::remove_copy_if( thrust::device,
-                                                                                first_particle, last_particle,
-                                                 first_already_ordered,
-                                                                                OutOfClusterPredicate<ClusterType>{ cluster_type } );
-
-        // Compute or recompute the cluster index of the particle_to_inject
-        // NOTE:
-        // - we can "save" some work here if cluster index is already computed
-        // for the new particles to inject (not the one we got with copy_if).
-        //
-        doComputeParticleClusterKey( first_to_inject,
-                                     first_already_ordered,
-                                     cluster_type );
-
-        const auto first_to_inject_no_key  = particle_no_key_iterator_provider( particle_to_inject );
-        const auto particle_to_rekey_count = std::distance( first_to_inject,
-                                                            first_already_ordered );
-
-        doSortParticleByKey( particle_to_inject.getPtrCellKeys(),
-                             particle_to_inject.getPtrCellKeys() + particle_to_rekey_count,
-                             first_to_inject_no_key );
-
-        // This free generates a lot of memory fragmentation.
-        // particle_container.free();
-        // Same as for particle_to_inject, non-initializing vector is best.
         particle_container.softReserve( new_count );
         particle_container.resize( new_count );
-
-        // Merge by key
-        // NOTE: Dont merge in place on GPU. That means we need an other large buffer!
-        //
-        thrust::merge_by_key( thrust::device,
-                              particle_to_inject.getPtrCellKeys(),                           // Input range 1, first key
-                              particle_to_inject.getPtrCellKeys() + particle_to_rekey_count, // Input range 1, last key
-                              particle_to_inject.getPtrCellKeys() + particle_to_rekey_count, // Input range 2, first key
-                              particle_to_inject.getPtrCellKeys() + new_count,      // Input range 2, last key
-                              first_to_inject_no_key,                               // Input range 1, first value
-                              first_to_inject_no_key + particle_to_rekey_count,     // Input range 2, first value
-                              particle_container.getPtrCellKeys(),                           // Output range first key
-                              particle_no_key_iterator_provider( particle_container ) );     // Output range first value
-
+        
+        // Combine imported particles to main particles
+        const auto first = particle_no_key_iterator_provider( particle_container );
+        const auto first_to_inject = particle_no_key_iterator_provider( particle_to_inject );
+        thrust::copy( thrust::device,
+                      first_to_inject,
+                      first_to_inject + inject_count,
+                      first + initial_count );
+        
+        // Compute keys of imported particles
+        const auto first_new = particle_iterator_provider( particle_container );
+        doComputeParticleClusterKey( first_new, first_new + new_count, cluster_type );
+        
+        // Make a sorting map using the cell keys (like numpy.argsort)
+        thrust::device_vector<int> particle_index( new_count );
+        thrust::counting_iterator<int> iter( 0 );
+        thrust::copy(iter, iter + new_count, particle_index.begin());
+        thrust::sort_by_key( thrust::device,
+                             particle_container.getPtrCellKeys(),
+                             particle_container.getPtrCellKeys() + new_count,
+                             particle_index.begin() );
+        
+        // Make a buffer
+        thrust::device_vector<double> buffer( new_count );
+        
+        // Sort particles using thrust::gather, according to the sorting map
+        for( int idim = 0; idim < particle_container.dimension(); idim++ ) {
+            thrust::gather( thrust::device,
+                            particle_index.begin(), particle_index.end(),
+                            particle_container.getPtrPosition( idim ),
+                            buffer.begin() );
+            particle_container.swapPosition( idim, buffer );
+        }
+        for( int idim = 0; idim < 3; idim++ ) {
+            thrust::gather( thrust::device,
+                            particle_index.begin(), particle_index.end(),
+                            particle_container.getPtrMomentum( idim ),
+                            buffer.begin() );
+            particle_container.swapMomentum( idim, buffer );
+        }
+        thrust::gather( thrust::device,
+                        particle_index.begin(), particle_index.end(),
+                        particle_container.getPtrWeight(),
+                        buffer.begin() );
+        particle_container.swapWeight( buffer );
+        buffer.resize( 0 );
+        
+        thrust::device_vector<short> buffer_short( new_count );
+        thrust::gather( thrust::device,
+                        particle_index.begin(), particle_index.end(),
+                        particle_container.getPtrCharge(),
+                        buffer_short.begin() );
+        particle_container.swapCharge( buffer_short );
+        buffer_short.resize( 0 );
+        
+        if( particle_container.tracked ) {
+            thrust::device_vector<uint64_t> buffer_uint64( new_count );
+            thrust::gather( thrust::device,
+                            particle_index.begin(), particle_index.end(),
+                            particle_container.getPtrId(),
+                            buffer_uint64.begin() );
+            particle_container.swapId( buffer_uint64 );
+            buffer_uint64.resize( 0 );
+        }
+        
         // Recompute bins
         computeBinIndex( particle_container );
-
+        
         // This free generates a lot of memory fragmentation. If we enable it we
         // reduce significantly the memory usage over time but a memory spike
         // will still be present. Unfortunately, this free generates soo much
diff --git a/src/Particles/nvidiaParticles.h b/src/Particles/nvidiaParticles.h
index 5fa0a933b..f1ec4ad8b 100644
--- a/src/Particles/nvidiaParticles.h
+++ b/src/Particles/nvidiaParticles.h
@@ -112,6 +112,32 @@ class nvidiaParticles : public Particles
         return thrust::raw_pointer_cast( nvidia_id_.data() );
     };
 
+    void swapPosition( int idim, thrust::device_vector<double> &new_vector ) {
+        nvidia_position_[idim].swap( new_vector );
+    };
+    void swapMomentum( int idim, thrust::device_vector<double> &new_vector ) {
+        nvidia_momentum_[idim].swap( new_vector );
+    };
+    void swapWeight( thrust::device_vector<double> &new_vector ) {
+        nvidia_weight_.swap( new_vector );
+    };
+    void swapChi( thrust::device_vector<double> &new_vector ) {
+        nvidia_chi_.swap( new_vector );
+    };
+    void swapCharge( thrust::device_vector<short> &new_vector ) {
+        nvidia_charge_.swap( new_vector );
+    };
+    void swapTau( thrust::device_vector<double> &new_vector ) {
+        nvidia_tau_.swap( new_vector );
+    };
+    void swapCellKeys( thrust::device_vector<int> &new_vector ) {
+        nvidia_cell_keys_.swap( new_vector );
+    };
+    void swapId( thrust::device_vector<uint64_t> &new_vector ) {
+        nvidia_id_.swap( new_vector );
+    };
+    
+
     // -----------------------------------------------------------------------------
     //! Move leaving particles to the buffers
     // -----------------------------------------------------------------------------

From 8a6b4a82115099c145c39af4d1daeefa812376b9 Mon Sep 17 00:00:00 2001
From: Francesco Massimo <francescomassimo@MacBook-Pro-de-sei2.local>
Date: Thu, 9 May 2024 07:42:20 +0200
Subject: [PATCH 20/54] add article

---
 doc/Sphinx/Overview/material.rst | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/doc/Sphinx/Overview/material.rst b/doc/Sphinx/Overview/material.rst
index 9e6e17daf..d0446f1ce 100644
--- a/doc/Sphinx/Overview/material.rst
+++ b/doc/Sphinx/Overview/material.rst
@@ -30,7 +30,7 @@ Papers involving Smilei
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
 Only papers published in peer-reviewed journals are listed (for the complete list of citing papers see `Google Scholar <https://scholar.google.com/scholar?hl=fr&as_sdt=2005&sciodt=0,5&cites=17416460455672944837&scipsc=&q=&scisbd=1>`_).
-As of April 2024, 183 papers have been published covering a broad range of topics:
+As of May 2024, 184 papers have been published covering a broad range of topics:
 
 * laser-plasma interaction (LPI) / inertial fusion (FCI)
 * ultra-high intensity (UHI) applications
@@ -50,6 +50,13 @@ Following is the distribution of these topics in the listed publications up to N
    Use the python script doc/doi2publications.py to generate entries from a DOI number, and paste them here
    You can count the number of papers in the list with the vim command :%s/.. \[//gn. 
 
+
+.. [Azamoum2024]
+
+    Y. Azamoum, G. A. Becker, S. Keppler, G. Duchateau, S. Skupin, M. Grech, F. Catoire, S. Hell, I. Tamer, M. Hornung, M. Hellwing, A. Kessler, F. Schorcht, and M. C. Kaluza,
+    `Optical probing of ultrafast laser-induced solid-to-overdense-plasma transitions`,
+    `Light: Science & Applications volume 13, Article number: 109 (2024) <https://doi.org/10.1038/s41377-024-01444-y>`_
+
 .. [Pan2024]
 
     Z. Pan, J. Liu, P. Wang, Z. Mei, Z. Cao, D. Kong, S. Xu, Z. Liu, Y. Liang, Z. Peng, T. Xu, T. Song, X. Chen, Q. Wu, Y. Zhang, Q. Han, H. Chen, J. Zhao, Y. Gao, S. Chen, Y. Zhao, X. Yan, Y. Shou, W. Ma,

From 8447b754bc834a65cb1ab44c34117c658f324a11 Mon Sep 17 00:00:00 2001
From: Francesco Massimo <francescomassimo@MacBook-Pro-de-sei2.local>
Date: Sat, 11 May 2024 12:14:24 +0200
Subject: [PATCH 21/54] add publication

---
 doc/Sphinx/Overview/material.rst | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/doc/Sphinx/Overview/material.rst b/doc/Sphinx/Overview/material.rst
index d0446f1ce..61fa240c7 100644
--- a/doc/Sphinx/Overview/material.rst
+++ b/doc/Sphinx/Overview/material.rst
@@ -30,7 +30,7 @@ Papers involving Smilei
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
 Only papers published in peer-reviewed journals are listed (for the complete list of citing papers see `Google Scholar <https://scholar.google.com/scholar?hl=fr&as_sdt=2005&sciodt=0,5&cites=17416460455672944837&scipsc=&q=&scisbd=1>`_).
-As of May 2024, 184 papers have been published covering a broad range of topics:
+As of May 2024, 185 papers have been published covering a broad range of topics:
 
 * laser-plasma interaction (LPI) / inertial fusion (FCI)
 * ultra-high intensity (UHI) applications
@@ -55,7 +55,7 @@ Following is the distribution of these topics in the listed publications up to N
 
     Y. Azamoum, G. A. Becker, S. Keppler, G. Duchateau, S. Skupin, M. Grech, F. Catoire, S. Hell, I. Tamer, M. Hornung, M. Hellwing, A. Kessler, F. Schorcht, and M. C. Kaluza,
     `Optical probing of ultrafast laser-induced solid-to-overdense-plasma transitions`,
-    `Light: Science & Applications volume 13, Article number: 109 (2024) <https://doi.org/10.1038/s41377-024-01444-y>`_
+    `Light: Science & Applications 13, 109 (2024) <https://doi.org/10.1038/s41377-024-01444-y>`_
 
 .. [Pan2024]
 
@@ -146,7 +146,13 @@ Following is the distribution of these topics in the listed publications up to N
     A. Seidel, B. Lei, C. Zepter, M. C. Kaluza, A. Sävert, M. Zepf, and D. Seipt,
     `Polarization and CEP dependence of the transverse phase space in laser driven accelerators`,
     `Physical Review Research 6, 013056 (2024) <https://doi.org/10.1103/PhysRevResearch.6.013056>`_
-    
+
+.. [Krishnamurthy2023]
+
+    S. Krishnamurthy, S. Chintalwad, A. P. L. Robinson, R. M. G. M. Trines, and B. Ramakrishna,
+    `Observation of proton modulations in laser–solid interaction`,
+    `Plasma Physics and Controlled Fusion 65 085020 (2023) <https://doi.org/10.1088/1361-6587/ace4f1>`_
+            
 .. [Gao2023b]
 
     X. Gao,

From 47e30b4b2ba663aabd546e389f1fb9d985b38a9b Mon Sep 17 00:00:00 2001
From: Francesco Massimo <francescomassimo@MacBook-Pro-de-sei2.local>
Date: Mon, 13 May 2024 22:25:14 +0200
Subject: [PATCH 22/54] add publication

---
 doc/Sphinx/Overview/material.rst | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/doc/Sphinx/Overview/material.rst b/doc/Sphinx/Overview/material.rst
index 61fa240c7..04973edbf 100644
--- a/doc/Sphinx/Overview/material.rst
+++ b/doc/Sphinx/Overview/material.rst
@@ -30,7 +30,7 @@ Papers involving Smilei
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
 Only papers published in peer-reviewed journals are listed (for the complete list of citing papers see `Google Scholar <https://scholar.google.com/scholar?hl=fr&as_sdt=2005&sciodt=0,5&cites=17416460455672944837&scipsc=&q=&scisbd=1>`_).
-As of May 2024, 185 papers have been published covering a broad range of topics:
+As of May 2024, 186 papers have been published covering a broad range of topics:
 
 * laser-plasma interaction (LPI) / inertial fusion (FCI)
 * ultra-high intensity (UHI) applications
@@ -51,6 +51,12 @@ Following is the distribution of these topics in the listed publications up to N
    You can count the number of papers in the list with the vim command :%s/.. \[//gn. 
 
 
+.. [Timmis2024]
+
+    R. J. L. Timmis, R. W. Paddock, I. Ouatu, J. Lee, S. Howard, E. Atonga, R. T. Ruskov, H. Martin, R. H. W. Wang, R. Aboushelbaya, M. W. von der Leyen, E. Gumbrell and P. A. Norreys,
+    `Attosecond and nano‐Coulomb electron bunches via the Zero Vector Potential mechanism`,
+    `Scientific Reports volume 14, 10805 (2024) <https://doi.org/10.1038/s41598-024-61041-2>`_
+
 .. [Azamoum2024]
 
     Y. Azamoum, G. A. Becker, S. Keppler, G. Duchateau, S. Skupin, M. Grech, F. Catoire, S. Hell, I. Tamer, M. Hornung, M. Hellwing, A. Kessler, F. Schorcht, and M. C. Kaluza,

From 18f1e1c120143504865619c06e937435f43a53b4 Mon Sep 17 00:00:00 2001
From: Arnaud Beck <beck@llr.in2p3.fr>
Date: Tue, 14 May 2024 17:25:22 +0200
Subject: [PATCH 23/54] Typo in deprecated error message

---
 src/Python/pyprofiles.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Python/pyprofiles.py b/src/Python/pyprofiles.py
index 0e122a1a9..2fff14c1f 100755
--- a/src/Python/pyprofiles.py
+++ b/src/Python/pyprofiles.py
@@ -702,7 +702,7 @@ def LaserGaussianAM( box_side="xmin", a0=1., omega=1., focus=None, waist=3.,
         print("ERROR: focus should be a list of length 1")
         exit(1)
     elif (len(focus)==2):
-        print("WARNING: deprecated focus in LaserEnvelopeGaussianAM should be a list of length 1")
+        print("WARNING: deprecated focus in LaserGaussianAM should be a list of length 1")
     # Polarization and amplitude
     [dephasing, amplitudeY, amplitudeZ] = transformPolarization(polarization_phi, ellipticity)
     amplitudeY *= a0 * omega

From af0070a2315a8952ee8ab4ae8c5b2d67916bdb3c Mon Sep 17 00:00:00 2001
From: Frederic Perez <frederic.perez@polytechnique.edu>
Date: Wed, 15 May 2024 11:53:43 +0200
Subject: [PATCH 24/54] try sorting with zip_iterator

---
 .../compile_tools/machine/jean_zay_gpu_V100   | 17 ++++++-
 src/Particles/nvidiaParticles.cu              | 46 ++++---------------
 src/Particles/nvidiaParticles.h               | 13 ++++++
 3 files changed, 36 insertions(+), 40 deletions(-)

diff --git a/scripts/compile_tools/machine/jean_zay_gpu_V100 b/scripts/compile_tools/machine/jean_zay_gpu_V100
index 7fa7ce513..cc9d15c8b 100644
--- a/scripts/compile_tools/machine/jean_zay_gpu_V100
+++ b/scripts/compile_tools/machine/jean_zay_gpu_V100
@@ -5,12 +5,25 @@
 # Documentation:
 # http://www.idris.fr/jean-zay
 #
+# Use the following commented commands to have the proper environment for compilation and running 
+#
+# module purge
+# module load anaconda-py3/2020.11
+# module load nvidia-compilers/23.11
+# module load cuda/12.2.0
+# module load openmpi/4.1.5-cuda
+# module load hdf5/1.12.0-mpi-cuda
+# export HDF5_ROOT_DIR=/gpfslocalsup/spack_soft/hdf5/1.12.0/nvhpc-23.11-i5lyakq3iu254ru3eqe2yukvg7airopl
+# export I_MPI_CXX=pgc++
+# export SMILEICXX=mpic++
+# export CICCFLAG="--c++14"
+
 
 SMILEICXX_DEPS = g++
 #GPU_COMPILER = nvcc
 
 CXXFLAGS += -w
-CXXFLAGS += -ta=tesla:cc70 -std=c++14 -lcurand -Minfo=accel # what is offloaded/copied
+CXXFLAGS += -acc=gpu -gpu=cc70 -std=c++14 -lcurand -Minfo=accel # what is offloaded/copied
 # CXXFLAGS += -Minfo=all   # very verbose output
 CXXFLAGS += -D__GCC_ATOMIC_TEST_AND_SET_TRUEVAL=1
 
@@ -18,4 +31,4 @@ CXXFLAGS += -D__GCC_ATOMIC_TEST_AND_SET_TRUEVAL=1
 GPU_COMPILER_FLAGS += -O3 --std c++14 -arch=sm_70 
 GPU_COMPILER_FLAGS += --expt-relaxed-constexpr
 
-LDFLAGS += -ta=tesla:cc70 -std=c++14 -Mcudalib=curand -lcudart -lcurand -lacccuda
+LDFLAGS += -acc=gpu -gpu=cc70 -std=c++14 -cudalib=curand -lcudart -lcurand -lacccuda
diff --git a/src/Particles/nvidiaParticles.cu b/src/Particles/nvidiaParticles.cu
index 5a6524a88..85e5a5bf7 100644
--- a/src/Particles/nvidiaParticles.cu
+++ b/src/Particles/nvidiaParticles.cu
@@ -512,48 +512,18 @@ namespace detail {
                              particle_container.getPtrCellKeys() + new_count,
                              particle_index.begin() );
         
-        // Make a buffer
-        thrust::device_vector<double> buffer( new_count );
+	
+        particle_to_inject.softReserve( new_count );
+        particle_to_inject.resize( new_count );
         
-        // Sort particles using thrust::gather, according to the sorting map
-        for( int idim = 0; idim < particle_container.dimension(); idim++ ) {
-            thrust::gather( thrust::device,
-                            particle_index.begin(), particle_index.end(),
-                            particle_container.getPtrPosition( idim ),
-                            buffer.begin() );
-            particle_container.swapPosition( idim, buffer );
-        }
-        for( int idim = 0; idim < 3; idim++ ) {
-            thrust::gather( thrust::device,
-                            particle_index.begin(), particle_index.end(),
-                            particle_container.getPtrMomentum( idim ),
-                            buffer.begin() );
-            particle_container.swapMomentum( idim, buffer );
-        }
-        thrust::gather( thrust::device,
-                        particle_index.begin(), particle_index.end(),
-                        particle_container.getPtrWeight(),
-                        buffer.begin() );
-        particle_container.swapWeight( buffer );
-        buffer.resize( 0 );
-        
-        thrust::device_vector<short> buffer_short( new_count );
+        const auto first_unsorted = particle_no_key_iterator_provider( particle_container );
+        const auto first_buffer = particle_no_key_iterator_provider( particle_to_inject );
         thrust::gather( thrust::device,
                         particle_index.begin(), particle_index.end(),
-                        particle_container.getPtrCharge(),
-                        buffer_short.begin() );
-        particle_container.swapCharge( buffer_short );
-        buffer_short.resize( 0 );
+                        first_unsorted,
+                        first_buffer );
         
-        if( particle_container.tracked ) {
-            thrust::device_vector<uint64_t> buffer_uint64( new_count );
-            thrust::gather( thrust::device,
-                            particle_index.begin(), particle_index.end(),
-                            particle_container.getPtrId(),
-                            buffer_uint64.begin() );
-            particle_container.swapId( buffer_uint64 );
-            buffer_uint64.resize( 0 );
-        }
+        particle_container.swap( particle_to_inject );
         
         // Recompute bins
         computeBinIndex( particle_container );
diff --git a/src/Particles/nvidiaParticles.h b/src/Particles/nvidiaParticles.h
index f1ec4ad8b..0bb254cef 100644
--- a/src/Particles/nvidiaParticles.h
+++ b/src/Particles/nvidiaParticles.h
@@ -137,6 +137,19 @@ class nvidiaParticles : public Particles
         nvidia_id_.swap( new_vector );
     };
     
+    void swap( nvidiaParticles &p ) {
+        for( int idim = 0; idim < dimension(); idim++ ) {
+            swapPosition( idim, p.nvidia_position_[idim] );
+        }
+        for( int idim = 0; idim < 3; idim++ ) {
+            swapMomentum( idim, p.nvidia_momentum_[idim] );
+        }
+        swapWeight( p.nvidia_weight_ );
+        swapCharge( p.nvidia_charge_ );
+        if( tracked ) {
+            swapId( p.nvidia_id_ );
+        }
+    };
 
     // -----------------------------------------------------------------------------
     //! Move leaving particles to the buffers

From 227811ca0f496f7d5856cf70ed4791f4e0e066b4 Mon Sep 17 00:00:00 2001
From: Frederic Perez <frederic.perez@polytechnique.edu>
Date: Thu, 16 May 2024 22:17:22 +0200
Subject: [PATCH 25/54] huge simplification of nvidiaParticles using thrust
 asynchronism

---
 src/Particles/Particles.cpp      |   3 +-
 src/Particles/Particles.h        |   8 +-
 src/Particles/nvidiaParticles.cu | 872 ++++++-------------------------
 src/Particles/nvidiaParticles.h  |  64 +--
 src/Species/Species.cpp          |   4 +-
 5 files changed, 205 insertions(+), 746 deletions(-)

diff --git a/src/Particles/Particles.cpp b/src/Particles/Particles.cpp
index 34eaeb161..d4eea30e9 100755
--- a/src/Particles/Particles.cpp
+++ b/src/Particles/Particles.cpp
@@ -1398,10 +1398,9 @@ int Particles::eraseLeavingParticles()
     return 0;
 }
 
-int Particles::injectParticles( Particles *particles_to_inject )
+void Particles::copyParticles( Particles* particles_to_inject )
 {
     ERROR( "Device only feature, should not have come here! On CPU it's done in sortParticles." );
-    return 0;
 }
 
 void Particles::importAndSortParticles( Particles *particles_to_inject )
diff --git a/src/Particles/Particles.h b/src/Particles/Particles.h
index c0e5958e3..91689ef3f 100755
--- a/src/Particles/Particles.h
+++ b/src/Particles/Particles.h
@@ -484,11 +484,9 @@ class Particles
     virtual int eraseLeavingParticles();
 
     // -----------------------------------------------------------------------------
-    //! Inject particles from particles_to_inject object and put
-    //! them in the Particles object
-    //! \param[in,out] particles_to_inject Particles object containing particles to inject
-    virtual int injectParticles( Particles *particles_to_inject );
-
+    //! Resize & Copy particles from particles_to_inject to the end of the vectors
+    virtual void copyParticles( Particles* particles_to_inject  );
+    
     //! Implementation of a somewhat efficient particle injection, sorting
     //! (including removing leaving particles) and binning for GPU if
     //! available for the configuration of offloading technology
diff --git a/src/Particles/nvidiaParticles.cu b/src/Particles/nvidiaParticles.cu
index 85e5a5bf7..617cb0851 100644
--- a/src/Particles/nvidiaParticles.cu
+++ b/src/Particles/nvidiaParticles.cu
@@ -46,21 +46,13 @@ struct cellKeyEquals
     }
 };
 
-struct cellKeyNegative
+template<int key>
+struct cellKeyBelow
 {
     constexpr __host__ __device__ bool
     operator()( const int& x ) const
     {
-        return x < 0;
-    }
-};
-
-struct cellKeyBelowMinus1
-{
-    constexpr __host__ __device__ bool
-    operator()( const int& x ) const
-    {
-        return x < -1;
+        return x < key;
     }
 };
 
@@ -91,12 +83,6 @@ namespace detail {
                                    const Params&    parameters,
                                    const Patch&     a_parent_patch );
 
-        //! Sort the particle on GPU by their cluster/cell key.
-        //!
-        static inline void
-        sortParticleByKey( nvidiaParticles& particle_container,
-                           const Params&    parameters );
-
         //! precondition:
         //!     - nvidia_cell_keys_ shall be sorted in non decreasing order
         //!     - last_index.data() is a pointer mapped to GPU via
@@ -127,22 +113,6 @@ namespace detail {
                                      InputIterator last,
                                      ClusterType   cluster_type );
 
-        template <typename RandomAccessIterator0,
-                  typename RandomAccessIterator1>
-        static void
-        doSortParticleByKey( RandomAccessIterator0 key_first,
-                             RandomAccessIterator0 key_last,
-                             RandomAccessIterator1 value_first );
-
-        template <typename ClusterType,
-                  typename ParticleIteratorProvider,
-                  typename ParticleNoKeyIteratorProvider>
-        static void
-        doImportAndSortParticles( nvidiaParticles&              particle_container,
-                                  nvidiaParticles&              particle_to_inject,
-                                  ClusterType                   cluster_type,
-                                  ParticleIteratorProvider      particle_iterator_provider,
-                                  ParticleNoKeyIteratorProvider particle_no_key_iterator_provider );
     };
 
 
@@ -155,8 +125,8 @@ namespace detail {
                    double   inverse_y_cell_dimension,
                    SizeType local_x_dimension_in_cell,
                    SizeType local_y_dimension_in_cell,
-		   int CellStartingGlobalIndex_for_x,
-		   int CellStartingGlobalIndex_for_y);
+                   int CellStartingGlobalIndex_for_x,
+                   int CellStartingGlobalIndex_for_y);
 
         //! Compute the cell key of a_particle. a_particle shall be a tuple (from a
         //! zipiterator).
@@ -174,21 +144,11 @@ namespace detail {
                                    const Params&    parameters,
                                    const Patch&     a_parent_patch );
 
-        static void
-        sortParticleByKey( nvidiaParticles& particle_container,
-                           const Params&    parameters );
-
-        static void
-        importAndSortParticles( nvidiaParticles& particle_container,
-                                nvidiaParticles& particle_to_inject,
-                                const Params&    parameters,
-                                const Patch&     a_parent_patch );
-
     public:
         double   inverse_of_x_cell_dimension_;
         double   inverse_of_y_cell_dimension_;
         SizeType local_y_dimension_in_cluster_;
-	int CellStartingGlobalIndex_for_x_;
+        int CellStartingGlobalIndex_for_x_;
         int CellStartingGlobalIndex_for_y_;
     };
 
@@ -203,7 +163,7 @@ namespace detail {
                    SizeType local_x_dimension_in_cell,
                    SizeType local_y_dimension_in_cell,
                    SizeType local_z_dimension_in_cell,
-		   int CellStartingGlobalIndex_for_x,
+                   int CellStartingGlobalIndex_for_x,
                    int CellStartingGlobalIndex_for_y,
                    int CellStartingGlobalIndex_for_z);
 
@@ -223,16 +183,6 @@ namespace detail {
                                    const Params&    parameters,
                                    const Patch&     a_parent_patch );
 
-        static void
-        sortParticleByKey( nvidiaParticles& particle_container,
-                           const Params&    parameters );
-
-        static void
-        importAndSortParticles( nvidiaParticles& particle_container,
-                                nvidiaParticles& particle_to_inject,
-                                const Params&    parameters,
-                                const Patch&     a_parent_patch );
-
     public:
         double   inverse_of_x_cell_dimension_;
         double   inverse_of_y_cell_dimension_;
@@ -240,7 +190,7 @@ namespace detail {
         SizeType local_y_dimension_in_cluster_;
         SizeType local_z_dimension_in_cluster_;
         int CellStartingGlobalIndex_for_x_;
-	int CellStartingGlobalIndex_for_y_;
+        int CellStartingGlobalIndex_for_y_;
         int CellStartingGlobalIndex_for_z_;
     };
 
@@ -270,47 +220,6 @@ namespace detail {
     };
 
 
-    //! This functor checks the cluster key of a_particle.
-    //!
-    template <typename ClusterType>
-    struct OutOfClusterPredicate
-    {
-    public:
-    public:
-        OutOfClusterPredicate( ClusterType cluster_type )
-            : cluster_type_{ cluster_type }
-        {
-            // EMPTY
-        }
-
-        template <typename Tuple>
-        __host__ __device__ bool
-        operator()( const Tuple& a_particle ) const
-        {
-            // NOTE: its ub to set the cluster key to wrongly keyed particles
-            // now..
-            return thrust::get<0>( a_particle ) /* cluster key */ != cluster_type_.Index( a_particle );
-        }
-
-    protected:
-        ClusterType cluster_type_;
-    };
-
-
-    //! If the particle's cell/cluster key is -1 it means that it needs to be
-    //! evicted.
-    //!
-    struct OutOfBoundaryPredicate
-    {
-        template <typename Tuple>
-        __host__ __device__ bool
-        operator()( const Tuple& a_particle ) const
-        {
-            return thrust::get<0>( a_particle ) /* cluster key */ < 0;
-        }
-    };
-
-
     ////////////////////////////////////////////////////////////////////////////////
     // Cluster manipulation functor method definitions
     ////////////////////////////////////////////////////////////////////////////////
@@ -343,31 +252,6 @@ namespace detail {
         }
     }
 
-    inline void
-    Cluster::sortParticleByKey( nvidiaParticles& particle_container,
-                                const Params&    parameters )
-    {
-        // This is where we do a runtime dispatch depending on the simulation's
-        // dimensions.
-
-        switch( particle_container.dimension() ) {
-            case 2: {
-                Cluster2D<Params::getGPUClusterWidth( 2 )>::sortParticleByKey( particle_container,
-                                                                                        parameters );
-                break;
-            }
-            case 3: {
-                Cluster3D<Params::getGPUClusterWidth( 3 )>::sortParticleByKey( particle_container,
-                                                                                        parameters );
-                break;
-            }
-            default:
-                // Not implemented, only Cartesian 2D or 3D for the moment
-                SMILEI_ASSERT( false );
-                break;
-        }
-    }
-
     inline void
     Cluster::computeBinIndex( nvidiaParticles& particle_container )
     {
@@ -408,78 +292,10 @@ namespace detail {
                                      const Params&    parameters,
                                      const Patch&     a_parent_patch )
     {
-        // This is where we do a runtime dispatch depending on the simulation's
-        // dimensions.
-
-        switch( particle_container.dimension() ) {
-            case 2: {
-                Cluster2D<Params::getGPUClusterWidth( 2 /* 2D */ )>::importAndSortParticles( particle_container,
-                                                                                             particle_to_inject,
-                                                                                             parameters,
-                                                                                             a_parent_patch );
-                break;
-            }
-            case 3: {
-                Cluster3D<Params::getGPUClusterWidth( 3 /* 2D */ )>::importAndSortParticles( particle_container,
-                                                                                             particle_to_inject,
-                                                                                             parameters,
-                                                                                             a_parent_patch );
-                break;
-            }
-
-            default:
-                // Not implemented, only 2D for the moment
-                SMILEI_ASSERT( false );
-                break;
-        }
-    }
-
-    template <typename InputIterator,
-              typename ClusterType>
-    void
-    Cluster::doComputeParticleClusterKey( InputIterator first,
-                                          InputIterator last,
-                                          ClusterType   cluster_type )
-    {
-        thrust::for_each( thrust::device,
-                          first, last,
-                          AssignClusterIndex<ClusterType>{ cluster_type } );
-    }
-
-    template <typename RandomAccessIterator0,
-              typename RandomAccessIterator1>
-    void
-    Cluster::doSortParticleByKey( RandomAccessIterator0 key_first,
-                                  RandomAccessIterator0 key_last,
-                                  RandomAccessIterator1 value_first )
-    {
-        thrust::sort_by_key( thrust::device,
-                             key_first, key_last,
-                             value_first );
-    }
-
-    template <typename ClusterType,
-              typename ParticleIteratorProvider,
-              typename ParticleNoKeyIteratorProvider>
-    void
-    Cluster::doImportAndSortParticles( nvidiaParticles&              particle_container,
-                                       nvidiaParticles&              particle_to_inject,
-                                       ClusterType                   cluster_type,
-                                       ParticleIteratorProvider      particle_iterator_provider,
-                                       ParticleNoKeyIteratorProvider particle_no_key_iterator_provider )
-    {
-        const auto first_particle = particle_iterator_provider( particle_container );
-        auto last_particle = first_particle + particle_container.deviceSize();
-        
         // Remove out of bound particles
-        // Using more memory, we could use the faster remove_copy_if
-        // NOTE: remove_if is stable.
-        last_particle = thrust::remove_if( thrust::device,
-                                           first_particle,
-                                           last_particle,
-                                           OutOfBoundaryPredicate{} );
+        const auto erased_count = particle_container.eraseParticlesByPredicate( cellKeyBelow<0>() );
         
-        const auto initial_count = std::distance( first_particle, last_particle );
+        const auto initial_count = particle_container.deviceSize() - erased_count;
         const auto inject_count  = particle_to_inject.deviceSize();
         const auto new_count     = initial_count + inject_count;
         
@@ -492,38 +308,17 @@ namespace detail {
         particle_container.resize( new_count );
         
         // Combine imported particles to main particles
-        const auto first = particle_no_key_iterator_provider( particle_container );
-        const auto first_to_inject = particle_no_key_iterator_provider( particle_to_inject );
-        thrust::copy( thrust::device,
-                      first_to_inject,
-                      first_to_inject + inject_count,
-                      first + initial_count );
+        particle_container.copyParticles( &particle_to_inject, initial_count );
         
-        // Compute keys of imported particles
-        const auto first_new = particle_iterator_provider( particle_container );
-        doComputeParticleClusterKey( first_new, first_new + new_count, cluster_type );
+        // Compute keys of particles
+        computeParticleClusterKey( particle_container, parameters, a_parent_patch );
         
-        // Make a sorting map using the cell keys (like numpy.argsort)
-        thrust::device_vector<int> particle_index( new_count );
-        thrust::counting_iterator<int> iter( 0 );
-        thrust::copy(iter, iter + new_count, particle_index.begin());
-        thrust::sort_by_key( thrust::device,
-                             particle_container.getPtrCellKeys(),
-                             particle_container.getPtrCellKeys() + new_count,
-                             particle_index.begin() );
-        
-	
+        // Use particle_to_inject as a buffer
         particle_to_inject.softReserve( new_count );
         particle_to_inject.resize( new_count );
         
-        const auto first_unsorted = particle_no_key_iterator_provider( particle_container );
-        const auto first_buffer = particle_no_key_iterator_provider( particle_to_inject );
-        thrust::gather( thrust::device,
-                        particle_index.begin(), particle_index.end(),
-                        first_unsorted,
-                        first_buffer );
-        
-        particle_container.swap( particle_to_inject );
+        // Sort particles using thrust::gather, according to the sorting map
+        particle_container.sortParticleByKey( particle_to_inject );
         
         // Recompute bins
         computeBinIndex( particle_container );
@@ -536,6 +331,17 @@ namespace detail {
         // particle_to_inject.free();
     }
 
+    template <typename InputIterator,
+              typename ClusterType>
+    void
+    Cluster::doComputeParticleClusterKey( InputIterator first,
+                                          InputIterator last,
+                                          ClusterType   cluster_type )
+    {
+        thrust::for_each( thrust::device,
+                          first, last,
+                          AssignClusterIndex<ClusterType>{ cluster_type } );
+    }
 
     ////////////////////////////////////////////////////////////////////////////////
     // Cluster2D method definitions
@@ -546,12 +352,12 @@ namespace detail {
                                          double   inverse_y_cell_dimension,
                                          SizeType local_x_dimension_in_cell,
                                          SizeType local_y_dimension_in_cell,
-					 int CellStartingGlobalIndex_for_x, int CellStartingGlobalIndex_for_y )
+                                         int CellStartingGlobalIndex_for_x, int CellStartingGlobalIndex_for_y )
         : inverse_of_x_cell_dimension_{ inverse_x_cell_dimension }
         , inverse_of_y_cell_dimension_{ inverse_y_cell_dimension }
         , local_y_dimension_in_cluster_{ local_y_dimension_in_cell / kClusterWidth }
         , CellStartingGlobalIndex_for_x_{CellStartingGlobalIndex_for_x}
-	, CellStartingGlobalIndex_for_y_{CellStartingGlobalIndex_for_y}
+        , CellStartingGlobalIndex_for_y_{CellStartingGlobalIndex_for_y}
     {
         // EMPTY
     }
@@ -563,7 +369,7 @@ namespace detail {
                                          SizeType local_x_dimension_in_cell,
                                          SizeType local_y_dimension_in_cell,
                                          SizeType local_z_dimension_in_cell,
-					 int CellStartingGlobalIndex_for_x,
+                                         int CellStartingGlobalIndex_for_x,
                                          int CellStartingGlobalIndex_for_y, int CellStartingGlobalIndex_for_z )
         : inverse_of_x_cell_dimension_{ inverse_x_cell_dimension }
         , inverse_of_y_cell_dimension_{ inverse_y_cell_dimension }
@@ -662,7 +468,7 @@ namespace detail {
         const auto last  = first + particle_container.deviceSize();
         int CellStartingGlobalIndex_for_x = a_parent_patch.getCellStartingGlobalIndex_noGC(0);
         int CellStartingGlobalIndex_for_y = a_parent_patch.getCellStartingGlobalIndex_noGC(1);
-	doComputeParticleClusterKey( first, last,
+        doComputeParticleClusterKey( first, last,
                                      Cluster2D<Params::getGPUClusterWidth( 2 )>{ parameters.res_space[0],
                                                                                           parameters.res_space[1],
                                                                                           parameters.patch_size_[0],
@@ -685,7 +491,7 @@ namespace detail {
         int CellStartingGlobalIndex_for_x = a_parent_patch.getCellStartingGlobalIndex_noGC(0);
         int CellStartingGlobalIndex_for_y = a_parent_patch.getCellStartingGlobalIndex_noGC(1);
         int CellStartingGlobalIndex_for_z = a_parent_patch.getCellStartingGlobalIndex_noGC(2);
-	doComputeParticleClusterKey( first, last,
+        doComputeParticleClusterKey( first, last,
                                      Cluster3D<Params::getGPUClusterWidth( 3 )>{ parameters.res_space[0],
                                                                                           parameters.res_space[1],
                                                                                           parameters.res_space[2],
@@ -697,277 +503,6 @@ namespace detail {
                                                                                           CellStartingGlobalIndex_for_z } );
     }
 
-    template <Cluster::DifferenceType kClusterWidth>
-    void
-    Cluster2D<kClusterWidth>::sortParticleByKey( nvidiaParticles& particle_container,
-                                                 const Params& )
-    {
-        // This is where we do a runtime dispatch depending on the simulation's
-        // qed/radiation settings.
-
-        // NOTE: For now we support dont support qed/radiations. Performance
-        // comes from specialization.
-
-        // TODO(Etienne M): Find a better way to dispatch at runtime. This is
-        // complex to read and to maintain.
-
-        if( particle_container.has_quantum_parameter ) {
-            if( particle_container.has_Monte_Carlo_process ) {
-                SMILEI_ASSERT( false );
-            } else {
-                SMILEI_ASSERT( false );
-            }
-        } else {
-            if( particle_container.has_Monte_Carlo_process ) {
-                SMILEI_ASSERT( false );
-            } else {
-                // The appropriate thrust::zip_iterator for the current
-                // simulation's parameters
-
-                const auto value_first = thrust::make_zip_iterator( thrust::make_tuple( particle_container.getPtrPosition( 0 ),
-                                                                                        particle_container.getPtrPosition( 1 ),
-                                                                                        particle_container.getPtrMomentum( 0 ),
-                                                                                        particle_container.getPtrMomentum( 1 ),
-                                                                                        particle_container.getPtrMomentum( 2 ),
-                                                                                        particle_container.getPtrWeight(),
-                                                                                        particle_container.getPtrCharge() ) );
-
-                doSortParticleByKey( particle_container.getPtrCellKeys(),
-                                     particle_container.getPtrCellKeys() + particle_container.deviceSize(),
-                                     value_first );
-            }
-        }
-    }
-
-    template <Cluster::DifferenceType kClusterWidth>
-    void
-    Cluster3D<kClusterWidth>::sortParticleByKey( nvidiaParticles& particle_container,
-                                                 const Params& )
-    {
-        // This is where we do a runtime dispatch depending on the simulation's
-        // qed/radiation settings.
-
-        // NOTE: For now we support dont support qed/radiations. Performance
-        // comes from specialization.
-
-        // TODO(Etienne M): Find a better way to dispatch at runtime. This is
-        // complex to read and to maintain.
-
-        if( particle_container.has_quantum_parameter ) {
-            if( particle_container.has_Monte_Carlo_process ) {
-                SMILEI_ASSERT( false );
-            } else {
-                SMILEI_ASSERT( false );
-            }
-        } else {
-            if( particle_container.has_Monte_Carlo_process ) {
-                SMILEI_ASSERT( false );
-            } else {
-                // The appropriate thrust::zip_iterator for the current
-                // simulation's parameters
-
-                if (particle_container.tracked) {
-                    const auto value_first = thrust::make_zip_iterator( thrust::make_tuple( particle_container.getPtrPosition( 0 ),
-                                                                                            particle_container.getPtrPosition( 1 ),
-                                                                                            particle_container.getPtrPosition( 2 ),
-                                                                                            particle_container.getPtrMomentum( 0 ),
-                                                                                            particle_container.getPtrMomentum( 1 ),
-                                                                                            particle_container.getPtrMomentum( 2 ),
-                                                                                            particle_container.getPtrWeight(),
-                                                                                            particle_container.getPtrCharge(),
-                                                                                            particle_container.getPtrId() ) );
-                    doSortParticleByKey( particle_container.getPtrCellKeys(),
-                                         particle_container.getPtrCellKeys() + particle_container.deviceSize(),
-                                         value_first );
-
-                }
-                else {
-                    const auto value_first = thrust::make_zip_iterator( thrust::make_tuple( particle_container.getPtrPosition( 0 ),
-                                                                                            particle_container.getPtrPosition( 1 ),
-                                                                                            particle_container.getPtrPosition( 2 ),
-                                                                                            particle_container.getPtrMomentum( 0 ),
-                                                                                            particle_container.getPtrMomentum( 1 ),
-                                                                                            particle_container.getPtrMomentum( 2 ),
-                                                                                            particle_container.getPtrWeight(),
-                                                                                            particle_container.getPtrCharge() ) );
-                    doSortParticleByKey( particle_container.getPtrCellKeys(),
-                                         particle_container.getPtrCellKeys() + particle_container.deviceSize(),
-                                         value_first );
-                }
-            }
-        }
-    }
-
-    template <Cluster::DifferenceType kClusterWidth>
-    void
-    Cluster2D<kClusterWidth>::importAndSortParticles( nvidiaParticles& particle_container,
-                                                      nvidiaParticles& particle_to_inject,
-                                                      const Params&    parameters,
-                                                      const Patch&     a_parent_patch )
-    {
-        // This is where we do a runtime dispatch depending on the simulation's
-        // qed/radiation settings.
-
-        // NOTE: For now we support dont support qed/radiations. Performance
-        // comes from specialization.
-
-        // TODO(Etienne M): Find a better way to dispatch at runtime. This is
-        // complex to read and to maintain.
-        int CellStartingGlobalIndex_for_x = a_parent_patch.getCellStartingGlobalIndex_noGC(0);
-        int CellStartingGlobalIndex_for_y = a_parent_patch.getCellStartingGlobalIndex_noGC(1);
-
-        const Cluster2D cluster_manipulator{ parameters.res_space[0],
-                                             parameters.res_space[1],
-                                             parameters.patch_size_[0],
-                                             parameters.patch_size_[1],
-                                             CellStartingGlobalIndex_for_x, CellStartingGlobalIndex_for_y};
-
-        if( particle_container.has_quantum_parameter ) {
-            if( particle_container.has_Monte_Carlo_process ) {
-                SMILEI_ASSERT( false );
-            } else {
-                SMILEI_ASSERT( false );
-            }
-        } else {
-            if( particle_container.has_Monte_Carlo_process ) {
-                SMILEI_ASSERT( false );
-            } else {
-                // Returns the appropriate thrust::zip_iterator for the
-                // current simulation's parameters
-                const auto particle_iterator_provider = []( nvidiaParticles& particle_container ) {
-                    return thrust::make_zip_iterator( thrust::make_tuple( particle_container.getPtrCellKeys(),
-                                                                          particle_container.getPtrPosition( 0 ),
-                                                                          particle_container.getPtrPosition( 1 ),
-                                                                          particle_container.getPtrMomentum( 0 ),
-                                                                          particle_container.getPtrMomentum( 1 ),
-                                                                          particle_container.getPtrMomentum( 2 ),
-                                                                          particle_container.getPtrWeight(),
-                                                                          particle_container.getPtrCharge() ) );
-                };
-
-                const auto particle_no_key_iterator_provider = []( nvidiaParticles& particle_container ) {
-                    return thrust::make_zip_iterator( thrust::make_tuple( particle_container.getPtrPosition( 0 ),
-                                                                          particle_container.getPtrPosition( 1 ),
-                                                                          particle_container.getPtrMomentum( 0 ),
-                                                                          particle_container.getPtrMomentum( 1 ),
-                                                                          particle_container.getPtrMomentum( 2 ),
-                                                                          particle_container.getPtrWeight(),
-                                                                          particle_container.getPtrCharge() ) );
-                };
-
-                doImportAndSortParticles( particle_container,
-                                          particle_to_inject,
-                                          cluster_manipulator,
-                                          particle_iterator_provider,
-                                          particle_no_key_iterator_provider );
-            }
-        }
-    }
-
-    template <Cluster::DifferenceType kClusterWidth>
-    void
-    Cluster3D<kClusterWidth>::importAndSortParticles( nvidiaParticles& particle_container,
-                                                      nvidiaParticles& particle_to_inject,
-                                                      const Params&    parameters,
-                                                      const Patch&     a_parent_patch )
-    {
-        // This is where we do a runtime dispatch depending on the simulation's
-        // qed/radiation settings.
-
-        // NOTE: For now we support dont support qed/radiations. Performance
-        // comes from specialization.
-
-        // TODO(Etienne M): Find a better way to dispatch at runtime. This is
-        // complex to read and to maintain.
-        int CellStartingGlobalIndex_for_x = a_parent_patch.getCellStartingGlobalIndex_noGC(0);
-        int CellStartingGlobalIndex_for_y = a_parent_patch.getCellStartingGlobalIndex_noGC(1);
-        int CellStartingGlobalIndex_for_z = a_parent_patch.getCellStartingGlobalIndex_noGC(2);
-
-        const Cluster3D cluster_manipulator{ parameters.res_space[0],
-                                             parameters.res_space[1],
-                                             parameters.res_space[2],
-                                             parameters.patch_size_[0],
-                                             parameters.patch_size_[1],
-                                             parameters.patch_size_[2],
-                                             CellStartingGlobalIndex_for_x, 
-                                             CellStartingGlobalIndex_for_y, CellStartingGlobalIndex_for_z};
-
-        if( particle_container.has_quantum_parameter ) {
-            if( particle_container.has_Monte_Carlo_process ) {
-                SMILEI_ASSERT( false );
-            } else {
-                SMILEI_ASSERT( false );
-            }
-        } else {
-            if( particle_container.has_Monte_Carlo_process ) {
-                SMILEI_ASSERT( false );
-            } else {
-                // Returns the appropriate thrust::zip_iterator for the
-                // current simulation's parameters
-                if (particle_container.tracked) {
-                    const auto particle_iterator_provider = []( nvidiaParticles& particle_container ) {
-                        return thrust::make_zip_iterator( thrust::make_tuple( particle_container.getPtrCellKeys(),
-                                                                          particle_container.getPtrPosition( 0 ),
-                                                                          particle_container.getPtrPosition( 1 ),
-                                                                          particle_container.getPtrPosition( 2 ),
-                                                                          particle_container.getPtrMomentum( 0 ),
-                                                                          particle_container.getPtrMomentum( 1 ),
-                                                                          particle_container.getPtrMomentum( 2 ),
-                                                                          particle_container.getPtrWeight(),
-                                                                          particle_container.getPtrCharge(),
-                                                                          particle_container.getPtrId() ) );
-                    };
-                    const auto particle_no_key_iterator_provider = []( nvidiaParticles& particle_container ) {
-                        return thrust::make_zip_iterator( thrust::make_tuple( particle_container.getPtrPosition( 0 ),
-                                                                          particle_container.getPtrPosition( 1 ),
-                                                                          particle_container.getPtrPosition( 2 ),
-                                                                          particle_container.getPtrMomentum( 0 ),
-                                                                          particle_container.getPtrMomentum( 1 ),
-                                                                          particle_container.getPtrMomentum( 2 ),
-                                                                          particle_container.getPtrWeight(),
-                                                                          particle_container.getPtrCharge(),
-                                                                          particle_container.getPtrId() ) );
-                    };
-                    doImportAndSortParticles( particle_container,
-                                          particle_to_inject,
-                                          cluster_manipulator,
-                                          particle_iterator_provider,
-                                          particle_no_key_iterator_provider );
-                }
-                else {
-                    const auto particle_iterator_provider = []( nvidiaParticles& particle_container ) {
-                        return thrust::make_zip_iterator( thrust::make_tuple( particle_container.getPtrCellKeys(),
-                                                                          particle_container.getPtrPosition( 0 ),
-                                                                          particle_container.getPtrPosition( 1 ),
-                                                                          particle_container.getPtrPosition( 2 ),
-                                                                          particle_container.getPtrMomentum( 0 ),
-                                                                          particle_container.getPtrMomentum( 1 ),
-                                                                          particle_container.getPtrMomentum( 2 ),
-                                                                          particle_container.getPtrWeight(),
-                                                                          particle_container.getPtrCharge() ) );
-                    };
-
-                    const auto particle_no_key_iterator_provider = []( nvidiaParticles& particle_container ) {
-                        return thrust::make_zip_iterator( thrust::make_tuple( particle_container.getPtrPosition( 0 ),
-                                                                          particle_container.getPtrPosition( 1 ),
-                                                                          particle_container.getPtrPosition( 2 ),
-                                                                          particle_container.getPtrMomentum( 0 ),
-                                                                          particle_container.getPtrMomentum( 1 ),
-                                                                          particle_container.getPtrMomentum( 2 ),
-                                                                          particle_container.getPtrWeight(),
-                                                                          particle_container.getPtrCharge() ) );
-                    };
-
-                    doImportAndSortParticles( particle_container,
-                                          particle_to_inject,
-                                          cluster_manipulator,
-                                          particle_iterator_provider,
-                                          particle_no_key_iterator_provider );
-                }
-            }
-        }
-    }
-
 } // namespace detail
 
 
@@ -1270,7 +805,7 @@ void nvidiaParticles::initializeDataOnDevice()
         detail::Cluster::computeParticleClusterKey( *this, *parameters_, *parent_patch_ );
 
         // The particles are not correctly sorted when created.
-        detail::Cluster::sortParticleByKey( *this, *parameters_ );
+        sortParticleByKey();
 
         detail::Cluster::computeBinIndex( *this );
         setHostBinIndex();
@@ -1365,7 +900,7 @@ unsigned int nvidiaParticles::deviceCapacity() const
 // -----------------------------------------------------------------------------
 void nvidiaParticles::copyLeavingParticlesToBuffer( Particles* buffer )
 {
-    copyParticlesByPredicate( buffer, cellKeyBelowMinus1() );
+    copyParticlesByPredicate( buffer, cellKeyBelow<-1>() );
     buffer->copyFromDeviceToHost( true );
 }
 
@@ -1379,86 +914,64 @@ void nvidiaParticles::copyParticlesByPredicate( Particles* buffer, Predicate pre
     // and keep the good ones. This would help us avoid the std::remove_if in
     // the particle injection and sorting algorithm.
     
-    const int nparts = gpu_nparts_;
-    // Iterator of the main data structure
-    // NOTE: https://nvidia.github.io/thrust/api/classes/classthrust_1_1zip__iterator.html#class-thrustzip_iterator
-    const auto source_iterator_first = thrust::make_zip_iterator( thrust::make_tuple( nvidia_position_[0].begin(),
-                                                                                      nvidia_momentum_[0].begin(),
-                                                                                      nvidia_momentum_[1].begin(),
-                                                                                      nvidia_momentum_[2].begin(),
-                                                                                      nvidia_weight_.begin(),
-                                                                                      nvidia_charge_.begin(),
-                                                                                      nvidia_cell_keys_.begin() ) );
-    const auto source_iterator_last  = source_iterator_first + nparts; // std::advance
+    // Count particles satisfying the predicate
+    const auto keys = getPtrCellKeys();
+    const int nparts_to_copy = thrust::count_if( thrust::device, keys, keys + gpu_nparts_, pred );
     
-    nvidiaParticles* const cp_parts = static_cast<nvidiaParticles*>( buffer );
+    // Resize destination buffer (copy_if does not resize)
+    nvidiaParticles* const dest = static_cast<nvidiaParticles*>( buffer );
+    dest->resize( nparts_to_copy );
     
-    const int nparts_to_copy = thrust::count_if( thrust::device,
-                                                 nvidia_cell_keys_.cbegin(),
-                                                 nvidia_cell_keys_.cbegin() + nparts,
-                                                 pred );
-
-    // Resize it, if too small (copy_if do not resize)
-    cp_parts->resize( nparts_to_copy );
-
-    const auto destination_iterator_first = thrust::make_zip_iterator( thrust::make_tuple( cp_parts->nvidia_position_[0].begin(),
-                                                                                           cp_parts->nvidia_momentum_[0].begin(),
-                                                                                           cp_parts->nvidia_momentum_[1].begin(),
-                                                                                           cp_parts->nvidia_momentum_[2].begin(),
-                                                                                           cp_parts->nvidia_weight_.begin(),
-                                                                                           cp_parts->nvidia_charge_.begin(),
-                                                                                           cp_parts->nvidia_cell_keys_.begin() ) );
-
-    // Copy send particles in dedicated data structure
-    thrust::copy_if( thrust::device,
-                     source_iterator_first,
-                     source_iterator_last,
-                     nvidia_cell_keys_.cbegin(),
-                     destination_iterator_first,
-                     pred );
-
-    // Copy the other position values depending on the simulation's grid dimensions
-    const int ndim_particles = nvidia_position_.size();
-    for( int i = 1; i < ndim_particles; ++i ) {
-        thrust::copy_if( thrust::device,
-                         nvidia_position_[i].cbegin(),
-                         nvidia_position_[i].cbegin() + nparts,
-                         nvidia_cell_keys_.cbegin(),
-                         cp_parts->nvidia_position_[i].begin(),
-                         pred );
-    }
-
-    // Special treatment for chi if radiation emission
-    if( has_quantum_parameter ) {
-        thrust::copy_if( thrust::device,
-                         nvidia_chi_.cbegin(),
-                         nvidia_chi_.cbegin() + nparts,
-                         nvidia_cell_keys_.cbegin(),
-                         cp_parts->nvidia_chi_.begin(),
-                         pred );
+    if( nparts_to_copy ) {
+        // Copy the particles to the destination
+        for( int ip = 0; ip < getNDoubleProp(); ip++ ) {
+            const auto in = getPtrDoubleProp( ip );
+            const auto out = dest->getPtrDoubleProp( ip );
+            thrust::copy_if( thrust::cuda::par_nosync, in, in + gpu_nparts_, keys, out, pred );
+        }
+        for( int ip = 0; ip < getNShortProp(); ip++ ) {
+            const auto in = getPtrShortProp( ip );
+            const auto out = dest->getPtrShortProp( ip );
+            thrust::copy_if( thrust::cuda::par_nosync, in, in + gpu_nparts_, keys, out, pred );
+        }
+        if( tracked ) {
+            const auto in = getPtrId();
+            const auto out = dest->getPtrId();
+            thrust::copy_if( thrust::cuda::par_nosync, in, in + gpu_nparts_, keys, out, pred );
+        }
+        cudaDeviceSynchronize();
     }
+}
 
-    if( has_Monte_Carlo_process ) {
-        thrust::copy_if( thrust::device,
-                         nvidia_tau_.cbegin(),
-                         nvidia_tau_.cbegin() + nparts,
-                         nvidia_cell_keys_.cbegin(),
-                         cp_parts->nvidia_tau_.begin(),
-                         pred );
-    }
+void nvidiaParticles::copyParticles( Particles* particles_to_inject )
+{
+    const auto nparts = gpu_nparts_;
+    nvidiaParticles* to_inject = static_cast<nvidiaParticles*>( particles_to_inject );
+    resize( nparts + to_inject->gpu_nparts_ );
+    copyParticles( to_inject, nparts );
+}
 
+void nvidiaParticles::copyParticles( nvidiaParticles* particles_to_inject, size_t offset )
+{
+    // Copy the particles to the destination
+    for( int ip = 0; ip < getNDoubleProp(); ip++ ) {
+        const auto in = particles_to_inject->getPtrDoubleProp( ip );
+        const auto out = getPtrDoubleProp( ip );
+        thrust::copy_n( thrust::cuda::par_nosync, in, particles_to_inject->gpu_nparts_, out + offset );
+    }
+    for( int ip = 0; ip < getNShortProp(); ip++ ) {
+        const auto in = particles_to_inject->getPtrShortProp( ip );
+        const auto out = getPtrShortProp( ip );
+        thrust::copy_n( thrust::cuda::par_nosync, in, particles_to_inject->gpu_nparts_, out + offset );
+    }
     if( tracked ) {
-        thrust::copy_if( thrust::device,
-                         nvidia_id_.cbegin(),
-                         nvidia_id_.cbegin() + nparts,
-                         nvidia_cell_keys_.cbegin(),
-                         cp_parts->nvidia_id_.begin(),
-                         pred );
+        const auto in = particles_to_inject->getPtrId();
+        const auto out = getPtrId();
+        thrust::copy_n( thrust::cuda::par_nosync, in, particles_to_inject->gpu_nparts_, out + offset );
     }
-
+    cudaDeviceSynchronize();
 }
 
-
 // -----------------------------------------------------------------------------
 //! Erase `npart` particles from `ipart`
 // -----------------------------------------------------------------------------
@@ -1484,157 +997,43 @@ void nvidiaParticles::copyParticlesByPredicate( Particles* buffer, Predicate pre
 //}
 
 // -----------------------------------------------------------------------------
-//! Erase particles leaving the patch object on device
+//! Erase particles leaving the patch on device
 // -----------------------------------------------------------------------------
 int nvidiaParticles::eraseLeavingParticles()
 {
-    return eraseParticlesByPredicate( cellKeyNegative() );
+    const auto nremoved = eraseParticlesByPredicate( cellKeyBelow<0>() );
+    resize( gpu_nparts_ - nremoved );
+    return nremoved;
 }
 
+//! "Erase" particles but does not resize the arrays!
 template<typename Predicate>
 int nvidiaParticles::eraseParticlesByPredicate( Predicate pred )
 {
-    const int position_dimension_count = nvidia_position_.size();
-    const int nparts                   = gpu_nparts_;
-    const int nparts_to_remove         = thrust::count_if( thrust::device,
-                                                           nvidia_cell_keys_.begin(),
-                                                           nvidia_cell_keys_.begin() + nparts,
-                                                           pred );
-
-    if( nparts_to_remove > 0 ) {
-        const auto first_particle = thrust::make_zip_iterator( thrust::make_tuple( nvidia_position_[0].begin(),
-                                                                                   nvidia_momentum_[0].begin(),
-                                                                                   nvidia_momentum_[1].begin(),
-                                                                                   nvidia_momentum_[2].begin(),
-                                                                                   nvidia_weight_.begin(),
-                                                                                   nvidia_charge_.begin() ) );
-
-        const auto last_particle = first_particle + nparts;
-
-        // Remove particles which leaves current patch
-        thrust::remove_if( thrust::device,
-                           first_particle,
-                           last_particle,
-                           nvidia_cell_keys_.cbegin(),
-                           pred );
-
-        // Remove the other position values depending on the simulation's grid
-        // dimensions
-        for( int i = 1; i < position_dimension_count; ++i ) {
-            thrust::remove_if( thrust::device,
-                               nvidia_position_[i].begin(),
-                               nvidia_position_[i].begin() + nparts,
-                               nvidia_cell_keys_.cbegin(),
-                               pred );
-        }
-
-        if( has_quantum_parameter ) {
-            thrust::remove_if( thrust::device,
-                               nvidia_chi_.begin(),
-                               nvidia_chi_.begin() + nparts,
-                               nvidia_cell_keys_.cbegin(),
-                               pred );
-        }
-
-        if( has_Monte_Carlo_process ) {
-            thrust::remove_if( thrust::device,
-                               nvidia_tau_.begin(),
-                               nvidia_tau_.begin() + nparts,
-                               nvidia_cell_keys_.cbegin(),
-                               pred );
-        }
-
-        if( tracked ) {
-            thrust::remove_if( thrust::device,
-                               nvidia_id_.begin(),
-                               nvidia_id_.begin() + nparts,
-                               nvidia_cell_keys_.cbegin(),
-                               pred );
-        }
-
-        // Update current number of particles
-        gpu_nparts_ -= nparts_to_remove;
-
-        // Resize data structures (remove_if does not resize)
-        resize( gpu_nparts_ );
-    }
-
-    return nparts_to_remove;
-}
-
-int nvidiaParticles::injectParticles( Particles* particles_to_inject )
-{
-    const int nparts = gpu_nparts_;
-
-    // Manage the recv data structure
-    nvidiaParticles* const cp_parts = static_cast<nvidiaParticles*>( particles_to_inject );
-
-    const int nparts_add = cp_parts->gpu_nparts_;
-    const int tot_parts  = nparts + nparts_add;
-
-    const int position_dimension_count = nvidia_position_.size();
-
-    // Resize main data structure, if too small (copy_n do not resize)
-    resize( tot_parts );
-
-    const auto source_iterator_first = thrust::make_zip_iterator( thrust::make_tuple( cp_parts->nvidia_position_[0].cbegin(),
-                                                                                      cp_parts->nvidia_momentum_[0].cbegin(),
-                                                                                      cp_parts->nvidia_momentum_[1].cbegin(),
-                                                                                      cp_parts->nvidia_momentum_[2].cbegin(),
-                                                                                      cp_parts->nvidia_weight_.cbegin(),
-                                                                                      cp_parts->nvidia_charge_.cbegin() ) );
-
-    // Iterator of the main data structure (once it has been resized)
-    const auto destination_iterator_first = thrust::make_zip_iterator( thrust::make_tuple( nvidia_position_[0].begin(),
-                                                                                           nvidia_momentum_[0].begin(),
-                                                                                           nvidia_momentum_[1].begin(),
-                                                                                           nvidia_momentum_[2].begin(),
-                                                                                           nvidia_weight_.begin(),
-                                                                                           nvidia_charge_.begin() ) ) +
-                                            nparts;
-
-    // Copy recv particles in main data structure
-    thrust::copy_n( thrust::device,
-                    source_iterator_first,
-                    nparts_add,
-                    destination_iterator_first );
-
-    // Remove the other position values depending on the simulation's grid
-    // dimensions
-    for( int i = 1; i < position_dimension_count; ++i ) {
-        thrust::copy_n( thrust::device,
-                        cp_parts->nvidia_position_[i].cbegin(),
-                        nparts_add,
-                        nvidia_position_[i].begin() + nparts );
-    }
-
-    if( has_quantum_parameter ) {
-        thrust::copy_n( thrust::device,
-                        cp_parts->nvidia_chi_.cbegin(),
-                        nparts_add,
-                        nvidia_chi_.begin() + nparts );
+    const auto keys = getPtrCellKeys();
+    const int nparts_to_remove = thrust::count_if( thrust::device, keys, keys + gpu_nparts_, pred );
+    
+    // Copy the particles to the destination
+    // Using more memory, we could use the faster remove_copy_if
+    // NOTE: remove_if is stable.
+    for( int ip = 0; ip < getNDoubleProp(); ip++ ) {
+        const auto in = getPtrDoubleProp( ip );
+        thrust::remove_if( thrust::cuda::par_nosync, in, in + gpu_nparts_, keys, pred );
     }
-
-    if( has_Monte_Carlo_process ) {
-        thrust::copy_n( thrust::device,
-                        cp_parts->nvidia_tau_.cbegin(),
-                        nparts_add,
-                        nvidia_tau_.begin() + nparts );
+    for( int ip = 0; ip < getNShortProp(); ip++ ) {
+        const auto in = getPtrShortProp( ip );
+        thrust::remove_if( thrust::cuda::par_nosync, in, in + gpu_nparts_, keys, pred );
     }
-
     if( tracked ) {
-        thrust::copy_n( thrust::device,
-                        cp_parts->nvidia_id_.cbegin(),
-                        nparts_add,
-                        nvidia_id_.begin() + nparts );
+        const auto in = getPtrId();
+        thrust::remove_if( thrust::cuda::par_nosync, in, in + gpu_nparts_, keys, pred );
     }
-
-    // No more particles to move
-    cp_parts->resize( 0 );
-
-    return nparts_add;
+    cudaDeviceSynchronize();
+    
+    return nparts_to_remove;
 }
 
+
 // ---------------------------------------------------------------------------------------------------------------------
 //! Create n_additional_particles new particles at the end of vectors
 //! Fill the new elements with 0
@@ -1684,6 +1083,60 @@ void nvidiaParticles::importAndSortParticles( Particles* particles_to_inject )
     setHostBinIndex();
 }
 
+//! Sort by cell_keys_
+//! This version synchronizes for every vector, but uses less buffers
+void nvidiaParticles::sortParticleByKey()
+{
+    // Make a sorting map using the cell keys (like numpy.argsort)
+    thrust::device_vector<int> index( gpu_nparts_ );
+    thrust::sequence( thrust::device, index.begin(), index.end() );
+    thrust::sort_by_key( thrust::device, nvidia_cell_keys_.begin(), nvidia_cell_keys_.end(), index.begin() );
+    
+    // Sort particles using thrust::gather, according to the sorting map
+    thrust::device_vector<double> buffer( gpu_nparts_ );
+    for( int ip = 0; ip < getNDoubleProp(); ip++ ) {
+        thrust::gather( thrust::device, index.begin(), index.end(), getPtrDoubleProp( ip ), buffer.begin() );
+        swapDoubleProp( ip, buffer );
+    }
+    buffer.clear();
+    thrust::device_vector<short> buffer_short( gpu_nparts_ );
+    for( int ip = 0; ip < getNShortProp(); ip++ ) {
+        thrust::gather( thrust::device, index.begin(), index.end(), getPtrShortProp( ip ), buffer_short.begin() );
+        swapShortProp( ip, buffer_short );
+    }
+    buffer_short.clear();
+    if( tracked ) {
+        thrust::device_vector<uint64_t> buffer_uint64( gpu_nparts_ );
+        thrust::gather( thrust::device, index.begin(), index.end(), getPtrId(), buffer_uint64.begin() );
+        swapId( buffer_uint64 );
+        buffer_uint64.clear();
+    }
+}
+
+//! Sort by cell_keys_
+//! This version is asynchronous, but requires a buffer of equal size to be provided
+void nvidiaParticles::sortParticleByKey( nvidiaParticles& buffer )
+{
+    // Make a sorting map using the cell keys (like numpy.argsort)
+    thrust::device_vector<int> index( gpu_nparts_ );
+    thrust::sequence( thrust::device, index.begin(), index.end() );
+    thrust::sort_by_key( thrust::device, nvidia_cell_keys_.begin(), nvidia_cell_keys_.end(), index.begin() );
+    
+    // Sort particles using thrust::gather, according to the sorting map
+    for( int ip = 0; ip < getNDoubleProp(); ip++ ) {
+        thrust::gather( thrust::cuda::par_nosync, index.begin(), index.end(), getPtrDoubleProp( ip ), buffer.getPtrDoubleProp( ip ) );
+    }
+    for( int ip = 0; ip < getNShortProp(); ip++ ) {
+        thrust::gather( thrust::cuda::par_nosync, index.begin(), index.end(), getPtrShortProp( ip ), buffer.getPtrShortProp( ip ) );
+    }
+    if( tracked ) {
+        thrust::gather( thrust::cuda::par_nosync, index.begin(), index.end(), getPtrId(), buffer.getPtrId() );
+    }
+    cudaDeviceSynchronize();
+    
+    swap( buffer );
+}
+
 int nvidiaParticles::prepareBinIndex()
 {
     if( first_index.size() == 0 ) {
@@ -1747,7 +1200,10 @@ void nvidiaParticles::naiveImportAndSortParticles( nvidiaParticles* particles_to
     eraseLeavingParticles();
 
     // Inject newly arrived particles in particles_to_inject
-    injectParticles( particles_to_inject );
+    const size_t current_size = gpu_nparts_;
+    resize( current_size + particles_to_inject->size() );
+    copyParticles( particles_to_inject, current_size );
+    particles_to_inject->clear();
 }
 
 extern "C"
diff --git a/src/Particles/nvidiaParticles.h b/src/Particles/nvidiaParticles.h
index 0bb254cef..906d3709e 100644
--- a/src/Particles/nvidiaParticles.h
+++ b/src/Particles/nvidiaParticles.h
@@ -111,45 +111,42 @@ class nvidiaParticles : public Particles
     uint64_t * getPtrId() override {
         return thrust::raw_pointer_cast( nvidia_id_.data() );
     };
-
-    void swapPosition( int idim, thrust::device_vector<double> &new_vector ) {
-        nvidia_position_[idim].swap( new_vector );
-    };
-    void swapMomentum( int idim, thrust::device_vector<double> &new_vector ) {
-        nvidia_momentum_[idim].swap( new_vector );
+    
+    size_t getNDoubleProp() {
+        return nvidia_double_prop_.size();
     };
-    void swapWeight( thrust::device_vector<double> &new_vector ) {
-        nvidia_weight_.swap( new_vector );
+    size_t getNShortProp() {
+        return nvidia_short_prop_.size();
     };
-    void swapChi( thrust::device_vector<double> &new_vector ) {
-        nvidia_chi_.swap( new_vector );
+    
+    double * getPtrDoubleProp( int iprop ) {
+        return thrust::raw_pointer_cast( nvidia_double_prop_[iprop]->data() );
     };
-    void swapCharge( thrust::device_vector<short> &new_vector ) {
-        nvidia_charge_.swap( new_vector );
+    short * getPtrShortProp( int iprop ) {
+        return thrust::raw_pointer_cast( nvidia_short_prop_[iprop]->data() );
     };
-    void swapTau( thrust::device_vector<double> &new_vector ) {
-        nvidia_tau_.swap( new_vector );
+    
+    void swapDoubleProp( int iprop, thrust::device_vector<double> &new_vector ) {
+        nvidia_double_prop_[iprop]->swap( new_vector );
     };
-    void swapCellKeys( thrust::device_vector<int> &new_vector ) {
-        nvidia_cell_keys_.swap( new_vector );
+    void swapShortProp( int iprop, thrust::device_vector<short> &new_vector ) {
+        nvidia_short_prop_[iprop]->swap( new_vector );
     };
     void swapId( thrust::device_vector<uint64_t> &new_vector ) {
         nvidia_id_.swap( new_vector );
     };
     
-    void swap( nvidiaParticles &p ) {
-        for( int idim = 0; idim < dimension(); idim++ ) {
-            swapPosition( idim, p.nvidia_position_[idim] );
+    void swap( nvidiaParticles & p ) {
+        for( int iprop = 0; iprop < getNDoubleProp(); iprop++ ) {
+            nvidia_double_prop_[iprop]->swap( *p.nvidia_double_prop_[iprop] );
         }
-        for( int idim = 0; idim < 3; idim++ ) {
-            swapMomentum( idim, p.nvidia_momentum_[idim] );
+        for( int iprop = 0; iprop < getNShortProp(); iprop++ ) {
+            nvidia_short_prop_[iprop]->swap( *p.nvidia_short_prop_[iprop] );
         }
-        swapWeight( p.nvidia_weight_ );
-        swapCharge( p.nvidia_charge_ );
         if( tracked ) {
-            swapId( p.nvidia_id_ );
+            nvidia_id_.swap( p.nvidia_id_ );
         }
-    };
+    }
 
     // -----------------------------------------------------------------------------
     //! Move leaving particles to the buffers
@@ -158,6 +155,12 @@ class nvidiaParticles : public Particles
     
     template<typename Predicate>
     void copyParticlesByPredicate( Particles* buffer, Predicate pred );
+
+    //! Resize & Copy particles from particles_to_inject to end of vectors
+    void copyParticles( Particles* particles_to_inject ) override;
+    
+    //! Copy particles from particles_to_inject to specific offset
+    void copyParticles( nvidiaParticles* particles_to_inject, size_t offset );
     
     // -----------------------------------------------------------------------------
     //! Erase particles leaving the patch object on device and returns the number of particle removed
@@ -167,11 +170,6 @@ class nvidiaParticles : public Particles
     template<typename Predicate>
     int eraseParticlesByPredicate( Predicate pred );
     
-    // -----------------------------------------------------------------------------
-    //! Inject particles from particles_to_inject into *this and return the number of particle added
-    // -----------------------------------------------------------------------------
-    int injectParticles( Particles* particles_to_inject ) override;
-
     // ---------------------------------------------------------------------------------------------------------------------
     //! Create n_additional_particles new particles at the end of vectors
     //! Fill the new elements with 0
@@ -181,6 +179,12 @@ class nvidiaParticles : public Particles
     //! See the Particles class for documentation.
     void importAndSortParticles( Particles* particles_to_inject ) override;
 
+    //! Sort by cell_keys_
+    //! This version synchronizes for every vector, but uses less buffers
+    void sortParticleByKey();
+    //! This version is asynchronous, but requires a buffer of equal size to be provided
+    void sortParticleByKey( nvidiaParticles& buffer );
+
 protected:
     //! Redefine first_index and last_index according to the binning algorithm
     //! used on GPU.
diff --git a/src/Species/Species.cpp b/src/Species/Species.cpp
index 65358f555..19b39c2ed 100755
--- a/src/Species/Species.cpp
+++ b/src/Species/Species.cpp
@@ -2103,8 +2103,10 @@ void Species::importParticles( Params &params, Patch *patch, Particles &source_p
     // Warning: the current GPU version does not handle tracked particles
 
     // Inject particles from source_particles
-    particles->last_index.back() += particles->injectParticles( &source_particles );
+    particles->copyParticles( &source_particles );
+    particles->last_index.back() += source_particles.size();
     particles->last_index[0] = particles->last_index.back();
+    source_particles.clear();
     
 #else
     // ---------------------------------------------------

From 0b7d91e7b0592bbf3f7c50557145f3d443e5d27d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Brian=20Edward=20Marr=C3=A9?= <brian-edward@web.de>
Date: Fri, 17 May 2024 11:50:01 +0200
Subject: [PATCH 26/54] fix for documentation typos (#716)

---
 doc/Sphinx/implementation.rst | 36 +++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/doc/Sphinx/implementation.rst b/doc/Sphinx/implementation.rst
index 46bf953e9..c524cb560 100644
--- a/doc/Sphinx/implementation.rst
+++ b/doc/Sphinx/implementation.rst
@@ -10,10 +10,10 @@ and conveniency for non-advanced C++ users.
 The repository is composed of the following directories:
 
 - ``Licence``: contains code licence information
-- ``doc``: conatins the Sphinx doc files
+- ``doc``: contains the Sphinx doc files
 - ``src``: contains all source files
 - ``happi``: contains the sources of the happi Python tool for visualization
-- ``benchmarks``: contains the benchmarks used by the validation process. these becnhamrks are also examples for users.
+- ``benchmarks``: contains the benchmarks used by the validation process, these benchmarks are also examples for users.
 - ``scripts``: contains multiple tool scripts for compilation and more
 
   - ``compile_tools``: contains scripts and machine files used by the makefile for compilation
@@ -23,7 +23,7 @@ The repository is composed of the following directories:
 
 The source files directory is as well composed of several sub-directories to organise the `.cpp` and `.h` files by related thematics.
 The main is the file `Smilei.cpp`.
-There is always only one class definition per file and the file name correcponds to the class name.
+There is always only one class definition per file and the file name corresponds to the class name.
 
 The general implementation is later summarized in :numref:`smilei_main_loop`
 
@@ -54,10 +54,10 @@ Notion of operators
 An operator is a class that operates on input data to provide a processed information.
 Input data can be parameters and data containers.
 Output data can be processed data from data containers or updated data containers.
-An operator is a class functor (overloadind of the ``()`` ).
-Sometime, operator provides additional methods called wrappers to provide differents simplified or adapted interfaces.
-An operator do not store data or temporarely.
-for instance, the particle interpolation, push and proection are operators.
+An operator is a class functor (overloading of the ``()`` ).
+Sometime, operator provides additional methods called wrappers to provide different simplified or adapted interfaces.
+An operator do not store data or temporarily.
+for instance, the particle interpolation, push and protection are operators.
 
 .. _operator:
 
@@ -71,7 +71,7 @@ Notion of domain parts
 
 Domain parts are classes that represents some specific levels of the domain decomposition.
 They can be seen as high-level data container or container of data container.
-They contain some methods to handle, manange and access the local data.
+They contain some methods to handle, manage and access the local data.
 For instance, patches and ``Species`` are domain parts:
 
 - ``Species`` contains the particles.
@@ -80,10 +80,10 @@ For instance, patches and ``Species`` are domain parts:
 Notion of factory
 ------------------------------------
 
-Some objects such as operators or data containers have sereral variations.
+Some objects such as operators or data containers have several variations.
 For this we use inheritance.
 A base class is used for common parameters and methods and derived classes are used for all variations.
-The factory uses user-defined input parameters to determine the right derive class to choose and initiate them as shown in :numref:`factory`.
+The factory uses user-defined input parameters to determine the right derived class to choose and initiate them as shown in :numref:`factory`.
 For instance, there are several ``push`` operators implemented all derived from a base ``push`` class.
 The ``push`` factory will determine the right one to use.
 
@@ -97,7 +97,7 @@ The ``push`` factory will determine the right one to use.
 Other
 ------------------------------------
 
-Some classes are used for specific actions in the code such as the initilization process.
+Some classes are used for specific actions in the code such as the initialization process.
 
 -----------------------------------------------------------------
 
@@ -106,7 +106,7 @@ III. Domain decomposition and parallelism
 
 The simulation domain is divided multiple times following a succession of decomposition levels.
 The whole domain is the superimposition of different grids for each electromagnetic field component
-and macro-particules.
+and macro-particles.
 Let us represent schematically the domain as an array of cells as in Fig. :numref:`full_domain`.
 Each cell contains a certain population of particles (that can differ from cell to cell).
 
@@ -127,8 +127,8 @@ The domain becomes a collection of patches as shown in :numref:`patch_domain_dec
 
   The domain in :program:`Smilei` is a collection of patches.
 
-A patch is an independant piece of the whole simulation domain.
-It therefore owns local electrmognatic grids and list of macro-particles.
+A patch is an independent piece of the whole simulation domain.
+It therefore owns the local electromagnetic grids and list of macro-particles.
 Electromagnetic grids have ghost cells that represent the information located in the neighboring patches (not shown in :numref:`patch_domain_decomposition`).
 All patches have the same spatial size .i.e. the same number of cells.
 The size of a patch is calculated so that all local field grids (ghost cells included) can fit in L2 cache.
@@ -144,7 +144,7 @@ The distribution can be ensured in an equal cartesian way or using a load balanc
   Patches are then distributed among MPI processes in so-called MPI patch collections.
 
 Inside MPI patch collection, OpenMP loop directives are used to distribute the computation of the patches among the available threads.
-Since each patch have a different number of particles, this approach enables a dynamic scheduling depending on the specified OpenMP scheduler.
+Since each patch has a different number of particles, this approach enables a dynamic scheduling depending on the specified OpenMP scheduler.
 As shown in :numref:`smilei_main_loop`, a synchronization step is required to exchange grid ghost cells and particles traveling from patch to patch.
 
 The patch granularity is used for:
@@ -163,7 +163,7 @@ The patch can be decomposed into bins as shown in :numref:`bin_decomposition`.
 
   Bin decomposition.
 
-Contrary to patch, a bin is not an independant data structure with its own arrays.
+Contrary to patch, a bin is not an independent data structure with its own arrays.
 It represents a smaller portion of the patch grids through specific start and end indexes.
 For the macro-particles, a sorting algorithm is used to ensure that in the macro-particles
 located in the same bin are grouped and contiguous in memory.
@@ -288,7 +288,7 @@ located in the file `src/Tools.h`.
 - `ERROR_NAMELIST`: this function should be used for namelist error. It takes in argument a simple message and a link to the documentation. It throws as well a SIGABRT signal.
 - `MESSAGE`: this function should be used to output an information message (it uses `std::cout`).
 - `DEBUG` : should be used for debugging messages (for the so-called DEBUG mode)
-- `WARNING` : should be used to thrown a warning. A warning alerts the users of a possible issue or to be carreful with some parameters without stoping the program.
+- `WARNING` : should be used to thrown a warning. A warning alerts the users of a possible issue or to be careful with some parameters without stopping the program.
 
 --------------------------------------------------------------------------------
 
@@ -618,7 +618,7 @@ We first loop on the patches and then the species of
 each patch ``ipatch``: ``(*this )( ipatch )->vecSpecies.size()``.
 For each species, the method ``Species::dynamics`` is called to perform the
 dynamic step of the respective particles.
-The OpenMP parallelism is explicitely applied in ``vecPatches::dynamics`` on the patch loop as shown
+The OpenMP parallelism is explicitly applied in ``vecPatches::dynamics`` on the patch loop as shown
 in the following pieces of code.
 
 .. code-block:: c++

From 5b60a4d771e3ba59e9aba251a5110a0d2366bfb9 Mon Sep 17 00:00:00 2001
From: Francesco Massimo <francescomassimo@MacBook-Pro-de-sei2.local>
Date: Sun, 19 May 2024 10:49:22 +0200
Subject: [PATCH 27/54] add article, use extended journal names for each
 article

---
 doc/Sphinx/Overview/material.rst | 197 ++++++++++++++++---------------
 1 file changed, 101 insertions(+), 96 deletions(-)

diff --git a/doc/Sphinx/Overview/material.rst b/doc/Sphinx/Overview/material.rst
index 04973edbf..2d33d6aff 100644
--- a/doc/Sphinx/Overview/material.rst
+++ b/doc/Sphinx/Overview/material.rst
@@ -30,7 +30,7 @@ Papers involving Smilei
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
 Only papers published in peer-reviewed journals are listed (for the complete list of citing papers see `Google Scholar <https://scholar.google.com/scholar?hl=fr&as_sdt=2005&sciodt=0,5&cites=17416460455672944837&scipsc=&q=&scisbd=1>`_).
-As of May 2024, 186 papers have been published covering a broad range of topics:
+As of May 2024, 187 papers have been published covering a broad range of topics:
 
 * laser-plasma interaction (LPI) / inertial fusion (FCI)
 * ultra-high intensity (UHI) applications
@@ -50,7 +50,12 @@ Following is the distribution of these topics in the listed publications up to N
    Use the python script doc/doi2publications.py to generate entries from a DOI number, and paste them here
    You can count the number of papers in the list with the vim command :%s/.. \[//gn. 
 
+.. [Ivanov2024]
 
+    K. A. Ivanov, D. A. Gorlova, I. N. Tsymbalov, I. P. Tsygvintsev, S. A. Shulyapov, R. V. Volkov, and A. B. Savel’ev,
+    `Laser-driven pointed acceleration of electrons with preformed plasma lens`,
+    `Physical Review Accelerators and Beams 27, 051301 (2024) <https://doi.org/10.1038/s41598-024-61041-2>`_
+       
 .. [Timmis2024]
 
     R. J. L. Timmis, R. W. Paddock, I. Ouatu, J. Lee, S. Howard, E. Atonga, R. T. Ruskov, H. Martin, R. H. W. Wang, R. Aboushelbaya, M. W. von der Leyen, E. Gumbrell and P. A. Norreys,
@@ -79,7 +84,7 @@ Following is the distribution of these topics in the listed publications up to N
 
     M. Luo, C. Riconda, I. Pusztai, A. Grassi, J. S. Wurtele, and T. Fülöp,
     `Control of autoresonant plasma beat-wave wakefield excitation`,
-    `Phys. Rev. Research 6, 013338 (2024) <https://doi.org/10.1103/PhysRevResearch.6.013338>`_
+    `Physical Review Research 6, 013338 (2024) <https://doi.org/10.1103/PhysRevResearch.6.013338>`_
 
 .. [Krafft2024]
 
@@ -247,7 +252,7 @@ Following is the distribution of these topics in the listed publications up to N
 
     E. Starodubtseva, I. Tsymbalov, D. Gorlova, K. Ivanov, and A. Savel'ev,
     `Low energy electron injection for direct laser acceleration`,
-    `Phys. Plasmas 30, 083105 (2023) <https://doi.org/10.1063/5.0155196>`_
+    `Physics of Plasmas 30, 083105 (2023) <https://doi.org/10.1063/5.0155196>`_
 
 .. [Maffini2023]
 
@@ -259,7 +264,7 @@ Following is the distribution of these topics in the listed publications up to N
 
     S. Yu. Gus'kov, Ph. Korneev, and M. Murakami,
     `Laser-driven electrodynamic implosion of fast ions in a thin shell`,
-    `Matter Radiat. Extremes 8, 056602 (2023) <https://doi.org/10.1063/5.0156113>`_
+    `Matter and Radiation at Extremes 8, 056602 (2023) <https://doi.org/10.1063/5.0156113>`_
 
 .. [RezaeiPandari2023]
 
@@ -271,19 +276,19 @@ Following is the distribution of these topics in the listed publications up to N
 
       J. Jonnerby, A. von Boetticher, J. Holloway, L. Corner, A. Picksley, A. J. Ross, R. J. Shalloo , C. Thornton, N. Bourgeois, R. Walczak, and S. M. Hooker,
       `Measurement of the decay of laser-driven linear plasma wakefields`,
-      `Phys. Rev. E 108, 055211  (2023) <https://link.aps.org/doi/10.1103/PhysRevE.108.055211>`_
+      `Physical Review E 108, 055211  (2023) <https://link.aps.org/doi/10.1103/PhysRevE.108.055211>`_
          
 .. [Drobniak2023]
 
       P. Drobniak, E. Baynard, C. Bruni, K. Cassou, C. Guyot, G. Kane, S. Kazamias, V. Kubytskyi, N. Lericheux, B. Lucas, M. Pittman, F. Massimo, A. Beck, A. Specka, P. Nghiem, and D. Minenna,
       `Random scan optimization of a laser-plasma electron injector based on fast particle-in-cell simulations`,
-      `Phys. Rev. Accel. Beams 26, 091302 (2023) <https://doi.org/10.1103/PhysRevAccelBeams.26.091302>`_
+      `Physical Review Accelerators and Beams 26, 091302 (2023) <https://doi.org/10.1103/PhysRevAccelBeams.26.091302>`_
       
 .. [Bukharskii2023]
 
        N. Bukharskii and Ph. Korneev,
        `Intense widely controlled terahertz radiation from laser-driven wires`,
-       `Matter Radiat. Extremes 8, 044401 (2023) <https://doi.org/10.1063/5.0142083>`_
+       `Matter and Radiation at Extremes 8, 044401 (2023) <https://doi.org/10.1063/5.0142083>`_
       
 .. [Schmitz2023]
 
@@ -307,7 +312,7 @@ Following is the distribution of these topics in the listed publications up to N
 
     X. Gao,
     `Ionization dynamics of sub-micrometer-sized clusters in intense ultrafast laser pulses`,
-    `Phys. Plasmas 30, 052102 (2023) <https://doi.org/10.1063/5.0143356>`_
+    `Physics of Plasmas 30, 052102 (2023) <https://doi.org/10.1063/5.0143356>`_
     
 .. [Krafft2023]
 
@@ -325,7 +330,7 @@ Following is the distribution of these topics in the listed publications up to N
 
     A. Ghizzo, D. Del Sarto, and H. Betar,
     `Collisionless Heating Driven by Vlasov Filamentation in a Counterstreaming Beams Configuration`,
-    `Phys. Rev. Lett. 131, 035101 (2023) <https://doi.org/10.1103/PhysRevLett.131.035101>`_
+    `Physical Review Letters 131, 035101 (2023) <https://doi.org/10.1103/PhysRevLett.131.035101>`_
      
 .. [Yang2023]
 
@@ -337,31 +342,31 @@ Following is the distribution of these topics in the listed publications up to N
 
   W. Yao, A. Fazzini, S.N. Chen, K. Burdonov, J. Béard, M. Borghesi, A. Ciardi, M. Miceli, S. Orlando, X. Ribeyre, E. d'Humières and J. Fuchs,
   `Investigating particle acceleration dynamics in interpenetrating magnetized collisionless super-critical shocks`,
-  `J. Plasma Phys. 89, 915890101 (2023) <http://dx.doi.org/10.1017/S002237782300003X>`_
+  `Journal of Plasma Physics 89, 915890101 (2023) <http://dx.doi.org/10.1017/S002237782300003X>`_
 
 .. [Pak2023]
 
   T. Pak, M. Rezaei-Pandari, S. B. Kim, G. Lee, D. H. Wi, C. I. Hojbota, M. Mirzaie, H. Kim, J. H. Sung, S. K. Lee, C. Kang and K.-Y. Kim,
   `Multi-millijoule terahertz emission from laser-wakefield-accelerated electrons`,
-  `Light Sci Appl 12, 37 (2023) <http://dx.doi.org/10.1038/s41377-022-01068-0>`_
+  `Light: Science and Applications 12, 37 (2023) <http://dx.doi.org/10.1038/s41377-022-01068-0>`_
 
 .. [Istokskaia2023]
 
   V. Istokskaia, M. Tosca, L. Giuffrida, J. Psikal, F. Grepl, V. Kantarelou, S. Stancek, S. Di Siena, A. Hadjikyriacou, A. McIlvenny, Y. Levy, J. Huynh, M. Cimrman, P. Pleskunov, D. Nikitin, A. Choukourov, F. Belloni, A. Picciotto, S. Kar, M. Borghesi, A. Lucianetti, T. Mocek and D. Margarone,
   `A multi-MeV alpha particle source via proton-boron fusion driven by a 10-GW tabletop laser`,
-  `Commun Phys 6, 27 (2023) <http://dx.doi.org/10.1038/s42005-023-01135-x>`_
+  `Communications Physics 6, 27 (2023) <http://dx.doi.org/10.1038/s42005-023-01135-x>`_
 
 .. [Yoon2023]
 
   Y. D. Yoon, D. E. Wendel and G. S. Yun,
   `Equilibrium selection via current sheet relaxation and guide field amplification`,
-  `Nat Commun 14, 139 (2023) <http://dx.doi.org/10.1038/s41467-023-35821-9>`_
+  `Nature Communications 14, 139 (2023) <http://dx.doi.org/10.1038/s41467-023-35821-9>`_
 
 .. [Galbiati2023]
 
    M. Galbiati, A. Formenti, M. Grech and M. Passoni,
    `Numerical investigation of non-linear inverse Compton scattering in double-layer targets`,
-   `Front. Phys. 11, fphy.2023.1117543 (2023) <http://dx.doi.org/10.3389/fphy.2023.1117543>`_
+   `Frontiers in Physics 11, fphy.2023.1117543 (2023) <http://dx.doi.org/10.3389/fphy.2023.1117543>`_
 
 .. [Sakai2023]
 
@@ -373,7 +378,7 @@ Following is the distribution of these topics in the listed publications up to N
 
    A. Golovanov, I. Yu. Kostyukov, A. Pukhov and V. Malka,
    `Energy-Conserving Theory of the Blowout Regime of Plasma Wakefield`,
-   `Phys. Rev. Lett. 130, 105001 (2023) <http://dx.doi.org/10.1103/PhysRevLett.130.105001>`_
+   `Physical Review Letters 130, 105001 (2023) <http://dx.doi.org/10.1103/PhysRevLett.130.105001>`_
 
 .. [Miethlinger2023]
 
@@ -385,13 +390,13 @@ Following is the distribution of these topics in the listed publications up to N
 
     C. Zepter, A. Seidel, M. Zepf, M. C. Kaluza and A. Sävert,
     `Role of spatiotemporal couplings in stimulated Raman side scattering`,
-    `Phys. Rev. Research 5, L012023 (2023) <http://dx.doi.org/10.1103/PhysRevResearch.5.L012023>`_
+    `Physical Review Research 5, L012023 (2023) <http://dx.doi.org/10.1103/PhysRevResearch.5.L012023>`_
     
 .. [Marini2023]
 
     S. Marini, M. Grech, P. S. Kleij, M. Raynaud and C. Riconda,
     `Electron acceleration by laser plasma wedge interaction`,
-    `Phys. Rev. Research 5, 013115 (2023) <http://dx.doi.org/10.1103/PhysRevResearch.5.013115>`_
+    `Physical Review Research 5, 013115 (2023) <http://dx.doi.org/10.1103/PhysRevResearch.5.013115>`_
 
 .. [Blackman2022]
 
@@ -451,7 +456,7 @@ Following is the distribution of these topics in the listed publications up to N
 
      D. Margarone, J. Bonvalet, L. Giuffrida, A. Morace, V. Kantarelou, M. Tosca, D. Raffestin, P. Nicolai, A. Picciotto, Y. Abe, Y. Arikawa, S. Fujioka, Y. Fukuda, Y. Kuramitsu, H. Habara and D. Batani,
      `In-Target Proton–Boron Nuclear Fusion Using a PW-Class Laser`,
-     `Appl. Sci. 12(3), 1444 (2022) <https://doi.org/10.3390/app12031444>`_
+     `Appled Sciences 12(3), 1444 (2022) <https://doi.org/10.3390/app12031444>`_
             
 .. [Kochetkov2022]
 
@@ -463,13 +468,13 @@ Following is the distribution of these topics in the listed publications up to N
 
      A. Oudin, A. Debayle, C. Ruyer, D. Benisti,
      `Cross-beam energy transfer between spatially smoothed laser beams`,
-     `Phys. Plasmas 29, 112112 (2022) <https://doi.org/10.1063/5.0109511>`_
+     `Physics of Plasmas 29, 112112 (2022) <https://doi.org/10.1063/5.0109511>`_
           
 .. [Chen2022]
 
      Q. Chen, D. Maslarova, J. Wang, S. Li, and D. Umstadter,
      `Injection of electron beams into two laser wakefields and generation of electron rings`,
-     `Phys. Rev. E 106, 055202 (2022) <https://doi.org/10.1103/PhysRevE.106.055202>`_
+     `Physical Review E 106, 055202 (2022) <https://doi.org/10.1103/PhysRevE.106.055202>`_
 
 .. [Kumar2022b]
 
@@ -481,7 +486,7 @@ Following is the distribution of these topics in the listed publications up to N
 
     S. Kumar, D. K. Singh and H. K. Malik,
     `Comparative study of ultrashort single-pulse and multi-pulse driven laser wakefield acceleration`,
-    `Laser Phys. Lett. 20, 026001 (2022) <http://dx.doi.org/10.1088/1612-202X/aca978>`_
+    `Laser Physics Letters 20, 026001 (2022) <http://dx.doi.org/10.1088/1612-202X/aca978>`_
 
 .. [Miloshevsky2022]
 
@@ -505,25 +510,25 @@ Following is the distribution of these topics in the listed publications up to N
 
     I. Ouatu, B. T. Spiers, R. Aboushelbaya, Q. Feng, M. W. von der Leyen, R. W. Paddock, R. Timmis, C. Ticos, K. M. Krushelnick and P. A. Norreys,
     `Ionization states for the multipetawatt laser-QED regime`,
-    `Phys. Rev. E 106, 015205 (2022) <http://dx.doi.org/10.1103/PhysRevE.106.015205>`_
+    `Physical Review E 106, 015205 (2022) <http://dx.doi.org/10.1103/PhysRevE.106.015205>`_
 
 .. [Beth2022]
 
     A. Beth, H. Gunell, C. Simon Wedlund, C. Goetz, H. Nilsson and M. Hamrin,
     `First investigation of the diamagnetic cavity boundary layer with a 1D3V PIC simulation`,
-    `A&A 667, A143 (2022) <http://dx.doi.org/10.1051/0004-6361/202243209>`_
+    `Astronomy & Astrophysics 667, A143 (2022) <http://dx.doi.org/10.1051/0004-6361/202243209>`_
 
 .. [Guo2022]
 
     Y. Guo, X. Geng, L. Ji, B. Shen and R. Li,
     `Improving the accuracy of hard photon emission by sigmoid sampling of the quantum-electrodynamic table in particle-in-cell Monte Carlo simulations`,
-    `Phys. Rev. E 105, 025309 (2022) <http://dx.doi.org/10.1103/PhysRevE.105.025309>`_
+    `Physical Review E 105, 025309 (2022) <http://dx.doi.org/10.1103/PhysRevE.105.025309>`_
 
 .. [Pae2022]
 
     K. . Pae, C. M. Kim, V. B. Pathak, C.-M. Ryu and C. H. Nam,
     `Direct laser acceleration of electrons from a plasma mirror by an intense few-cycle Laguerre–Gaussian laser and its dependence on the carrier-envelope phase`,
-    `Plasma Phys. Control. Fusion 64, 055013 (2022) <http://dx.doi.org/10.1088/1361-6587/ac5a0a>`_
+    `Plasma Physics and Controlled Fusion 64, 055013 (2022) <http://dx.doi.org/10.1088/1361-6587/ac5a0a>`_
 
       
 .. [Zhang2022a]
@@ -536,43 +541,43 @@ Following is the distribution of these topics in the listed publications up to N
 
    Q. Han, X. Geng, B. Shen, Z. Xu and L. Ji,
    `Ultra-fast polarization of a thin electron layer in the rotational standing-wave field driven by double ultra-intense laser pulses`,
-   `New J. Phys. 24, 063013 (2022) <http://dx.doi.org/10.1088/1367-2630/ac740f>`_
+   `New Journal of Physics 24, 063013 (2022) <http://dx.doi.org/10.1088/1367-2630/ac740f>`_
 
 .. [Gothel2022]
 
    I. Göthel, C. Bernert, M. Bussmann, M. Garten, T. Miethlinger, M. Rehwald, K. Zeil, T. Ziegler, T. E. Cowan, U. Schramm and T. Kluge,
    `Optimized laser ion acceleration at the relativistic critical density surface`,
-   `Plasma Phys. Control. Fusion 64, 044010 (2022) <http://dx.doi.org/10.1088/1361-6587/ac4e9f>`_
+   `Plasma Physics and Controlled Fusion 64, 044010 (2022) <http://dx.doi.org/10.1088/1361-6587/ac4e9f>`_
 
 .. [Fazzini2022]
 
    A. Fazzini, W. Yao, K. Burdonov, J. Béard, S. N. Chen, A. Ciardi, E. d’Humières, R. Diab, E. D. Filippov, S. Kisyov, V. Lelasseux, M. Miceli, Q. Moreno, S. Orlando, S. Pikuz, X. Ribeyre, M. Starodubtsev, R. Zemskov and J. Fuchs,
    `Particle energization in colliding subcritical collisionless shocks investigated in the laboratory`,
-   `A&A 665, A87 (2022) <http://dx.doi.org/10.1051/0004-6361/202243277>`_
+   `Astronomy & Astrophysics 665, A87 (2022) <http://dx.doi.org/10.1051/0004-6361/202243277>`_
 
 .. [Bykov2022]
 
   A. M. Bykov, S. M. Osipov and V. I. Romanskii,
   `Acceleration of Cosmic Rays to Energies above 1015 eV by Transrelativistic Shocks`,
-  `J. Exp. Theor. Phys. 134, 487-497 (2022) <http://dx.doi.org/10.1134/S1063776122040161>`_
+  `Journal of Experimental and Theoretical Physics 134, 487-497 (2022) <http://dx.doi.org/10.1134/S1063776122040161>`_
 
 .. [Sundstrom2022]
 
   A. Sundström, M. Grech, I. Pusztai and C. Riconda,
   `Stimulated-Raman-scattering amplification of attosecond XUV pulses with pulse-train pumps and application to local in-depth plasma-density measurement`,
-  `Phys. Rev. E 106, 045208 (2022) <http://dx.doi.org/10.1103/PhysRevE.106.045208>`_
+  `Physical Review E 106, 045208 (2022) <http://dx.doi.org/10.1103/PhysRevE.106.045208>`_
 
 .. [Krafft2022b]
 
   C. Krafft and P. Savoini,
   `Third and Fourth Harmonics of Electromagnetic Emissions by a Weak Beam in a Solar Wind Plasma with Random Density Fluctuations`,
-  `ApJL 934, L28 (2022) <http://dx.doi.org/10.3847/2041-8213/ac7f28>`_
+  `The Astrophysical Journal Letters 934, L28 (2022) <http://dx.doi.org/10.3847/2041-8213/ac7f28>`_
 
 .. [Krafft2022a]
 
   C. Krafft and P. Savoini,
   `Fundamental Electromagnetic Emissions by a Weak Electron Beam in Solar Wind Plasmas with Density Fluctuations`,
-  `ApJL 924, L24 (2022) <http://dx.doi.org/10.3847/2041-8213/ac46a7>`_
+  `The Astrophysical Journal Letters 924, L24 (2022) <http://dx.doi.org/10.3847/2041-8213/ac46a7>`_
 
 .. [Kong2022]
 
@@ -584,7 +589,7 @@ Following is the distribution of these topics in the listed publications up to N
 
   C. Davidson, Z.-M. Sheng, T. Wilson and P. McKenna,
   `Theoretical and computational studies of the Weibel instability in several beam–plasma interaction configurations`,
-  `J. Plasma Phys. 88, 905880206 (2022) <http://dx.doi.org/10.1017/S0022377822000253>`_
+  `Journal of Plasma Physics 88, 905880206 (2022) <http://dx.doi.org/10.1017/S0022377822000253>`_
   
 .. [Glek2022]
 
@@ -596,7 +601,7 @@ Following is the distribution of these topics in the listed publications up to N
 
    D. Umstadter
    `Controlled Injection of Electrons for Improved Performance of Laser-Wakefield Acceleration`,
-   `United States: N. p., (2022) <http://dx.doi.org/10.2172/1838680>`_
+   `United States Department of Energy Technical Report (2022) <http://dx.doi.org/10.2172/1838680>`_
 
 .. [Massimo2022]
 
@@ -615,7 +620,7 @@ Following is the distribution of these topics in the listed publications up to N
 
   P. K. Singh, F.-Y. Li, C.-K. Huang, A. Moreau, R. Hollinger, A. Junghans, A. Favalli, C. Calvi, S. Wang, Y. Wang, H. Song, J. J. Rocca, R. E. Reinovsky and S. Palaniyappan,
   `Vacuum laser acceleration of super-ponderomotive electrons using relativistic transparency injection`,
-  `Nat Commun 13, 54 (2022) <http://dx.doi.org/10.1038/s41467-021-27691-w>`_
+  `Nature Communications 13, 54 (2022) <http://dx.doi.org/10.1038/s41467-021-27691-w>`_
 
 .. [Lobet2022]
 
@@ -646,13 +651,13 @@ Following is the distribution of these topics in the listed publications up to N
 
     P. Tomassini, F. Massimo, L. Labate and L. A. Gizzi,
     `Accurate electron beam phase-space theory for ionization-injection schemes driven by laser pulses`,
-    `High Pow Laser Sci Eng 10, e15 (2021) <http://dx.doi.org/10.1017/hpl.2021.56>`_
+    `High Power Laser Science and Engineering 10, e15 (2021) <http://dx.doi.org/10.1017/hpl.2021.56>`_
     
 .. [Meinhold2021]
 
   T. A. Meinhold and N. Kumar,
   `Radiation pressure acceleration of protons from structured thin-foil targets`,
-  `J. Plasma Phys. 87, 905870607 (2021) <http://dx.doi.org/10.1017/S0022377821001070>`_
+  `Journal of Plasma Physics 87, 905870607 (2021) <http://dx.doi.org/10.1017/S0022377821001070>`_
 
 .. [Bonvalet2021b]
 
@@ -664,13 +669,13 @@ Following is the distribution of these topics in the listed publications up to N
 
   Y. Shi, D. R. Blackman and A. Arefiev,
   `Electron acceleration using twisted laser wavefronts`,
-  `Plasma Phys. Control. Fusion 63, 125032 (2021) <http://dx.doi.org/10.1088/1361-6587/ac318d>`_
+  `Plasma Physics and Controlled Fusion 63, 125032 (2021) <http://dx.doi.org/10.1088/1361-6587/ac318d>`_
 
 .. [Kumar2021]
 
   N. Kumar and B. Reville,
   `Nonthermal Particle Acceleration at Highly Oblique Nonrelativistic Shocks`,
-  `ApJL 921, L14 (2021) <http://dx.doi.org/10.3847/2041-8213/ac30e0>`_
+  `The Astrophysical Journal Letters 921, L14 (2021) <http://dx.doi.org/10.3847/2041-8213/ac30e0>`_
 
 .. [Ghaith2021]
 
@@ -682,13 +687,13 @@ Following is the distribution of these topics in the listed publications up to N
 
   V. Horný and L. Veisz,
   `Generation of single attosecond relativistic electron bunch from intense laser interaction with a nanosphere`,
-  `Plasma Phys. Control. Fusion 63, 125025 (2021) <http://dx.doi.org/10.1088/1361-6587/ac2996>`_
+  `Plasma Physics and Controlled Fusion 63, 125025 (2021) <http://dx.doi.org/10.1088/1361-6587/ac2996>`_
 
 .. [Krafft2021]
 
   C. Krafft and P. Savoini,
   `Second Harmonic Electromagnetic Emissions by an Electron Beam in Solar Wind Plasmas with Density Fluctuations`,
-  `ApJL 917, L23 (2021) <http://dx.doi.org/10.3847/2041-8213/ac1795>`_
+  `The Astrophysical Journal Letters 917, L23 (2021) <http://dx.doi.org/10.3847/2041-8213/ac1795>`_
 
 .. [Khalilzadeh2021c]
 
@@ -712,7 +717,7 @@ Following is the distribution of these topics in the listed publications up to N
 
   Y. Shou, D. Wang, P. Wang, J. Liu, Z. Cao, Z. Mei, S. Xu, Z. Pan, D. Kong, G. Qi, Z. Liu, Y. Liang, Z. Peng, Y. Gao, S. Chen, J. Zhao, Y. Zhao, H. Xu, J. Zhao, Y. Wu, X. Yan and W. Ma,
   `High-efficiency generation of narrowband soft x rays from carbon nanotube foams irradiated by relativistic femtosecond lasers`,
-  `Opt. Lett. 46, 3969 (2021) <http://dx.doi.org/10.1364/OL.432817>`_
+  `Optics Letters 46, 3969 (2021) <http://dx.doi.org/10.1364/OL.432817>`_
 
 .. [Khalilzadeh2021b]
 
@@ -724,67 +729,67 @@ Following is the distribution of these topics in the listed publications up to N
 
   H. Hosseinkhani, M. Pishdast, J. Yazdanpanah and S. A. Ghasemi,
   `Investigation of the classical and quantum radiation reaction effect on interaction of ultra high power laser with near critical plasma`,
-  `J. Nuclear Sci. Technol. 42, 27-35 (2021) <http://dx.doi.org/10.24200/nst.2021.1197>`_
+  `Journal of Nuclear Science, Engineering and Technology 42, 27-35 (2021) <http://dx.doi.org/10.24200/nst.2021.1197>`_
 
 .. [MercuriBaron2021]
 
   A. Mercuri-Baron, M. Grech, F. Niel, A. Grassi, M. Lobet, A. Di Piazza and C. Riconda,
   `Impact of the laser spatio-temporal shape on Breit–Wheeler pair production`,
-  `New J. Phys. 23, 085006 (2021) <http://dx.doi.org/10.1088/1367-2630/ac1975>`_
+  `New Journal of Physics 23, 085006 (2021) <http://dx.doi.org/10.1088/1367-2630/ac1975>`_
 
 .. [Peng2021]
 
   H. Peng, C. Riconda, S. Weber, C.T. Zhou and S.C. Ruan,
   `Frequency Conversion of Lasers in a Dynamic Plasma Grating`,
-  `Phys. Rev. Applied 15, 054053 (2021) <http://dx.doi.org/10.1103/PhysRevApplied.15.054053>`_
+  `Physical Review Applied 15, 054053 (2021) <http://dx.doi.org/10.1103/PhysRevApplied.15.054053>`_
 
 .. [Shi2021a]
 
   Y. Shi, D. Blackman, D. Stutman and A. Arefiev,
   `Generation of Ultrarelativistic Monoenergetic Electron Bunches via a Synergistic Interaction of Longitudinal Electric and Magnetic Fields of a Twisted Laser`,
-  `Phys. Rev. Lett. 126, 234801 (2021) <http://dx.doi.org/10.1103/PhysRevLett.126.234801>`_
+  `Physical Review Letters 126, 234801 (2021) <http://dx.doi.org/10.1103/PhysRevLett.126.234801>`_
 
 .. [Bonvalet2021a]
 
   J. Bonvalet, Ph. Nicolaï, D. Raffestin, E. D'humieres, D. Batani, V. Tikhonchuk, V. Kantarelou, L. Giuffrida, M. Tosca, G. Korn, A. Picciotto, A. Morace, Y. Abe, Y. Arikawa, S. Fujioka, Y. Fukuda, Y. Kuramitsu, H. Habara and D. Margarone,
   `Energetic α-particle sources produced through proton-boron reactions by high-energy high-intensity laser beams`,
-  `Phys. Rev. E 103, 053202 (2021) <http://dx.doi.org/10.1103/PhysRevE.103.053202>`_
+  `Physical Review E 103, 053202 (2021) <http://dx.doi.org/10.1103/PhysRevE.103.053202>`_
 
 .. [Shekhanov2021]
 
   S. A. Shekhanov and V. T. Tikhonchuk,
   `SRS-SBS competition and nonlinear laser energy absorption in a high temperature plasma`,
-  `Plasma Phys. Control. Fusion 63, 115016 (2021) <http://dx.doi.org/10.1088/1361-6587/ac2614>`_
+  `Plasma Physics and Controlled Fusion 63, 115016 (2021) <http://dx.doi.org/10.1088/1361-6587/ac2614>`_
 
 .. [Psikal2021]
 
   J Psikal,
   `Laser-driven ion acceleration from near-critical Gaussian plasma density profile`,
-  `Plasma Phys. Control. Fusion 63, 064002 (2021) <http://dx.doi.org/10.1088/1361-6587/abf448>`_
+  `Plasma Physics and Controlled Fusion 63, 064002 (2021) <http://dx.doi.org/10.1088/1361-6587/abf448>`_
 
 .. [Yoon2021b]
 
   Y. D. Yoon, G. S. Yun, D. E. Wendel and J. L. Burch,
   `Collisionless relaxation of a disequilibrated current sheet and implications for bifurcated structures`,
-  `Nat Commun 12, 3774 (2021) <http://dx.doi.org/10.1038/s41467-021-24006-x>`_
+  `Nature Communications 12, 3774 (2021) <http://dx.doi.org/10.1038/s41467-021-24006-x>`_
 
 .. [Lavorenti2021]
 
   F. Lavorenti, P. Henri, F. Califano, S. Aizawa and N. André,
   `Electron acceleration driven by the lower-hybrid-drift instability. An extended quasilinear model`,
-  `A&A 652, 202141049 (2021) <http://dx.doi.org/10.1051/0004-6361/202141049>`_
+  `Astronomy & Astrophysics 652, 202141049 (2021) <http://dx.doi.org/10.1051/0004-6361/202141049>`_
 
 .. [Golovanov2021]
 
   A A Golovanov, I Yu Kostyukov, L Reichwein, J Thomas and A Pukhov,
   `Excitation of strongly nonlinear plasma wakefield by electron bunches`,
-  `Plasma Phys. Control. Fusion 63, 085004 (2021) <http://dx.doi.org/10.1088/1361-6587/ac0352>`_
+  `Plasma Physics and Controlled Fusion 63, 085004 (2021) <http://dx.doi.org/10.1088/1361-6587/ac0352>`_
 
 .. [Jirka2021]
 
   M. Jirka, P. Sasorov, S. S. Bulanov, G. Korn, B. Rus and S. V. Bulanov,
   `Reaching high laser intensity by a radiating electron`,
-  `Phys. Rev. A 103, 053114 (2021) <http://dx.doi.org/10.1103/PhysRevA.103.053114>`_
+  `Physical Review A 103, 053114 (2021) <http://dx.doi.org/10.1103/PhysRevA.103.053114>`_
 
 .. [Marques2021]
 
@@ -814,7 +819,7 @@ Following is the distribution of these topics in the listed publications up to N
 
   G. Cantono, A. Permogorov, J. Ferri, E. Smetanina, A. Dmitriev, A. Persson, T. Fülöp and C.-G. Wahlström,
   `Laser-driven proton acceleration from ultrathin foils with nanoholes`,
-  `Sci Rep 11, 5006 (2021) <http://dx.doi.org/10.1038/s41598-021-84264-z>`_
+  `Scientific Reports 11, 5006 (2021) <http://dx.doi.org/10.1038/s41598-021-84264-z>`_
 
 .. [Perez2021]
 
@@ -832,13 +837,13 @@ Following is the distribution of these topics in the listed publications up to N
 
   A. Sampath, X. Davoine, S. Corde, L. Gremillet, M. Gilljohann, M. Sangal, C. H. Keitel, R. Ariniello, J. Cary, H. Ekerfelt, C. Emma, F. Fiuza, H. Fujii, M. Hogan, C. Joshi, A. Knetsch, O. Kononenko, V. Lee, M. Litos, K. Marsh, Z. Nie, B. O’Shea, J. R. Peterson, P. San Miguel Claveria, D. Storey, Y. Wu, X. Xu, C. Zhang and M. Tamburini,
   `Extremely Dense Gamma-Ray Pulses in Electron Beam-Multifoil Collisions`,
-  `Phys. Rev. Lett. 126, 064801 (2021) <http://dx.doi.org/10.1103/PhysRevLett.126.064801>`_
+  `Physical Review Letters 126, 064801 (2021) <http://dx.doi.org/10.1103/PhysRevLett.126.064801>`_
 
 .. [Marini2021a]
 
   S. Marini, P. S. Kleij, F. Pisani, F. Amiranoff, M. Grech, A. Macchi, M. Raynaud and C. Riconda,
   `Ultrashort high energy electron bunches from tunable surface plasma waves driven with laser wavefront rotation`,
-  `Phys. Rev. E 103, L021201 (2021) <http://dx.doi.org/10.1103/PhysRevE.103.L021201>`_
+  `Physical Review E 103, L021201 (2021) <http://dx.doi.org/10.1103/PhysRevE.103.L021201>`_
 
 .. [Yao2021]
 
@@ -850,14 +855,14 @@ Following is the distribution of these topics in the listed publications up to N
 
   E. G. Gelfer, A. M, Fedotov and S. Weber,
   `Radiation induced acceleration of ions in a laser irradiated transparent foil`,
-  `New J. Phys. 23, 095002 (2021) <http://dx.doi.org/10.1088/1367-2630/ac1a97>`_
+  `New Journal of Physics 23, 095002 (2021) <http://dx.doi.org/10.1088/1367-2630/ac1a97>`_
   `arXiv:1907.02621 <https://arxiv.org/abs/1907.02621>`_
 
 .. [Siminos2021]
 
   E. Siminos, I. Thiele and C. Olofsson,
   `Laser Wakefield Driven Generation of Isolated Carrier-Envelope-Phase Tunable Intense Subcycle Pulses`,
-  `Phys. Rev. Lett. 126, 044801 (2021) <http://dx.doi.org/10.1103/PhysRevLett.126.044801>`_
+  `Physical Review Letters 126, 044801 (2021) <http://dx.doi.org/10.1103/PhysRevLett.126.044801>`_
   `arXiv:1902.05014 <https://arxiv.org/abs/1902.05014>`_
 
 .. [Budriga2020]
@@ -870,13 +875,13 @@ Following is the distribution of these topics in the listed publications up to N
 
   P. A. P. Nghiem, R. Assmann, A. Beck et al., 
   `Toward a plasma-based accelerator at high beam energy with high beam charge and high beam quality`,
-  `Phys. Rev. Accel. Beams 23, 031301 (2020) <https://doi.org/10.1103/PhysRevAccelBeams.23.031301>`_
+  `Physical Review Accelerators and Beams 23, 031301 (2020) <https://doi.org/10.1103/PhysRevAccelBeams.23.031301>`_
 
 .. [Pisarczyk2020]
 
   T. Pisarczyk, M. Kalal, S. Yu. Gus'kov et al.,
   `Hot electron retention in laser plasma created under terawatt subnanosecond irradiation of Cu targets`,
-  `Plasma Phys. Control. Fusion 62, 115020 (2020) <https://doi.org/10.1088/1361-6587/abb74b>`_
+  `Plasma Physics and Controlled Fusion 62, 115020 (2020) <https://doi.org/10.1088/1361-6587/abb74b>`_
 
 .. [Pagano2020]
 
@@ -894,25 +899,25 @@ Following is the distribution of these topics in the listed publications up to N
 
   H. Peng, C. Riconda, M. Grech, C.-T. Zhou and S. Weber,
   `Dynamical aspects of plasma gratings driven by a static ponderomotive potential`,
-  `Plasma Phys. Control. Fusion 62, 115015 (2020) <https://doi.org/10.1088/1361-6587/abb3aa>`_
+  `Plasma Physics and Controlled Fusion 62, 115015 (2020) <https://doi.org/10.1088/1361-6587/abb3aa>`_
 
 .. [Glek2020]
 
   P. B. Glek, A. A. Voronin, V. Ya. Panchenko and A. M. Zheltikov,
   `Relativistic electron bunches locked to attosecond optical field waveforms: an attosecond light–matter bound state`,
-  `Laser Phys. Lett. 17 055401 (2020) <https://doi.org/10.1088/1612-202X/ab7827>`_
+  `Laser Physics Letters 17 055401 (2020) <https://doi.org/10.1088/1612-202X/ab7827>`_
 
 .. [Margarone2020]
 
   D. Margarone, A. Morace, J. Bonvalet et al.,
   `Generation of α-Particle Beams With a Multi-kJ, Peta-Watt Class Laser System`,
-  `Front. Phys. 8, 343 (2020) <https://doi.org/10.3389/fphy.2020.00343>`_
+  `Frontiers in Physics 8, 343 (2020) <https://doi.org/10.3389/fphy.2020.00343>`_
 
 .. [Sinha2020]
 
   U. Sinha and N. Kumar,
   `Pair-beam propagation in a magnetized plasma for modeling the polarized radiation emission from gamma-ray bursts in laboratory astrophysics experiments`,
-  `Phys. Rev. E 101, 063204 (2020) <https://doi.org/10.1103/PhysRevE.101.063204>`_
+  `Physical Review E 101, 063204 (2020) <https://doi.org/10.1103/PhysRevE.101.063204>`_
 
 .. [Mitrofanov2020]
 
@@ -924,81 +929,81 @@ Following is the distribution of these topics in the listed publications up to N
 
   B. T. Spiers, M. P. Hill, C. Brown, L. Ceurvorst, N. Ratan, A. F. Savin, P. Allan, E. Floyd, J. Fyrth, L. Hobbs, S. James, J. Luis, M. Ramsay, N. Sircombe, J. Skidmore, R. Aboushelbaya, M. W. Mayr, R. Paddock, R. H. W. Wang and P. A. Norreys,
   `Whole-beam self-focusing in fusion-relevant plasma`,
-  `Phil. Trans. R. Soc. A379, 20200159 <https://doi.org/10.1098/rsta.2020.0159>`_
+  `Philosophical Transactions of the Royal Society A379, 20200159 <https://doi.org/10.1098/rsta.2020.0159>`_
 
 .. [Derouillat2020]
 
   J. Derouillat and A. Beck,
   `Single Domain Multiple Decompositions for Particle-in-Cell simulations`,
-  `J. Phys.: Conf. Ser. 1596, 012052 (2020) <http://dx.doi.org/10.1088/1742-6596/1596/1/012052>`_
+  `Journal of Physics: Conference Series 1596, 012052 (2020) <http://dx.doi.org/10.1088/1742-6596/1596/1/012052>`_
   `arXiv:1912.04064 <https://arxiv.org/abs/1912.04064>`_
 
 .. [Zemzemi2020]
 
   I. Zemzemi, F. Massimo and A. Beck,
   `Azimuthal decomposition study of a realistic laser profile for efficient modeling of Laser WakeField Acceleration`,
-  `J. Phys.: Conf. Ser. 1596, 012055 (2020) <https://doi.org/10.1088/1742-6596/1596/1/012054>`_
+  `Journal of Physics: Conference Series 1596, 012055 (2020) <https://doi.org/10.1088/1742-6596/1596/1/012054>`_
 
 .. [Massimo2020b]
 
   F. Massimo, I. Zemzemi, A. Beck, J. Derouillat and A. Specka,
   `Efficient cylindrical envelope modeling for laser wakefield acceleration`,
-  `J. Phys.: Conf. Ser. 1596, 012054 (2020) <http://dx.doi.org/10.1088/1742-6596/1596/1/012055>`_
+  `Journal of Physics: Conference Series 1596, 012054 (2020) <http://dx.doi.org/10.1088/1742-6596/1596/1/012055>`_
   `arXiv:1912.04674 <https://arxiv.org/abs/1912.04674>`_
 
 .. [Massimo2020a]
 
   F. Massimo, A. Beck, J. Derouillat, I. Zemzemi and A. Specka,
   `Numerical modeling of laser tunneling ionization in particle-in-cell codes with a laser envelope model`,
-  `Phys. Rev. E 102, 033204 (2020) <http://dx.doi.org/10.1103/PhysRevE.102.033204>`_
+  `Physical Review E 102, 033204 (2020) <http://dx.doi.org/10.1103/PhysRevE.102.033204>`_
   `arXiv:2006.04433 <https://arxiv.org/abs/2006.04433>`_
 
 .. [Marcowith2020]
 
   A. Marcowith, G. Ferrand, M. Grech, Z. Meliani, I. Plotnikov and R. Walder,
   `Multi-scale simulations of particle acceleration in astrophysical systems`,
-  `Living Rev Comput Astrophys 6, 1 (2020) <http://dx.doi.org/10.1007/s41115-020-0007-6>`_
+  `Living Reviews in Computational Astrophysics 6, 1 (2020) <http://dx.doi.org/10.1007/s41115-020-0007-6>`_
   `arXiv:2002.09411 <https://arxiv.org/abs/2002.09411>`_
 
 .. [Dargent2020]
 
   J. Dargent, N. Aunai, B. Lavraud, S. Toledo‐Redondo and F. Califano,
   `Simulation of Plasmaspheric Plume Impact on Dayside Magnetic Reconnection`,
-  `Geophys. Res. Lett. 47, 2019GL086546 (2020) <http://dx.doi.org/10.1029/2019GL086546>`_
+  `Geophysical Research Letters 47, 2019GL086546 (2020) <http://dx.doi.org/10.1029/2019GL086546>`_
   `arXiv:2002.02243 <https://arxiv.org/abs/2002.02243>`_
 
 .. [Sundström2020b]
 
   A. Sundström, L. Gremillet, E. Siminos and I. Pusztai,
   `Collisional effects on the electrostatic shock dynamics in thin-foil targets driven by an ultraintense short pulse laser`,
-  `Plasma Phys. Control. Fusion 62, 085015 (2020) <https://doi.org/10.1088/1361-6587/ab9a62>`_
+  `Plasma Physics and Controlled Fusion 62, 085015 (2020) <https://doi.org/10.1088/1361-6587/ab9a62>`_
 
 .. [Sundström2020a]
 
   A. Sundström, L. Gremillet, E. Siminos and I. Pusztai,
   `Fast collisional electron heating and relaxation in thin foils driven by a circularly polarized ultraintense short-pulse laser`,
-  `J. Plasma Phys. 86, 755860201 (2020) <http://dx.doi.org/10.1017/S0022377820000264>`_
+  `Journal of Plasma Physics 86, 755860201 (2020) <http://dx.doi.org/10.1017/S0022377820000264>`_
   `arXiv:1911.09562 <https://arxiv.org/abs/1911.09562>`_
 
 .. [Gelfer2020]
 
   E. G. Gelfer, A. M. Fedotov, O. Klimo and S. Weber,
   `Absorption and opacity threshold for a thin foil in a strong circularly polarized laser field`,
-  `Phys. Rev. E 101, 033204 (2020) <http://dx.doi.org/10.1103/PhysRevE.101.033204>`_
+  `Physical Review E 101, 033204 (2020) <http://dx.doi.org/10.1103/PhysRevE.101.033204>`_
   `arXiv:1906.05902 <https://arxiv.org/abs/1906.05902>`_
 
 .. [Ferri2020]
 
   J. Ferri, I. Thiele, E. Siminos, L. Gremillet, E. Smetanina, A. Dmitriev, G. Cantono, C.-G. Wahlström and T. Fülöp,
   `Enhancement of laser-driven ion acceleration in non-periodic nanostructured targets`,
-  `J. Plasma Phys. 86, 905860101 (2020) <http://dx.doi.org/10.1017/S0022377819000898>`_
+  `Journal of Plasma Physics 86, 905860101 (2020) <http://dx.doi.org/10.1017/S0022377819000898>`_
   `arXiv:1905.11131 <https://arxiv.org/abs/1905.11131>`_
 
 .. [Marques2019]
 
   J.-R. Marquès, L. Lancia, T. Gangolf, M. Blecher, S. Bolaños, J. Fuchs, O. Willi, F. Amiranoff, R. L. Berger, M. Chiaramello, S. Weber, and C. Riconda,
   `Joule-Level High-Efficiency Energy Transfer to Subpicosecond Laser Pulses by a Plasma-Based Amplifier`,
-  `Phys. Rev. X 9, 021008 (2019) <https://doi.org/10.1103/PhysRevX.9.021008>`_
+  `Physical Review X 9, 021008 (2019) <https://doi.org/10.1103/PhysRevX.9.021008>`_
 
 .. [Plotnikov2019]
   I. Plotnikov and L. Sironi,
@@ -1021,39 +1026,39 @@ Following is the distribution of these topics in the listed publications up to N
 
   X. S. Geng, L. L. Ji, B. F. Shen et al.,
   `Quantum reflection above the classical radiation-reaction barrier in the quantum electro-dynamics regime`,
-  `Commun. Phys. 2, 66 (2019) <https://doi.org/10.1038/s42005-019-0164-2>`_  
+  `Communications Physics 2, 66 (2019) <https://doi.org/10.1038/s42005-019-0164-2>`_  
 
 .. [Sinha2019]
 
   U. Sinha, C. H. Keitel, and N. Kumar,
   `Polarized Light from the Transportation of a Matter-Antimatter Beam in a Plasma`,
-  `Phys. Rev. Lett. 122, 204801 (2019) <https://doi.org/10.1103/PhysRevLett.122.204801>`_
+  `Physical Review Letters 122, 204801 (2019) <https://doi.org/10.1103/PhysRevLett.122.204801>`_
 
 .. [Malko2019]
 
   S. Malko, X. Vaisseau, F. Perez, D. Batani, A. Curcio, M. Ehret, J. Honrubia, K. Jakubowska, A. Morace, J. J. Santos and L. Volpe, 
   `Enhanced relativistic-electron beam collimation using two consecutive laser pulses`, 
-  `Sci Rep 9, 14061 (2019) <https://doi.org/10.1038/s41598-019-50401-y>`_
+  `Scientific Reports 9, 14061 (2019) <https://doi.org/10.1038/s41598-019-50401-y>`_
 
 .. [Peng2019]
 
   H. Peng, C. Riconda, M. Grech, J.-Q. Su and S. Weber,
   `Nonlinear dynamics of laser-generated ion-plasma gratings: A unified description`,
-  `Phys. Rev. E 100, 061201 (2019) <http://dx.doi.org/10.1103/PhysRevE.100.061201>`_
+  `Physical Review E 100, 061201 (2019) <http://dx.doi.org/10.1103/PhysRevE.100.061201>`_
   `arXiv:1911.03440 <https://arxiv.org/abs/1911.03440>`_
 
 .. [Fang2019]
 
   J. Fang, C.-Y. Lu, J.-W. Yan and H. Yu,
   `Early acceleration of electrons and protons at the nonrelativistic quasiparallel shocks with different obliquity angles`,
-  `Res. Astron. Astrophys. 19, 182 (2019) <http://dx.doi.org/10.1088/1674-4527/19/12/182>`_
+  `Research in Astronomy and Astrophysics 19, 182 (2019) <http://dx.doi.org/10.1088/1674-4527/19/12/182>`_
   `arXiv:1908.08170 <https://arxiv.org/abs/1908.08170>`_
 
 .. [Yoon2019b]
 
   Y. Yoon and P. M. Bellan,
   `Kinetic Verification of the Stochastic Ion Heating Mechanism in Collisionless Magnetic Reconnection`,
-  `ApJ 887, L29 (2019) <http://dx.doi.org/10.3847/2041-8213/ab5b0a>`_
+  `The Astrophysical Journal Letters 887, L29 (2019) <http://dx.doi.org/10.3847/2041-8213/ab5b0a>`_
 
 .. [Yoon2019a]
 
@@ -1065,7 +1070,7 @@ Following is the distribution of these topics in the listed publications up to N
 
   F. Massimo, A. Beck, J. Derouillat, M. Grech, M. Lobet, F. Pérez, I. Zemzemi and A Specka,
   `Efficient start-to-end 3D envelope modeling for two-stage laser wakefield acceleration experiments`,
-  `Plasma Phys. Control. Fusion 61, 124001 (2019) <http://dx.doi.org/10.1088/1361-6587/ab49cf>`_
+  `Plasma Physics and Controlled Fusion 61, 124001 (2019) <http://dx.doi.org/10.1088/1361-6587/ab49cf>`_
   `arXiv:1912.04127 <https://arxiv.org/abs/1912.04127>`_
 
 .. [Beck2019]
@@ -1079,14 +1084,14 @@ Following is the distribution of these topics in the listed publications up to N
 
   F. Pérez and M. Grech,
   `Oblique-incidence, arbitrary-profile wave injection for electromagnetic simulations`,
-  `Phys. Rev. E 99, 033307 (2019) <http://dx.doi.org/10.1103/PhysRevE.99.033307>`_
+  `Physical Review E 99, 033307 (2019) <http://dx.doi.org/10.1103/PhysRevE.99.033307>`_
   `arXiv:1809.04435 <https://arxiv.org/abs/1809.04435>`_
 
 .. [Thiele2019]
 
   I. Thiele, E. Siminos and T. Fülöp,
   `Electron Beam Driven Generation of Frequency-Tunable Isolated Relativistic Subcycle Pulses`,
-  `Phys. Rev. Lett. 122, 104803 (2019) <http://dx.doi.org/10.1103/PhysRevLett.122.104803>`_
+  `Physical Review Letters 122, 104803 (2019) <http://dx.doi.org/10.1103/PhysRevLett.122.104803>`_
   `arXiv:1806.04976 <https://arxiv.org/abs/1806.04976>`_
 
 .. [Golovanov2018]
@@ -1099,19 +1104,19 @@ Following is the distribution of these topics in the listed publications up to N
 
   S. Toledo-Redondo, J. Dargent, N. Aunai, B. Lavraud, M. André, W. Li, B. Giles, P.-A. Lindvist, R. E. Ergun, C. T. Russel and J. L. Burch,
   `Perpendicular Current Reduction Caused by Cold Ions of Ionospheric Origin in Magnetic Reconnection at the Magnetopause: Particle-in-Cell Simulations and Spacecraft Observations`,
-  `Geophys. Res. Lett. 45, 10,033 (2018)  <https://doi.org/10.1029/2018GL079051>`_
+  `Geophysical Research Letters 45, 10,033 (2018)  <https://doi.org/10.1029/2018GL079051>`_
 
 .. [Gelfer2018]
 
   E. Gelfer, N. Elkina and A. Fedotov,
   `Unexpected impact of radiation friction: enhancing production of longitudinal plasma waves`,
-  `Sci. Rep. 8, 6478 (2018) <https://doi.org/10.1038/s41598-018-24930-x>`_
+  `Scientific Reports 8, 6478 (2018) <https://doi.org/10.1038/s41598-018-24930-x>`_
 
 .. [Niel2018b]
 
   F. Niel, C. Riconda, F. Amiranoff, M. Lobet, J. Derouillat, F. Pérez, T. Vinci and M. Grech,
   `From quantum to classical modeling of radiation reaction: a focus on the radiation spectrum`,
-  `Plasma Phys. Control. Fusion 60, 094002 (2018) <http://dx.doi.org/10.1088/1361-6587/aace22>`_
+  `Plasma Physics and Controlled Fusion 60, 094002 (2018) <http://dx.doi.org/10.1088/1361-6587/aace22>`_
   `arXiv:1802.02927 <https://arxiv.org/abs/1802.02927>`_
 
 .. [Plotnikov2018]
@@ -1125,21 +1130,21 @@ Following is the distribution of these topics in the listed publications up to N
 
   F. Niel, C. Riconda, F. Amiranoff, R. Duclous and M. Grech,
   `From quantum to classical modeling of radiation reaction: A focus on stochasticity effects`,
-  `Phys. Rev. E 97, 043209 (2018) <http://dx.doi.org/10.1103/PhysRevE.97.043209>`_
+  `Physical Review E 97, 043209 (2018) <http://dx.doi.org/10.1103/PhysRevE.97.043209>`_
   `arXiv:1707.02618 <https://arxiv.org/abs/1707.02618>`_
 
 .. [Grassi2017b]
 
   A. Grassi, M. Grech, F. Amiranoff, A. Macchi and C. Riconda,
   `Radiation-pressure-driven ion Weibel instability and collisionless shocks`,
-  `Phys. Rev. E 96, 033204 (2017) <http://dx.doi.org/10.1103/PhysRevE.96.033204>`_
+  `Physical Review E 96, 033204 (2017) <http://dx.doi.org/10.1103/PhysRevE.96.033204>`_
   `arXiv:1705.05402 <https://arxiv.org/abs/1705.05402>`_
 
 .. [Fedeli2017]
 
   L. Fedeli, A. Formenti, L. Cialfi, A. Sgattoni, G. Cantono and M. Passoni,
   `Structured targets for advanced laser-driven sources`,
-  `Plasma Phys. Control. Fusion 60, 014013 (2017) <http://dx.doi.org/10.1088/1361-6587/aa8a54>`_
+  `Plasma Physics and Controlled Fusion 60, 014013 (2017) <http://dx.doi.org/10.1088/1361-6587/aa8a54>`_
 
 .. [Golovanov2017]
 
@@ -1151,19 +1156,19 @@ Following is the distribution of these topics in the listed publications up to N
 
   J. Dargent, N. Aunai, B. Lavraud, S. Toledo-Redondo, M. A. Shay, P. A. Cassak and K. Malakit,
   `Kinetic simulation of asymmetric magnetic reconnection with cold ions`,
-  `J. Geophys. Res. Space Physics 122, 5290-5306 (2017) <http://dx.doi.org/10.1002/2016JA023831>`_
+  `Journal of Geophysical Research: Space Physics 122, 5290-5306 (2017) <http://dx.doi.org/10.1002/2016JA023831>`_
 
 .. [Grassi2017a]
 
   A. Grassi, M. Grech, F. Amiranoff, F. Pegoraro, A. Macchi and C. Riconda,
   `Electron Weibel instability in relativistic counterstreaming plasmas with flow-aligned external magnetic fields`,
-  `Phys. Rev. E 95, 023203 (2017) <http://dx.doi.org/10.1103/PhysRevE.95.023203>`_
+  `Physical Review E 95, 023203 (2017) <http://dx.doi.org/10.1103/PhysRevE.95.023203>`_
 
 .. [Dargent2016]
 
   J. Dargent, N. Aunai, G. Belmont, N. Dorville, B. Lavraud and M. Hesse,
   `Full particle-in-cell simulations of kinetic equilibria and the role of the initial current sheet on steady asymmetric magnetic reconnection`,
-  `J. Plasma Phys. 82, 905820305 (2016) <http://dx.doi.org/10.1017/S002237781600057X>`_
+  `Journal of Plasma Physics 82, 905820305 (2016) <http://dx.doi.org/10.1017/S002237781600057X>`_
 
 .. [Chiaramello2016]
 
@@ -1175,10 +1180,10 @@ Following is the distribution of these topics in the listed publications up to N
 
   A. Beck, J.T. Frederiksen and J. Dérouillat,
   `Load management strategy for Particle-In-Cell simulations in high energy particle acceleration`,
-  `Nucl. Inst. Meth. in Phys. Res. A 829, 418-421 (2016) <http://dx.doi.org/10.1016/j.nima.2016.03.112>`_
+  `Nuclear Instuments and Methods in Physics Research A 829, 418-421 (2016) <http://dx.doi.org/10.1016/j.nima.2016.03.112>`_
 
 .. [Lancia2016]
 
   L. Lancia, A. Giribono, L. Vassura, M. Chiaramello, C. Riconda, S. Weber, A. Castan, A. Chatelain, A. Frank, T. Gangolf, M. N. Quinn, J. Fuchs and J.-R. Marquès,
   `Signatures of the Self-Similar Regime of Strongly Coupled Stimulated Brillouin Scattering for Efficient Short Laser Pulse Amplification`,
-  `Phys. Rev. Lett. 116, 075001 (2016) <http://dx.doi.org/10.1103/PhysRevLett.116.075001>`_
+  `Physical Review Letters 116, 075001 (2016) <http://dx.doi.org/10.1103/PhysRevLett.116.075001>`_

From 097422756966fccdf0630bb0c80e5a01d7c319a2 Mon Sep 17 00:00:00 2001
From: Francesco Massimo
 <francescomassimo@client-172-18-123-248.eduroam.universite-paris-saclay.fr>
Date: Tue, 21 May 2024 09:33:43 +0200
Subject: [PATCH 28/54] add publication

---
 doc/Sphinx/Overview/material.rst | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/doc/Sphinx/Overview/material.rst b/doc/Sphinx/Overview/material.rst
index 2d33d6aff..33184146f 100644
--- a/doc/Sphinx/Overview/material.rst
+++ b/doc/Sphinx/Overview/material.rst
@@ -30,7 +30,7 @@ Papers involving Smilei
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
 Only papers published in peer-reviewed journals are listed (for the complete list of citing papers see `Google Scholar <https://scholar.google.com/scholar?hl=fr&as_sdt=2005&sciodt=0,5&cites=17416460455672944837&scipsc=&q=&scisbd=1>`_).
-As of May 2024, 187 papers have been published covering a broad range of topics:
+As of May 2024, 188 papers have been published covering a broad range of topics:
 
 * laser-plasma interaction (LPI) / inertial fusion (FCI)
 * ultra-high intensity (UHI) applications
@@ -50,6 +50,12 @@ Following is the distribution of these topics in the listed publications up to N
    Use the python script doc/doi2publications.py to generate entries from a DOI number, and paste them here
    You can count the number of papers in the list with the vim command :%s/.. \[//gn. 
 
+.. [Salgado2024]
+
+    F. C. Salgado, A. Kozan, D. Seipt, D. Hollatz, P. Hilz, M. Kaluza, A. Sävert, A. Seidel, D. Ullmann, Y. Zhao, and M. Zepf,
+    `All-optical source size and emittance measurements of laser-accelerated electron beams`,
+    `Physical Review Accelerators and Beams 27, 052803 (2024) <https://doi.org/10.1103/PhysRevAccelBeams.27.052803>`_
+
 .. [Ivanov2024]
 
     K. A. Ivanov, D. A. Gorlova, I. N. Tsymbalov, I. P. Tsygvintsev, S. A. Shulyapov, R. V. Volkov, and A. B. Savel’ev,

From 443a62534c3c8bda2ba88a524f74faf82bfed2b5 Mon Sep 17 00:00:00 2001
From: Frederic Perez <frederic.perez@polytechnique.edu>
Date: Wed, 22 May 2024 15:31:25 +0200
Subject: [PATCH 29/54] fix for AMD

---
 src/Particles/Particles.cpp      |   2 +-
 src/Particles/Particles.h        |   2 +-
 src/Particles/nvidiaParticles.cu | 115 +++++++++++++++++--------------
 src/Particles/nvidiaParticles.h  |  34 ++-------
 src/Species/Species.cpp          |   5 +-
 src/Tools/gpu.h                  |   4 ++
 6 files changed, 75 insertions(+), 87 deletions(-)

diff --git a/src/Particles/Particles.cpp b/src/Particles/Particles.cpp
index d4eea30e9..8285762c6 100755
--- a/src/Particles/Particles.cpp
+++ b/src/Particles/Particles.cpp
@@ -1398,7 +1398,7 @@ int Particles::eraseLeavingParticles()
     return 0;
 }
 
-void Particles::copyParticles( Particles* particles_to_inject )
+int Particles::addParticles( Particles* particles_to_inject )
 {
     ERROR( "Device only feature, should not have come here! On CPU it's done in sortParticles." );
 }
diff --git a/src/Particles/Particles.h b/src/Particles/Particles.h
index 91689ef3f..20b9c2ea6 100755
--- a/src/Particles/Particles.h
+++ b/src/Particles/Particles.h
@@ -485,7 +485,7 @@ class Particles
 
     // -----------------------------------------------------------------------------
     //! Resize & Copy particles from particles_to_inject to the end of the vectors
-    virtual void copyParticles( Particles* particles_to_inject  );
+    virtual int addParticles( Particles* particles_to_inject  );
     
     //! Implementation of a somewhat efficient particle injection, sorting
     //! (including removing leaving particles) and binning for GPU if
diff --git a/src/Particles/nvidiaParticles.cu b/src/Particles/nvidiaParticles.cu
index 617cb0851..a45a56cbb 100644
--- a/src/Particles/nvidiaParticles.cu
+++ b/src/Particles/nvidiaParticles.cu
@@ -16,6 +16,7 @@
 #include <thrust/remove.h>
 #include <thrust/sort.h>
 #include <thrust/gather.h>
+#include <thrust/sequence.h>
 
 
 #include "Patch.h"
@@ -308,7 +309,7 @@ namespace detail {
         particle_container.resize( new_count );
         
         // Combine imported particles to main particles
-        particle_container.copyParticles( &particle_to_inject, initial_count );
+        particle_container.pasteParticles( &particle_to_inject, initial_count );
         
         // Compute keys of particles
         computeParticleClusterKey( particle_container, parameters, a_parent_patch );
@@ -924,52 +925,58 @@ void nvidiaParticles::copyParticlesByPredicate( Particles* buffer, Predicate pre
     
     if( nparts_to_copy ) {
         // Copy the particles to the destination
-        for( int ip = 0; ip < getNDoubleProp(); ip++ ) {
-            const auto in = getPtrDoubleProp( ip );
-            const auto out = dest->getPtrDoubleProp( ip );
-            thrust::copy_if( thrust::cuda::par_nosync, in, in + gpu_nparts_, keys, out, pred );
+        for( int ip = 0; ip < nvidia_double_prop_.size(); ip++ ) {
+            const auto in = nvidia_double_prop_[ip]->begin();
+            const auto out = dest->nvidia_double_prop_[ip]->begin();
+            thrust::copy_if( SMILEI_ACCELERATOR_ASYNC_POLYCY, in, in + gpu_nparts_, keys, out, pred );
         }
-        for( int ip = 0; ip < getNShortProp(); ip++ ) {
-            const auto in = getPtrShortProp( ip );
-            const auto out = dest->getPtrShortProp( ip );
-            thrust::copy_if( thrust::cuda::par_nosync, in, in + gpu_nparts_, keys, out, pred );
+        for( int ip = 0; ip < nvidia_short_prop_.size(); ip++ ) {
+            const auto in = nvidia_short_prop_[ip]->begin();
+            const auto out = dest->nvidia_short_prop_[ip]->begin();
+            thrust::copy_if( SMILEI_ACCELERATOR_ASYNC_POLYCY, in, in + gpu_nparts_, keys, out, pred );
         }
         if( tracked ) {
-            const auto in = getPtrId();
-            const auto out = dest->getPtrId();
-            thrust::copy_if( thrust::cuda::par_nosync, in, in + gpu_nparts_, keys, out, pred );
+            const auto in = nvidia_id_.begin();
+            const auto out = dest->nvidia_id_.begin();
+            thrust::copy_if( SMILEI_ACCELERATOR_ASYNC_POLYCY, in, in + gpu_nparts_, keys, out, pred );
         }
-        cudaDeviceSynchronize();
+        const auto in = nvidia_cell_keys_.begin();
+        const auto out = dest->nvidia_cell_keys_.begin();
+        thrust::copy_if( SMILEI_ACCELERATOR_ASYNC_POLYCY, in, in + gpu_nparts_, keys, out, pred );
+        SMILEI_ACCELERATOR_DEVICE_SYNC();
+        ::hipDeviceSynchronize();
     }
 }
 
-void nvidiaParticles::copyParticles( Particles* particles_to_inject )
+int nvidiaParticles::addParticles( Particles* particles_to_inject )
 {
     const auto nparts = gpu_nparts_;
     nvidiaParticles* to_inject = static_cast<nvidiaParticles*>( particles_to_inject );
     resize( nparts + to_inject->gpu_nparts_ );
-    copyParticles( to_inject, nparts );
+    pasteParticles( to_inject, nparts );
+    return to_inject->gpu_nparts_;
 }
 
-void nvidiaParticles::copyParticles( nvidiaParticles* particles_to_inject, size_t offset )
+void nvidiaParticles::pasteParticles( nvidiaParticles* particles_to_inject, size_t offset )
 {
     // Copy the particles to the destination
-    for( int ip = 0; ip < getNDoubleProp(); ip++ ) {
-        const auto in = particles_to_inject->getPtrDoubleProp( ip );
-        const auto out = getPtrDoubleProp( ip );
-        thrust::copy_n( thrust::cuda::par_nosync, in, particles_to_inject->gpu_nparts_, out + offset );
+    for( int ip = 0; ip < nvidia_double_prop_.size(); ip++ ) {
+        const auto in = particles_to_inject->nvidia_double_prop_[ip]->begin();
+        const auto out = nvidia_double_prop_[ip]->begin();
+        thrust::copy_n( SMILEI_ACCELERATOR_ASYNC_POLYCY, in, particles_to_inject->gpu_nparts_, out + offset );
     }
-    for( int ip = 0; ip < getNShortProp(); ip++ ) {
-        const auto in = particles_to_inject->getPtrShortProp( ip );
-        const auto out = getPtrShortProp( ip );
-        thrust::copy_n( thrust::cuda::par_nosync, in, particles_to_inject->gpu_nparts_, out + offset );
+    for( int ip = 0; ip < nvidia_short_prop_.size(); ip++ ) {
+        const auto in = particles_to_inject->nvidia_short_prop_[ip]->begin();
+        const auto out = nvidia_short_prop_[ip]->begin();
+        thrust::copy_n( SMILEI_ACCELERATOR_ASYNC_POLYCY, in, particles_to_inject->gpu_nparts_, out + offset );
     }
     if( tracked ) {
-        const auto in = particles_to_inject->getPtrId();
-        const auto out = getPtrId();
-        thrust::copy_n( thrust::cuda::par_nosync, in, particles_to_inject->gpu_nparts_, out + offset );
+        const auto in = particles_to_inject->nvidia_id_.begin();
+        const auto out = nvidia_id_.begin();
+        thrust::copy_n( SMILEI_ACCELERATOR_ASYNC_POLYCY, in, particles_to_inject->gpu_nparts_, out + offset );
     }
-    cudaDeviceSynchronize();
+    SMILEI_ACCELERATOR_DEVICE_SYNC();
+    ::hipDeviceSynchronize();
 }
 
 // -----------------------------------------------------------------------------
@@ -1016,20 +1023,21 @@ int nvidiaParticles::eraseParticlesByPredicate( Predicate pred )
     // Copy the particles to the destination
     // Using more memory, we could use the faster remove_copy_if
     // NOTE: remove_if is stable.
-    for( int ip = 0; ip < getNDoubleProp(); ip++ ) {
-        const auto in = getPtrDoubleProp( ip );
-        thrust::remove_if( thrust::cuda::par_nosync, in, in + gpu_nparts_, keys, pred );
+    for( int ip = 0; ip < nvidia_double_prop_.size(); ip++ ) {
+        const auto in = nvidia_double_prop_[ip]->begin();
+        thrust::remove_if( SMILEI_ACCELERATOR_ASYNC_POLYCY, in, in + gpu_nparts_, keys, pred );
     }
-    for( int ip = 0; ip < getNShortProp(); ip++ ) {
-        const auto in = getPtrShortProp( ip );
-        thrust::remove_if( thrust::cuda::par_nosync, in, in + gpu_nparts_, keys, pred );
+    for( int ip = 0; ip < nvidia_short_prop_.size(); ip++ ) {
+        const auto in = nvidia_short_prop_[ip]->begin();
+        thrust::remove_if( SMILEI_ACCELERATOR_ASYNC_POLYCY, in, in + gpu_nparts_, keys, pred );
     }
     if( tracked ) {
-        const auto in = getPtrId();
-        thrust::remove_if( thrust::cuda::par_nosync, in, in + gpu_nparts_, keys, pred );
+        const auto in = nvidia_id_.begin();
+        thrust::remove_if( SMILEI_ACCELERATOR_ASYNC_POLYCY, in, in + gpu_nparts_, keys, pred );
     }
-    cudaDeviceSynchronize();
-    
+    SMILEI_ACCELERATOR_DEVICE_SYNC();
+    ::hipDeviceSynchronize();
+
     return nparts_to_remove;
 }
 
@@ -1094,21 +1102,21 @@ void nvidiaParticles::sortParticleByKey()
     
     // Sort particles using thrust::gather, according to the sorting map
     thrust::device_vector<double> buffer( gpu_nparts_ );
-    for( int ip = 0; ip < getNDoubleProp(); ip++ ) {
-        thrust::gather( thrust::device, index.begin(), index.end(), getPtrDoubleProp( ip ), buffer.begin() );
-        swapDoubleProp( ip, buffer );
+    for( auto prop: nvidia_double_prop_ ) {
+        thrust::gather( thrust::device, index.begin(), index.end(), prop->begin(), buffer.begin() );
+        prop->swap( buffer );
     }
     buffer.clear();
     thrust::device_vector<short> buffer_short( gpu_nparts_ );
-    for( int ip = 0; ip < getNShortProp(); ip++ ) {
-        thrust::gather( thrust::device, index.begin(), index.end(), getPtrShortProp( ip ), buffer_short.begin() );
-        swapShortProp( ip, buffer_short );
+    for( auto prop: nvidia_short_prop_ ) {
+        thrust::gather( thrust::device, index.begin(), index.end(), prop->begin(), buffer_short.begin() );
+        prop->swap( buffer_short );
     }
     buffer_short.clear();
     if( tracked ) {
         thrust::device_vector<uint64_t> buffer_uint64( gpu_nparts_ );
-        thrust::gather( thrust::device, index.begin(), index.end(), getPtrId(), buffer_uint64.begin() );
-        swapId( buffer_uint64 );
+        thrust::gather( thrust::device, index.begin(), index.end(), nvidia_id_.begin(), buffer_uint64.begin() );
+        nvidia_id_.swap( buffer_uint64 );
         buffer_uint64.clear();
     }
 }
@@ -1123,16 +1131,17 @@ void nvidiaParticles::sortParticleByKey( nvidiaParticles& buffer )
     thrust::sort_by_key( thrust::device, nvidia_cell_keys_.begin(), nvidia_cell_keys_.end(), index.begin() );
     
     // Sort particles using thrust::gather, according to the sorting map
-    for( int ip = 0; ip < getNDoubleProp(); ip++ ) {
-        thrust::gather( thrust::cuda::par_nosync, index.begin(), index.end(), getPtrDoubleProp( ip ), buffer.getPtrDoubleProp( ip ) );
+    for( int ip = 0; ip < nvidia_double_prop_.size(); ip++ ) {
+        thrust::gather( SMILEI_ACCELERATOR_ASYNC_POLYCY, index.begin(), index.end(), nvidia_double_prop_[ip]->begin(), buffer.nvidia_double_prop_[ip]->begin() );
     }
-    for( int ip = 0; ip < getNShortProp(); ip++ ) {
-        thrust::gather( thrust::cuda::par_nosync, index.begin(), index.end(), getPtrShortProp( ip ), buffer.getPtrShortProp( ip ) );
+    for( int ip = 0; ip < nvidia_short_prop_.size(); ip++ ) {
+        thrust::gather( SMILEI_ACCELERATOR_ASYNC_POLYCY, index.begin(), index.end(), nvidia_short_prop_[ip]->begin(), buffer.nvidia_short_prop_[ip]->begin() );
     }
     if( tracked ) {
-        thrust::gather( thrust::cuda::par_nosync, index.begin(), index.end(), getPtrId(), buffer.getPtrId() );
+        thrust::gather( SMILEI_ACCELERATOR_ASYNC_POLYCY, index.begin(), index.end(), nvidia_id_.begin(), buffer.nvidia_id_.begin() );
     }
-    cudaDeviceSynchronize();
+    SMILEI_ACCELERATOR_DEVICE_SYNC();
+    ::hipDeviceSynchronize();
     
     swap( buffer );
 }
@@ -1202,7 +1211,7 @@ void nvidiaParticles::naiveImportAndSortParticles( nvidiaParticles* particles_to
     // Inject newly arrived particles in particles_to_inject
     const size_t current_size = gpu_nparts_;
     resize( current_size + particles_to_inject->size() );
-    copyParticles( particles_to_inject, current_size );
+    pasteParticles( particles_to_inject, current_size );
     particles_to_inject->clear();
 }
 
diff --git a/src/Particles/nvidiaParticles.h b/src/Particles/nvidiaParticles.h
index 906d3709e..1938da107 100644
--- a/src/Particles/nvidiaParticles.h
+++ b/src/Particles/nvidiaParticles.h
@@ -111,36 +111,12 @@ class nvidiaParticles : public Particles
     uint64_t * getPtrId() override {
         return thrust::raw_pointer_cast( nvidia_id_.data() );
     };
-    
-    size_t getNDoubleProp() {
-        return nvidia_double_prop_.size();
-    };
-    size_t getNShortProp() {
-        return nvidia_short_prop_.size();
-    };
-    
-    double * getPtrDoubleProp( int iprop ) {
-        return thrust::raw_pointer_cast( nvidia_double_prop_[iprop]->data() );
-    };
-    short * getPtrShortProp( int iprop ) {
-        return thrust::raw_pointer_cast( nvidia_short_prop_[iprop]->data() );
-    };
-    
-    void swapDoubleProp( int iprop, thrust::device_vector<double> &new_vector ) {
-        nvidia_double_prop_[iprop]->swap( new_vector );
-    };
-    void swapShortProp( int iprop, thrust::device_vector<short> &new_vector ) {
-        nvidia_short_prop_[iprop]->swap( new_vector );
-    };
-    void swapId( thrust::device_vector<uint64_t> &new_vector ) {
-        nvidia_id_.swap( new_vector );
-    };
-    
+
     void swap( nvidiaParticles & p ) {
-        for( int iprop = 0; iprop < getNDoubleProp(); iprop++ ) {
+        for( int iprop = 0; iprop < nvidia_double_prop_.size(); iprop++ ) {
             nvidia_double_prop_[iprop]->swap( *p.nvidia_double_prop_[iprop] );
         }
-        for( int iprop = 0; iprop < getNShortProp(); iprop++ ) {
+        for( int iprop = 0; iprop < nvidia_short_prop_.size(); iprop++ ) {
             nvidia_short_prop_[iprop]->swap( *p.nvidia_short_prop_[iprop] );
         }
         if( tracked ) {
@@ -157,10 +133,10 @@ class nvidiaParticles : public Particles
     void copyParticlesByPredicate( Particles* buffer, Predicate pred );
 
     //! Resize & Copy particles from particles_to_inject to end of vectors
-    void copyParticles( Particles* particles_to_inject ) override;
+    int addParticles( Particles* particles_to_inject ) override;
     
     //! Copy particles from particles_to_inject to specific offset
-    void copyParticles( nvidiaParticles* particles_to_inject, size_t offset );
+    void pasteParticles( nvidiaParticles* particles_to_inject, size_t offset );
     
     // -----------------------------------------------------------------------------
     //! Erase particles leaving the patch object on device and returns the number of particle removed
diff --git a/src/Species/Species.cpp b/src/Species/Species.cpp
index 19b39c2ed..31e127876 100755
--- a/src/Species/Species.cpp
+++ b/src/Species/Species.cpp
@@ -2103,10 +2103,9 @@ void Species::importParticles( Params &params, Patch *patch, Particles &source_p
     // Warning: the current GPU version does not handle tracked particles
 
     // Inject particles from source_particles
-    particles->copyParticles( &source_particles );
-    particles->last_index.back() += source_particles.size();
+    particles->last_index.back() += particles->addParticles( &source_particles );
     particles->last_index[0] = particles->last_index.back();
-    source_particles.clear();
+    source_particles.resize( 0 );
     
 #else
     // ---------------------------------------------------
diff --git a/src/Tools/gpu.h b/src/Tools/gpu.h
index 28a8c98da..172d9fddf 100644
--- a/src/Tools/gpu.h
+++ b/src/Tools/gpu.h
@@ -19,10 +19,14 @@ namespace smilei {
     #define SMILEI_ACCELERATOR_DECLARE_ROUTINE     _Pragma( "omp declare target" )
     #define SMILEI_ACCELERATOR_DECLARE_ROUTINE_END _Pragma( "omp end declare target" )
     #define SMILEI_ACCELERATOR_ATOMIC _Pragma( "omp atomic update" )
+    #define SMILEI_ACCELERATOR_ASYNC_POLYCY thrust::hip::par_nosync
+    #define SMILEI_ACCELERATOR_DEVICE_SYNC() hipDeviceSynchronize()
 #elif defined( SMILEI_OPENACC_MODE )
     #define SMILEI_ACCELERATOR_DECLARE_ROUTINE _Pragma( "acc routine seq" )
     #define SMILEI_ACCELERATOR_DECLARE_ROUTINE_END
     #define SMILEI_ACCELERATOR_ATOMIC _Pragma( "acc atomic" )
+    #define SMILEI_ACCELERATOR_ASYNC_POLYCY thrust::cuda::par_nosync
+    #define SMILEI_ACCELERATOR_DEVICE_SYNC() cudaDeviceSynchronize()
 #else
     #define SMILEI_ACCELERATOR_DECLARE_ROUTINE
     #define SMILEI_ACCELERATOR_DECLARE_ROUTINE_END

From 93ba8e1431b358bfe65631f39d2b05e2f326c50b Mon Sep 17 00:00:00 2001
From: Frederic Perez <frederic.perez@polytechnique.edu>
Date: Wed, 22 May 2024 15:54:24 +0200
Subject: [PATCH 30/54] typos

---
 src/Particles/nvidiaParticles.cu | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/Particles/nvidiaParticles.cu b/src/Particles/nvidiaParticles.cu
index a45a56cbb..c8e46afd2 100644
--- a/src/Particles/nvidiaParticles.cu
+++ b/src/Particles/nvidiaParticles.cu
@@ -944,7 +944,6 @@ void nvidiaParticles::copyParticlesByPredicate( Particles* buffer, Predicate pre
         const auto out = dest->nvidia_cell_keys_.begin();
         thrust::copy_if( SMILEI_ACCELERATOR_ASYNC_POLYCY, in, in + gpu_nparts_, keys, out, pred );
         SMILEI_ACCELERATOR_DEVICE_SYNC();
-        ::hipDeviceSynchronize();
     }
 }
 
@@ -976,7 +975,6 @@ void nvidiaParticles::pasteParticles( nvidiaParticles* particles_to_inject, size
         thrust::copy_n( SMILEI_ACCELERATOR_ASYNC_POLYCY, in, particles_to_inject->gpu_nparts_, out + offset );
     }
     SMILEI_ACCELERATOR_DEVICE_SYNC();
-    ::hipDeviceSynchronize();
 }
 
 // -----------------------------------------------------------------------------
@@ -1036,7 +1034,6 @@ int nvidiaParticles::eraseParticlesByPredicate( Predicate pred )
         thrust::remove_if( SMILEI_ACCELERATOR_ASYNC_POLYCY, in, in + gpu_nparts_, keys, pred );
     }
     SMILEI_ACCELERATOR_DEVICE_SYNC();
-    ::hipDeviceSynchronize();
 
     return nparts_to_remove;
 }
@@ -1141,7 +1138,6 @@ void nvidiaParticles::sortParticleByKey( nvidiaParticles& buffer )
         thrust::gather( SMILEI_ACCELERATOR_ASYNC_POLYCY, index.begin(), index.end(), nvidia_id_.begin(), buffer.nvidia_id_.begin() );
     }
     SMILEI_ACCELERATOR_DEVICE_SYNC();
-    ::hipDeviceSynchronize();
     
     swap( buffer );
 }

From 9f362f3d3347d7ef58c903eed4dd67d3f4672eb2 Mon Sep 17 00:00:00 2001
From: Frederic Perez <frederic.perez@polytechnique.edu>
Date: Fri, 24 May 2024 12:37:17 +0200
Subject: [PATCH 31/54] Slightly faster GPU sort

---
 src/Particles/nvidiaParticles.cu | 109 ++++++++++++++++++-------------
 src/Particles/nvidiaParticles.h  |   6 +-
 2 files changed, 69 insertions(+), 46 deletions(-)

diff --git a/src/Particles/nvidiaParticles.cu b/src/Particles/nvidiaParticles.cu
index c8e46afd2..42995603d 100644
--- a/src/Particles/nvidiaParticles.cu
+++ b/src/Particles/nvidiaParticles.cu
@@ -293,23 +293,34 @@ namespace detail {
                                      const Params&    parameters,
                                      const Patch&     a_parent_patch )
     {
-        // Remove out of bound particles
-        const auto erased_count = particle_container.eraseParticlesByPredicate( cellKeyBelow<0>() );
-        
-        const auto initial_count = particle_container.deviceSize() - erased_count;
+        const auto initial_count = particle_container.deviceSize();
         const auto inject_count  = particle_to_inject.deviceSize();
-        const auto new_count     = initial_count + inject_count;
+
+        // Locate out-of-bounds particles in array "available_places"
+        const auto keys = particle_container.getPtrCellKeys();
+        const auto erased_count = thrust::count_if( thrust::device, keys, keys + initial_count, cellKeyBelow<0>() );
+        thrust::device_vector<int> available_places( erased_count );
+        thrust::copy_if( thrust::device,
+                         thrust::counting_iterator<int>{0},
+                         thrust::counting_iterator<int>{ (int) initial_count },
+                         keys,
+                         available_places.begin(),
+                         cellKeyBelow<0>() );
         
-        // Resize particles
-        // NOTE: We really want a non-initializing vector here!
-        // It's possible to give a custom allocator to thrust::device_vector.
-        // Create one with construct(<>) as a noop and derive from
-        // thrust::device_malloc_allocator. For now we do an explicit resize.
-        particle_container.softReserve( new_count );
-        particle_container.resize( new_count );
+        const auto new_count = initial_count + inject_count - erased_count;
         
-        // Combine imported particles to main particles
-        particle_container.pasteParticles( &particle_to_inject, initial_count );
+        // Copy the imported particles to available places
+        particle_to_inject.scatterParticles( particle_container, available_places );
+        // If there are more imported particles than places, copy the remaining imported particles at the end
+        if( inject_count >= erased_count ) {
+            particle_container.resize( new_count );
+            particle_container.pasteParticles( &particle_to_inject, initial_count, erased_count );
+        // If there are more places than imported particles, the remaining places should be filled
+        } else {
+            const auto last_filled = available_places[inject_count];
+            particle_container.eraseParticlesByPredicate( cellKeyBelow<0>(), last_filled );
+            particle_container.resize( new_count );
+        }
         
         // Compute keys of particles
         computeParticleClusterKey( particle_container, parameters, a_parent_patch );
@@ -319,17 +330,11 @@ namespace detail {
         particle_to_inject.resize( new_count );
         
         // Sort particles using thrust::gather, according to the sorting map
+        // (particle_to_inject serves as a buffer)
         particle_container.sortParticleByKey( particle_to_inject );
         
         // Recompute bins
         computeBinIndex( particle_container );
-        
-        // This free generates a lot of memory fragmentation. If we enable it we
-        // reduce significantly the memory usage over time but a memory spike
-        // will still be present. Unfortunately, this free generates soo much
-        // fragmentation (like the one above) that at some point the GPU memory
-        // allocator will fail!
-        // particle_to_inject.free();
     }
 
     template <typename InputIterator,
@@ -910,11 +915,6 @@ void nvidiaParticles::copyLeavingParticlesToBuffer( Particles* buffer )
 template<typename Predicate>
 void nvidiaParticles::copyParticlesByPredicate( Particles* buffer, Predicate pred )
 {
-    // TODO(Etienne M): We are doing extra work. We could use something like
-    // std::partition to output the invalidated particles in buffer
-    // and keep the good ones. This would help us avoid the std::remove_if in
-    // the particle injection and sorting algorithm.
-    
     // Count particles satisfying the predicate
     const auto keys = getPtrCellKeys();
     const int nparts_to_copy = thrust::count_if( thrust::device, keys, keys + gpu_nparts_, pred );
@@ -952,27 +952,29 @@ int nvidiaParticles::addParticles( Particles* particles_to_inject )
     const auto nparts = gpu_nparts_;
     nvidiaParticles* to_inject = static_cast<nvidiaParticles*>( particles_to_inject );
     resize( nparts + to_inject->gpu_nparts_ );
-    pasteParticles( to_inject, nparts );
+    pasteParticles( to_inject, nparts, 0 );
     return to_inject->gpu_nparts_;
 }
 
-void nvidiaParticles::pasteParticles( nvidiaParticles* particles_to_inject, size_t offset )
+void nvidiaParticles::pasteParticles( nvidiaParticles* particles_to_inject, size_t offset_in_output, size_t offset_in_input )
 {
+    const auto n = particles_to_inject->gpu_nparts_ - (int) offset_in_input;
+    
     // Copy the particles to the destination
     for( int ip = 0; ip < nvidia_double_prop_.size(); ip++ ) {
-        const auto in = particles_to_inject->nvidia_double_prop_[ip]->begin();
-        const auto out = nvidia_double_prop_[ip]->begin();
-        thrust::copy_n( SMILEI_ACCELERATOR_ASYNC_POLYCY, in, particles_to_inject->gpu_nparts_, out + offset );
+        const auto in = particles_to_inject->nvidia_double_prop_[ip]->begin() + offset_in_input;
+        const auto out = nvidia_double_prop_[ip]->begin() + offset_in_output;
+        thrust::copy_n( SMILEI_ACCELERATOR_ASYNC_POLYCY, in, n, out );
     }
     for( int ip = 0; ip < nvidia_short_prop_.size(); ip++ ) {
-        const auto in = particles_to_inject->nvidia_short_prop_[ip]->begin();
-        const auto out = nvidia_short_prop_[ip]->begin();
-        thrust::copy_n( SMILEI_ACCELERATOR_ASYNC_POLYCY, in, particles_to_inject->gpu_nparts_, out + offset );
+        const auto in = particles_to_inject->nvidia_short_prop_[ip]->begin() + offset_in_input;
+        const auto out = nvidia_short_prop_[ip]->begin() + offset_in_output;
+        thrust::copy_n( SMILEI_ACCELERATOR_ASYNC_POLYCY, in, n, out );
     }
     if( tracked ) {
-        const auto in = particles_to_inject->nvidia_id_.begin();
-        const auto out = nvidia_id_.begin();
-        thrust::copy_n( SMILEI_ACCELERATOR_ASYNC_POLYCY, in, particles_to_inject->gpu_nparts_, out + offset );
+        const auto in = particles_to_inject->nvidia_id_.begin() + offset_in_input;
+        const auto out = nvidia_id_.begin() + offset_in_output;
+        thrust::copy_n( SMILEI_ACCELERATOR_ASYNC_POLYCY, in, n, out );
     }
     SMILEI_ACCELERATOR_DEVICE_SYNC();
 }
@@ -1006,32 +1008,32 @@ void nvidiaParticles::pasteParticles( nvidiaParticles* particles_to_inject, size
 // -----------------------------------------------------------------------------
 int nvidiaParticles::eraseLeavingParticles()
 {
-    const auto nremoved = eraseParticlesByPredicate( cellKeyBelow<0>() );
+    const auto nremoved = eraseParticlesByPredicate( cellKeyBelow<0>(), 0 );
     resize( gpu_nparts_ - nremoved );
     return nremoved;
 }
 
 //! "Erase" particles but does not resize the arrays!
 template<typename Predicate>
-int nvidiaParticles::eraseParticlesByPredicate( Predicate pred )
+int nvidiaParticles::eraseParticlesByPredicate( Predicate pred, size_t offset )
 {
     const auto keys = getPtrCellKeys();
-    const int nparts_to_remove = thrust::count_if( thrust::device, keys, keys + gpu_nparts_, pred );
+    const int nparts_to_remove = thrust::count_if( thrust::device, keys + offset, keys + gpu_nparts_, pred );
     
     // Copy the particles to the destination
     // Using more memory, we could use the faster remove_copy_if
     // NOTE: remove_if is stable.
     for( int ip = 0; ip < nvidia_double_prop_.size(); ip++ ) {
         const auto in = nvidia_double_prop_[ip]->begin();
-        thrust::remove_if( SMILEI_ACCELERATOR_ASYNC_POLYCY, in, in + gpu_nparts_, keys, pred );
+        thrust::remove_if( SMILEI_ACCELERATOR_ASYNC_POLYCY, in + offset, in + gpu_nparts_, keys + offset, pred );
     }
     for( int ip = 0; ip < nvidia_short_prop_.size(); ip++ ) {
         const auto in = nvidia_short_prop_[ip]->begin();
-        thrust::remove_if( SMILEI_ACCELERATOR_ASYNC_POLYCY, in, in + gpu_nparts_, keys, pred );
+        thrust::remove_if( SMILEI_ACCELERATOR_ASYNC_POLYCY, in + offset, in + gpu_nparts_, keys + offset, pred );
     }
     if( tracked ) {
         const auto in = nvidia_id_.begin();
-        thrust::remove_if( SMILEI_ACCELERATOR_ASYNC_POLYCY, in, in + gpu_nparts_, keys, pred );
+        thrust::remove_if( SMILEI_ACCELERATOR_ASYNC_POLYCY, in + offset, in + gpu_nparts_, keys + offset, pred );
     }
     SMILEI_ACCELERATOR_DEVICE_SYNC();
 
@@ -1142,6 +1144,25 @@ void nvidiaParticles::sortParticleByKey( nvidiaParticles& buffer )
     swap( buffer );
 }
 
+
+void nvidiaParticles::scatterParticles( nvidiaParticles &dest, const thrust::device_vector<int> &index )
+{
+    const auto n = std::min( (int) index.size(), gpu_nparts_ );
+    for( int ip = 0; ip < nvidia_double_prop_.size(); ip++ ) {
+        const auto in = nvidia_double_prop_[ip]->begin();
+        thrust::scatter( SMILEI_ACCELERATOR_ASYNC_POLYCY, in, in + n, index.begin(), dest.nvidia_double_prop_[ip]->begin() );
+    }
+    for( int ip = 0; ip < nvidia_short_prop_.size(); ip++ ) {
+        const auto in = nvidia_short_prop_[ip]->begin();
+        thrust::scatter( SMILEI_ACCELERATOR_ASYNC_POLYCY, in, in + n, index.begin(), dest.nvidia_short_prop_[ip]->begin() );
+    }
+    if( tracked ) {
+        const auto in = nvidia_id_.begin();
+        thrust::scatter( SMILEI_ACCELERATOR_ASYNC_POLYCY, in, in + n, index.begin(), dest.nvidia_id_.begin() );
+    }
+    SMILEI_ACCELERATOR_DEVICE_SYNC();
+}
+
 int nvidiaParticles::prepareBinIndex()
 {
     if( first_index.size() == 0 ) {
@@ -1207,7 +1228,7 @@ void nvidiaParticles::naiveImportAndSortParticles( nvidiaParticles* particles_to
     // Inject newly arrived particles in particles_to_inject
     const size_t current_size = gpu_nparts_;
     resize( current_size + particles_to_inject->size() );
-    pasteParticles( particles_to_inject, current_size );
+    pasteParticles( particles_to_inject, current_size, 0 );
     particles_to_inject->clear();
 }
 
diff --git a/src/Particles/nvidiaParticles.h b/src/Particles/nvidiaParticles.h
index 1938da107..37b3fc18d 100644
--- a/src/Particles/nvidiaParticles.h
+++ b/src/Particles/nvidiaParticles.h
@@ -136,7 +136,7 @@ class nvidiaParticles : public Particles
     int addParticles( Particles* particles_to_inject ) override;
     
     //! Copy particles from particles_to_inject to specific offset
-    void pasteParticles( nvidiaParticles* particles_to_inject, size_t offset );
+    void pasteParticles( nvidiaParticles* particles_to_inject, size_t offset_out, size_t offset_in );
     
     // -----------------------------------------------------------------------------
     //! Erase particles leaving the patch object on device and returns the number of particle removed
@@ -144,7 +144,7 @@ class nvidiaParticles : public Particles
     int eraseLeavingParticles() override;
     
     template<typename Predicate>
-    int eraseParticlesByPredicate( Predicate pred );
+    int eraseParticlesByPredicate( Predicate pred, size_t offset );
     
     // ---------------------------------------------------------------------------------------------------------------------
     //! Create n_additional_particles new particles at the end of vectors
@@ -161,6 +161,8 @@ class nvidiaParticles : public Particles
     //! This version is asynchronous, but requires a buffer of equal size to be provided
     void sortParticleByKey( nvidiaParticles& buffer );
 
+    void scatterParticles( nvidiaParticles &particles_to_import, const thrust::device_vector<int> &index );
+
 protected:
     //! Redefine first_index and last_index according to the binning algorithm
     //! used on GPU.

From 3074a9fab7ecbd37ce74b6207f544daf0e6c2d9f Mon Sep 17 00:00:00 2001
From: Francesco Massimo <francescomassimo@MacBook-Pro-de-sei2.local>
Date: Fri, 24 May 2024 14:35:24 +0200
Subject: [PATCH 32/54] add publication

---
 doc/Sphinx/Overview/material.rst | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/doc/Sphinx/Overview/material.rst b/doc/Sphinx/Overview/material.rst
index 33184146f..66ed26180 100644
--- a/doc/Sphinx/Overview/material.rst
+++ b/doc/Sphinx/Overview/material.rst
@@ -30,7 +30,7 @@ Papers involving Smilei
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
 Only papers published in peer-reviewed journals are listed (for the complete list of citing papers see `Google Scholar <https://scholar.google.com/scholar?hl=fr&as_sdt=2005&sciodt=0,5&cites=17416460455672944837&scipsc=&q=&scisbd=1>`_).
-As of May 2024, 188 papers have been published covering a broad range of topics:
+As of May 2024, 189 papers have been published covering a broad range of topics:
 
 * laser-plasma interaction (LPI) / inertial fusion (FCI)
 * ultra-high intensity (UHI) applications
@@ -50,6 +50,12 @@ Following is the distribution of these topics in the listed publications up to N
    Use the python script doc/doi2publications.py to generate entries from a DOI number, and paste them here
    You can count the number of papers in the list with the vim command :%s/.. \[//gn. 
 
+.. [Krafft2024b]
+
+    C. Krafft, P. Savoini, and F. J. Polanco-Rodríguez,
+    `Mechanisms of Fundamental Electromagnetic Wave Radiation in the Solar Wind`,
+    `The Astrophysical Journal Letters 967, 2 (2024) <https://doi.org/10.3847/2041-8213/ad47b5>`_
+
 .. [Salgado2024]
 
     F. C. Salgado, A. Kozan, D. Seipt, D. Hollatz, P. Hilz, M. Kaluza, A. Sävert, A. Seidel, D. Ullmann, Y. Zhao, and M. Zepf,
@@ -92,7 +98,7 @@ Following is the distribution of these topics in the listed publications up to N
     `Control of autoresonant plasma beat-wave wakefield excitation`,
     `Physical Review Research 6, 013338 (2024) <https://doi.org/10.1103/PhysRevResearch.6.013338>`_
 
-.. [Krafft2024]
+.. [Krafft2024a]
 
     C. Krafft and P. Savoini,
     `Electrostatic Wave Decay in the Randomly Inhomogeneous Solar Wind`,
@@ -532,7 +538,7 @@ Following is the distribution of these topics in the listed publications up to N
 
 .. [Pae2022]
 
-    K. . Pae, C. M. Kim, V. B. Pathak, C.-M. Ryu and C. H. Nam,
+    K. H. Pae, C. M. Kim, V. B. Pathak, C.-M. Ryu and C. H. Nam,
     `Direct laser acceleration of electrons from a plasma mirror by an intense few-cycle Laguerre–Gaussian laser and its dependence on the carrier-envelope phase`,
     `Plasma Physics and Controlled Fusion 64, 055013 (2022) <http://dx.doi.org/10.1088/1361-6587/ac5a0a>`_
 
@@ -769,7 +775,7 @@ Following is the distribution of these topics in the listed publications up to N
 
 .. [Psikal2021]
 
-  J Psikal,
+  J. Psikal,
   `Laser-driven ion acceleration from near-critical Gaussian plasma density profile`,
   `Plasma Physics and Controlled Fusion 63, 064002 (2021) <http://dx.doi.org/10.1088/1361-6587/abf448>`_
 
@@ -787,7 +793,7 @@ Following is the distribution of these topics in the listed publications up to N
 
 .. [Golovanov2021]
 
-  A A Golovanov, I Yu Kostyukov, L Reichwein, J Thomas and A Pukhov,
+  A. A. Golovanov, I. Y. Kostyukov, L. Reichwein, J. Thomas and A. Pukhov,
   `Excitation of strongly nonlinear plasma wakefield by electron bunches`,
   `Plasma Physics and Controlled Fusion 63, 085004 (2021) <http://dx.doi.org/10.1088/1361-6587/ac0352>`_
 

From 1045fd21bda84117a21c6118f01dd23d36e0ce73 Mon Sep 17 00:00:00 2001
From: Frederic Perez <frederic.perez@polytechnique.edu>
Date: Mon, 27 May 2024 09:38:48 +0200
Subject: [PATCH 33/54] Various small fixes

---
 doc/Sphinx/Use/namelist.rst          |  8 ++++++--
 happi/_Diagnostics/TrackParticles.py | 15 ++++++++++-----
 happi/_Utils.py                      |  6 +++++-
 src/SmileiMPI/AsyncMPIbuffers.h      |  2 +-
 4 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/doc/Sphinx/Use/namelist.rst b/doc/Sphinx/Use/namelist.rst
index f7deebcae..6c5eaf2be 100755
--- a/doc/Sphinx/Use/namelist.rst
+++ b/doc/Sphinx/Use/namelist.rst
@@ -1148,6 +1148,9 @@ Each species has to be defined in a ``Species`` block::
   :ref:`tracking <DiagTrackParticles>`. The available fields are ``"Ex"``, ``"Ey"``, ``"Ez"``, 
   ``"Bx"``, ``"By"`` and ``"Bz"``.
   
+  Note that magnetic field components, as they originate from the interpolator,
+  are shifted by half a timestep compared to those from the *Fields* diagnostics.
+  
   Additionally, the work done by each component of the electric field is available as
   ``"Wx"``, ``"Wy"`` and ``"Wz"``. Contrary to the other interpolated fields, these quantities
   are accumulated over time.
@@ -2716,7 +2719,8 @@ or several points arranged in a 2-D or 3-D grid.
   * **In "AMcylindrical" geometry**, probes are defined with 3D Cartesian coordinates
     and cannot be separated per mode. Use Field diagnostics for cylindrical coordinates and
     information per mode.
-
+  * **Probes rely on the particle interpolator to compute fields** so that the
+    magnetic field is shifted by half a timestep compared to that of *Fields* diagnostics.
 
 To add one probe diagnostic, include the block ``DiagProbe``::
 
@@ -3343,7 +3347,7 @@ for instance::
     def my_filter(particles):
         return (particles.px>-1.)*(particles.px<1.) + (particles.pz>3.)
 
-.. Warning:: The ``px``, ``py`` and ``pz`` quantities are not exactly the momenta.
+.. Note:: The ``px``, ``py`` and ``pz`` quantities are not exactly the momenta.
   They are actually the velocities multiplied by the lorentz factor, i.e.,
   :math:`\gamma v_x`, :math:`\gamma v_y` and :math:`\gamma v_z`. This is true only
   inside the ``filter`` function (not for the output of the diagnostic).
diff --git a/happi/_Diagnostics/TrackParticles.py b/happi/_Diagnostics/TrackParticles.py
index 253bb2958..0825eb0f3 100755
--- a/happi/_Diagnostics/TrackParticles.py
+++ b/happi/_Diagnostics/TrackParticles.py
@@ -447,8 +447,9 @@ def _orderFiles( self, fileOrdered, chunksize, sort ):
 						for k, name in self._short_properties_from_raw.items():
 							if k not in group: continue
 							ordered = self._np.empty((nparticles_to_write, ), dtype=group[k].dtype)
-							if k == "id": ordered.fill(0)
-							else        : ordered.fill(self._np.nan)
+							if k == "id"      : ordered.fill(0)
+							elif k == "charge": ordered.fill(9999)
+							else              : ordered.fill(self._np.nan)
 							ordered[locs] = group[k][()][selectedIndices]
 							f0[name].write_direct(ordered, dest_sel=self._np.s_[it,:])
 				
@@ -461,8 +462,9 @@ def _orderFiles( self, fileOrdered, chunksize, sort ):
 					for first_o, last_o, npart_o in ChunkedRange(nparticles_to_write, chunksize):
 						for k, name in self._short_properties_from_raw.items():
 							if k not in group: continue
-							if k == "id": data[k].fill(0)
-							else        : data[k].fill(self._np.nan)
+							if k == "id"      : data[k].fill(0)
+							elif k == "charge": data[k].fill(9999)
+							else              : data[k].fill(self._np.nan)
 						# Loop chunks of the input
 						for first_i, last_i, npart_i in ChunkedRange(nparticles, chunksize):
 							# Obtain IDs
@@ -538,7 +540,10 @@ def _generateRawData(self, times=None):
 								data[it,:] -= self._XmovedForTime[time]
 						else:
 							data = self._readUnstructuredH5(self._h5items[axis], self.selectedParticles, first_time, last_time)
-						data[deadParticles] = self._np.nan
+						if data.dtype == float:
+							data[deadParticles] = self._np.nan
+						else:
+							data[deadParticles] = 9999
 						self._rawData[axis] = data
 
 				if self._verbose: print("Process broken lines ...")
diff --git a/happi/_Utils.py b/happi/_Utils.py
index 9fd35a757..28dd028df 100755
--- a/happi/_Utils.py
+++ b/happi/_Utils.py
@@ -398,7 +398,11 @@ def __init__(self, operation, QuantityTranslator, ureg):
 							raise Exception("Quantity "+q+" not understood")
 			# Calculate the total units and its inverse
 			locals().update(self.imports)
-			units = eval("".join(basic_op)).units
+			units = eval("".join(basic_op))
+			if isinstance(units, (int, float)):
+				units = ureg.Quantity(1) # dimensionless
+			else:
+				units = units.units
 			self.translated_units = units.format_babel(locale="en")
 			# Make the operation string
 			self.translated_operation = "".join(full_op)
diff --git a/src/SmileiMPI/AsyncMPIbuffers.h b/src/SmileiMPI/AsyncMPIbuffers.h
index 7b3cf1fcc..90ba02fb1 100755
--- a/src/SmileiMPI/AsyncMPIbuffers.h
+++ b/src/SmileiMPI/AsyncMPIbuffers.h
@@ -17,7 +17,7 @@ class AsyncMPIbuffers
     AsyncMPIbuffers();
     ~AsyncMPIbuffers();
     
-    virtual void allocate( unsigned int nDim_field );
+    void allocate( unsigned int nDim_field );
     
     void defineTags( Patch *patch, SmileiMPI *smpi, int tag ) ;
     

From a48d556b6dcffb9042342fa53efec27c9f53f33b Mon Sep 17 00:00:00 2001
From: Frederic Perez <frederic.perez@polytechnique.edu>
Date: Mon, 27 May 2024 12:44:49 +0200
Subject: [PATCH 34/54] fix many warnings

---
 makefile                                      |   8 +-
 scripts/compile_tools/machine/adastra         |   1 -
 scripts/compile_tools/machine/ruche_gpu2      |   2 +-
 src/Checkpoint/Checkpoint.cpp                 |   2 +-
 src/Diagnostic/DiagnosticProbes.cpp           |   2 +-
 src/Diagnostic/DiagnosticScalar.cpp           |  22 +--
 src/Diagnostic/DiagnosticTrack.cpp            |   2 +-
 src/ElectroMagn/ElectroMagn.cpp               |  14 +-
 src/ElectroMagn/ElectroMagn.h                 |   2 +-
 src/ElectroMagn/ElectroMagn1D.cpp             |   2 +-
 src/ElectroMagn/ElectroMagn1D.h               |   2 +-
 src/ElectroMagn/ElectroMagn2D.cpp             |  32 ++--
 src/ElectroMagn/ElectroMagn2D.h               |   2 +-
 src/ElectroMagn/ElectroMagn3D.cpp             |  32 ++--
 src/ElectroMagn/ElectroMagn3D.h               |   2 +-
 src/ElectroMagn/ElectroMagnAM.cpp             |   6 +-
 src/ElectroMagn/ElectroMagnAM.h               |   2 +-
 src/ElectroMagnBC/ElectroMagnBC2D_SM.cpp      |  16 +-
 src/ElectroMagnBC/ElectroMagnBC3D_SM.cpp      |  26 +--
 .../MA_Solver1D_Friedman.cpp                  |   2 +-
 src/ElectroMagnSolver/MA_Solver2D_norm.cpp    |  18 +-
 src/ElectroMagnSolver/MA_Solver3D_norm.cpp    |  18 +-
 src/ElectroMagnSolver/MF_Solver2D_Yee.cpp     |  18 +-
 src/ElectroMagnSolver/MF_Solver3D_Yee.cpp     |  18 +-
 .../PML_Solver2D_Envelope.cpp                 |   8 +-
 .../PML_SolverAM_Envelope.cpp                 |   9 +-
 ...PML_SolverAM_EnvelopeReducedDispersion.cpp |   7 +-
 src/Field/Field.cpp                           |   6 +-
 src/Field/Field.h                             |   8 +-
 src/Field/Field1D.cpp                         |   2 +-
 src/Field/Field1D.h                           |   2 +-
 src/Field/Field2D.cpp                         |  26 +--
 src/Field/Field2D.h                           |   2 +-
 src/Field/Field3D.cpp                         |  38 ++--
 src/Field/Field3D.h                           |   2 +-
 src/Field/cField.h                            |   2 +-
 src/Field/cField1D.cpp                        |   2 +-
 src/Field/cField1D.h                          |   2 +-
 src/Field/cField2D.cpp                        |   2 +-
 src/Field/cField2D.h                          |   2 +-
 src/Field/cField3D.cpp                        |   2 +-
 src/Field/cField3D.h                          |   2 +-
 src/Interpolator/Interpolator2D2Order.cpp     |  10 +-
 src/Interpolator/Interpolator3D2Order.cpp     |  12 +-
 src/Interpolator/Interpolator3D2Order.h       |   2 +-
 src/MovWindow/SimWindow.cpp                   |   4 +-
 .../MultiphotonBreitWheeler.cpp               |  26 +--
 .../MultiphotonBreitWheeler.h                 |   2 +-
 .../MultiphotonBreitWheelerTables.h           |   4 +-
 src/Params/Params.cpp                         |  34 ++--
 src/Params/Params.h                           |   2 +-
 src/ParticleBC/BoundaryConditionType.cpp      |  24 +--
 src/ParticleBC/PartBoundCond.h                |   2 +-
 src/Particles/Particles.cpp                   |   6 +-
 src/Particles/ParticlesFactory.cpp            |   4 +-
 src/Patch/Patch.cpp                           |   4 +-
 src/Patch/Patch.h                             |   2 +-
 src/Patch/SyncVectorPatch.cpp                 |  60 +++----
 src/Patch/SyncVectorPatch.h                   |  30 ++--
 src/Patch/VectorPatch.cpp                     |  64 ++++---
 src/Patch/VectorPatch.h                       |   2 +-
 src/Projector/Projector2D2OrderGPU.cpp        |  62 ++++---
 src/Projector/Projector2D2OrderGPU.h          |  30 ++--
 src/Projector/Projector2D2OrderGPUKernel.cpp  |   2 +-
 .../Projector2D2OrderGPUKernelCUDAHIP.cu      |   4 +-
 .../Projector2D2OrderGPUKernelCUDAHIP.h       |   2 +-
 src/Projector/Projector3D2OrderGPU.cpp        | 169 ++++++++++--------
 src/Projector/Projector3D2OrderGPU.cpp.backup |  32 ++--
 src/Projector/Projector3D2OrderGPU.h          |  30 ++--
 src/Projector/Projector3D2OrderGPUKernel.cpp  |   2 +-
 src/Projector/Projector3D2OrderGPUKernelAcc.h |  26 +--
 .../Projector3D2OrderGPUKernelCUDAHIP.cu      |   2 +-
 .../Projector3D2OrderGPUKernelCUDAHIP.h       |   2 +-
 .../Projector3D2OrderGPUKernelNaive.h         |   6 +-
 src/Projector/ProjectorAM2OrderV.cpp          |   4 -
 src/Projector/ProjectorFactory.h              |   4 +-
 src/Pusher/PusherBoris.cpp                    |   2 +-
 src/Pusher/PusherBorisNR.cpp                  |   2 +-
 src/Pusher/PusherHigueraCary.cpp              |   2 +-
 src/Pusher/PusherPhoton.cpp                   |   2 +-
 src/Pusher/PusherPonderomotiveBoris.cpp       |   2 +-
 src/Pusher/PusherPonderomotiveBorisBTIS3.cpp  |   1 -
 .../PusherPonderomotivePositionBoris.cpp      |   2 +-
 src/Pusher/PusherVay.cpp                      |   2 +-
 src/Radiation/RadiationCorrLandauLifshitz.cpp |  12 +-
 src/Radiation/RadiationLandauLifshitz.cpp     |  12 +-
 src/Radiation/RadiationMonteCarlo.cpp         |  30 ++--
 src/Radiation/RadiationMonteCarlo.h           |   2 +-
 src/Radiation/RadiationNiel.cpp               |  32 ++--
 src/Radiation/RadiationNiel.h                 |   2 +-
 src/Radiation/RadiationTables.h               |  14 +-
 src/Radiation/RadiationTools.h                |  14 +-
 src/Radiation/Table.h                         |   2 +-
 src/Smilei.cpp                                |  16 +-
 src/SmileiMPI/SmileiMPI.cpp                   |  14 +-
 src/SmileiMPI/SmileiMPI.h                     |   6 +-
 src/Species/Species.cpp                       |  63 ++++---
 src/Species/Species.h                         |   6 +-
 src/Tools/Pragma.h                            |   2 +-
 src/Tools/gpu.cpp                             |  26 +--
 src/Tools/gpu.h                               |   2 +-
 src/Tools/gpuRandom.h                         |  22 ++-
 src/Tools/userFunctions.h                     |   4 +-
 103 files changed, 689 insertions(+), 653 deletions(-)

diff --git a/makefile b/makefile
index d06dfaccc..277a2237d 100755
--- a/makefile
+++ b/makefile
@@ -202,9 +202,9 @@ endif
 ifneq (,$(call parse_config,gpu_nvidia))
 	override config += noopenmp # Prevent openmp for nvidia
 	
-	CXXFLAGS += -DSMILEI_ACCELERATOR_MODE -DSMILEI_OPENACC_MODE
+	CXXFLAGS += -DSMILEI_ACCELERATOR_GPU -DSMILEI_ACCELERATOR_GPU_OACC
 	GPU_COMPILER ?= nvcc
-	GPU_COMPILER_FLAGS += -x cu -DSMILEI_ACCELERATOR_MODE -DSMILEI_OPENACC_MODE $(DIRS:%=-I%)
+	GPU_COMPILER_FLAGS += -x cu -DSMILEI_ACCELERATOR_GPU -DSMILEI_ACCELERATOR_GPU_OACC $(DIRS:%=-I%)
 	GPU_COMPILER_FLAGS += -I$(BUILD_DIR)/src/Python $(PY_CXXFLAGS)
 	GPU_KERNEL_SRCS := $(shell find src/* -name \*.cu)
 	GPU_KERNEL_OBJS := $(addprefix $(BUILD_DIR)/, $(GPU_KERNEL_SRCS:.cu=.o))
@@ -214,9 +214,9 @@ endif
 
 # AMD GPUs
 ifneq (,$(call parse_config,gpu_amd))
-	CXXFLAGS += -DSMILEI_ACCELERATOR_MODE
+	CXXFLAGS += -DSMILEI_ACCELERATOR_GPU -DSMILEI_ACCELERATOR_GPU_OMP
 	GPU_COMPILER ?= $(CC)
-	GPU_COMPILER_FLAGS += -x hip -DSMILEI_ACCELERATOR_MODE -std=c++14 $(DIRS:%=-I%)
+	GPU_COMPILER_FLAGS += -x hip -DSMILEI_ACCELERATOR_GPU -DSMILEI_ACCELERATOR_GPU_OMP -std=c++14 $(DIRS:%=-I%)
 	GPU_COMPILER_FLAGS += -I$(BUILD_DIR)/src/Python $(PY_CXXFLAGS)
 	GPU_KERNEL_SRCS := $(shell find src/* -name \*.cu)
 	GPU_KERNEL_OBJS := $(addprefix $(BUILD_DIR)/, $(GPU_KERNEL_SRCS:.cu=.o))
diff --git a/scripts/compile_tools/machine/adastra b/scripts/compile_tools/machine/adastra
index 7aab184ce..14c2a975a 100644
--- a/scripts/compile_tools/machine/adastra
+++ b/scripts/compile_tools/machine/adastra
@@ -85,7 +85,6 @@ ADASTRA_DEBUG_FLAGS           := -g -ggdb $(ADASTRA_DEBUG_SANITIZER_FLAGS) -v #
 
 ifneq (,$(call parse_config,gpu_amd))
     # When using OMP
-    ADASTRA_ACCELERATOR_GPU_OMP_DEFINE_FLAGS := -DSMILEI_ACCELERATOR_GPU_OMP=1
 
     # ADASTRA_ACCELERATOR_GPU_TARGET := gfx908
 	# ADASTRA_ACCELERATOR_GPU_TARGET := gfx908:xnack-
diff --git a/scripts/compile_tools/machine/ruche_gpu2 b/scripts/compile_tools/machine/ruche_gpu2
index a9406d60d..80cf09198 100644
--- a/scripts/compile_tools/machine/ruche_gpu2
+++ b/scripts/compile_tools/machine/ruche_gpu2
@@ -26,7 +26,7 @@ GPU_COMPILER_FLAGS += -arch=sm_80 #sm_89 # first compile completely with sm_80 t
 CXXFLAGS        += -Minfo=accel # what is offloaded/copied
 # CXXFLAGS        += -Minfo=all   # very verbose output
 
-# To turn on the OpenMP support, uncomment these 3 lines and comment the line just above defining 'SMILEI_OPENACC_MODE'
+# To turn on the OpenMP support, uncomment these 3 lines and comment the line just above defining 'SMILEI_ACCELERATOR_GPU_OACC'
 # CXXFLAGS        += -mp=gpu -DSMILEI_ACCELERATOR_GPU_OMP
 # GPU_COMPILER_FLAGS +=         -DSMILEI_ACCELERATOR_GPU_OMP # Can't we pass the -mp=gpu to nvcc when compiling a .cu file ?
 # LDFLAGS                      += -mp=gpu
diff --git a/src/Checkpoint/Checkpoint.cpp b/src/Checkpoint/Checkpoint.cpp
index 13c3d28a5..943840cb9 100755
--- a/src/Checkpoint/Checkpoint.cpp
+++ b/src/Checkpoint/Checkpoint.cpp
@@ -233,7 +233,7 @@ void Checkpoint::dumpAll( VectorPatch &vecPatches, Region &region, unsigned int
     MESSAGE( " Checkpoint #" << num_dump << " at iteration " << itime << " dumped" );
 #endif
 
-#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC )
     MESSAGE( " Copying device data in main memory" );
     // TODO(Etienne M): This may very well be redundant if we did a diagnostic
     // during the last iteration. Indeed, we copy everything from the device to
diff --git a/src/Diagnostic/DiagnosticProbes.cpp b/src/Diagnostic/DiagnosticProbes.cpp
index 5e79eecc9..e66c684e7 100755
--- a/src/Diagnostic/DiagnosticProbes.cpp
+++ b/src/Diagnostic/DiagnosticProbes.cpp
@@ -740,7 +740,7 @@ void DiagnosticProbes::run( SmileiMPI *smpi, VectorPatch &vecPatches, int itime,
         // Interpolate all usual fields on probe ("fake") particles of current patch
         unsigned int iPart_MPI = offset_in_MPI[ipatch];
         unsigned int maxPart_MPI = offset_in_MPI[ipatch] + npart;
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
         smpi->resizeDeviceBuffers( ithread,
                                    nDim_particle,
                                    npart );
diff --git a/src/Diagnostic/DiagnosticScalar.cpp b/src/Diagnostic/DiagnosticScalar.cpp
index fe88f47d9..9b8b17409 100755
--- a/src/Diagnostic/DiagnosticScalar.cpp
+++ b/src/Diagnostic/DiagnosticScalar.cpp
@@ -436,7 +436,7 @@ void DiagnosticScalar::compute( Patch *patch, int )
             
             const unsigned int nPart=vecSpecies[ispec]->getNbrOfParticles(); // number of particles
 
-// #if defined( SMILEI_ACCELERATOR_MODE )
+// #if defined( SMILEI_ACCELERATOR_GPU )
             const double *const __restrict__ weight_ptr = vecSpecies[ispec]->particles->getPtrWeight();
             const short  *const __restrict__ charge_ptr = vecSpecies[ispec]->particles->getPtrCharge();
             const double *const __restrict__ momentum_x = vecSpecies[ispec]->particles->getPtrMomentum(0);
@@ -447,14 +447,14 @@ void DiagnosticScalar::compute( Patch *patch, int )
             if( vecSpecies[ispec]->mass_ > 0 ) {
 
 // GPU mode
-#ifdef SMILEI_ACCELERATOR_MODE
+#ifdef SMILEI_ACCELERATOR_GPU
 
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
     #pragma omp target teams distribute parallel for \
 		      map(tofrom: density)  \
 		      is_device_ptr(weight_ptr) \
 		      reduction(+:density) 
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel deviceptr(weight_ptr)
     #pragma acc loop gang worker vector reduction(+:density) 
 #endif
@@ -468,7 +468,7 @@ void DiagnosticScalar::compute( Patch *patch, int )
 		      map(tofrom: charge)  \
 		      is_device_ptr( charge_ptr, weight_ptr) \
                       reduction(+:charge)  
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel deviceptr(weight_ptr, charge_ptr)
     #pragma acc loop gang worker vector reduction(+:charge)
 #endif
@@ -484,7 +484,7 @@ void DiagnosticScalar::compute( Patch *patch, int )
                       momentum_y /* [istart:particle_number] */,             \
                       momentum_z /* [istart:particle_number] */)             \
                       reduction(+:ener_tot) 
-#elif defined(SMILEI_OPENACC_MODE)
+#elif defined(SMILEI_ACCELERATOR_GPU_OACC)
     #pragma acc parallel deviceptr(weight_ptr, \
                   momentum_x,                                           \
                   momentum_y,                                           \
@@ -525,14 +525,14 @@ void DiagnosticScalar::compute( Patch *patch, int )
             } else if( vecSpecies[ispec]->mass_ == 0 ) {
 
 // GPU mode
-#ifdef SMILEI_ACCELERATOR_MODE
+#ifdef SMILEI_ACCELERATOR_GPU
 
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
     #pragma omp target teams distribute parallel for \
 		      map(tofrom: density)  \
 		      is_device_ptr(weight_ptr) \
 		      reduction(+:density) 
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel deviceptr(weight_ptr)
     #pragma acc loop gang worker vector reduction(+:density) 
 #endif
@@ -548,7 +548,7 @@ void DiagnosticScalar::compute( Patch *patch, int )
                       momentum_y /* [istart:particle_number] */,             \
                       momentum_z /* [istart:particle_number] */)             \
                       reduction(+:ener_tot) 
-#elif defined(SMILEI_OPENACC_MODE)
+#elif defined(SMILEI_ACCELERATOR_GPU_OACC)
     #pragma acc parallel deviceptr(weight_ptr, \
                   momentum_x,                                           \
                   momentum_y,                                           \
@@ -667,7 +667,7 @@ void DiagnosticScalar::compute( Patch *patch, int )
             // total energy in current field
             double Uem = 0.;
             if( ! AM ) {
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
                 Uem = field->norm2OnDevice( EMfields->istart, EMfields->bufsize );
 #else
                 Uem = field->norm2( EMfields->istart, EMfields->bufsize );
@@ -751,7 +751,7 @@ void DiagnosticScalar::compute( Patch *patch, int )
             j_max = iFieldStart[1];
             k_max = iFieldStart[2];
 
-#if defined( SMILEI_ACCELERATOR_MODE)
+#if defined( SMILEI_ACCELERATOR_GPU)
             // We use scalar rather than arrays because omp target 
             // sometime fails to pass them to the device
             const unsigned int ixstart = iFieldStart[0];
@@ -776,7 +776,7 @@ void DiagnosticScalar::compute( Patch *patch, int )
 		        map(tofrom: minval, maxval, i_min, i_max, j_min, j_max, k_min, k_max)  \
                 map(to: ny, nz, ixstart, ixend, iystart, iyend, izstart, izend) 
 	        //reduction(min:minval)
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present(field_data) //deviceptr( data_ )
     #pragma acc loop gang worker vector collapse(3)
 #endif
diff --git a/src/Diagnostic/DiagnosticTrack.cpp b/src/Diagnostic/DiagnosticTrack.cpp
index 16ac325e9..583caab94 100755
--- a/src/Diagnostic/DiagnosticTrack.cpp
+++ b/src/Diagnostic/DiagnosticTrack.cpp
@@ -188,7 +188,7 @@ void DiagnosticTrack::setIDs( Patch *patch )
     for( unsigned int iPart=0; iPart<s; iPart++ ) {
         patch->vecSpecies[species_index_]->particles->id( iPart ) = ++latest_Id;
     }
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
     patch->vecSpecies[species_index_]->particles->initializeIDsOnDevice();
 #endif
 }
diff --git a/src/ElectroMagn/ElectroMagn.cpp b/src/ElectroMagn/ElectroMagn.cpp
index 2c75bc6a4..02467ecd4 100755
--- a/src/ElectroMagn/ElectroMagn.cpp
+++ b/src/ElectroMagn/ElectroMagn.cpp
@@ -555,7 +555,7 @@ void ElectroMagn::applyAntenna( unsigned int iAntenna, double intensity )
 //! Compute the total density and currents from species density and currents on Device
 //! This function is valid wathever the geometry
 // ---------------------------------------------------------------------------------------------------------------------
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
 void ElectroMagn::computeTotalRhoJOnDevice()
 {
 
@@ -577,7 +577,7 @@ void ElectroMagn::computeTotalRhoJOnDevice()
         double *const __restrict__ rhosp = rho_s[ispec] ? rho_s[ispec]->data() : nullptr;
 
 
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
             #pragma acc parallel present( \
                                           Jxp[0:Jx_size],     \
                                           Jyp[0:Jy_size],     \
@@ -594,7 +594,7 @@ void ElectroMagn::computeTotalRhoJOnDevice()
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
             #pragma omp target
             #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
             #pragma acc loop gang worker vector
 #endif
             for( unsigned int i=0 ; i<Jx_size; i++ ) {
@@ -605,7 +605,7 @@ void ElectroMagn::computeTotalRhoJOnDevice()
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
             #pragma omp target
             #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
             #pragma acc loop gang worker vector
 #endif
             for( unsigned int i=0 ; i<Jy_size; i++ ) {
@@ -616,7 +616,7 @@ void ElectroMagn::computeTotalRhoJOnDevice()
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
             #pragma omp target
             #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
             #pragma acc loop gang worker vector
 #endif
             for( unsigned int i=0 ; i<Jz_size; i++ ) {
@@ -627,14 +627,14 @@ void ElectroMagn::computeTotalRhoJOnDevice()
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
             #pragma omp target
             #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
             #pragma acc loop gang worker vector
 #endif
             for( unsigned int i=0 ; i<rho_size; i++ ) {
                 rhop[i] += rhosp[i];
             }
         }
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
         } // end parallel region
 #endif
 
diff --git a/src/ElectroMagn/ElectroMagn.h b/src/ElectroMagn/ElectroMagn.h
index b5bd3e5bc..0ac78db10 100755
--- a/src/ElectroMagn/ElectroMagn.h
+++ b/src/ElectroMagn/ElectroMagn.h
@@ -325,7 +325,7 @@ class ElectroMagn
     //! Method used to sum all species densities and currents to compute the total charge density and currents
     virtual void computeTotalRhoJ() = 0;
 
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
     //! Method used to sum all species densities and currents to compute the total charge density and currents
     //! This function is valid wathever the geometry
     virtual void computeTotalRhoJOnDevice();
diff --git a/src/ElectroMagn/ElectroMagn1D.cpp b/src/ElectroMagn/ElectroMagn1D.cpp
index ea97df8fb..c3b5db3dc 100755
--- a/src/ElectroMagn/ElectroMagn1D.cpp
+++ b/src/ElectroMagn/ElectroMagn1D.cpp
@@ -783,7 +783,7 @@ void ElectroMagn1D::computeTotalRhoJ()
     }//END loop on species ispec
 }
 
-// #if defined( SMILEI_ACCELERATOR_MODE )
+// #if defined( SMILEI_ACCELERATOR_GPU )
 // //! Method used to compute the total charge density and currents by summing over all species on Device
 // void ElectroMagn1D::computeTotalRhoJOnDevice()
 // {
diff --git a/src/ElectroMagn/ElectroMagn1D.h b/src/ElectroMagn/ElectroMagn1D.h
index 4ea226a6c..01ff3ed7b 100755
--- a/src/ElectroMagn/ElectroMagn1D.h
+++ b/src/ElectroMagn/ElectroMagn1D.h
@@ -115,7 +115,7 @@ class ElectroMagn1D : public ElectroMagn
     //! Method used to compute the total charge density and currents by summing over all species
     void computeTotalRhoJ() override;
 
-// #if defined( SMILEI_ACCELERATOR_MODE )
+// #if defined( SMILEI_ACCELERATOR_GPU )
 //     //! Method used to compute the total charge density and currents by summing over all species on Device
 //     void computeTotalRhoJOnDevice() override;
 // #endif
diff --git a/src/ElectroMagn/ElectroMagn2D.cpp b/src/ElectroMagn/ElectroMagn2D.cpp
index e14b4c826..f270aac18 100755
--- a/src/ElectroMagn/ElectroMagn2D.cpp
+++ b/src/ElectroMagn/ElectroMagn2D.cpp
@@ -1217,7 +1217,7 @@ void ElectroMagn2D::centerMagneticFields()
     double *const __restrict__ Bz2D_m     = Bz_m->data();
 
 // Magnetic field Bx^(p,d)
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     const int sizeofBx = Bx_->size();
     const int sizeofBy = By_->size();
     const int sizeofBz = Bz_->size();
@@ -1229,10 +1229,10 @@ void ElectroMagn2D::centerMagneticFields()
     #pragma omp teams distribute parallel for collapse( 2 )
 #endif
     for( unsigned int x = 0; x < nx_p; ++x ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc loop vector
 #endif
-#if !defined( SMILEI_ACCELERATOR_MODE )
+#if !defined( SMILEI_ACCELERATOR_GPU )
         #pragma omp simd
 #endif
         for( unsigned int y = 0; y < ny_d; ++y ) {
@@ -1241,7 +1241,7 @@ void ElectroMagn2D::centerMagneticFields()
     }
 
     // Magnetic field By^(d,p)
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present(By2D[0:sizeofBy],By2D_m[0:sizeofBy])
     #pragma acc loop gang worker
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -1249,10 +1249,10 @@ void ElectroMagn2D::centerMagneticFields()
     #pragma omp teams distribute parallel for collapse( 2 )
 #endif
     for( unsigned int x = 0; x < ( nx_p + 1 ); ++x ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc loop vector
 #endif
-#if !defined( SMILEI_ACCELERATOR_MODE )
+#if !defined( SMILEI_ACCELERATOR_GPU )
         #pragma omp simd
 #endif
         for( unsigned int y = 0; y < ny_p; ++y ) {
@@ -1260,7 +1260,7 @@ void ElectroMagn2D::centerMagneticFields()
         }
     }
     // Magnetic field Bz^(d,d)
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present(Bz2D[0:sizeofBz],Bz2D_m[0:sizeofBz])
     #pragma acc loop gang worker
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -1268,10 +1268,10 @@ void ElectroMagn2D::centerMagneticFields()
     #pragma omp teams distribute parallel for collapse( 2 )
 #endif
     for( unsigned int x = 0; x < ( nx_p + 1 ); ++x ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc loop vector
 #endif
-#if !defined( SMILEI_ACCELERATOR_MODE )
+#if !defined( SMILEI_ACCELERATOR_GPU )
         #pragma omp simd
 #endif
         for( unsigned int y = 0; y < ny_d; ++y ) {
@@ -1282,7 +1282,7 @@ void ElectroMagn2D::centerMagneticFields()
         double *const             By2D_oldBTIS3 = By_mBTIS3->data();
         double *const             Bz2D_oldBTIS3 = Bz_mBTIS3->data();
 
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     const int sizeofByBTIS3 = By_mBTIS3->size();
     #pragma acc parallel present(By2D_oldBTIS3[0:sizeofByBTIS3],By2D[0:sizeofBy])
     #pragma acc loop gang
@@ -1291,17 +1291,17 @@ void ElectroMagn2D::centerMagneticFields()
     #pragma omp teams distribute parallel for collapse( 2 )
 #endif
         for( unsigned int x = 0; x < ( nx_p - 1 ); ++x ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc loop vector
 #endif
-#if !defined( SMILEI_ACCELERATOR_MODE )
+#if !defined( SMILEI_ACCELERATOR_GPU )
         #pragma omp simd
 #endif
             for( unsigned int y = 0; y < ny_p; ++y ) {
                 By2D_oldBTIS3[x * ny_p + y] = ( By2D[(x+1) * ny_p + y] + By2D_oldBTIS3[x * ny_p + y] ) * 0.5;
             }
         }
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     const int sizeofBzBTIS3 = Bz_mBTIS3->size();
     #pragma acc parallel present(Bz2D_oldBTIS3[0:sizeofBz],Bz2D[0:sizeofBz])
     #pragma acc loop gang
@@ -1310,10 +1310,10 @@ void ElectroMagn2D::centerMagneticFields()
     #pragma omp teams distribute parallel for collapse( 2 )
 #endif
         for( unsigned int x = 0; x < ( nx_p - 1 ); ++x ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc loop vector
 #endif
-#if !defined( SMILEI_ACCELERATOR_MODE )
+#if !defined( SMILEI_ACCELERATOR_GPU )
         #pragma omp simd
 #endif
             for( unsigned int y = 0; y < ny_d; ++y ) {
@@ -1392,7 +1392,7 @@ void ElectroMagn2D::computeTotalRhoJ()
 //END computeTotalRhoJ
 }
 
-// #if defined( SMILEI_ACCELERATOR_MODE )
+// #if defined( SMILEI_ACCELERATOR_GPU )
 // //! Method used to compute the total charge density and currents by summing over all species on Device
 // void ElectroMagn2D::computeTotalRhoJOnDevice()
 // {
diff --git a/src/ElectroMagn/ElectroMagn2D.h b/src/ElectroMagn/ElectroMagn2D.h
index aecb87ab8..d8cdfb031 100755
--- a/src/ElectroMagn/ElectroMagn2D.h
+++ b/src/ElectroMagn/ElectroMagn2D.h
@@ -115,7 +115,7 @@ class ElectroMagn2D : public ElectroMagn
     //! Method used to compute the total charge density and currents by summing over all species
     void computeTotalRhoJ() override;
 
-// #if defined( SMILEI_ACCELERATOR_MODE )
+// #if defined( SMILEI_ACCELERATOR_GPU )
 //     //! Method used to compute the total charge density and currents by summing over all species on Device
 //     void computeTotalRhoJOnDevice() override;
 // #endif
diff --git a/src/ElectroMagn/ElectroMagn3D.cpp b/src/ElectroMagn/ElectroMagn3D.cpp
index c8994d75c..41ba9cc58 100755
--- a/src/ElectroMagn/ElectroMagn3D.cpp
+++ b/src/ElectroMagn/ElectroMagn3D.cpp
@@ -4,7 +4,7 @@
 #include <iostream>
 #include <sstream>
 
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     #include <openacc.h>
 #endif
 
@@ -1207,7 +1207,7 @@ void ElectroMagn3D::centerMagneticFields()
     double *const __restrict__ Bz3D_m     = Bz_m->data();
 
     // Magnetic field Bx^(p,d,d)
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     const int sizeofBx = Bx_->size();
     const int sizeofBy = By_->size();
     const int sizeofBz = Bz_->size();
@@ -1219,11 +1219,11 @@ void ElectroMagn3D::centerMagneticFields()
     #pragma omp teams distribute parallel for collapse( 3 )
 #endif
     for( unsigned int i=0 ; i<nx_p ; i++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc loop worker
 #endif
         for( unsigned int j=0 ; j<ny_d ; j++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc loop vector
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
             // EMPTY
@@ -1240,7 +1240,7 @@ void ElectroMagn3D::centerMagneticFields()
     }
 
     // Magnetic field By^(d,p,d)
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present(By3D[0:sizeofBy],By3D_m[0:sizeofBy])
     #pragma acc loop gang
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -1248,12 +1248,12 @@ void ElectroMagn3D::centerMagneticFields()
     #pragma omp teams distribute parallel for collapse( 3 )
 #endif
     for( unsigned int i=0 ; i<nx_d ; i++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc loop worker
 #endif
         for( unsigned int j=0 ; j<ny_p ; j++ ) {
 
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc loop vector
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
             // EMPTY
@@ -1267,7 +1267,7 @@ void ElectroMagn3D::centerMagneticFields()
     }
 
     // Magnetic field Bz^(d,d,p)
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present(Bz3D[0:sizeofBz],Bz3D_m[0:sizeofBz])
     #pragma acc loop gang
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -1275,11 +1275,11 @@ void ElectroMagn3D::centerMagneticFields()
     #pragma omp teams distribute parallel for collapse( 3 )
 #endif
     for( unsigned int i=0 ; i<nx_d ; i++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc loop worker
 #endif
         for( unsigned int j=0 ; j<ny_d ; j++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc loop vector
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
             // EMPTY
@@ -1296,7 +1296,7 @@ void ElectroMagn3D::centerMagneticFields()
         // Static-cast of the fields
         double *const __restrict__ BymBTIS3 = By_mBTIS3->data();
         double *const __restrict__ BzmBTIS3 = Bz_mBTIS3->data();
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     const int sizeofByBTIS3 = By_mBTIS3->size();
     #pragma acc parallel present(By3D[0:sizeofBy],BymBTIS3[0:sizeofByBTIS3])
     #pragma acc loop gang
@@ -1305,11 +1305,11 @@ void ElectroMagn3D::centerMagneticFields()
     #pragma omp teams distribute parallel for collapse( 3 )
 #endif
         for( unsigned int i=0 ; i<nx_p-1 ; i++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc loop worker
 #endif
             for( unsigned int j=0 ; j<ny_p ; j++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc loop vector
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
             // EMPTY
@@ -1323,7 +1323,7 @@ void ElectroMagn3D::centerMagneticFields()
                 }
             }
          }
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     const int sizeofBzBTIS3 = Bz_mBTIS3->size();
     #pragma acc parallel present(Bz3D[0:sizeofBz],BzmBTIS3[0:sizeofBzBTIS3])
     #pragma acc loop gang
@@ -1332,11 +1332,11 @@ void ElectroMagn3D::centerMagneticFields()
     #pragma omp teams distribute parallel for collapse( 3 )
 #endif
         for( unsigned int i=0 ; i<nx_p-1 ; i++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc loop worker
 #endif
             for( unsigned int j=0 ; j<ny_d ; j++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc loop vector
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
             // EMPTY
diff --git a/src/ElectroMagn/ElectroMagn3D.h b/src/ElectroMagn/ElectroMagn3D.h
index ac3abfab1..dabef1d55 100755
--- a/src/ElectroMagn/ElectroMagn3D.h
+++ b/src/ElectroMagn/ElectroMagn3D.h
@@ -108,7 +108,7 @@ class ElectroMagn3D : public ElectroMagn
     //! Method used to compute the total charge density and currents by summing over all species on CPU (Host)
     void computeTotalRhoJ() override;
 
-// #if defined( SMILEI_ACCELERATOR_MODE )
+// #if defined( SMILEI_ACCELERATOR_GPU )
 //     //! Method used to compute the total charge density and currents by summing over all species on Device
 //     void computeTotalRhoJOnDevice() override;
 // #endif
diff --git a/src/ElectroMagn/ElectroMagnAM.cpp b/src/ElectroMagn/ElectroMagnAM.cpp
index 31204694d..700c0b901 100755
--- a/src/ElectroMagn/ElectroMagnAM.cpp
+++ b/src/ElectroMagn/ElectroMagnAM.cpp
@@ -1730,7 +1730,7 @@ void ElectroMagnAM::computeTotalRhoJ()
     }//END loop on mmodes
 } //END computeTotalRhoJ
 
-// #if defined( SMILEI_ACCELERATOR_MODE )
+// #if defined( SMILEI_ACCELERATOR_GPU )
 // //! Method used to compute the total charge density and currents by summing over all species on Device
 // void ElectroMagnAM::computeTotalRhoJOnDevice()
 // {
@@ -1891,7 +1891,7 @@ void ElectroMagnAM::applyExternalFields( Patch *patch )
         if (input[1] && copy[1]) Br_m[imode]->copyFrom( Br_[imode] );
         if (input[2] && copy[2]) Bt_m[imode]->copyFrom( Bt_[imode] );
     }
-    ElectroMagnAM *emAM = static_cast<ElectroMagnAM *>( patch->EMfields );
+    // ElectroMagnAM *emAM = static_cast<ElectroMagnAM *>( patch->EMfields );
     //emAM->compute_B_m_fromEB();
 }
 
@@ -1900,7 +1900,7 @@ void ElectroMagnAM::compute_B_m_fromEB()
 {
     const unsigned int nl_p = dimPrim[0];
     const unsigned int nl_d = dimDual[0];
-    const unsigned int nr_p = dimPrim[1];
+    // const unsigned int nr_p = dimPrim[1];
     const unsigned int nr_d = dimDual[1];
     const unsigned int Nmodes = El_.size();
 
diff --git a/src/ElectroMagn/ElectroMagnAM.h b/src/ElectroMagn/ElectroMagnAM.h
index 979581b4c..cd3063113 100755
--- a/src/ElectroMagn/ElectroMagnAM.h
+++ b/src/ElectroMagn/ElectroMagnAM.h
@@ -157,7 +157,7 @@ class ElectroMagnAM : public ElectroMagn
     void computeTotalRhoJ() override;
 
 
-// #if defined( SMILEI_ACCELERATOR_MODE )
+// #if defined( SMILEI_ACCELERATOR_GPU )
 //     //! Method used to compute the total charge density and currents by summing over all species on Device
 //     void computeTotalRhoJOnDevice() override ;
 // #endif
diff --git a/src/ElectroMagnBC/ElectroMagnBC2D_SM.cpp b/src/ElectroMagnBC/ElectroMagnBC2D_SM.cpp
index 42ce8c381..2d257cbd5 100755
--- a/src/ElectroMagnBC/ElectroMagnBC2D_SM.cpp
+++ b/src/ElectroMagnBC/ElectroMagnBC2D_SM.cpp
@@ -68,9 +68,9 @@ ElectroMagnBC2D_SM::ElectroMagnBC2D_SM( Params &params, Patch *patch, unsigned i
 
 ElectroMagnBC2D_SM::~ElectroMagnBC2D_SM()
 {
-    for (int i=0 ; i<B_val.size() ; ++i){
-        smilei::tools::gpu::HostDeviceMemoryManagement::DeviceFree( B_val[i].data(), B_val[i].size() );
-        //delete[] B_val[i];
+    for( auto B: B_val ){
+        smilei::tools::gpu::HostDeviceMemoryManagement::DeviceFree( B.data(), B.size() );
+        //delete[] B;
     }
 }
 
@@ -136,7 +136,7 @@ void ElectroMagnBC2D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch *
         const double *const __restrict__ B_ext1 = B_val[1].data();
         const double *const __restrict__ B_ext2 = B_val[2].data();
 
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         const int sizeofE0 = E[0]->number_of_points_;
         const int sizeofE1 = E[1]->number_of_points_;
         const int sizeofE2 = E[2]->number_of_points_;
@@ -182,7 +182,7 @@ void ElectroMagnBC2D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch *
         smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocateAndCopyHostToDevice( db1, b1_size );
 
         if( axis0_ == 0 ) { // for By^(d,p)
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc parallel present(E2[0:sizeofE2],B0[0:sizeofB0],B1[0:sizeofB1],B_ext1[0:B_ext_size1],B_ext0[0:B_ext_size0],db1[0:b1_size])
             #pragma acc loop gang worker vector
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -199,7 +199,7 @@ void ElectroMagnBC2D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch *
                     + B_ext1[j];
             }
         } else { // for Bx^(p,d)
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc parallel present(E2[0:sizeofE2],B0[0:sizeofB0],B1[0:sizeofB1],B_ext1[0:B_ext_size1],B_ext0[0:B_ext_size0],db1[0:b1_size])
             #pragma acc loop gang worker vector
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -234,7 +234,7 @@ void ElectroMagnBC2D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch *
 
         // for Bz^(d,d)
         if( axis0_ == 0 ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc parallel present(E1[0:sizeofE1],B2[0:sizeofB2],B_ext2[0:B_ext_size2],db2[0:b2_size])
             #pragma acc loop gang worker vector
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -247,7 +247,7 @@ void ElectroMagnBC2D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch *
 
             }
         } else {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc parallel present(E0[0:sizeofE0],B2[0:sizeofB2],B_ext2[0:B_ext_size2],db2[0:b2_size])
             #pragma acc loop gang worker vector
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
diff --git a/src/ElectroMagnBC/ElectroMagnBC3D_SM.cpp b/src/ElectroMagnBC/ElectroMagnBC3D_SM.cpp
index 3ae113e60..ba4e61b28 100755
--- a/src/ElectroMagnBC/ElectroMagnBC3D_SM.cpp
+++ b/src/ElectroMagnBC/ElectroMagnBC3D_SM.cpp
@@ -186,7 +186,7 @@ void ElectroMagnBC3D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch *
         const int isBoundary2min = patch->isBoundary( axis2_, 0 );
         const int isBoundary2max = patch->isBoundary( axis2_, 1 );
 
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         const int sizeofE0 = E[axis0_]->number_of_points_;
         const int sizeofE1 = E[axis1_]->number_of_points_;
         const int sizeofE2 = E[axis2_]->number_of_points_;
@@ -217,7 +217,7 @@ void ElectroMagnBC3D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch *
 
         // B1
         if( axis0_ == 0 ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc parallel present(E2[0:sizeofE2],B0[0:sizeofB0],B1[0:sizeofB1],B_ext1[0:B_ext_size1],B_ext0[0:B_ext_size0],db1[0:b1_size])
             #pragma acc loop gang
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -225,7 +225,7 @@ void ElectroMagnBC3D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch *
             #pragma omp teams distribute parallel for collapse( 2 )
 #endif
             for( unsigned int j=isBoundary1min; j<n1p-isBoundary1max ; j++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc loop worker vector
 #endif
                 for( unsigned int k=isBoundary2min ; k<n2d-isBoundary2max ; k++ ) {
@@ -239,7 +239,7 @@ void ElectroMagnBC3D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch *
                 }
             }
         } else if( axis0_ == 1 ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc parallel present(E2[0:sizeofE2],B0[0:sizeofB0],B1[0:sizeofB1],B_ext1[0:B_ext_size1],B_ext0[0:B_ext_size0],db1[0:b1_size])
             #pragma acc loop gang
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -247,7 +247,7 @@ void ElectroMagnBC3D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch *
             #pragma omp teams distribute parallel for collapse( 2 )
 #endif
             for( unsigned int i=isBoundary1min; i<n1p-isBoundary1max ; i++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc loop worker vector
 #endif
                 for( unsigned int k=isBoundary2min ; k<n2d-isBoundary2max ; k++ ) {
@@ -261,7 +261,7 @@ void ElectroMagnBC3D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch *
                 }
             }
         } else {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc parallel present(E2[0:sizeofE2],B0[0:sizeofB0],B1[0:sizeofB1],B_ext1[0:B_ext_size1],B_ext0[0:B_ext_size0],db1[0:b1_size])
             #pragma acc loop gang
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -269,7 +269,7 @@ void ElectroMagnBC3D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch *
             #pragma omp teams distribute parallel for collapse( 2 )
 #endif
             for( unsigned int i=isBoundary1min; i<n1p-isBoundary1max ; i++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc loop worker vector
 #endif
                 for( unsigned int j=isBoundary2min ; j<n2d-isBoundary2max ; j++ ) {
@@ -304,7 +304,7 @@ void ElectroMagnBC3D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch *
 
         // B2
         if( axis0_ == 0 ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc parallel present(E1[0:sizeofE1],B0[0:sizeofB0],B2[0:sizeofB2],B_ext2[0:B_ext_size2],B_ext0[0:B_ext_size0],db2[0:b2_size])
             #pragma acc loop gang
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -312,7 +312,7 @@ void ElectroMagnBC3D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch *
             #pragma omp teams distribute parallel for collapse( 2 )
 #endif
             for( unsigned int j=isBoundary1min; j<n1d-isBoundary1max ; j++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
                 #pragma acc loop worker vector
 #endif
                 for( unsigned int k=isBoundary2min; k<n2p-isBoundary2max ; k++ ) {
@@ -326,7 +326,7 @@ void ElectroMagnBC3D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch *
                 }
             }
         } else if( axis0_ == 1 ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc parallel present(E1[0:sizeofE1],B0[0:sizeofB0],B2[0:sizeofB2],B_ext2[0:B_ext_size2],B_ext0[0:B_ext_size0],db2[0:b2_size])
             #pragma acc loop gang
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -334,7 +334,7 @@ void ElectroMagnBC3D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch *
             #pragma omp teams distribute parallel for collapse( 2 )
 #endif
             for( unsigned int i=isBoundary1min; i<n1d-isBoundary1max ; i++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
                 #pragma acc loop worker vector
 #endif
                 for( unsigned int k=isBoundary2min; k<n2p-isBoundary2max ; k++ ) {
@@ -348,7 +348,7 @@ void ElectroMagnBC3D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch *
                 }
             }
         } else {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc parallel present(E1[0:sizeofE1],B0[0:sizeofB0],B2[0:sizeofB2],B_ext2[0:B_ext_size2],B_ext0[0:B_ext_size0],db2[0:b2_size])
             #pragma acc loop gang
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -356,7 +356,7 @@ void ElectroMagnBC3D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch *
             #pragma omp teams distribute parallel for collapse( 2 )
 #endif
             for( unsigned int i=isBoundary1min; i<n1d-isBoundary1max ; i++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
                 #pragma acc loop worker vector
 #endif
                 for( unsigned int j=isBoundary2min; j<n2p-isBoundary2max ; j++ ) {
diff --git a/src/ElectroMagnSolver/MA_Solver1D_Friedman.cpp b/src/ElectroMagnSolver/MA_Solver1D_Friedman.cpp
index 502fca0f3..7c2607263 100644
--- a/src/ElectroMagnSolver/MA_Solver1D_Friedman.cpp
+++ b/src/ElectroMagnSolver/MA_Solver1D_Friedman.cpp
@@ -28,7 +28,7 @@ void MA_Solver1D_Friedman::operator()( ElectroMagn *fields )
     Field1D *Ex1D   = static_cast<Field1D *>( fields->Ex_ );
     Field1D *Ey1D   = static_cast<Field1D *>( fields->Ey_ );
     Field1D *Ez1D   = static_cast<Field1D *>( fields->Ez_ );
-    Field1D *Bx1D   = static_cast<Field1D *>( fields->Bx_ );
+    // Field1D *Bx1D   = static_cast<Field1D *>( fields->Bx_ );
     Field1D *By1D   = static_cast<Field1D *>( fields->By_ );
     Field1D *Bz1D   = static_cast<Field1D *>( fields->Bz_ );
     Field1D *Jx1D   = static_cast<Field1D *>( fields->Jx_ );
diff --git a/src/ElectroMagnSolver/MA_Solver2D_norm.cpp b/src/ElectroMagnSolver/MA_Solver2D_norm.cpp
index d12e021c1..4cd0d7d7c 100755
--- a/src/ElectroMagnSolver/MA_Solver2D_norm.cpp
+++ b/src/ElectroMagnSolver/MA_Solver2D_norm.cpp
@@ -37,7 +37,7 @@ void MA_Solver2D_norm::operator()( ElectroMagn *fields )
     // double sumJz = 0;
 
     // Electric field Ex^(d,p)
-#if defined( SMILEI_OPENACC_MODE )                                                                                                     
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )                                                                                                     
     const int sizeofEx = fields->Ex_->number_of_points_;                                                                               
     const int sizeofEy = fields->Ey_->number_of_points_;                                                                               
     const int sizeofEz = fields->Ez_->number_of_points_;                                                                               
@@ -52,10 +52,10 @@ void MA_Solver2D_norm::operator()( ElectroMagn *fields )
     #pragma omp teams distribute parallel for collapse( 2 )
 #endif
     for( unsigned int x = 0; x < nx_d; ++x ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc loop worker
 #endif
-#if !defined( SMILEI_ACCELERATOR_MODE )
+#if !defined( SMILEI_ACCELERATOR_GPU )
         #pragma omp simd
 #endif
         for( unsigned int y = 0; y < ny_p; ++y ) {
@@ -64,7 +64,7 @@ void MA_Solver2D_norm::operator()( ElectroMagn *fields )
     }
 
     // Electric field Ey^(p,d)
-#if defined( SMILEI_OPENACC_MODE ) 
+#if defined( SMILEI_ACCELERATOR_GPU_OACC ) 
     #pragma acc parallel present( Ey2D[0:sizeofEy], Jy2D[0:sizeofEy], Bz2D[0:sizeofBz] )
     #pragma acc loop gang
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -72,10 +72,10 @@ void MA_Solver2D_norm::operator()( ElectroMagn *fields )
     #pragma omp teams distribute parallel for collapse( 2 )
 #endif
     for( unsigned int x = 0; x < nx_p; ++x ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc loop worker
 #endif
-#if !defined( SMILEI_ACCELERATOR_MODE )
+#if !defined( SMILEI_ACCELERATOR_GPU )
         #pragma omp simd
 #endif
         for( unsigned int y = 0; y < ny_d; ++y ) {
@@ -84,7 +84,7 @@ void MA_Solver2D_norm::operator()( ElectroMagn *fields )
     }
 
     // Electric field Ez^(p,p)
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present( Ez2D[0:sizeofEz], Jz2D[0:sizeofEz], Bx2D[0:sizeofBx], By2D[0:sizeofBy] )                             
     #pragma acc loop gang   
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -92,10 +92,10 @@ void MA_Solver2D_norm::operator()( ElectroMagn *fields )
     #pragma omp teams distribute parallel for collapse( 2 )
 #endif
     for( unsigned int x = 0; x < nx_p; ++x ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc loop worker
 #endif
-#if !defined( SMILEI_ACCELERATOR_MODE )
+#if !defined( SMILEI_ACCELERATOR_GPU )
         #pragma omp simd
 #endif
         for( unsigned int y = 0; y < ny_p; ++y ) {
diff --git a/src/ElectroMagnSolver/MA_Solver3D_norm.cpp b/src/ElectroMagnSolver/MA_Solver3D_norm.cpp
index 9b2a089cc..7ffea26c0 100755
--- a/src/ElectroMagnSolver/MA_Solver3D_norm.cpp
+++ b/src/ElectroMagnSolver/MA_Solver3D_norm.cpp
@@ -35,7 +35,7 @@ void MA_Solver3D_norm::operator()( ElectroMagn *fields )
     const unsigned int nz_d = fields->dimDual[2];
     
     // Electric field Ex^(d,p,p)
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     const int sizeofEx = fields->Ex_->number_of_points_;
     const int sizeofEy = fields->Ey_->number_of_points_;
     const int sizeofEz = fields->Ez_->number_of_points_;
@@ -50,11 +50,11 @@ void MA_Solver3D_norm::operator()( ElectroMagn *fields )
     #pragma omp teams distribute parallel for collapse( 3 )
 #endif
     for( unsigned int i=0 ; i<nx_d ; i++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc loop worker
 #endif
         for( unsigned int j=0 ; j<ny_p ; j++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc loop vector
 #endif
             for( unsigned int k=0 ; k<nz_p ; k++ ) {
@@ -66,7 +66,7 @@ void MA_Solver3D_norm::operator()( ElectroMagn *fields )
     }
     
     // Electric field Ey^(p,d,p)
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present( Ey3D[0:sizeofEy], Jy3D[0:sizeofEy], Bx3D[0:sizeofBx], Bz3D[0:sizeofBz] )
     #pragma acc loop gang
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -74,11 +74,11 @@ void MA_Solver3D_norm::operator()( ElectroMagn *fields )
     #pragma omp teams distribute parallel for collapse( 3 )
 #endif
     for( unsigned int i=0 ; i<nx_p ; i++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc loop worker
 #endif
         for( unsigned int j=0 ; j<ny_d ; j++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc loop vector
 #endif
             for( unsigned int k=0 ; k<nz_p ; k++ ) {
@@ -90,7 +90,7 @@ void MA_Solver3D_norm::operator()( ElectroMagn *fields )
     }
     
     // Electric field Ez^(p,p,d)
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present( Ez3D[0:sizeofEz], Jz3D[0:sizeofEz], Bx3D[0:sizeofBx], By3D[0:sizeofBy] )
     #pragma acc loop gang
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -98,11 +98,11 @@ void MA_Solver3D_norm::operator()( ElectroMagn *fields )
     #pragma omp teams distribute parallel for collapse( 3 )
 #endif
     for( unsigned int i=0 ;  i<nx_p ; i++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     #pragma acc loop worker
 #endif
         for( unsigned int j=0 ; j<ny_p ; j++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc loop vector
 #endif
             for( unsigned int k=0 ; k<nz_d ; k++ ) {
diff --git a/src/ElectroMagnSolver/MF_Solver2D_Yee.cpp b/src/ElectroMagnSolver/MF_Solver2D_Yee.cpp
index 28fad57d9..7b66f6368 100755
--- a/src/ElectroMagnSolver/MF_Solver2D_Yee.cpp
+++ b/src/ElectroMagnSolver/MF_Solver2D_Yee.cpp
@@ -33,7 +33,7 @@ void MF_Solver2D_Yee::operator()( ElectroMagn *fields )
     double *const __restrict__ Bz2D       = fields->Bz_->data();                    // [x * ny_d + y] : dual in x,y primal in z
     
     // Magnetic field Bx^(p,d)
-#if defined( SMILEI_OPENACC_MODE )                                                                                                     
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )                                                                                                     
     const int sizeofEx = fields->Ex_->number_of_points_;                                                                               
     const int sizeofEy = fields->Ey_->number_of_points_;                                                                               
     const int sizeofEz = fields->Ez_->number_of_points_;                                                                               
@@ -48,10 +48,10 @@ void MF_Solver2D_Yee::operator()( ElectroMagn *fields )
     #pragma omp teams distribute parallel for collapse( 2 )
 #endif
     for( unsigned int x = 0; x < nx_d - 1; ++x ) {
-#if !defined( SMILEI_ACCELERATOR_MODE )
+#if !defined( SMILEI_ACCELERATOR_GPU )
         #pragma omp simd
 #endif
-#ifdef SMILEI_OPENACC_MODE                                                                                                             
+#ifdef SMILEI_ACCELERATOR_GPU_OACC                                                                                                             
             #pragma acc loop vector                                                                                                    
 #endif  
         for( unsigned int y = 1; y < ny_d - 1; ++y ) {
@@ -59,7 +59,7 @@ void MF_Solver2D_Yee::operator()( ElectroMagn *fields )
         }
     }
     // Magnetic field By^(d,p)
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present( By2D[0:sizeofBy], Ez2D[0:sizeofEz] )
     #pragma acc loop gang
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -67,10 +67,10 @@ void MF_Solver2D_Yee::operator()( ElectroMagn *fields )
     #pragma omp teams distribute parallel for collapse( 2 )
 #endif
     for( unsigned int x = 1; x < nx_d - 1; ++x ) {
-#if !defined( SMILEI_ACCELERATOR_MODE )
+#if !defined( SMILEI_ACCELERATOR_GPU )
         #pragma omp simd
 #endif
-#ifdef SMILEI_OPENACC_MODE                                                                                                             
+#ifdef SMILEI_ACCELERATOR_GPU_OACC                                                                                                             
             #pragma acc loop vector                                                                                                    
 #endif  
         for( unsigned int y = 0; y < ny_p; ++y ) {
@@ -79,7 +79,7 @@ void MF_Solver2D_Yee::operator()( ElectroMagn *fields )
     }
 
     // Magnetic field Bz^(d,d)
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present( Bz2D[0:sizeofBy], Ex2D[0:sizeofEx], Ey2D[0:sizeofEz] )
     #pragma acc loop gang
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -87,10 +87,10 @@ void MF_Solver2D_Yee::operator()( ElectroMagn *fields )
     #pragma omp teams distribute parallel for collapse( 2 )
 #endif
     for( unsigned int x = 1; x < nx_d - 1; ++x ) {
-#if !defined( SMILEI_ACCELERATOR_MODE )
+#if !defined( SMILEI_ACCELERATOR_GPU )
         #pragma omp simd
 #endif
-#ifdef SMILEI_OPENACC_MODE                                                                                                             
+#ifdef SMILEI_ACCELERATOR_GPU_OACC                                                                                                             
             #pragma acc loop vector                                                                                                    
 #endif  
         for( unsigned int y = 1; y < ny_d - 1; ++y ) {
diff --git a/src/ElectroMagnSolver/MF_Solver3D_Yee.cpp b/src/ElectroMagnSolver/MF_Solver3D_Yee.cpp
index 5930af3e1..f70159699 100755
--- a/src/ElectroMagnSolver/MF_Solver3D_Yee.cpp
+++ b/src/ElectroMagnSolver/MF_Solver3D_Yee.cpp
@@ -34,7 +34,7 @@ void MF_Solver3D_Yee::operator()( ElectroMagn *fields )
     const double * __restrict__ Ez3D = isEFilterApplied ? fields->filter_->Ez_[0]->data() : fields->Ez_->data();
 
     // Magnetic field Bx^(p,d,d)
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     const int sizeofEx = fields->Ex_->number_of_points_;
     const int sizeofEy = fields->Ey_->number_of_points_;
     const int sizeofEz = fields->Ez_->number_of_points_;
@@ -49,11 +49,11 @@ void MF_Solver3D_Yee::operator()( ElectroMagn *fields )
     #pragma omp teams distribute parallel for collapse( 3 )
 #endif
     for( unsigned int i=0 ; i<nx_p;  i++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc loop worker
 #endif
         for( unsigned int j=1 ; j<ny_d-1 ; j++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc loop vector
 #endif
             for( unsigned int k=1 ; k<nz_d-1 ; k++ ) {
@@ -64,7 +64,7 @@ void MF_Solver3D_Yee::operator()( ElectroMagn *fields )
     }
 
     // Magnetic field By^(d,p,d)
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present( By3D[0:sizeofBy], Ex3D[0:sizeofEx], Ez3D[0:sizeofEz] )
     #pragma acc loop gang
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -72,11 +72,11 @@ void MF_Solver3D_Yee::operator()( ElectroMagn *fields )
     #pragma omp teams distribute parallel for collapse( 3 )
 #endif
     for( unsigned int i=1 ; i<nx_d-1 ; i++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc loop worker
 #endif
         for( unsigned int j=0 ; j<ny_p ; j++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc loop vector
 #endif
             for( unsigned int k=1 ; k<nz_d-1 ; k++ ) {
@@ -87,7 +87,7 @@ void MF_Solver3D_Yee::operator()( ElectroMagn *fields )
     }
 
     // Magnetic field Bz^(d,d,p)
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present( Bz3D[0:sizeofBz], Ex3D[0:sizeofEx], Ey3D[0:sizeofEy] )
     #pragma acc loop gang
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -95,11 +95,11 @@ void MF_Solver3D_Yee::operator()( ElectroMagn *fields )
     #pragma omp teams distribute parallel for collapse( 3 )
 #endif
     for( unsigned int i=1 ; i<nx_d-1 ; i++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc loop worker
 #endif
         for( unsigned int j=1 ; j<ny_d-1 ; j++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc loop vector
 #endif
             for( unsigned int k=0 ; k<nz_p ; k++ ) {
diff --git a/src/ElectroMagnSolver/PML_Solver2D_Envelope.cpp b/src/ElectroMagnSolver/PML_Solver2D_Envelope.cpp
index 36da377bf..7eb9c8810 100644
--- a/src/ElectroMagnSolver/PML_Solver2D_Envelope.cpp
+++ b/src/ElectroMagnSolver/PML_Solver2D_Envelope.cpp
@@ -455,8 +455,8 @@ void PML_Solver2D_Envelope::compute_A_from_G( LaserEnvelope *envelope, int iDim,
                     // ----
                     // dA/dx = dA/dx + ik0 A
                     std::complex<double> dA_over_dx_fdtd = ( ( *A_n_pml )( i+1, j )-( *A_n_pml )( i-1, j ) )/(2.*dx) ;
-                    std::complex<double> dA_over_dx = dA_over_dx_fdtd
-                                                      + i1*k0*( *A_n_pml )( i, j ) ;
+                    // std::complex<double> dA_over_dx = dA_over_dx_fdtd
+                    //                                   + i1*k0*( *A_n_pml )( i, j ) ;
                     // d2A/dx^2 = d2A/dx^2 + 2ik0 dA/dx - k0^2 A
                     std::complex<double> d2A_over_dx2_fdtd = ( ( *A_n_pml )( i-1, j )-2.*( *A_n_pml )( i, j )+( *A_n_pml )( i+1, j ) )/(dx*dx) ;
                     std::complex<double> d2A_over_dx2 = d2A_over_dx2_fdtd
@@ -590,8 +590,8 @@ void PML_Solver2D_Envelope::compute_A_from_G( LaserEnvelope *envelope, int iDim,
                     // ----
                     // dA/dx = dA/dx + ik0 A
                     std::complex<double> dA_over_dx_fdtd = ( ( *A_n_pml )( i+1, j )-( *A_n_pml )( i-1, j ) )/(2.*dx) ;
-                    std::complex<double> dA_over_dx = dA_over_dx_fdtd
-                                                      + i1*k0*( *A_n_pml )( i, j ) ;
+                    // std::complex<double> dA_over_dx = dA_over_dx_fdtd
+                    //                                   + i1*k0*( *A_n_pml )( i, j ) ;
                     // d2A/dx^2 = d2A/dx^2 + 2ik0 dA/dx - k0^2 A
                     std::complex<double> d2A_over_dx2_fdtd = ( ( *A_n_pml )( i-1, j )-2.*( *A_n_pml )( i, j )+( *A_n_pml )( i+1, j ) )/(dx*dx) ;
                     std::complex<double> d2A_over_dx2 = d2A_over_dx2_fdtd
diff --git a/src/ElectroMagnSolver/PML_SolverAM_Envelope.cpp b/src/ElectroMagnSolver/PML_SolverAM_Envelope.cpp
index 7e4e740c7..d8c65645a 100644
--- a/src/ElectroMagnSolver/PML_SolverAM_Envelope.cpp
+++ b/src/ElectroMagnSolver/PML_SolverAM_Envelope.cpp
@@ -395,7 +395,6 @@ void PML_SolverAM_Envelope::compute_A_from_G( LaserEnvelope *envelope, int iDim,
     double k0 = 1.; // laser wavenumber
     std::complex<double> source_term_x ;
     std::complex<double> source_term_y ;
-    double mpml_ratio = 0.00;
 
     if (iDim == 0) {
         for( unsigned int k=0 ; k<1 ; k++ ) {
@@ -405,7 +404,7 @@ void PML_SolverAM_Envelope::compute_A_from_G( LaserEnvelope *envelope, int iDim,
                     // dA/dx = dA/dx + ik0 A
                     // r dA/dx = r dA/dx + ik0 rA <=> dG/dx = dG/dx + ik0 G
                     std::complex<double> dG_over_dx_fdtd = ( ( *G_n_pml )( i+1, j )-( *G_n_pml )( i-1, j ) )/(2.*dl) ;
-                    std::complex<double> dG_over_dx = dG_over_dx_fdtd + i1*k0*( *G_n_pml )( i, j ) ;
+                    // std::complex<double> dG_over_dx = dG_over_dx_fdtd + i1*k0*( *G_n_pml )( i, j ) ;
                     // d2A/dx^2 = d2A/dx^2 + 2ik0 dA/dx - k0^2 A
                     // r d2A/dx^2 = r d2A/dx^2 + r 2ik0 dA/dx - r k0^2 A <=> d2G/dx^2 = d2G/dx^2 + 2ik0 dG/dx - k0^2 G
                     std::complex<double> d2G_over_dx2_fdtd = ( ( *G_n_pml )( i-1, j )-2.*( *G_n_pml )( i, j )+( *G_n_pml )( i+1, j ) )/(dl*dl) ;
@@ -494,7 +493,7 @@ void PML_SolverAM_Envelope::compute_A_from_G( LaserEnvelope *envelope, int iDim,
                     // dA/dx = dA/dx + ik0 A
                     // r dA/dx = r dA/dx + ik0 rA <=> dG/dx = dG/dx + ik0 G
                     std::complex<double> dA_over_dx_fdtd = ( ( *A_n_pml )( i+1, j )-( *A_n_pml )( i-1, j ) )/(2.*dl) ;
-                    std::complex<double> dA_over_dx = dA_over_dx_fdtd + i1*k0*( *A_n_pml )( i, j ) ;
+                    // std::complex<double> dA_over_dx = dA_over_dx_fdtd + i1*k0*( *A_n_pml )( i, j ) ;
                     // d2A/dx^2 = d2A/dx^2 + 2ik0 dA/dx - k0^2 A
                     // r d2A/dx^2 = r d2A/dx^2 + r 2ik0 dA/dx - r k0^2 A <=> d2G/dx^2 = d2G/dx^2 + 2ik0 dG/dx - k0^2 G
                     std::complex<double> d2A_over_dx2_fdtd = ( ( *A_n_pml )( i-1, j )-2.*( *A_n_pml )( i, j )+( *A_n_pml )( i+1, j ) )/(dl*dl) ;
@@ -635,8 +634,8 @@ void PML_SolverAM_Envelope::compute_A_from_G( LaserEnvelope *envelope, int iDim,
                 for( unsigned int j=solvermin ; j < solvermax ; j++ ) { // y loop
                     // r dA/dx = r dA/dx + ik0 rA <=> dG/dx = dG/dx + ik0 G
                     std::complex<double> dG_over_dx_fdtd = ( ( *G_n_pml )( i+1, j )-( *G_n_pml )( i-1, j ) )/(2.*dl) ;
-                    std::complex<double> dG_over_dx = dG_over_dx_fdtd
-                                                      + i1*k0*( *G_n_pml )( i, j ) ;
+                    // std::complex<double> dG_over_dx = dG_over_dx_fdtd
+                    //                                   + i1*k0*( *G_n_pml )( i, j ) ;
                     // r d2A/dx^2 = r d2A/dx^2 + r 2ik0 dA/dx - r k0^2 A <=> d2G/dx^2 = d2G/dx^2 + 2ik0 dG/dx - k0^2 G
                     std::complex<double> d2G_over_dx2_fdtd = ( ( *G_n_pml )( i-1, j )-2.*( *G_n_pml )( i, j )+( *G_n_pml )( i+1, j ) )/(dl*dl) ;
                     std::complex<double> d2G_over_dx2 = d2G_over_dx2_fdtd
diff --git a/src/ElectroMagnSolver/PML_SolverAM_EnvelopeReducedDispersion.cpp b/src/ElectroMagnSolver/PML_SolverAM_EnvelopeReducedDispersion.cpp
index 771f12e37..c2a5c4087 100644
--- a/src/ElectroMagnSolver/PML_SolverAM_EnvelopeReducedDispersion.cpp
+++ b/src/ElectroMagnSolver/PML_SolverAM_EnvelopeReducedDispersion.cpp
@@ -400,7 +400,6 @@ void PML_SolverAM_EnvelopeReducedDispersion::compute_A_from_G( LaserEnvelope *en
     double k0 = 1.; // laser wavenumber
     std::complex<double> source_term_x ;
     std::complex<double> source_term_y ;
-    double mpml_ratio = 0.00;
 
     if (iDim == 0) {
         for( unsigned int k=0 ; k<1 ; k++ ) {
@@ -410,7 +409,7 @@ void PML_SolverAM_EnvelopeReducedDispersion::compute_A_from_G( LaserEnvelope *en
                     // dA/dx = dA/dx + ik0 A
                     // r dA/dx = r dA/dx + ik0 rA <=> dG/dx = dG/dx + ik0 G
                     std::complex<double> dG_over_dx_fdtd = (1.+delta)*( ( *G_n_pml )( i+1, j )-( *G_n_pml )( i-1, j ) )/(2.*dl) - delta*( ( *G_n_pml )( i+2, j )-( *G_n_pml )( i-2, j ) )/(4.*dl) ;
-                    std::complex<double> dG_over_dx = dG_over_dx_fdtd + i1*k0*( *G_n_pml )( i, j ) ;
+                    // std::complex<double> dG_over_dx = dG_over_dx_fdtd + i1*k0*( *G_n_pml )( i, j ) ;
                     // d2A/dx^2 = d2A/dx^2 + 2ik0 dA/dx - k0^2 A
                     // r d2A/dx^2 = r d2A/dx^2 + r 2ik0 dA/dx - r k0^2 A <=> d2G/dx^2 = d2G/dx^2 + 2ik0 dG/dx - k0^2 G
                     std::complex<double> d2G_over_dx2_fdtd = (1.+delta)*( ( *G_n_pml )( i-1, j )-2.*( *G_n_pml )( i, j )+( *G_n_pml )( i+1, j ) )/(dl*dl)-delta*( ( *G_n_pml )( i-2, j )-2.*( *G_n_pml )( i, j )+( *G_n_pml )( i+2, j ) )/(4.*dl*dl) ;
@@ -490,7 +489,7 @@ void PML_SolverAM_EnvelopeReducedDispersion::compute_A_from_G( LaserEnvelope *en
                 for( unsigned int i=solvermin ; i<solvermax; i++ ) {
                     unsigned int j = 2; // j_p = 2 corresponds to r=0
                     std::complex<double> dA_over_dx_fdtd = (1.+delta)*( ( *A_n_pml )( i+1, j )-( *A_n_pml )( i-1, j ) )/(2.*dl) - delta*( ( *A_n_pml )( i+2, j )-( *A_n_pml )( i-2, j ) )/(4.*dl) ;
-                    std::complex<double> dA_over_dx = dA_over_dx_fdtd + i1*k0*( *A_n_pml )( i, j ) ;
+                    // std::complex<double> dA_over_dx = dA_over_dx_fdtd + i1*k0*( *A_n_pml )( i, j ) ;
                     // d2A/dx^2 = d2A/dx^2 + 2ik0 dA/dx - k0^2 A
                     // r d2A/dx^2 = r d2A/dx^2 + r 2ik0 dA/dx - r k0^2 A <=> d2G/dx^2 = d2G/dx^2 + 2ik0 dG/dx - k0^2 G
                     std::complex<double> d2A_over_dx2_fdtd = (1.+delta)*( ( *A_n_pml )( i-1, j )-2.*( *A_n_pml )( i, j )+( *A_n_pml )( i+1, j ) )/(dl*dl)-delta*( ( *A_n_pml )( i-2, j )-2.*( *A_n_pml )( i, j )+( *A_n_pml )( i+2, j ) )/(4.*dl*dl) ;
@@ -591,7 +590,7 @@ void PML_SolverAM_EnvelopeReducedDispersion::compute_A_from_G( LaserEnvelope *en
             for( unsigned int i=2 ; i<nl_p-2; i++ ) { // x loop
                 for( unsigned int j=solvermin ; j < solvermax ; j++ ) { // y loop
                     std::complex<double> dG_over_dx_fdtd = (1.+delta)*( ( *G_n_pml )( i+1, j )-( *G_n_pml )( i-1, j ) )/(2.*dl) - delta*( ( *G_n_pml )( i+2, j )-( *G_n_pml )( i-2, j ) )/(4.*dl) ;
-                    std::complex<double> dG_over_dx = dG_over_dx_fdtd + i1*k0*( *G_n_pml )( i, j ) ;
+                    // std::complex<double> dG_over_dx = dG_over_dx_fdtd + i1*k0*( *G_n_pml )( i, j ) ;
                     // d2A/dx^2 = d2A/dx^2 + 2ik0 dA/dx - k0^2 A
                     // r d2A/dx^2 = r d2A/dx^2 + r 2ik0 dA/dx - r k0^2 A <=> d2G/dx^2 = d2G/dx^2 + 2ik0 dG/dx - k0^2 G
                     std::complex<double> d2G_over_dx2_fdtd = (1.+delta)*( ( *G_n_pml )( i-1, j )-2.*( *G_n_pml )( i, j )+( *G_n_pml )( i+1, j ) )/(dl*dl)-delta*( ( *G_n_pml )( i-2, j )-2.*( *G_n_pml )( i, j )+( *G_n_pml )( i+2, j ) )/(4.*dl*dl) ;
diff --git a/src/Field/Field.cpp b/src/Field/Field.cpp
index 19c820d1d..0d8427f1e 100644
--- a/src/Field/Field.cpp
+++ b/src/Field/Field.cpp
@@ -5,14 +5,14 @@ void Field::put_to( double val )
 {
     SMILEI_ASSERT( data_ != nullptr );
 
-#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC )
     const bool is_hostptr_mapped_on_device = smilei::tools::gpu::HostDeviceMemoryManagement::IsHostPointerMappedOnDevice( data_ );
 #endif
 
     // NVCC's OpenACC needs that redundant pointer value
     double* an_other_data_pointer = data_;
 
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     // Test if data exists on GPU, put_to can be used on CPU and GPU during a simulation
     #pragma acc parallel present( an_other_data_pointer [0:size()] ) if( is_hostptr_mapped_on_device )
     #pragma acc loop gang worker vector
@@ -25,7 +25,7 @@ void Field::put_to( double val )
     }
 }
 
-#if defined(SMILEI_ACCELERATOR_MODE)
+#if defined(SMILEI_ACCELERATOR_GPU)
     //! copy the field array from Host to Device
     void Field::copyFromHostToDevice()
     {
diff --git a/src/Field/Field.h b/src/Field/Field.h
index 669106245..563705ab1 100755
--- a/src/Field/Field.h
+++ b/src/Field/Field.h
@@ -188,7 +188,7 @@ class Field
     virtual double norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] ) = 0;
 
 
-#if defined(SMILEI_ACCELERATOR_MODE)
+#if defined(SMILEI_ACCELERATOR_GPU)
     //! Compute the norm2OnDevice of the field
     virtual double norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] ) = 0;
 #endif
@@ -234,7 +234,7 @@ class Field
         return sum;
     }
 
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
 
     inline double __attribute__((always_inline)) normOnDevice()
     {
@@ -245,7 +245,7 @@ class Field
     #pragma omp target teams distribute parallel for \
 		      map(tofrom: sum)  map(to: number_of_points_) \
 		      reduction(+:sum) 
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present(field) //deviceptr( data_ )
     #pragma acc loop gang worker vector reduction(+:sum)
 #endif
@@ -279,7 +279,7 @@ class Field
     virtual void extract_fields_sum ( int iDim, int iNeighbor, int ghost_size ) = 0;
     virtual void inject_fields_sum  ( int iDim, int iNeighbor, int ghost_size ) = 0;
 
-#if defined(SMILEI_ACCELERATOR_MODE)
+#if defined(SMILEI_ACCELERATOR_GPU)
 
     //! copy the field from Host to Device
     void copyFromHostToDevice();
diff --git a/src/Field/Field1D.cpp b/src/Field/Field1D.cpp
index d0fa18b2f..59f085f81 100755
--- a/src/Field/Field1D.cpp
+++ b/src/Field/Field1D.cpp
@@ -190,7 +190,7 @@ double Field1D::norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] )
 }
 
 //! Perform the norm2 on Device
-#if defined(SMILEI_ACCELERATOR_MODE)
+#if defined(SMILEI_ACCELERATOR_GPU)
 double Field1D::norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] )
 {
     ERROR("Not implemented");
diff --git a/src/Field/Field1D.h b/src/Field/Field1D.h
index 0ff09cd1e..228cc586f 100755
--- a/src/Field/Field1D.h
+++ b/src/Field/Field1D.h
@@ -92,7 +92,7 @@ class Field1D : public Field
     virtual double norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] ) override;
 
     //! Compute the norm2OnDevice of the field
-#if defined(SMILEI_ACCELERATOR_MODE)
+#if defined(SMILEI_ACCELERATOR_GPU)
     virtual double norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] ) override final;
 #endif
 
diff --git a/src/Field/Field2D.cpp b/src/Field/Field2D.cpp
index a089a0d45..94051fed6 100755
--- a/src/Field/Field2D.cpp
+++ b/src/Field/Field2D.cpp
@@ -71,7 +71,7 @@ Field2D::~Field2D()
     for (int iside=0 ; iside<(int)(sendFields_.size()) ; iside++ ) {
         if ( sendFields_[iside] != NULL ) {
 
-#if defined ( SMILEI_ACCELERATOR_MODE )
+#if defined ( SMILEI_ACCELERATOR_GPU )
             if ( sendFields_[iside]->isOnDevice() )
             {
                 sendFields_[iside]->deleteOnDevice();
@@ -220,7 +220,7 @@ double Field2D::norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] )
 }
 
 //! Perform the norm2 on Device
-#if defined(SMILEI_ACCELERATOR_MODE)
+#if defined(SMILEI_ACCELERATOR_GPU)
 double Field2D::norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] )
 {
 
@@ -247,7 +247,7 @@ double Field2D::norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3
                       map(to: ny, idxlocalstart[0], idxlocalstart[1], iystart, iyend) \
 		      /* is_device_ptr( data_ )*/ \
 		      reduction(+:nrj) 
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present(field) //deviceptr( data_ )
     #pragma acc loop gang worker vector collapse(2) reduction(+:nrj)
 #endif
@@ -333,7 +333,7 @@ void Field2D::create_sub_fields( int iDim, int iNeighbor, int ghost_size )
         sendFields_[iDim*2+iNeighbor] = new Field2D(size);
         recvFields_[iDim*2+iNeighbor] = new Field2D(size);
 
-#if defined( SMILEI_ACCELERATOR_MODE ) 
+#if defined( SMILEI_ACCELERATOR_GPU ) 
        if( ( name[0] == 'B' ) || ( name[0] == 'J' || name[0] == 'R' ) ) {
            sendFields_[iDim * 2 + iNeighbor]->allocateAndCopyFromHostToDevice();
            recvFields_[iDim * 2 + iNeighbor]->allocateAndCopyFromHostToDevice();
@@ -341,7 +341,7 @@ void Field2D::create_sub_fields( int iDim, int iNeighbor, int ghost_size )
 #endif
     } 
     else if ( ghost_size != (int)(sendFields_[iDim*2+iNeighbor]->dims_[iDim]) ) {
-#if defined( SMILEI_OPENACC_MODE ) || defined( SMILEI_ACCELERATOR_GPU_OMP )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC ) || defined( SMILEI_ACCELERATOR_GPU_OMP )
         ERROR( "To Do GPU : envelope" );
 #endif
         delete sendFields_[iDim*2+iNeighbor];
@@ -381,7 +381,7 @@ void Field2D::extract_fields_exch( int iDim, int iNeighbor, int ghost_size )
 
     #pragma omp target if( should_manipulate_gpu_memory )
     #pragma omp teams distribute parallel for collapse( 2 )
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     const int subSize = sendFields_[iDim*2+iNeighbor]->size();
     const int fSize = number_of_points_;
     bool fieldName( (name.substr(0,1) == "B") );
@@ -389,7 +389,7 @@ void Field2D::extract_fields_exch( int iDim, int iNeighbor, int ghost_size )
     #pragma acc loop gang
 #endif
     for( unsigned int i=0; i<NX; i++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
 	#pragma acc loop worker vector
 #endif
         for( unsigned int j=0; j<NY; j++ ) {
@@ -429,7 +429,7 @@ void Field2D::inject_fields_exch ( int iDim, int iNeighbor, int ghost_size )
         map( tofrom                                       \
              : field [field_first:field_last - field_first] )
     #pragma omp teams distribute parallel for collapse( 2 )
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     int subSize = recvFields_[iDim*2+(iNeighbor+1)%2]->size();
     const int fSize = number_of_points_;
     bool fieldName( name.substr(0,1) == "B" );
@@ -437,7 +437,7 @@ void Field2D::inject_fields_exch ( int iDim, int iNeighbor, int ghost_size )
     #pragma acc loop gang
 #endif
     for( unsigned int i=0; i<NX; i++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
 	#pragma acc loop worker vector
 #endif
         for( unsigned int j=0; j<NY; j++ ) {
@@ -477,7 +477,7 @@ void Field2D::extract_fields_sum ( int iDim, int iNeighbor, int ghost_size )
         map( to                                           \
              : field [field_first:field_last - field_first] )
     #pragma omp teams distribute parallel for collapse( 2 )
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     const int subSize = sendFields_[iDim*2+iNeighbor]->size();
     const int fSize = number_of_points_;
     bool fieldName( ((name.substr(0,1) == "J") || (name.substr(0,1) == "R") ) && smilei::tools::gpu::HostDeviceMemoryManagement::IsHostPointerMappedOnDevice( sub ));
@@ -486,7 +486,7 @@ void Field2D::extract_fields_sum ( int iDim, int iNeighbor, int ghost_size )
     #pragma acc loop gang
 #endif
     for( unsigned int i=0; i<NX; i++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
 	#pragma acc loop worker vector
 #endif
         for( unsigned int j=0; j<NY; j++ ) {
@@ -526,7 +526,7 @@ void Field2D::inject_fields_sum  ( int iDim, int iNeighbor, int ghost_size )
         map( tofrom                                       \
              : field [field_first:field_last - field_first] )
     #pragma omp teams distribute parallel for collapse( 2 )
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     int subSize = recvFields_[iDim*2+(iNeighbor+1)%2]->size();
     int fSize = number_of_points_;
     bool fieldName( name.substr(0,1) == "J" || name.substr(0,1) == "R");
@@ -535,7 +535,7 @@ void Field2D::inject_fields_sum  ( int iDim, int iNeighbor, int ghost_size )
     #pragma acc loop gang
 #endif
     for( unsigned int i=0; i<NX; i++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
 	#pragma acc loop worker vector
 #endif
         for( unsigned int j=0; j<NY; j++ ) {
diff --git a/src/Field/Field2D.h b/src/Field/Field2D.h
index a6938a468..490ecde85 100755
--- a/src/Field/Field2D.h
+++ b/src/Field/Field2D.h
@@ -97,7 +97,7 @@ class Field2D : public Field
 
     virtual double norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] ) override;
 
-#if defined(SMILEI_ACCELERATOR_MODE)
+#if defined(SMILEI_ACCELERATOR_GPU)
     //! Compute the norm 2 on device
     virtual double norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] ) override final;
 #endif
diff --git a/src/Field/Field3D.cpp b/src/Field/Field3D.cpp
index 5a20964e7..e8c98d55b 100755
--- a/src/Field/Field3D.cpp
+++ b/src/Field/Field3D.cpp
@@ -4,7 +4,7 @@
 #include <iostream>
 #include <vector>
 
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
 #include <openacc.h>
 #endif
 
@@ -81,7 +81,7 @@ Field3D::~Field3D()
     for( unsigned int iside=0 ; iside<sendFields_.size() ; iside++ ) {
         if ( sendFields_[iside] != NULL ) {
 
-#if defined ( SMILEI_ACCELERATOR_MODE )
+#if defined ( SMILEI_ACCELERATOR_GPU )
 
             if ( sendFields_[iside]->isOnDevice() )
             {
@@ -102,7 +102,9 @@ Field3D::~Field3D()
         }
     }
     if( data_!=NULL ) {
+#if defined(SMILEI_ACCELERATOR_GPU_OACC)
         #pragma acc exit data delete (data_[0:number_of_points_]) if (acc_deviceptr(data_) != NULL)
+#endif
         delete [] data_;
         for( unsigned int i=0; i<dims_[0]; i++ ) {
             delete [] this->data_3D[i];
@@ -248,7 +250,7 @@ double Field3D::norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] )
 }
 
 // Perform the norm2 on Device
-#if defined(SMILEI_ACCELERATOR_MODE)
+#if defined(SMILEI_ACCELERATOR_GPU)
 double Field3D::norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] )
 {
     double nrj( 0. );
@@ -277,7 +279,7 @@ double Field3D::norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3
               map(to: ny, nz, ixstart, ixend, iystart, iyend, izstart, izend) \
 	      /*is_device_ptr( data_ ) */           \
 	      reduction(+:nrj) 
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present(field[0:number_of_points_]) //deviceptr( data_ )
     #pragma acc loop gang worker vector collapse(3) reduction(+:nrj)
 #endif
@@ -405,7 +407,7 @@ void Field3D::create_sub_fields  ( int iDim, int iNeighbor, int ghost_size )
         sendFields_[iDim*2+iNeighbor] = new Field3D(size);
         recvFields_[iDim*2+iNeighbor] = new Field3D(size);
 
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
 
         if( ( name[0] == 'B' ) || ( name[0] == 'J' || name[0] == 'R' ) ) {
 
@@ -427,7 +429,7 @@ void Field3D::create_sub_fields  ( int iDim, int iNeighbor, int ghost_size )
 
     }
     else if( ghost_size != (int) sendFields_[iDim*2+iNeighbor]->dims_[iDim] ) {
-#if defined( SMILEI_OPENACC_MODE ) || defined( SMILEI_ACCELERATOR_GPU_OMP )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC ) || defined( SMILEI_ACCELERATOR_GPU_OMP )
         ERROR( "To Do GPU : envelope" );
 #endif
         delete sendFields_[iDim*2+iNeighbor];
@@ -463,7 +465,7 @@ void Field3D::extract_fields_exch( int iDim, int iNeighbor, int ghost_size )
 
     #pragma omp target if( is_the_right_field )
     #pragma omp teams distribute parallel for collapse( 3 )
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     const int subSize = sendFields_[iDim*2+iNeighbor]->size();
     const int fSize = number_of_points_;
     bool fieldName( (name.substr(0,1) == "B") );
@@ -471,11 +473,11 @@ void Field3D::extract_fields_exch( int iDim, int iNeighbor, int ghost_size )
     #pragma acc loop gang
 #endif
     for( unsigned int i=0; i<(unsigned int)NX; i++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
 	#pragma acc loop worker
 #endif
         for( unsigned int j=0; j<(unsigned int)NY; j++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
 	    #pragma acc loop vector
 #endif
             for( unsigned int k=0; k<(unsigned int)NZ; k++ ) {
@@ -514,7 +516,7 @@ void Field3D::inject_fields_exch ( int iDim, int iNeighbor, int ghost_size )
         map( tofrom                             \
              : field [0:fSize] )
     #pragma omp teams distribute parallel for collapse( 3 )
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     int subSize = recvFields_[iDim*2+(iNeighbor+1)%2]->size();
     const int fSize = number_of_points_;
     bool fieldName( name.substr(0,1) == "B" );
@@ -522,11 +524,11 @@ void Field3D::inject_fields_exch ( int iDim, int iNeighbor, int ghost_size )
     #pragma acc loop gang
 #endif
     for( unsigned int i=0; i<(unsigned int)NX; i++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
 	#pragma acc loop worker
 #endif
         for( unsigned int j=0; j<(unsigned int)NY; j++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
 	    #pragma acc loop vector
 #endif
             for( unsigned int k=0; k<(unsigned int)NZ; k++ ) {
@@ -566,7 +568,7 @@ void Field3D::extract_fields_sum ( int iDim, int iNeighbor, int ghost_size )
         map( to                                 \
              : field [0:fSize] )
     #pragma omp teams distribute parallel for collapse( 3 )
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     const int subSize = sendFields_[iDim*2+iNeighbor]->size();
     const int fSize = number_of_points_;
     bool fieldName( (name.substr(0,1) == "J") || (name.substr(0,1) == "R"));
@@ -575,11 +577,11 @@ void Field3D::extract_fields_sum ( int iDim, int iNeighbor, int ghost_size )
     #pragma acc loop gang
 #endif
     for( unsigned int i=0; i<(unsigned int)NX; i++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
 	#pragma acc loop worker
 #endif
         for( unsigned int j=0; j<(unsigned int)NY; j++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
 	    #pragma acc loop vector
 #endif
             for( unsigned int k=0; k<(unsigned int)NZ; k++ ) {
@@ -618,7 +620,7 @@ void Field3D::inject_fields_sum  ( int iDim, int iNeighbor, int ghost_size )
         map( tofrom                             \
              : field [0:fSize] )
     #pragma omp teams distribute parallel for collapse( 3 )
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     int subSize = recvFields_[iDim*2+(iNeighbor+1)%2]->size();
     int fSize = number_of_points_;
     bool fieldName( name.substr(0,1) == "J" || name.substr(0,1) == "R");
@@ -627,11 +629,11 @@ void Field3D::inject_fields_sum  ( int iDim, int iNeighbor, int ghost_size )
     #pragma acc loop gang
 #endif
     for( unsigned int i=0; i<(unsigned int)NX; i++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
 	#pragma acc loop worker
 #endif
         for( unsigned int j=0; j<(unsigned int)NY; j++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
 	    #pragma acc loop vector
 #endif
             for( unsigned int k=0; k<(unsigned int)NZ; k++ ) {
diff --git a/src/Field/Field3D.h b/src/Field/Field3D.h
index cc9524790..9f9ce4c9a 100755
--- a/src/Field/Field3D.h
+++ b/src/Field/Field3D.h
@@ -100,7 +100,7 @@ class Field3D : public Field
     virtual double norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] ) override;
 
     //! Compute the norm2OnDevice of the field
-#if defined(SMILEI_ACCELERATOR_MODE)
+#if defined(SMILEI_ACCELERATOR_GPU)
     virtual double norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] ) override final;
 #endif
 
diff --git a/src/Field/cField.h b/src/Field/cField.h
index c37aa9514..d76de6ed7 100755
--- a/src/Field/cField.h
+++ b/src/Field/cField.h
@@ -63,7 +63,7 @@ class cField : public Field
     virtual double norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] ) override = 0;
 
     //! Compute the norm2OnDevice of the field
-#if defined(SMILEI_ACCELERATOR_MODE)
+#if defined(SMILEI_ACCELERATOR_GPU)
     virtual double norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] ) = 0;
 #endif
 
diff --git a/src/Field/cField1D.cpp b/src/Field/cField1D.cpp
index 77b0c2685..6a79da95a 100755
--- a/src/Field/cField1D.cpp
+++ b/src/Field/cField1D.cpp
@@ -191,7 +191,7 @@ double cField1D::norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] )
 }
 
 //! Perform the norm2 on Device
-#if defined(SMILEI_ACCELERATOR_MODE)
+#if defined(SMILEI_ACCELERATOR_GPU)
 double cField1D::norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] )
 {
     ERROR("Not implemented");
diff --git a/src/Field/cField1D.h b/src/Field/cField1D.h
index 43f2030e3..27b15bfc1 100755
--- a/src/Field/cField1D.h
+++ b/src/Field/cField1D.h
@@ -94,7 +94,7 @@ class cField1D : public cField
     virtual double norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] ) override;
 
     //! Compute the norm2OnDevice of the field
-#if defined(SMILEI_ACCELERATOR_MODE)
+#if defined(SMILEI_ACCELERATOR_GPU)
     virtual double norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] ) override final;
 #endif
 
diff --git a/src/Field/cField2D.cpp b/src/Field/cField2D.cpp
index e1ca5560a..57ff6ea81 100755
--- a/src/Field/cField2D.cpp
+++ b/src/Field/cField2D.cpp
@@ -219,7 +219,7 @@ double cField2D::norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] )
 }
 
 //! Perform the norm2 on Device
-#if defined(SMILEI_ACCELERATOR_MODE)
+#if defined(SMILEI_ACCELERATOR_GPU)
 double cField2D::norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] )
 {
     ERROR("Not implemented");
diff --git a/src/Field/cField2D.h b/src/Field/cField2D.h
index d447d4f2e..26ee995c9 100755
--- a/src/Field/cField2D.h
+++ b/src/Field/cField2D.h
@@ -84,7 +84,7 @@ class cField2D : public cField
     virtual double norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] ) override;
 
     //! Compute the norm2OnDevice of the field
-#if defined(SMILEI_ACCELERATOR_MODE)
+#if defined(SMILEI_ACCELERATOR_GPU)
     virtual double norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] ) override final;
 #endif
 
diff --git a/src/Field/cField3D.cpp b/src/Field/cField3D.cpp
index 84510f401..f4249e134 100755
--- a/src/Field/cField3D.cpp
+++ b/src/Field/cField3D.cpp
@@ -218,7 +218,7 @@ double cField3D::norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] )
 }
 
 //! Perform the norm2 on Device
-#if defined(SMILEI_ACCELERATOR_MODE)
+#if defined(SMILEI_ACCELERATOR_GPU)
 double cField3D::norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] )
 {
     ERROR("Not implemented");
diff --git a/src/Field/cField3D.h b/src/Field/cField3D.h
index a81f293fc..0db1f6835 100755
--- a/src/Field/cField3D.h
+++ b/src/Field/cField3D.h
@@ -84,7 +84,7 @@ class cField3D : public cField
     virtual double norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] ) override;
 
     //! Compute the norm2OnDevice of the field
-#if defined(SMILEI_ACCELERATOR_MODE)
+#if defined(SMILEI_ACCELERATOR_GPU)
     virtual double norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] ) override final;
 #endif
 
diff --git a/src/Interpolator/Interpolator2D2Order.cpp b/src/Interpolator/Interpolator2D2Order.cpp
index 0254294f5..795ab996d 100755
--- a/src/Interpolator/Interpolator2D2Order.cpp
+++ b/src/Interpolator/Interpolator2D2Order.cpp
@@ -180,7 +180,7 @@ void Interpolator2D2Order::fieldsWrapper(   ElectroMagn *EMfields,
     const double *const __restrict__ By2D = static_cast<Field2D *>( EMfields->By_m )->data();
     const double *const __restrict__ Bz2D = static_cast<Field2D *>( EMfields->Bz_m )->data();
 
-#if defined(SMILEI_OPENACC_MODE)    
+#if defined(SMILEI_ACCELERATOR_GPU_OACC)    
     const int sizeofEx = EMfields->Ex_->size();
     const int sizeofEy = EMfields->Ey_->size();
     const int sizeofEz = EMfields->Ez_->size();
@@ -207,7 +207,7 @@ void Interpolator2D2Order::fieldsWrapper(   ElectroMagn *EMfields,
                                   position_x /* [first_index:npart_range_size] */, \
                                   position_y /* [first_index:npart_range_size] */ )
     #pragma omp teams distribute parallel for
-#elif defined(SMILEI_OPENACC_MODE)
+#elif defined(SMILEI_ACCELERATOR_GPU_OACC)
     #pragma acc enter data create(this)
     #pragma acc update device(this)
     size_t interpolation_range_size = ( last_index + 1 * nparts ) - first_index;
@@ -260,7 +260,7 @@ void Interpolator2D2Order::fieldsWrapper(   ElectroMagn *EMfields,
         delta[1*nparts+ipart] = delta_p[1];
         
     }
-    #if defined(SMILEI_OPENACC_MODE)
+    #if defined(SMILEI_ACCELERATOR_GPU_OACC)
         #pragma acc exit data delete(this)
     #endif
     } else{ // with B-TIS3 interpolation
@@ -276,7 +276,7 @@ void Interpolator2D2Order::fieldsWrapper(   ElectroMagn *EMfields,
                                   position_x /* [first_index:npart_range_size] */, \
                                   position_y /* [first_index:npart_range_size] */ )
     #pragma omp teams distribute parallel for
-#elif defined(SMILEI_OPENACC_MODE)
+#elif defined(SMILEI_ACCELERATOR_GPU_OACC)
     #pragma acc enter data create(this)
     #pragma acc update device(this)
     size_t interpolation_range_size = ( last_index + 1 * nparts ) - first_index;
@@ -337,7 +337,7 @@ void Interpolator2D2Order::fieldsWrapper(   ElectroMagn *EMfields,
             delta[1*nparts+ipart] = delta_p[1];
 
         } // end ipart loop
-    #if defined(SMILEI_OPENACC_MODE)
+    #if defined(SMILEI_ACCELERATOR_GPU_OACC)
         #pragma acc exit data delete(this)
     #endif
     } // end with B-TIS interpolation
diff --git a/src/Interpolator/Interpolator3D2Order.cpp b/src/Interpolator/Interpolator3D2Order.cpp
index 9e594f20b..f40239836 100755
--- a/src/Interpolator/Interpolator3D2Order.cpp
+++ b/src/Interpolator/Interpolator3D2Order.cpp
@@ -185,8 +185,6 @@ void Interpolator3D2Order::fieldsWrapper( ElectroMagn *EMfields, Particles &part
     int *const __restrict__ iold     = smpi->dynamics_iold[ithread].data();
     double *const __restrict__ delta = smpi->dynamics_deltaold[ithread].data();
 
-    unsigned int buffer_size = smpi->dynamics_Epart[ithread].size();
-
     const double *const __restrict__ position_x = particles.getPtrPosition( 0 );
     const double *const __restrict__ position_y = particles.getPtrPosition( 1 );
     const double *const __restrict__ position_z = particles.getPtrPosition( 2 );
@@ -198,7 +196,7 @@ void Interpolator3D2Order::fieldsWrapper( ElectroMagn *EMfields, Particles &part
     const double *const __restrict__ By3D = EMfields->By_m->data_;
     const double *const __restrict__ Bz3D = EMfields->Bz_m->data_;
 
-#if defined(SMILEI_OPENACC_MODE)
+#if defined(SMILEI_ACCELERATOR_GPU_OACC)
     const int sizeofEx = EMfields->Ex_->size();
     const int sizeofEy = EMfields->Ey_->size();
     const int sizeofEz = EMfields->Ez_->size();
@@ -224,7 +222,7 @@ void Interpolator3D2Order::fieldsWrapper( ElectroMagn *EMfields, Particles &part
                        position_y /* [first_index:npart_range_size] */,        \
                        position_z /* [first_index:npart_range_size] */ )
     #pragma omp teams distribute parallel for
-#elif defined(SMILEI_OPENACC_MODE)
+#elif defined(SMILEI_ACCELERATOR_GPU_OACC)
     #pragma acc enter data create(this)
     #pragma acc update device(this)
     size_t interpolation_range_size = ( last_index + 2 * nparts ) - first_index;
@@ -282,7 +280,7 @@ void Interpolator3D2Order::fieldsWrapper( ElectroMagn *EMfields, Particles &part
             delta[1*nparts+ipart] = delta_p[1];
             delta[2*nparts+ipart] = delta_p[2];
         }
-        #if defined(SMILEI_OPENACC_MODE)
+        #if defined(SMILEI_ACCELERATOR_GPU_OACC)
             #pragma acc exit data delete(this)
         #endif
     } else { // with B-TIS3 interpolation
@@ -302,7 +300,7 @@ void Interpolator3D2Order::fieldsWrapper( ElectroMagn *EMfields, Particles &part
                        position_y /* [first_index:npart_range_size] */,        \
                        position_z /* [first_index:npart_range_size] */ )
     #pragma omp teams distribute parallel for
-#elif defined(SMILEI_OPENACC_MODE)
+#elif defined(SMILEI_ACCELERATOR_GPU_OACC)
     #pragma acc enter data create(this)
     #pragma acc update device(this)
     size_t interpolation_range_size = ( last_index + 2 * nparts ) - first_index;
@@ -368,7 +366,7 @@ void Interpolator3D2Order::fieldsWrapper( ElectroMagn *EMfields, Particles &part
             delta[ipart+0*nparts] = delta_p[0];
             delta[ipart+1*nparts] = delta_p[1];
             delta[ipart+2*nparts] = delta_p[2];
-            #if defined(SMILEI_OPENACC_MODE)
+            #if defined(SMILEI_ACCELERATOR_GPU_OACC)
                 #pragma acc exit data delete(this)
             #endif
         } // end ipart loop
diff --git a/src/Interpolator/Interpolator3D2Order.h b/src/Interpolator/Interpolator3D2Order.h
index 52f0335a0..1fa07438d 100755
--- a/src/Interpolator/Interpolator3D2Order.h
+++ b/src/Interpolator/Interpolator3D2Order.h
@@ -59,7 +59,7 @@ class Interpolator3D2Order : public Interpolator3D
         int idx, 
         int idy, 
         int idz, 
-        int nx, 
+        int /*nx*/, 
         int ny, 
         int nz )
     {
diff --git a/src/MovWindow/SimWindow.cpp b/src/MovWindow/SimWindow.cpp
index 6dbb5da57..4ee9781c7 100755
--- a/src/MovWindow/SimWindow.cpp
+++ b/src/MovWindow/SimWindow.cpp
@@ -383,7 +383,7 @@ void SimWindow::shift( VectorPatch &vecPatches, SmileiMPI *smpi, Params &params,
 
                         } // end loop nSpecies
 
-#if defined ( SMILEI_ACCELERATOR_MODE )
+#if defined ( SMILEI_ACCELERATOR_GPU )
                         if( params.gpu_computing ) {
                             for( auto spec: mypatch->vecSpecies ) {
                                 spec->allocateParticlesOnDevice();
@@ -398,7 +398,7 @@ void SimWindow::shift( VectorPatch &vecPatches, SmileiMPI *smpi, Params &params,
                         
                     } // end test patch_particle_created[ithread][j]
 
-#if defined ( SMILEI_ACCELERATOR_MODE )
+#if defined ( SMILEI_ACCELERATOR_GPU )
                     // if ( params.gpu_computing ) {
                         // Initializes only field data structures, particle data structure are initialized separately
                         mypatch->allocateAndCopyFieldsOnDevice();
diff --git a/src/MultiphotonBreitWheeler/MultiphotonBreitWheeler.cpp b/src/MultiphotonBreitWheeler/MultiphotonBreitWheeler.cpp
index 6f7b9e0df..8136f36ff 100755
--- a/src/MultiphotonBreitWheeler/MultiphotonBreitWheeler.cpp
+++ b/src/MultiphotonBreitWheeler/MultiphotonBreitWheeler.cpp
@@ -10,7 +10,7 @@
 #include "MultiphotonBreitWheeler.h"
 #include "Species.h"
 
-#if defined(SMILEI_OPENACC_MODE)
+#if defined(SMILEI_ACCELERATOR_GPU_OACC)
     #define __HIP_PLATFORM_NVCC__
     #define __HIP_PLATFORM_NVIDIA__
     #include "gpuRandom.h"
@@ -248,7 +248,7 @@ void MultiphotonBreitWheeler::operator()( Particles &particles,
     double *const __restrict__ pair1_chi = new_pair[1]->has_quantum_parameter ? new_pair[1]->getPtrChi() : nullptr;
     double *const __restrict__ pair1_tau = new_pair[1]->has_Monte_Carlo_process ? new_pair[1]->getPtrTau() : nullptr;
 
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     // Parameters for random generator
     unsigned long long seed;
     unsigned long long seq;
@@ -325,7 +325,7 @@ void MultiphotonBreitWheeler::operator()( Particles &particles,
                                 Ex[ipart-ipart_ref], Ey[ipart-ipart_ref], Ez[ipart-ipart_ref],
                                 Bx[ipart-ipart_ref], By[ipart-ipart_ref], Bz[ipart-ipart_ref] );
                                 
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
                                 
     }
 
@@ -349,7 +349,7 @@ void MultiphotonBreitWheeler::operator()( Particles &particles,
                 while( tau[ipart] <= epsilon_tau_ ) {
                     //tau[ipart] = -log( 1.-Rand::uniform() );
                     
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
                     tau[ipart] = -std::log( 1.-rand_->uniform() );
 #else
                     
@@ -406,7 +406,7 @@ void MultiphotonBreitWheeler::operator()( Particles &particles,
                     double pair_chi[2];
 
                     // Draw random number in [0,1[
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
                     const double random_number = rand_->uniform();
 #else
                     seed_curand_2 = (int) (ipart + 1)*(initial_seed_2 + 1); //Seed for linear generator
@@ -431,7 +431,7 @@ void MultiphotonBreitWheeler::operator()( Particles &particles,
                     SMILEI_UNUSED( ibin );
                     // Creation of new electrons in the temporary array new_pair[0]
                     new_pair[0]->createParticles( mBW_pair_creation_sampling_[0] );
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
                     // Final size
                     int nparticles = new_pair[0]->size();
 
@@ -442,7 +442,7 @@ void MultiphotonBreitWheeler::operator()( Particles &particles,
 #endif
 
                     // For all new paticles
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
                     #pragma omp simd
 #endif
                     for( int ipair=i_pair_start; ipair < i_pair_start+mBW_pair_creation_sampling_[0]; ipair++ ) {
@@ -466,7 +466,7 @@ void MultiphotonBreitWheeler::operator()( Particles &particles,
                         }
             //               + new_pair[k].momentum(i,ipair)*remaining_dt*inv_gamma;
 
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
                         // Old positions
                         if( particles.keepOldPositions() ) {
                             pair0_position_old_x[ipair]=position_x[ipart] ;
@@ -494,7 +494,7 @@ void MultiphotonBreitWheeler::operator()( Particles &particles,
                     // Create particle for the second pair species
                     new_pair[1]->createParticles( mBW_pair_creation_sampling_[1] );
 
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
                     // Final size
                     nparticles = new_pair[1]->size();
 
@@ -505,7 +505,7 @@ void MultiphotonBreitWheeler::operator()( Particles &particles,
 #endif
 
                     // For all new paticles
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
                     #pragma omp simd
 #endif
                     for( auto ipair=i_pair_start; ipair < i_pair_start + mBW_pair_creation_sampling_[1]; ipair++ ) {
@@ -530,7 +530,7 @@ void MultiphotonBreitWheeler::operator()( Particles &particles,
                         }
             //               + new_pair[k].momentum(i,ipair)*remaining_dt*inv_gamma;
 
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
                         // Old positions
                         if( particles.keepOldPositions() ) {
                             pair1_position_old_x[ipair]=position_x[ipart] ;
@@ -629,7 +629,7 @@ void MultiphotonBreitWheeler::operator()( Particles &particles,
         }
     } // end ipart loop
     
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     }
 #endif
 }
@@ -795,7 +795,7 @@ void MultiphotonBreitWheeler::removeDecayedPhotonsWithoutBinCompression(
                 if( ipart < last_photon_index ) {
                     // The last existing photon comes to the position of
                     // the deleted photon
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
                     particles.overwriteParticle( last_photon_index, ipart );
 #else
 #endif
diff --git a/src/MultiphotonBreitWheeler/MultiphotonBreitWheeler.h b/src/MultiphotonBreitWheeler/MultiphotonBreitWheeler.h
index 6e14a37f3..71315d79a 100755
--- a/src/MultiphotonBreitWheeler/MultiphotonBreitWheeler.h
+++ b/src/MultiphotonBreitWheeler/MultiphotonBreitWheeler.h
@@ -115,7 +115,7 @@ class MultiphotonBreitWheeler
     //! \param bmin        Pointer toward the first particle index of the bin in the Particles object
     //! \param bmax        Pointer toward the last particle index of the bin in the Particles object
     //! \param ithread     Thread index
-//#ifdef SMILEI_OPENACC_MODE
+//#ifdef SMILEI_ACCELERATOR_GPU_OACC
 //    #pragma acc routine seq
 //#endif
     void removeDecayedPhotonsWithoutBinCompression(
diff --git a/src/MultiphotonBreitWheeler/MultiphotonBreitWheelerTables.h b/src/MultiphotonBreitWheeler/MultiphotonBreitWheelerTables.h
index 4f7f1ce72..9bef108b6 100755
--- a/src/MultiphotonBreitWheeler/MultiphotonBreitWheelerTables.h
+++ b/src/MultiphotonBreitWheeler/MultiphotonBreitWheelerTables.h
@@ -54,7 +54,7 @@ class MultiphotonBreitWheelerTables
     //! the multiphoton Breit-Wheeler pair creation
     //! \param photon_chi photon quantum parameter
     //! \param[out] pair_chi quantum parameters of the pair
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     #pragma acc routine seq
 #endif
     void computePairQuantumParameter( const double photon_chi, 
@@ -71,7 +71,7 @@ class MultiphotonBreitWheelerTables
     //! \param photon_chi photon quantum parameter
     //! \param gamma photon normalized energy
     // -----------------------------------------------------------------------------
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     #pragma acc routine seq
 #endif
     double computeBreitWheelerPairProductionRate( 
diff --git a/src/Params/Params.cpp b/src/Params/Params.cpp
index 803cdf9e5..b1fafcb09 100755
--- a/src/Params/Params.cpp
+++ b/src/Params/Params.cpp
@@ -837,7 +837,7 @@ Params::Params( SmileiMPI *smpi, std::vector<std::string> namelistsFiles ) :
 
     PyTools::extract( "gpu_computing", gpu_computing, "Main" );
     if( gpu_computing ) {
-#if( defined( SMILEI_OPENACC_MODE ) && defined( _OPENACC ) ) || defined( SMILEI_ACCELERATOR_GPU_OMP )
+#if( defined( SMILEI_ACCELERATOR_GPU_OACC ) && defined( _OPENACC ) ) || defined( SMILEI_ACCELERATOR_GPU_OMP )
         // If compiled for GPU and asking for GPU
         MESSAGE( 1, "Smilei will run on GPU devices" );
 #else
@@ -1055,21 +1055,21 @@ Params::Params( SmileiMPI *smpi, std::vector<std::string> namelistsFiles ) :
             // Extract the list of profiles and verify their content
             PyObject *p = PyTools::extract_py( "_profiles", "Laser", i_laser );
             vector<PyObject *> profiles;
-            vector<int> profiles_n = {1, 2};
             if( ! PyTools::py2pyvector( p, profiles ) ) {
                 ERROR_NAMELIST( "For LaserOffset #" << n_laser_offset << ": space_time_profile must be a list of 2 profiles",  LINK_NAMELIST + std::string("#lasers") );
             }
             Py_DECREF( p );
-            if( profiles.size()!=2 ) {
+            if( profiles.size() != 2 ) {
                 ERROR_NAMELIST( "For LaserOffset #" << n_laser_offset << ": space_time_profile needs 2 profiles.",  LINK_NAMELIST + std::string("#lasers") );
             }
-            if( profiles[1] == Py_None ) {
-                profiles  .pop_back();
-                profiles_n.pop_back();
-            }
-            if( profiles[0] == Py_None ) {
-                profiles  .erase( profiles  .begin() );
-                profiles_n.erase( profiles_n.begin() );
+            vector<int> profiles_n;
+            for( unsigned int i = 0; i < 2; i++ ) {
+                if( profiles[i]  == Py_None ) {
+                    Py_DECREF( profiles[i] );
+                    profiles.erase( profiles.begin() );
+                } else {
+                    profiles_n.push_back( i );
+                }
             }
             if( profiles.size() == 0 ) {
                 ERROR_NAMELIST( "For LaserOffset #" << n_laser_offset << ": space_time_profile cannot be [None, None]", LINK_NAMELIST + std::string("#lasers") );
@@ -1124,7 +1124,11 @@ Params::Params( SmileiMPI *smpi, std::vector<std::string> namelistsFiles ) :
                     propagateX( profiles, profiles_n, offset, file, keep_n_strongest_modes, angle_z );
                 }
             }
-
+            
+            for( auto p: profiles ) {
+                Py_DECREF( p );
+            }
+            
             n_laser_offset ++;
         }
     }
@@ -1227,7 +1231,7 @@ void Params::compute()
 
     // Set cluster_width_ if not set by the user
     if( cluster_width_ == -1 ) {
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
         cluster_width_ = patch_size_[0];
         // On GPU, dont do the CPU automatic cluster_width computation, only one
         // bin is expected.
@@ -1276,7 +1280,7 @@ void Params::compute()
 
 
     // Verify that cluster_width_ divides patch_size_[0] or patch_size_[n] in GPU mode
-#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC )
     const int kClusterWidth = getGPUClusterWidth();
 
     if( kClusterWidth < 0 ) {
@@ -1886,7 +1890,7 @@ string Params::speciesField( string field_name )
     return "";
 }
 
-#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC )
 
 bool Params::isGPUParticleBinningAvailable() const
 {
@@ -1903,7 +1907,7 @@ bool Params::isGPUParticleBinningAvailable() const
 
 #endif
 
-#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC )
 
 int Params::getGPUClusterWidth() const
 {
diff --git a/src/Params/Params.h b/src/Params/Params.h
index e2b0603e6..32bf63a37 100755
--- a/src/Params/Params.h
+++ b/src/Params/Params.h
@@ -386,7 +386,7 @@ class Params
     //!
     bool isGPUParticleBinningAvailable() const;
 
-#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC )
 
     //! Given dimension_id in [0, 3), return for dimension_id == :
     //! 1: the 1D value (not implemented)
diff --git a/src/ParticleBC/BoundaryConditionType.cpp b/src/ParticleBC/BoundaryConditionType.cpp
index 5a55d74b2..304656eca 100755
--- a/src/ParticleBC/BoundaryConditionType.cpp
+++ b/src/ParticleBC/BoundaryConditionType.cpp
@@ -18,7 +18,7 @@ void internal_inf( Species *species, int imin, int imax, int direction, double l
     energy_change = 0.;     // no energy loss during exchange
     const double* const position  = species->particles->getPtrPosition( direction );
     int* const          cell_keys = species->particles->getPtrCellKeys();
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel deviceptr(position,cell_keys)
     #pragma acc loop gang worker vector
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -40,7 +40,7 @@ void internal_sup( Species *species, int imin, int imax, int direction, double l
     energy_change = 0.;     // no energy loss during exchange
     const double* const position  = species->particles->getPtrPosition( direction );
     int* const          cell_keys = species->particles->getPtrCellKeys();
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel deviceptr(position,cell_keys)
     #pragma acc loop gang worker vector
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -92,7 +92,7 @@ void reflect_particle_inf( Species *species, int imin, int imax, int direction,
     energy_change = 0.;     // no energy loss during reflection
     double* position = species->particles->getPtrPosition(direction);
     double* momentum = species->particles->getPtrMomentum(direction);
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     #pragma acc parallel deviceptr(position,momentum)
     #pragma acc loop gang worker vector
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -112,7 +112,7 @@ void reflect_particle_sup( Species *species, int imin, int imax, int direction,
     energy_change = 0.;     // no energy loss during reflection
     double* position = species->particles->getPtrPosition(direction);
     double* momentum = species->particles->getPtrMomentum(direction);
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     #pragma acc parallel deviceptr(position,momentum)
     #pragma acc loop gang worker vector
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -189,9 +189,9 @@ void remove_particle_inf( Species* species,
                           int imin, int imax, 
                           int direction, 
                           double limit_inf, 
-                          double dt, 
-                          std::vector<double>& invgf, 
-                          Random* rand, 
+                          double /*dt*/, 
+                          std::vector<double>& /*invgf*/, 
+                          Random* /*rand*/, 
                           double& energy_change )
 {
 
@@ -210,7 +210,7 @@ void remove_particle_inf( Species* species,
                                                                                                                                : change_in_energy )
     #pragma omp teams distribute parallel for reduction( + \
                                                          : change_in_energy )
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel deviceptr(position,momentum_x,momentum_y,momentum_z,weight,charge,cell_keys)
     #pragma acc loop gang worker vector reduction(+ : change_in_energy)
 #else
@@ -235,9 +235,9 @@ void remove_particle_sup( Species* species,
                           int imin, int imax, 
                           int direction, 
                           double limit_sup, 
-                          double dt, 
-                          std::vector<double>& invgf, 
-                          Random* rand, 
+                          double /*dt*/, 
+                          std::vector<double>& /*invgf*/, 
+                          Random* /*rand*/, 
                           double& energy_change )
 {
 
@@ -256,7 +256,7 @@ void remove_particle_sup( Species* species,
                                                                                                                                : change_in_energy )
     #pragma omp teams distribute parallel for reduction( + \
                                                          : change_in_energy )
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel deviceptr(position,momentum_x,momentum_y,momentum_z,weight,charge,cell_keys)
     #pragma acc loop gang worker vector reduction(+ : change_in_energy)
 #else
diff --git a/src/ParticleBC/PartBoundCond.h b/src/ParticleBC/PartBoundCond.h
index 47ab7e235..7afd6ca9c 100755
--- a/src/ParticleBC/PartBoundCond.h
+++ b/src/ParticleBC/PartBoundCond.h
@@ -44,7 +44,7 @@ class PartBoundCond
         } else {
             int *const cell_keys = species->particles->getPtrCellKeys();
 
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel deviceptr( cell_keys )
     #pragma acc loop gang worker vector
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
diff --git a/src/Particles/Particles.cpp b/src/Particles/Particles.cpp
index 34eaeb161..30c685155 100755
--- a/src/Particles/Particles.cpp
+++ b/src/Particles/Particles.cpp
@@ -1311,7 +1311,7 @@ void Particles::copyLeavingParticlesToBuffers( const vector<bool> copy, const ve
     // where direction goes from 0 to 6 and tells which way the particle escapes.
     // If the cell_key is -1, the particle must be destroyed so it is not extracted.
 
-#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC )
 
     // GPU
     
@@ -1398,13 +1398,13 @@ int Particles::eraseLeavingParticles()
     return 0;
 }
 
-int Particles::injectParticles( Particles *particles_to_inject )
+int Particles::injectParticles( Particles */*particles_to_inject*/ )
 {
     ERROR( "Device only feature, should not have come here! On CPU it's done in sortParticles." );
     return 0;
 }
 
-void Particles::importAndSortParticles( Particles *particles_to_inject )
+void Particles::importAndSortParticles( Particles */*particles_to_inject*/ )
 {
     ERROR( "Device only feature, should not have come here! On CPU it's done in sortParticles." );
 }
diff --git a/src/Particles/ParticlesFactory.cpp b/src/Particles/ParticlesFactory.cpp
index 00f51bbb0..34e9a3a83 100755
--- a/src/Particles/ParticlesFactory.cpp
+++ b/src/Particles/ParticlesFactory.cpp
@@ -7,7 +7,7 @@
 // -----------------------------------------------------------------------------
 #include "ParticlesFactory.h"
 
-#if defined( SMILEI_OPENACC_MODE ) || defined( SMILEI_ACCELERATOR_GPU_OMP )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC ) || defined( SMILEI_ACCELERATOR_GPU_OMP )
 extern "C" void* CreateGPUParticles( const void* parameters,
                                      const void* a_parent_patch );
 #endif
@@ -22,7 +22,7 @@ Particles* ParticlesFactory::create( const Params& parameters,
         // We export a C interface to avoid potential ABI problems
         // that could occur when using two different compilers (e.g., one to
         // compile cuda/hip and another one for the host code).
-#if defined( SMILEI_OPENACC_MODE ) || defined( SMILEI_ACCELERATOR_GPU_OMP )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC ) || defined( SMILEI_ACCELERATOR_GPU_OMP )
         particles = static_cast<Particles*>( CreateGPUParticles( &parameters, &a_parent_patch ) );
 #else
         SMILEI_UNUSED( a_parent_patch );
diff --git a/src/Patch/Patch.cpp b/src/Patch/Patch.cpp
index 8fa4022aa..ca76c6ece 100755
--- a/src/Patch/Patch.cpp
+++ b/src/Patch/Patch.cpp
@@ -445,7 +445,7 @@ void Patch::setLocationAndAllocateFields( Params &params, DomainDecomposition *d
 Patch::~Patch()
 {
 
-#ifdef SMILEI_ACCELERATOR_MODE
+#ifdef SMILEI_ACCELERATOR_GPU
     deleteFieldsOnDevice();
 #endif
 
@@ -1153,7 +1153,7 @@ void Patch::computePoynting() {
     }
 }
 
-#ifdef SMILEI_ACCELERATOR_MODE
+#ifdef SMILEI_ACCELERATOR_GPU
 
 // ---------------------------------------------------------------------------------------------------------------------
 // Allocate data on device
diff --git a/src/Patch/Patch.h b/src/Patch/Patch.h
index ff5a76a5c..8d06d21c2 100755
--- a/src/Patch/Patch.h
+++ b/src/Patch/Patch.h
@@ -194,7 +194,7 @@ class Patch
     //! delete Particles included in the index of particles to exchange. Assumes indexes are sorted.
     void cleanupSentParticles( int ispec, std::vector<int> *indexes_of_particles_to_exchange );
 
-#ifdef SMILEI_ACCELERATOR_MODE
+#ifdef SMILEI_ACCELERATOR_GPU
     //! Allocate and copy all the field grids on device
     void allocateAndCopyFieldsOnDevice();
 
diff --git a/src/Patch/SyncVectorPatch.cpp b/src/Patch/SyncVectorPatch.cpp
index 675529113..7f2cd183e 100755
--- a/src/Patch/SyncVectorPatch.cpp
+++ b/src/Patch/SyncVectorPatch.cpp
@@ -2,7 +2,7 @@
 #include "SyncVectorPatch.h"
 
 #include <vector>
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     #include <openacc.h>
 #endif
 #include "Params.h"
@@ -269,7 +269,7 @@ void SyncVectorPatch::sumAllComponents( std::vector<Field *> &fields, VectorPatc
                 vecPatches.densitiesMPIx[ifield             ]->extract_fields_sum( 0, iNeighbor, oversize[0] );
                 vecPatches.densitiesMPIx[ifield+nPatchMPIx  ]->extract_fields_sum( 0, iNeighbor, oversize[0] );
                 vecPatches.densitiesMPIx[ifield+2*nPatchMPIx]->extract_fields_sum( 0, iNeighbor, oversize[0] );
-// #ifdef SMILEI_OPENACC_MODE
+// #ifdef SMILEI_ACCELERATOR_GPU_OACC
 //                 Field* field = vecPatches.densitiesMPIx[ifield      ];
 //                 double* Jx   = field->sendFields_[iNeighbor]->data_;
 //                 int sizeofJx = field->sendFields_[iNeighbor]->size();
@@ -291,7 +291,7 @@ void SyncVectorPatch::sumAllComponents( std::vector<Field *> &fields, VectorPatc
     // iDim = 0, local
     const int nFieldLocalx = vecPatches.densitiesLocalx.size() / 3;
 
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
     // At initialization, we may get a CPU buffer than needs to be handled on the host.
     const bool is_memory_on_device = vecPatches.densitiesLocalx.size() > 0 &&
                                      smilei::tools::gpu::HostDeviceMemoryManagement::IsHostPointerMappedOnDevice( vecPatches.densitiesLocalx[0]->data() );
@@ -324,9 +324,9 @@ void SyncVectorPatch::sumAllComponents( std::vector<Field *> &fields, VectorPatc
                 pt2 = &( vecPatches.densitiesLocalx[ifield]->data_[0] );
                 //Sum 2 ==> 1
 
-                const int last = gsp[0] * ny_ * nz_;
+                const unsigned int last = gsp[0] * ny_ * nz_;
 
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
                 int ptsize = vecPatches.densitiesLocalx[ifield]->size();
                 int nspace0 = size[0];
                 #pragma acc parallel if ( is_memory_on_device) present(pt1[0-nspace0*ny_*nz_:ptsize],pt2[0:ptsize])
@@ -358,7 +358,7 @@ void SyncVectorPatch::sumAllComponents( std::vector<Field *> &fields, VectorPatc
         vecPatches( ipatch )->finalizeSumField( vecPatches.densitiesMPIx[ifield+2*nPatchMPIx], 0 ); // Jz
         for (int iNeighbor=0 ; iNeighbor<2 ; iNeighbor++) {
             if ( vecPatches( ipatch )->is_a_MPI_neighbor( 0, ( iNeighbor+1 )%2 ) ) {
-// #ifdef SMILEI_OPENACC_MODE
+// #ifdef SMILEI_ACCELERATOR_GPU_OACC
 //                 Field* field = vecPatches.densitiesMPIx[ifield      ];
 //                 double* Jx   = field->recvFields_[(iNeighbor+1)%2]->data_;
 //                 int sizeofJx = field->recvFields_[(iNeighbor+1)%2]->size();
@@ -402,7 +402,7 @@ void SyncVectorPatch::sumAllComponents( std::vector<Field *> &fields, VectorPatc
                     vecPatches.densitiesMPIy[ifield             ]->extract_fields_sum( 1, iNeighbor, oversize[1] );
                     vecPatches.densitiesMPIy[ifield+nPatchMPIy  ]->extract_fields_sum( 1, iNeighbor, oversize[1] );
                     vecPatches.densitiesMPIy[ifield+2*nPatchMPIy]->extract_fields_sum( 1, iNeighbor, oversize[1] );
-// #ifdef SMILEI_OPENACC_MODE
+// #ifdef SMILEI_ACCELERATOR_GPU_OACC
 //                     Field* field = vecPatches.densitiesMPIy[ifield      ];
 //                     double* Jx   = field->sendFields_[iNeighbor+2]->data_;
 //                     int sizeofJx = field->sendFields_[iNeighbor+2]->size();
@@ -424,7 +424,7 @@ void SyncVectorPatch::sumAllComponents( std::vector<Field *> &fields, VectorPatc
         // iDim = 1,
         const int nFieldLocaly = vecPatches.densitiesLocaly.size() / 3;
 
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
         const bool is_memory_on_device = vecPatches.densitiesLocaly.size() > 0 &&
                                          smilei::tools::gpu::HostDeviceMemoryManagement::IsHostPointerMappedOnDevice( vecPatches.densitiesLocaly[0]->data() );
 #endif
@@ -457,11 +457,11 @@ void SyncVectorPatch::sumAllComponents( std::vector<Field *> &fields, VectorPatc
                     pt1 = &( fields[vecPatches( ipatch )->neighbor_[1][0]-h0+icomp*nPatches]->data_[size[1]*nz_] );
                     pt2 = &( vecPatches.densitiesLocaly[ifield]->data_[0] );
 
-                    const int outer_last   = nx_ * ny_ * nz_;
-                    const int outer_stride = ny_ * nz_;
-                    const int inner_last   = gsp[1] * nz_;
+                    const unsigned int outer_last   = nx_ * ny_ * nz_;
+                    const unsigned int outer_stride = ny_ * nz_;
+                    const unsigned int inner_last   = gsp[1] * nz_;
 
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
                     int ptsize = vecPatches.densitiesLocaly[ifield]->size();
                     int blabla = size[1];
                     #pragma acc parallel if (is_memory_on_device) present(pt1[0-blabla*nz_:ptsize],pt2[0:ptsize])
@@ -496,7 +496,7 @@ void SyncVectorPatch::sumAllComponents( std::vector<Field *> &fields, VectorPatc
             vecPatches( ipatch )->finalizeSumField( vecPatches.densitiesMPIy[ifield+2*nPatchMPIy], 1 ); // Jz
             for (int iNeighbor=0 ; iNeighbor<2 ; iNeighbor++) {
                 if ( vecPatches( ipatch )->is_a_MPI_neighbor( 1, ( iNeighbor+1 )%2 ) ) {
-// #ifdef SMILEI_OPENACC_MODE
+// #ifdef SMILEI_ACCELERATOR_GPU_OACC
 //                     Field* field = vecPatches.densitiesMPIy[ifield      ];
 //                     double* Jx   = field->recvFields_[(iNeighbor+1)%2+2]->data_;
 //                     int sizeofJx = field->recvFields_[(iNeighbor+1)%2+2]->size();
@@ -538,7 +538,7 @@ void SyncVectorPatch::sumAllComponents( std::vector<Field *> &fields, VectorPatc
                         vecPatches.densitiesMPIz[ifield             ]->extract_fields_sum( 2, iNeighbor, oversize[2] );
                         vecPatches.densitiesMPIz[ifield+nPatchMPIz  ]->extract_fields_sum( 2, iNeighbor, oversize[2] );
                         vecPatches.densitiesMPIz[ifield+2*nPatchMPIz]->extract_fields_sum( 2, iNeighbor, oversize[2] );
-// #ifdef SMILEI_OPENACC_MODE
+// #ifdef SMILEI_ACCELERATOR_GPU_OACC
 //                         Field* field = vecPatches.densitiesMPIz[ifield      ];
 //                         double* Jx   = field->sendFields_[iNeighbor+4]->data_;
 //                         int sizeofJx = field->sendFields_[iNeighbor+4]->size();
@@ -560,7 +560,7 @@ void SyncVectorPatch::sumAllComponents( std::vector<Field *> &fields, VectorPatc
             // iDim = 2 local
             const int nFieldLocalz = vecPatches.densitiesLocalz.size() / 3;
 
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
             const bool is_memory_on_device = vecPatches.densitiesLocalz.size() > 0 &&
                                              smilei::tools::gpu::HostDeviceMemoryManagement::IsHostPointerMappedOnDevice( vecPatches.densitiesLocalz[0]->data() );
 #endif
@@ -594,11 +594,11 @@ void SyncVectorPatch::sumAllComponents( std::vector<Field *> &fields, VectorPatc
                         pt1 = &( fields[vecPatches( ipatch )->neighbor_[2][0]-h0+icomp*nPatches]->data_[size[2]] );
                         pt2 = &( vecPatches.densitiesLocalz[ifield]->data_[0] );
 
-                        const int outer_last   = nx_ * ny_ * nz_;
-                        const int outer_stride = nz_;
-                        const int inner_last   = gsp[2];
+                        const unsigned int outer_last   = nx_ * ny_ * nz_;
+                        const unsigned int outer_stride = nz_;
+                        const unsigned int inner_last   = gsp[2];
 
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
                         int ptsize = vecPatches.densitiesLocalz[ifield]->size();
                         int blabla = size[2];
                         #pragma acc parallel if (is_memory_on_device) present(pt1[0-blabla:ptsize],pt2[0:ptsize])
@@ -630,7 +630,7 @@ void SyncVectorPatch::sumAllComponents( std::vector<Field *> &fields, VectorPatc
                 vecPatches( ipatch )->finalizeSumField( vecPatches.densitiesMPIz[ifield+2*nPatchMPIz], 2 ); // Jz
                 for (int iNeighbor=0 ; iNeighbor<2 ; iNeighbor++) {
                     if ( vecPatches( ipatch )->is_a_MPI_neighbor( 2, ( iNeighbor+1 )%2 ) ) {
-// #ifdef SMILEI_OPENACC_MODE
+// #ifdef SMILEI_ACCELERATOR_GPU_OACC
 //                         Field* field = vecPatches.densitiesMPIz[ifield      ];
 //                         double* Jx   = field->recvFields_[(iNeighbor+1)%2+4]->data_;
 //                         int sizeofJx = field->recvFields_[(iNeighbor+1)%2+4]->size();
@@ -797,7 +797,7 @@ void SyncVectorPatch::exchangeE( Params &, VectorPatch &vecPatches, int imode, S
     SyncVectorPatch::finalizeExchangeAlongAllDirections( vecPatches.listEt_[imode], vecPatches );
 }
 
-void SyncVectorPatch::exchangeBmBTIS3( Params &params, VectorPatch &vecPatches, int imode, SmileiMPI *smpi )
+void SyncVectorPatch::exchangeBmBTIS3( Params &/*params*/, VectorPatch &vecPatches, int imode, SmileiMPI *smpi )
 {
     SyncVectorPatch::exchangeAlongAllDirections<complex<double>,cField>( vecPatches.listBr_mBTIS3[imode], vecPatches, smpi );
     SyncVectorPatch::finalizeExchangeAlongAllDirections( vecPatches.listBr_mBTIS3[imode], vecPatches );
@@ -881,7 +881,7 @@ void SyncVectorPatch::exchangeEnvEx( Params &params, VectorPatch &vecPatches, Sm
     }
 }
 
-void SyncVectorPatch::exchangeBmBTIS3( Params &params, VectorPatch &vecPatches, SmileiMPI *smpi )
+void SyncVectorPatch::exchangeBmBTIS3( Params &/*params*/, VectorPatch &vecPatches, SmileiMPI *smpi )
 {   // exchange BmBTIS3 in Cartesian geometries
 
     // exchange ByBTIS3 
@@ -1487,7 +1487,7 @@ void SyncVectorPatch::exchangeAllComponentsAlongX( std::vector<Field *> &fields,
                 vecPatches.B_MPIx[ifield      ]->extract_fields_exch( 0, iNeighbor, oversize );
                 vecPatches.B_MPIx[ifield+nMPIx]->create_sub_fields  ( 0, iNeighbor, oversize );
                 vecPatches.B_MPIx[ifield+nMPIx]->extract_fields_exch( 0, iNeighbor, oversize );
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
                 Field* field = vecPatches.B_MPIx[ifield      ];
                 double* By   = field->sendFields_[iNeighbor]->data_;
                 int sizeofBy = field->sendFields_[iNeighbor]->size();
@@ -1580,7 +1580,7 @@ void SyncVectorPatch::finalizeExchangeAllComponentsAlongX( VectorPatch &vecPatch
         vecPatches( ipatch )->finalizeExchange( vecPatches.B_MPIx[ifield+nMPIx], 0 ); // Bz
         for (int iNeighbor=0 ; iNeighbor<2 ; iNeighbor++) {
             if ( vecPatches( ipatch )->is_a_MPI_neighbor( 0, ( iNeighbor+1 )%2 ) ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
                 Field* field = vecPatches.B_MPIx[ifield      ];
                 double* By   = field->recvFields_[(iNeighbor+1)%2]->data_;
                 int sizeofBy = field->recvFields_[(iNeighbor+1)%2]->size();
@@ -1623,7 +1623,7 @@ void SyncVectorPatch::exchangeAllComponentsAlongY( std::vector<Field *> &fields,
                 vecPatches.B1_MPIy[ifield      ]->extract_fields_exch( 1, iNeighbor, oversize );
                 vecPatches.B1_MPIy[ifield+nMPIy]->create_sub_fields  ( 1, iNeighbor, oversize );
                 vecPatches.B1_MPIy[ifield+nMPIy]->extract_fields_exch( 1, iNeighbor, oversize );
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
                 Field* field = vecPatches.B1_MPIy[ifield      ];
                 double* Bx   = field->sendFields_[iNeighbor+2]->data_;
                 int sizeofBx = field->sendFields_[iNeighbor+2]->size();
@@ -1671,7 +1671,7 @@ void SyncVectorPatch::exchangeAllComponentsAlongY( std::vector<Field *> &fields,
             if( vecPatches( ipatch )->MPI_me_ == vecPatches( ipatch )->MPI_neighbor_[1][0] ) {
                 pt1 = &( fields[vecPatches( ipatch )->neighbor_[1][0]-h0+icomp*nPatches]->data_[size*nz_] );
                 pt2 = &( vecPatches.B1_localy[ifield]->data_[0] );
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
                 int ptsize = vecPatches.B1_localy[ifield]->size();
                 #pragma acc parallel present(pt1[0-size*nz_:ptsize],pt2[0:ptsize])
                 #pragma acc loop gang worker vector
@@ -1711,7 +1711,7 @@ void SyncVectorPatch::finalizeExchangeAllComponentsAlongY( VectorPatch &vecPatch
         vecPatches( ipatch )->finalizeExchange( vecPatches.B1_MPIy[ifield+nMPIy], 1 ); // Bz
         for (int iNeighbor=0 ; iNeighbor<2 ; iNeighbor++) {
             if ( vecPatches( ipatch )->is_a_MPI_neighbor( 1, ( iNeighbor+1 )%2 ) ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
                 Field* field = vecPatches.B1_MPIy[ifield      ];
                 double* Bx   = field->recvFields_[(iNeighbor+1)%2+2]->data_;
                 int sizeofBx = field->recvFields_[(iNeighbor+1)%2+2]->size();
@@ -1754,7 +1754,7 @@ void SyncVectorPatch::exchangeAllComponentsAlongZ( std::vector<Field *> fields,
                 vecPatches.B2_MPIz[ifield      ]->extract_fields_exch( 2, iNeighbor, oversize );
                 vecPatches.B2_MPIz[ifield+nMPIz]->create_sub_fields  ( 2, iNeighbor, oversize );
                 vecPatches.B2_MPIz[ifield+nMPIz]->extract_fields_exch( 2, iNeighbor, oversize );
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
                 Field* field = vecPatches.B2_MPIz[ifield      ];
                 double* Bx   = field->sendFields_[iNeighbor+4]->data_;
                 int sizeofBx = field->sendFields_[iNeighbor+4]->size();
@@ -1799,7 +1799,7 @@ void SyncVectorPatch::exchangeAllComponentsAlongZ( std::vector<Field *> fields,
             if( vecPatches( ipatch )->MPI_me_ == vecPatches( ipatch )->MPI_neighbor_[2][0] ) {
                 pt1 = &( fields[vecPatches( ipatch )->neighbor_[2][0]-h0+icomp*nPatches]->data_[size] );
                 pt2 = &( vecPatches.B2_localz[ifield]->data_[0] );
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
                 int ptsize = vecPatches.B2_localz[ifield]->size();
                 #pragma acc parallel present(pt1[0-size:ptsize],pt2[0:ptsize])
                 #pragma acc loop gang worker vector
@@ -1839,7 +1839,7 @@ void SyncVectorPatch::finalizeExchangeAllComponentsAlongZ( VectorPatch &vecPatch
         vecPatches( ipatch )->finalizeExchange( vecPatches.B2_MPIz[ifield+nMPIz], 2 ); // By
         for (int iNeighbor=0 ; iNeighbor<2 ; iNeighbor++) {
             if ( vecPatches( ipatch )->is_a_MPI_neighbor( 2, ( iNeighbor+1 )%2 ) ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
                 Field* field = vecPatches.B2_MPIz[ifield      ];
                 double* Bx   = field->recvFields_[(iNeighbor+1)%2+4]->data_;
                 int sizeofBx = field->recvFields_[(iNeighbor+1)%2+4]->size();
diff --git a/src/Patch/SyncVectorPatch.h b/src/Patch/SyncVectorPatch.h
index 0322c1283..07435cd49 100755
--- a/src/Patch/SyncVectorPatch.h
+++ b/src/Patch/SyncVectorPatch.h
@@ -73,7 +73,7 @@ public :
                 if ( vecPatches( ipatch )->is_a_MPI_neighbor( 0, iNeighbor ) ) {
                     fields[ifield]->create_sub_fields ( 0, iNeighbor, 2*oversize[0]+1+fields[ifield]->isDual_[0] );
                     fields[ifield]->extract_fields_sum( 0, iNeighbor, oversize[0] );
-// #ifdef SMILEI_OPENACC_MODE
+// #ifdef SMILEI_ACCELERATOR_GPU_OACC
 //                     double * pointer = fields[ifield]->sendFields_[iNeighbor]->data_;
 //                     int size = fields[ifield]->size();
 // #endif
@@ -87,7 +87,7 @@ public :
 
         // iDim = 0, local
 
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
     // At initialization, we may get a CPU buffer than needs to be handled on the host.
         const bool is_memory_on_device = fields.size() > 0 &&
                                      smilei::tools::gpu::HostDeviceMemoryManagement::IsHostPointerMappedOnDevice( fields[0]->data() );
@@ -123,7 +123,7 @@ public :
 
                     const unsigned int last = gsp[0] * ny_ * nz_;
 
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
                     int ptsize = fields[ifield]->size();
                     int nspace0 = size[0];
                     #pragma acc parallel if ( is_memory_on_device) present(pt1[0-nspace0*ny_*nz_:ptsize],pt2[0:ptsize])
@@ -177,7 +177,7 @@ public :
                     if ( vecPatches( ipatch )->is_a_MPI_neighbor( 1, iNeighbor ) ) {
                         fields[ifield]->create_sub_fields ( 1, iNeighbor, 2*oversize[1]+1+fields[ifield]->isDual_[1] );
                         fields[ifield]->extract_fields_sum( 1, iNeighbor, oversize[1] );
-// #ifdef SMILEI_OPENACC_MODE
+// #ifdef SMILEI_ACCELERATOR_GPU_OACC
 //                 double* pointer   = fields[ifield]->recvFields_[(iNeighbor+1)%2]->data_;
 //                 int size = fields[ifield]->recvFields_[(iNeighbor+1)%2]->size();
 //                 //#pragma acc update device( Jx[0:sizeofJx], Jy[0:sizeofJy], Jz[0:sizeofJz] )
@@ -192,7 +192,7 @@ public :
 
             // iDim = 1, local
 
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
             const bool is_memory_on_device = fields.size() > 0 &&
                 smilei::tools::gpu::HostDeviceMemoryManagement::IsHostPointerMappedOnDevice( fields[0]->data() );
 #endif
@@ -220,11 +220,11 @@ public :
                         pt1 = &( *field1 )( size[1]*nz_ );
                         pt2 = &( *field2 )( 0 );
 
-                        const int outer_last   = nx_ * ny_ * nz_;
-                        const int outer_stride = ny_ * nz_;
-                        const int inner_last   = gsp[1] * nz_;
+                        const unsigned int outer_last   = nx_ * ny_ * nz_;
+                        const unsigned int outer_stride = ny_ * nz_;
+                        const unsigned int inner_last   = gsp[1] * nz_;
 
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
                         int ptsize = fields[ifield]->size();
                         int blabla = size[1];
                         #pragma acc parallel if (is_memory_on_device) present(pt1[0-blabla*nz_:ptsize],pt2[0:ptsize])
@@ -282,7 +282,7 @@ public :
                         if ( vecPatches( ipatch )->is_a_MPI_neighbor( 2, iNeighbor ) ) {
                             fields[ifield]->create_sub_fields ( 2, iNeighbor, 2*oversize[2]+1+fields[ifield]->isDual_[2] );
                             fields[ifield]->extract_fields_sum( 2, iNeighbor, oversize[2] );
-// #ifdef SMILEI_OPENACC_MODE
+// #ifdef SMILEI_ACCELERATOR_GPU_OACC
 //                             double* pointer   = fields[ifield]->recvFields_[(iNeighbor+1)%2+2]->data_;
 //                             int size = fields[ifield]->recvFields_[(iNeighbor+1)%2+2]->size();
 // #endif                       
@@ -293,7 +293,7 @@ public :
 
                 // iDim = 2 local
 
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
                 const bool is_memory_on_device = fields.size() > 0 &&
                                              smilei::tools::gpu::HostDeviceMemoryManagement::IsHostPointerMappedOnDevice( fields[0]->data() );
 #endif
@@ -321,11 +321,11 @@ public :
                             pt1 = &( *field1 )( size[2] );
                             pt2 = &( *field2 )( 0 );
 
-                            const int outer_last   = nx_ * ny_ * nz_;
-                            const int outer_stride = nz_;
-                            const int inner_last = gsp[2];
+                            const unsigned int outer_last   = nx_ * ny_ * nz_;
+                            const unsigned int outer_stride = nz_;
+                            const unsigned int inner_last = gsp[2];
 
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
                             int ptsize = fields[ifield]->size();
                             int blabla = size[2];
                             #pragma acc parallel if (is_memory_on_device) present(pt1[0-blabla:ptsize],pt2[0:ptsize])
diff --git a/src/Patch/VectorPatch.cpp b/src/Patch/VectorPatch.cpp
index 65d68f28c..42f4dd3d8 100755
--- a/src/Patch/VectorPatch.cpp
+++ b/src/Patch/VectorPatch.cpp
@@ -301,7 +301,7 @@ void VectorPatch::reconfiguration( Params &params, Timers &timers, int itime )
 // ---------------------------------------------------------------------------------------------------------------------
 void VectorPatch::initialParticleSorting( Params &params )
 {
-#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE)
+#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC)
     // Initially I wanted to control the GPU particle sorting/bin initialization
     // here. In the end it was put in initializeDataOnDevice which is more
     // meaningful.
@@ -853,7 +853,7 @@ void VectorPatch::sumDensities( Params &params, double time_dual, Timers &timers
         #pragma omp for schedule(static)
         for( unsigned int ipatch=0 ; ipatch<this->size() ; ipatch++ ) {
             // Per species in global, Attention if output -> Sync / per species fields
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
             // At itime == 0, data is still located on the Host
             if (itime == 0) {
                 ( *this )( ipatch )->EMfields->computeTotalRhoJ();
@@ -1269,7 +1269,7 @@ void VectorPatch::closeAllDiags( SmileiMPI *smpi )
 // ---------------------------------------------------------------------------------------------------------------------
 void VectorPatch::runAllDiags( Params &/*params*/, SmileiMPI *smpi, unsigned int itime, Timers &timers, SimWindow *simWindow )
 {
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
     bool data_on_cpu_updated = false;
 #endif
 
@@ -1277,7 +1277,7 @@ void VectorPatch::runAllDiags( Params &/*params*/, SmileiMPI *smpi, unsigned int
     timers.diags.restart();
 
     // Determine which data is required from the device
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
     bool need_particles = false;
     bool need_fields    = false;
 
@@ -1346,7 +1346,7 @@ void VectorPatch::runAllDiags( Params &/*params*/, SmileiMPI *smpi, unsigned int
     for( unsigned int idiag = 0 ; idiag < globalDiags.size() ; idiag++ ) {
         diag_timers_[idiag]->restart();
 
-// #if defined( SMILEI_ACCELERATOR_MODE)
+// #if defined( SMILEI_ACCELERATOR_GPU)
 //         if( globalDiags[idiag]->timeSelection->theTimeIsNow( itime ) &&
 //             !data_on_cpu_updated &&
 //             ( itime > 0 ) ) {
@@ -1462,7 +1462,7 @@ void VectorPatch::runAllDiags( Params &/*params*/, SmileiMPI *smpi, unsigned int
     for( unsigned int idiag = 0 ; idiag < localDiags.size() ; idiag++ ) {
         diag_timers_[globalDiags.size()+idiag]->restart();
 
-// #if defined( SMILEI_ACCELERATOR_MODE )
+// #if defined( SMILEI_ACCELERATOR_GPU )
 //         if( localDiags[idiag]->timeSelection->theTimeIsNow( itime ) &&
 //             !data_on_cpu_updated &&
 //             ( itime > 0 ) ) {
@@ -1496,7 +1496,7 @@ void VectorPatch::runAllDiags( Params &/*params*/, SmileiMPI *smpi, unsigned int
         for( unsigned int ipatch=0 ; ipatch<size() ; ipatch++ ) {
             ( *this )( ipatch )->EMfields->restartRhoJs();
 
-#if defined (SMILEI_ACCELERATOR_MODE)
+#if defined (SMILEI_ACCELERATOR_GPU)
             // Delete species current and rho grids from device 
             for( unsigned int ispec = 0; ispec < ( *this )( ipatch )->vecSpecies.size(); ispec++ ) {
                 ( *this )( ipatch )->vecSpecies[ispec]->Species::deleteSpeciesCurrentAndChargeOnDevice(ispec, ( *this )( ipatch )->EMfields);
@@ -4402,7 +4402,7 @@ void VectorPatch::moveWindow(
     // Bring all particles and field grids to the Host (except species grids)
     // This part can be optimized by copying only the patch to be destructed
 
-#if defined( SMILEI_ACCELERATOR_MODE)
+#if defined( SMILEI_ACCELERATOR_GPU)
     if( simWindow->isMoving( time_dual ) || itime == simWindow->getAdditionalShiftsIteration() ) {
         copyParticlesFromDeviceToHost();
         copyFieldsFromDeviceToHost();
@@ -4412,10 +4412,11 @@ void VectorPatch::moveWindow(
 
     simWindow->shift( (*this), smpi, params, itime, time_dual, region );
 
-    if (itime == simWindow->getAdditionalShiftsIteration() ) {
+    if( itime == (int) simWindow->getAdditionalShiftsIteration() ) {
         int adjust = simWindow->isMoving(time_dual)?0:1;
-        for (unsigned int n=0;n < simWindow->getNumberOfAdditionalShifts()-adjust; n++)
+        for( unsigned int n=0; n < simWindow->getNumberOfAdditionalShifts()-adjust; n++ ) {
             simWindow->shift( (*this), smpi, params, itime, time_dual, region );
+        }
     }
 
     // Copy all Fields and Particles to the device
@@ -4423,7 +4424,7 @@ void VectorPatch::moveWindow(
 
 
 // let's try initialising like we do at the start:
-/*#if defined( SMILEI_ACCELERATOR_MODE )
+/*#if defined( SMILEI_ACCELERATOR_GPU )
     // Allocate particle and field arrays
     // Also copy particle array content on device
     vecPatches.allocateDataOnDevice( params, &smpi,
@@ -4434,7 +4435,7 @@ void VectorPatch::moveWindow(
 #endif*/
 
 // does not do anything?
- /*#if defined( SMILEI_ACCELERATOR_MODE)
+ /*#if defined( SMILEI_ACCELERATOR_GPU)
      if( simWindow->isMoving( time_dual ) || itime == simWindow->getAdditionalShiftsIteration() ) {
         copyFieldsFromHostToDevice();
         copyParticlesFromHostToDevice();
@@ -4609,13 +4610,12 @@ void VectorPatch::initNewEnvelope( Params & )
 } // END initNewEnvelope
 
 
+#if defined( SMILEI_ACCELERATOR_GPU )
 void VectorPatch::allocateDataOnDevice(Params &params,
                                        SmileiMPI *smpi,
                                        RadiationTables *radiation_tables,
                                        MultiphotonBreitWheelerTables *multiphoton_Breit_Wheeler_tables)
 {
-
-#if defined( SMILEI_ACCELERATOR_MODE )
     // TODO(Etienne M): FREE. If we have load balancing or other patch
     // creation/destruction available (which is not the case on GPU ATM),
     // we should be taking care of freeing this GPU memory.
@@ -4681,17 +4681,24 @@ void VectorPatch::allocateDataOnDevice(Params &params,
         smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocateAndCopyHostToDevice( min_particle_chi_table, min_particle_chi_size );
         smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocateAndCopyHostToDevice( xi_table, xi_table_size );
     }
+}
 #else
+void VectorPatch::allocateDataOnDevice(Params &,
+                                       SmileiMPI *,
+                                       RadiationTables *,
+                                       MultiphotonBreitWheelerTables *)
+{
     ERROR( "GPU related code should not be reached in CPU mode!" );
-#endif
 }
+#endif
+
 
 //! Clean data allocated on device
+#if defined( SMILEI_ACCELERATOR_GPU )
 void VectorPatch::cleanDataOnDevice( Params &params, SmileiMPI *smpi,
                                     RadiationTables *radiation_tables,
                                     MultiphotonBreitWheelerTables *multiphoton_Breit_Wheeler_tables)
 {
-#if defined( SMILEI_OPENACC_MODE ) || defined( SMILEI_ACCELERATOR_GPU_OMP )
 
     const int npatches = this->size();
 
@@ -4801,12 +4808,17 @@ void VectorPatch::cleanDataOnDevice( Params &params, SmileiMPI *smpi,
         smilei::tools::gpu::HostDeviceMemoryManagement::DeviceFree( xi_table, xi_table_size );
 
     }
+}
 #else
+void VectorPatch::cleanDataOnDevice( Params &, SmileiMPI *,
+                                     RadiationTables *,
+                                     MultiphotonBreitWheelerTables *)
+{
     ERROR( "GPU related code should not be reached in CPU mode!" );
-#endif
 }
+#endif
 
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
 
 //! Field Synchronization from the GPU (Device) to the CPU
 //! This function updates the data on the host from the data located on the device
@@ -4846,9 +4858,7 @@ void VectorPatch::copyFieldsFromHostToDevice()
 
     }
 }
-#endif
 
-#if defined( SMILEI_ACCELERATOR_MODE)
 //! Sync all fields from device to host
 void
 VectorPatch::copyFieldsFromDeviceToHost()
@@ -4861,10 +4871,6 @@ VectorPatch::copyFieldsFromDeviceToHost()
 
     }
 }
-#endif
-
-
-#if defined( SMILEI_ACCELERATOR_MODE)
 
 //! Copy all species particles from  Host to devices
 void VectorPatch::copyParticlesFromHostToDevice()
@@ -4876,9 +4882,6 @@ void VectorPatch::copyParticlesFromHostToDevice()
         }
     }
 }
-#endif
-
-#if defined( SMILEI_ACCELERATOR_MODE)
 
 //! copy all patch Particles from device to Host
 void
@@ -4891,9 +4894,7 @@ VectorPatch::copyParticlesFromDeviceToHost()
     for( int ipatch = 0; ipatch < npatches; ipatch++ ) {
         for( unsigned int ispec = 0; ispec < ( *this )( ipatch )->vecSpecies.size(); ispec++ ) {
                 species( ipatch, ispec )->particles->copyFromDeviceToHost();
-#if defined ( SMILEI_ACCELERATOR_GPU_OMP ) || defined ( SMILEI_ACCELERATOR_MODE )
                 species( ipatch, ispec )->particles->setHostBinIndex();
-#endif
                 // std::cerr 
                 // << "ipatch: " << ipatch
                 // << " ispec: "  << ispec
@@ -4906,9 +4907,6 @@ VectorPatch::copyParticlesFromDeviceToHost()
     }
 }
 
-#endif
-
-#if defined( SMILEI_ACCELERATOR_MODE)
 //! Sync all fields from device to host
 void
 VectorPatch::copySpeciesFieldsFromDeviceToHost()
@@ -4988,7 +4986,7 @@ void VectorPatch::dynamicsWithoutTasks( Params &params,
 
                 if( spec->isProj( time_dual, simWindow ) || diag_flag ) {
 
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
                     if (diag_flag) {
                         spec->Species::prepareSpeciesCurrentAndChargeOnDevice(
                             ispec,
diff --git a/src/Patch/VectorPatch.h b/src/Patch/VectorPatch.h
index be5a37d21..051d78276 100755
--- a/src/Patch/VectorPatch.h
+++ b/src/Patch/VectorPatch.h
@@ -510,7 +510,7 @@ public :
                             RadiationTables * radiation_tables,
                             MultiphotonBreitWheelerTables *multiphoton_Breit_Wheeler_tables );
     
-#if defined( SMILEI_ACCELERATOR_MODE)
+#if defined( SMILEI_ACCELERATOR_GPU)
 
     //! Field Synchronization from the GPU (Device) to the host (CPU)
 
diff --git a/src/Projector/Projector2D2OrderGPU.cpp b/src/Projector/Projector2D2OrderGPU.cpp
index cfe20eb7d..c669cc209 100755
--- a/src/Projector/Projector2D2OrderGPU.cpp
+++ b/src/Projector/Projector2D2OrderGPU.cpp
@@ -26,7 +26,7 @@ Projector2D2OrderGPU::Projector2D2OrderGPU( Params &parameters, Patch *a_patch )
     dts2 = dt / 2.0;
     dts4 = dts2 / 2.0;
 
-#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined ( SMILEI_OPENACC_MODE ) 
+#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined ( SMILEI_ACCELERATOR_GPU_OACC ) 
     // When sorting is disabled, these values are invalid (-1) and the HIP
     // implementation can't be used.
     x_dimension_bin_count_ = parameters.getGPUBinCount( 1 );
@@ -41,7 +41,7 @@ Projector2D2OrderGPU::~Projector2D2OrderGPU()
     // EMPTY
 }
 
-#if defined( SMILEI_ACCELERATOR_MODE )    //SMILEI_ACCELERATOR_GPU_OMP )
+#if defined( SMILEI_ACCELERATOR_GPU )    //SMILEI_ACCELERATOR_GPU_OMP )
 
 extern "C" void
 currentDepositionKernel2DOnDevice( double *__restrict__ Jx,
@@ -109,6 +109,7 @@ namespace { // Unnamed namespace == static == internal linkage == no exported sy
     /// Project global current densities (EMfields->Jx_/Jy_/Jz_)
     ///
     /* inline */ void
+#if defined( SMILEI_ACCELERATOR_GPU )//SMILEI_ACCELERATOR_GPU_OMP )
     currents( double *__restrict__ Jx,
               double *__restrict__ Jy,
               double *__restrict__ Jz,
@@ -132,7 +133,6 @@ namespace { // Unnamed namespace == static == internal linkage == no exported sy
               double,
               int not_spectral )
     {
-#if defined( SMILEI_ACCELERATOR_MODE )//SMILEI_ACCELERATOR_GPU_OMP )
         currentDepositionKernel2DOnDevice( Jx,
                                  Jy,
                                  Jz,
@@ -159,15 +159,22 @@ namespace { // Unnamed namespace == static == internal linkage == no exported sy
                                  j_domain_begin,
                                  nprimy,
                                  not_spectral );
+    }
 #else
+    currents( double *__restrict__ , double *__restrict__ , double *__restrict__ , int, int, int,
+              Particles   &, unsigned int , unsigned int ,const double *__restrict__ ,
+              const int    *__restrict__ , const double *__restrict__ , double , double , double ,
+              double , double , int    , int    , int    , double, int )
+    {
         SMILEI_ASSERT( false );
-#endif
     }
+#endif
 
     /// Like currents(), project the particle current on the grid (Jx_/Jy_/Jz_)
     /// but also compute global current densities rho used for diagFields timestep
     ///
     /* inline */ void
+#if defined( SMILEI_ACCELERATOR_GPU )//SMILEI_ACCELERATOR_GPU_OMP )
     currentsAndDensity( double *__restrict__ Jx,
                         double *__restrict__ Jy,
                         double *__restrict__ Jz,
@@ -193,7 +200,6 @@ namespace { // Unnamed namespace == static == internal linkage == no exported sy
                         double,
                         int not_spectral )
     {
-#if defined( SMILEI_ACCELERATOR_MODE )//SMILEI_ACCELERATOR_GPU_OMP )
         currentAndDensityDepositionKernelOnDevice( Jx,
                                            Jy,
                                            Jz,
@@ -222,10 +228,16 @@ namespace { // Unnamed namespace == static == internal linkage == no exported sy
                                            j_domain_begin,
                                            nprimy,
                                            not_spectral );
+    }
 #else
+    currentsAndDensity( double *__restrict__ , double *__restrict__ , double *__restrict__ , double *__restrict__ ,
+                        int , int , int , int , Particles   &, unsigned int , unsigned int ,
+                        const double *__restrict__ , const int *__restrict__ , const double *__restrict__ ,
+                        double , double , double , double , double , int    , int    , int    , double, int  )
+    {
         SMILEI_ASSERT( false );
-#endif
     }
+#endif
 
 } // namespace
 
@@ -233,7 +245,7 @@ void Projector2D2OrderGPU::basic( double      *rhoj,
                                   Particles   &particles,
                                   unsigned int ipart,
                                   unsigned int type,
-                                  int          bin_shift )
+                                  int          /*bin_shift*/ )
 {
     // Warning : this function is used for frozen species only. It is assumed that position = position_old !!!
 
@@ -306,12 +318,12 @@ void Projector2D2OrderGPU::basic( double      *rhoj,
     }
 }
 
-void Projector2D2OrderGPU::ionizationCurrents( Field      *Jx,
-                                               Field      *Jy,
-                                               Field      *Jz,
-                                               Particles  &particles,
-                                               int         ipart,
-                                               LocalFields Jion )
+void Projector2D2OrderGPU::ionizationCurrents( Field      */*Jx*/,
+                                               Field      */*Jy*/,
+                                               Field      */*Jz*/,
+                                               Particles  &/*particles*/,
+                                               int         /*ipart*/,
+                                               LocalFields /*Jion */)
 {
     ERROR( "Projector2D2OrderGPU::ionizationCurrents(): Not implemented !" );
 }
@@ -325,8 +337,8 @@ void Projector2D2OrderGPU::currentsAndDensityWrapper( ElectroMagn *EMfields,
                                                       bool diag_flag,
                                                       bool is_spectral,
                                                       int  ispec,
-                                                      int  icell,
-                                                      int  ipart_ref )
+                                                      int  /*icell*/,
+                                                      int  /*ipart_ref */)
 {
     std::vector<int>    &iold  = smpi->dynamics_iold[ithread];
     std::vector<double> &delta = smpi->dynamics_deltaold[ithread];
@@ -425,20 +437,20 @@ void Projector2D2OrderGPU::currentsAndDensityWrapper( ElectroMagn *EMfields,
     }
 }
 
-void Projector2D2OrderGPU::susceptibility( ElectroMagn *EMfields,
-                                           Particles   &particles,
-                                           double       species_mass,
-                                           SmileiMPI   *smpi,
-                                           int          istart,
-                                           int          iend,
-                                           int          ithread,
-                                           int          icell,
-                                           int          ipart_ref )
+void Projector2D2OrderGPU::susceptibility( ElectroMagn */*EMfields*/,
+                                           Particles   &/*particles*/,
+                                           double       /*species_mass*/,
+                                           SmileiMPI   */*smpi*/,
+                                           int          /*istart*/,
+                                           int          /*iend*/,
+                                           int          /*ithread*/,
+                                           int          /*icell*/,
+                                           int          /*ipart_ref */)
 {
     ERROR( "Projector2D2OrderGPU::susceptibility(): Not implemented !" );
 }
 
-//#if defined( SMILEI_ACCELERATOR_MODE )
+//#if defined( SMILEI_ACCELERATOR_GPU )
 ////! Project global current densities (EMfields->Jx_/Jy_/Jz_)
 ////!
 //extern "C" void
diff --git a/src/Projector/Projector2D2OrderGPU.h b/src/Projector/Projector2D2OrderGPU.h
index 9a799f9b5..ecdd4959d 100755
--- a/src/Projector/Projector2D2OrderGPU.h
+++ b/src/Projector/Projector2D2OrderGPU.h
@@ -46,21 +46,21 @@ class Projector2D2OrderGPU : public Projector2D
                                     int          ipart_ref = 0 ) override;
 
     //!Wrapper for task-based implementation of Smilei
-    void currentsAndDensityWrapperOnBuffers( double *b_Jx,
-                                             double *b_Jy,
-                                             double *b_Jz,
-                                             double *b_rho,
-                                             int bin_width,
-                                             Particles &particles,
-                                             SmileiMPI *smpi,
-                                             int istart,
-                                             int iend,
-                                             int ithread,
-                                             bool diag_flag,
-                                             bool is_spectral,
-                                             int ispec,
-                                             int icell = 0,
-                                             int ipart_ref = 0 ) override {};
+    void currentsAndDensityWrapperOnBuffers( double *   /*b_Jx*/,
+                                             double *   /*b_Jy*/,
+                                             double *   /*b_Jz*/,
+                                             double *   /*b_rho*/,
+                                             int        /*bin_width*/,
+                                             Particles &/*particles*/,
+                                             SmileiMPI */*smpi*/,
+                                             int        /*istart*/,
+                                             int        /*iend*/,
+                                             int        /*ithread*/,
+                                             bool       /*diag_flag*/,
+                                             bool       /*is_spectral*/,
+                                             int        /*ispec*/,
+                                             int        /*icell*/ = 0,
+                                             int        /*ipart_ref*/ = 0 ) override {};
 
     /// Project susceptibility, used as source term in envelope equation
     ///
diff --git a/src/Projector/Projector2D2OrderGPUKernel.cpp b/src/Projector/Projector2D2OrderGPUKernel.cpp
index 8f38f52fe..e2ec56495 100644
--- a/src/Projector/Projector2D2OrderGPUKernel.cpp
+++ b/src/Projector/Projector2D2OrderGPUKernel.cpp
@@ -1,4 +1,4 @@
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
 
 #include "Projector2D2OrderGPUKernelCUDAHIP.h"
 #include <cmath>
diff --git a/src/Projector/Projector2D2OrderGPUKernelCUDAHIP.cu b/src/Projector/Projector2D2OrderGPUKernelCUDAHIP.cu
index 666a409f4..55082b793 100644
--- a/src/Projector/Projector2D2OrderGPUKernelCUDAHIP.cu
+++ b/src/Projector/Projector2D2OrderGPUKernelCUDAHIP.cu
@@ -81,7 +81,7 @@
 //                                                         device_particle_charge /* [0:particle_count] */,     \
 //                                                         device_particle_weight /* [0:particle_count] */ )
 //            #pragma omp teams thread_limit( 64 ) distribute parallel for
-//        #elif defined( SMILEI_OPENACC_MODE )
+//        #elif defined( SMILEI_ACCELERATOR_GPU_OACC )
 //            #pragma acc parallel                      \
 //            deviceptr( device_particle_position_x,    \
 //                       device_particle_position_y,    \
@@ -264,7 +264,7 @@
 //                                                         device_particle_charge /* [0:particle_count] */,     \
 //                                                         device_particle_weight /* [0:particle_count] */ )
 //            #pragma omp teams thread_limit( 64 ) distribute parallel for
-//        #elif defined( SMILEI_OPENACC_MODE )
+//        #elif defined( SMILEI_ACCELERATOR_GPU_OACC )
 //            #pragma acc parallel                      \
 //            deviceptr( device_particle_position_x,    \
 //                       device_particle_position_y,    \
diff --git a/src/Projector/Projector2D2OrderGPUKernelCUDAHIP.h b/src/Projector/Projector2D2OrderGPUKernelCUDAHIP.h
index d607a4ab4..a21f757db 100644
--- a/src/Projector/Projector2D2OrderGPUKernelCUDAHIP.h
+++ b/src/Projector/Projector2D2OrderGPUKernelCUDAHIP.h
@@ -4,7 +4,7 @@
 #define Projector2D2OrderGPUKernelCUDAHIP_H
 
 
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
 
 #if defined( __HIP__ )
     #include <hip/hip_runtime.h>
diff --git a/src/Projector/Projector3D2OrderGPU.cpp b/src/Projector/Projector3D2OrderGPU.cpp
index 39342b204..62ec54141 100755
--- a/src/Projector/Projector3D2OrderGPU.cpp
+++ b/src/Projector/Projector3D2OrderGPU.cpp
@@ -30,13 +30,13 @@ Projector3D2OrderGPU::Projector3D2OrderGPU( Params &parameters, Patch *a_patch )
     dts2 = dt / 2.0;
     dts4 = dts2 / 2.0;
 
-#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined ( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined ( SMILEI_ACCELERATOR_GPU_OACC )
     // When sorting is disabled, these values are invalid (-1) and the HIP
     // implementation can't be used.
     x_dimension_bin_count_ = parameters.getGPUBinCount( 1 );
     y_dimension_bin_count_ = parameters.getGPUBinCount( 2 );
     z_dimension_bin_count_ = parameters.getGPUBinCount( 3 );
-//#elif defined( SMILEI_OPENACC_MODE )
+//#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
 //    x_dimension_bin_count_ = 1;
 //    y_dimension_bin_count_ = 1;
 //    z_dimension_bin_count_ = 1;
@@ -50,7 +50,7 @@ Projector3D2OrderGPU::~Projector3D2OrderGPU()
     // EMPTY
 }
 
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
 extern "C" void
 currentDeposition3DOnDevice( double *__restrict__ Jx,
                          double *__restrict__ Jy,
@@ -122,6 +122,8 @@ namespace { // Unnamed namespace == static == internal linkage == no exported sy
     /// Project global current densities (EMfields->Jx_/Jy_/Jz_)
     ///
     /* inline */ void
+
+#if defined( SMILEI_ACCELERATOR_GPU )
     currents( double *__restrict__ Jx,
               double *__restrict__ Jy,
               double *__restrict__ Jz,
@@ -150,72 +152,77 @@ namespace { // Unnamed namespace == static == internal linkage == no exported sy
               double,
               int not_spectral )
     {
-#if defined( SMILEI_ACCELERATOR_MODE )
         currentDeposition3DOnDevice( Jx,
-                                 Jy,
-                                 Jz,
-                                 Jx_size,
-                                 Jy_size,
-                                 Jz_size,
-                                 particles.getPtrPosition( 0 ),
-                                 particles.getPtrPosition( 1 ),
-                                 particles.getPtrPosition( 2 ),
-                                 particles.getPtrCharge(),
-                                 particles.getPtrWeight(),
-                                 particles.last_index.data(),
-                                 x_dimension_bin_count,
-                                 y_dimension_bin_count,
-                                 z_dimension_bin_count,
-                                 invgf_,
-                                 iold_,
-                                 deltaold_,
-                                 particles.deviceSize(),
-                                 inv_cell_volume,
-                                 dx_inv,
-                                 dy_inv,
-                                 dz_inv,
-                                 dx_ov_dt,
-                                 dy_ov_dt,
-                                 dz_ov_dt,
-                                 i_domain_begin,
-                                 j_domain_begin,
-                                 k_domain_begin,
-                                 nprimy, nprimz,
-                                 not_spectral );
+                                     Jy,
+                                     Jz,
+                                     Jx_size,
+                                     Jy_size,
+                                     Jz_size,
+                                     particles.getPtrPosition( 0 ),
+                                     particles.getPtrPosition( 1 ),
+                                     particles.getPtrPosition( 2 ),
+                                     particles.getPtrCharge(),
+                                     particles.getPtrWeight(),
+                                     particles.last_index.data(),
+                                     x_dimension_bin_count,
+                                     y_dimension_bin_count,
+                                     z_dimension_bin_count,
+                                     invgf_,
+                                     iold_,
+                                     deltaold_,
+                                     particles.deviceSize(),
+                                     inv_cell_volume,
+                                     dx_inv,
+                                     dy_inv,
+                                     dz_inv,
+                                     dx_ov_dt,
+                                     dy_ov_dt,
+                                     dz_ov_dt,
+                                     i_domain_begin,
+                                     j_domain_begin,
+                                     k_domain_begin,
+                                     nprimy, nprimz,
+                                     not_spectral );
+    }
 #else
+    currents( double *__restrict__ , double *__restrict__ , double *__restrict__ , int, int, int,
+              Particles   &, unsigned int , unsigned int , unsigned int , const double *__restrict__ ,
+              const int    *__restrict__ , const double *__restrict__ , double , double , double , double ,
+              double , double , double , int    , int    , int    , int    ,  int    , double, int )
+    {
         SMILEI_ASSERT( false );
-#endif
     }
+#endif
 
 
     //! Project density
     /* inline */ void
+#if defined( SMILEI_ACCELERATOR_GPU )
     density( 
-                        double *__restrict__ rho,
-                        int rho_size,
-                        Particles   &particles,
-                        unsigned int x_dimension_bin_count,
-                        unsigned int y_dimension_bin_count,
-                        unsigned int z_dimension_bin_count,
-                        const double *__restrict__ invgf_,
-                        const int *__restrict__ iold_,
-                        const double *__restrict__ deltaold_,
-                        double inv_cell_volume,
-                        double dx_inv,
-                        double dy_inv,
-                        double dz_inv,
-                        double dx_ov_dt,
-                        double dy_ov_dt,
-                        double dz_ov_dt,
-                        int    i_domain_begin,
-                        int    j_domain_begin,
-                        int    k_domain_begin,
-                        int    nprimy,
-                        int    nprimz,
-                        double,
-                        int not_spectral )
+        double *__restrict__ rho,
+        int rho_size,
+        Particles   &particles,
+        unsigned int x_dimension_bin_count,
+        unsigned int y_dimension_bin_count,
+        unsigned int z_dimension_bin_count,
+        const double *__restrict__ invgf_,
+        const int *__restrict__ iold_,
+        const double *__restrict__ deltaold_,
+        double inv_cell_volume,
+        double dx_inv,
+        double dy_inv,
+        double dz_inv,
+        double dx_ov_dt,
+        double dy_ov_dt,
+        double dz_ov_dt,
+        int    i_domain_begin,
+        int    j_domain_begin,
+        int    k_domain_begin,
+        int    nprimy,
+        int    nprimz,
+        double,
+        int not_spectral )
     {
-#if defined( SMILEI_ACCELERATOR_MODE )
         densityDeposition3DOnDevice( 
                                  rho,
                                  rho_size,
@@ -244,10 +251,16 @@ namespace { // Unnamed namespace == static == internal linkage == no exported sy
                                  k_domain_begin,
                                  nprimy, nprimz,
                                  not_spectral );
+    }
 #else
+    density( double *__restrict__ , int , Particles   &, unsigned int , unsigned int , unsigned int ,
+             const double *__restrict__ , const int *__restrict__ , const double *__restrict__ ,
+             double , double , double , double , double , double , double ,
+             int, int, int, int, int, double, int )
+    {
         SMILEI_ASSERT( false );
-#endif
     }
+#endif
 
 } // namespace
 
@@ -255,7 +268,7 @@ void Projector3D2OrderGPU::basic( double      *rhoj,
                                   Particles   &particles,
                                   unsigned int ipart,
                                   unsigned int type,
-                                  int bin_shift )
+                                  int /*bin_shift*/ )
 {
 
 
@@ -347,12 +360,12 @@ void Projector3D2OrderGPU::basic( double      *rhoj,
     }
 }
 
-void Projector3D2OrderGPU::ionizationCurrents( Field      *Jx,
-                                               Field      *Jy,
-                                               Field      *Jz,
-                                               Particles  &particles,
-                                               int         ipart,
-                                               LocalFields Jion )
+void Projector3D2OrderGPU::ionizationCurrents( Field      */*Jx*/,
+                                               Field      */*Jy*/,
+                                               Field      */*Jz*/,
+                                               Particles  &/*particles*/,
+                                               int         /*ipart*/,
+                                               LocalFields /*Jion */)
 {
     ERROR( "Projector3D2OrderGPU::ionizationCurrents(): Not implemented !" );
 }
@@ -366,8 +379,8 @@ void Projector3D2OrderGPU::currentsAndDensityWrapper( ElectroMagn *EMfields,
                                                       bool diag_flag,
                                                       bool is_spectral,
                                                       int  ispec,
-                                                      int  icell,
-                                                      int  ipart_ref )
+                                                      int  /*icell*/,
+                                                      int  /*ipart_ref*/ )
 {
 
     if( is_spectral ) {
@@ -463,15 +476,15 @@ void Projector3D2OrderGPU::currentsAndDensityWrapper( ElectroMagn *EMfields,
        //std::cerr << sum << " " << sum2 << " " << sum_Jxs << " " << sum_Jx << std::endl;
 }
 
-void Projector3D2OrderGPU::susceptibility( ElectroMagn *EMfields,
-                                           Particles   &particles,
-                                           double       species_mass,
-                                           SmileiMPI   *smpi,
-                                           int          istart,
-                                           int          iend,
-                                           int          ithread,
-                                           int          icell,
-                                           int          ipart_ref )
+void Projector3D2OrderGPU::susceptibility( ElectroMagn */*EMfields*/,
+                                           Particles   &/*particles*/,
+                                           double       /*species_mass*/,
+                                           SmileiMPI   */*smpi*/,
+                                           int          /*istart*/,
+                                           int          /*iend*/,
+                                           int          /*ithread*/,
+                                           int          /*icell*/,
+                                           int          /*ipart_ref */)
 {
     ERROR( "Projector3D2OrderGPU::susceptibility(): Not implemented !" );
 }
diff --git a/src/Projector/Projector3D2OrderGPU.cpp.backup b/src/Projector/Projector3D2OrderGPU.cpp.backup
index 39ce7a4a5..761e6ae31 100755
--- a/src/Projector/Projector3D2OrderGPU.cpp.backup
+++ b/src/Projector/Projector3D2OrderGPU.cpp.backup
@@ -2,7 +2,7 @@
 
 #include <cmath>
 #include <iostream>
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     #include <accelmath.h>
     #include <openacc.h>
 #endif
@@ -136,7 +136,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU(
                                       position_y /* [istart_pack:current_pack_size] */, \
                                       position_z /* [istart_pack:current_pack_size] */ )
     #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present( iold [0:3 * nparts],     \
                                   deltaold [0:3 * nparts], \
                                   Sx0 [0:kTmpArraySize],   \
@@ -262,7 +262,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU(
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
     #pragma omp target
     #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present( DSx [0:kTmpArraySize], sumX [0:kTmpArraySize] )
 
     // #pragma acc parallel deviceptr( DSx, sumX )
@@ -287,7 +287,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU(
                                       charge /* [istart_pack:current_pack_size] */, \
                                       weight /* [istart_pack:current_pack_size] */ )
     #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present( iold [0:3 * nparts],     \
                                   Jx [0:Jx_size],         \
                                   Sy0 [0:kTmpArraySize],   \
@@ -310,7 +310,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU(
             const double crx_p = dx_ov_dt_inv_cell_volume * static_cast<double>( charge[ipart] ) * weight[ipart];
 
             const int linindex0 = iold[ipart+0*packsize]*yz_size0+iold[ipart+1*packsize]*z_size0+iold[ipart+2*packsize];
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc loop vector
 #endif
             for( int k=0 ; k<5 ; k++ ) {
@@ -326,7 +326,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU(
 
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
     #pragma omp atomic update
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc atomic
 #endif
                         Jx [ jdx ] += val;
@@ -339,7 +339,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU(
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
     #pragma omp target
     #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present( DSy [0:kTmpArraySize], \
                                   sumX [0:kTmpArraySize] )
 
@@ -365,7 +365,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU(
                                       charge /* [istart_pack:current_pack_size] */, \
                                       weight /* [istart_pack:current_pack_size] */ )
     #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present( iold [0:3 * nparts],     \
                                   Jy [0:Jy_size],         \
                                   Sx0 [0:kTmpArraySize],   \
@@ -388,7 +388,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU(
             const double cry_p = dy_ov_dt_inv_cell_volume * static_cast<double>( charge[ipart] ) * weight[ipart];
 
             const int linindex1 = iold[ipart+0*packsize]*yz_size1+iold[ipart+1*packsize]*z_size1+iold[ipart+2*packsize];
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc loop vector
 #endif
             for( int k=0 ; k<5 ; k++ ) {
@@ -404,7 +404,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU(
 
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
     #pragma omp atomic update
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc atomic
 #endif
                         Jy [ jdx ] += val;
@@ -417,7 +417,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU(
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
     #pragma omp target
     #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present( DSz [0:kTmpArraySize], \
                                   sumX [0:kTmpArraySize] )
 
@@ -443,7 +443,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU(
                                       charge /* [istart_pack:current_pack_size] */, \
                                       weight /* [istart_pack:current_pack_size] */ )
     #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present( iold [0:3 * nparts],     \
                                   Jz [0:Jz_size],         \
                                   Sx0 [0:kTmpArraySize],   \
@@ -466,7 +466,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU(
             const double crz_p = dz_ov_dt_inv_cell_volume * static_cast<double>( charge[ipart] ) * weight[ipart];
             
             const int linindex2 = iold[ipart+0*packsize]*yz_size2+iold[ipart+1*packsize]*z_size2+iold[ipart+2*packsize];
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc loop vector
 #endif
             for( int k=1 ; k<5 ; k++ ) {
@@ -482,7 +482,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU(
 
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
     #pragma omp atomic update
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc atomic
 #endif
                         Jz[ jdx ] += val;
@@ -498,7 +498,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU(
                                       charge /* [istart_pack:current_pack_size] */, \
                                       weight /* [istart_pack:current_pack_size] */ )
     #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
           #pragma acc parallel present( iold [0:3 * nparts], \
                                   rho [0:rho_size],          \
                                   Sx1 [0:kTmpArraySize],     \
@@ -523,7 +523,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU(
                            int jdx = idx + k;
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
                            #pragma omp atomic update
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
                            #pragma acc atomic
 #endif
                            rho[ jdx ] += charge_weight * Sx1[ipart_pack+i*packsize]*Sy1[ipart_pack+j*packsize]*Sz1[ipart_pack+k*packsize];
diff --git a/src/Projector/Projector3D2OrderGPU.h b/src/Projector/Projector3D2OrderGPU.h
index 2fac2402e..c76bf48a1 100755
--- a/src/Projector/Projector3D2OrderGPU.h
+++ b/src/Projector/Projector3D2OrderGPU.h
@@ -46,21 +46,21 @@ class Projector3D2OrderGPU : public Projector3D
                                     int          ipart_ref = 0 ) override;
 
     //!Wrapper for task-based implementation of Smilei
-    void currentsAndDensityWrapperOnBuffers( double *b_Jx,
-                                             double *b_Jy,
-                                             double *b_Jz,
-                                             double *b_rho,
-                                             int bin_width,
-                                             Particles &particles,
-                                             SmileiMPI *smpi,
-                                             int istart,
-                                             int iend,
-                                             int ithread,
-                                             bool diag_flag,
-                                             bool is_spectral,
-                                             int ispec,
-                                             int icell = 0,
-                                             int ipart_ref = 0 ) override {};
+    void currentsAndDensityWrapperOnBuffers( double *   /*b_Jx*/,
+                                             double *   /*b_Jy*/,
+                                             double *   /*b_Jz*/,
+                                             double *   /*b_rho*/,
+                                             int        /*bin_width*/,
+                                             Particles &/*particles*/,
+                                             SmileiMPI */*smpi*/,
+                                             int        /*istart*/,
+                                             int        /*iend*/,
+                                             int        /*ithread*/,
+                                             bool       /*diag_flag*/,
+                                             bool       /*is_spectral*/,
+                                             int        /*ispec*/,
+                                             int        /*icell*/ = 0,
+                                             int        /*ipart_ref*/ = 0 ) override {};
 
     /// Project susceptibility, used as source term in envelope equation
     ///
diff --git a/src/Projector/Projector3D2OrderGPUKernel.cpp b/src/Projector/Projector3D2OrderGPUKernel.cpp
index f77a4fda3..5d9f88b5d 100644
--- a/src/Projector/Projector3D2OrderGPUKernel.cpp
+++ b/src/Projector/Projector3D2OrderGPUKernel.cpp
@@ -5,7 +5,7 @@
 // issues (!).
 
 
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
 
     //! Simple switch to jump between the reference (omp) implementation and the
     //! hip one.
diff --git a/src/Projector/Projector3D2OrderGPUKernelAcc.h b/src/Projector/Projector3D2OrderGPUKernelAcc.h
index 9cf3b224d..43bff1cce 100644
--- a/src/Projector/Projector3D2OrderGPUKernelAcc.h
+++ b/src/Projector/Projector3D2OrderGPUKernelAcc.h
@@ -1,6 +1,6 @@
 //! Optimized Acc projection (from Julien Derouillat) 
 
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
 
 #include <cmath>
 #include "Tools.h"
@@ -110,7 +110,7 @@ namespace acc {
                                       position_y /* [istart_pack:current_pack_size] */, \
                                       position_z /* [istart_pack:current_pack_size] */ )
     #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present( iold [0:3 * nparts],     \
                                   deltaold [0:3 * nparts], \
                                   Sx0 [0:kTmpArraySize],   \
@@ -236,7 +236,7 @@ namespace acc {
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
     #pragma omp target
     #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present( DSx [0:kTmpArraySize], sumX [0:kTmpArraySize] )
 
     // #pragma acc parallel deviceptr( DSx, sumX )
@@ -261,7 +261,7 @@ namespace acc {
                                       charge /* [istart_pack:current_pack_size] */, \
                                       weight /* [istart_pack:current_pack_size] */ )
     #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present( iold [0:3 * nparts],     \
                                   Jx [0:Jx_size],         \
                                   Sy0 [0:kTmpArraySize],   \
@@ -284,7 +284,7 @@ namespace acc {
             const double crx_p = dx_ov_dt_inv_cell_volume * static_cast<double>( charge[ipart] ) * weight[ipart];
 
             const int linindex0 = iold[ipart+0*packsize]*yz_size0+iold[ipart+1*packsize]*z_size0+iold[ipart+2*packsize];
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc loop vector
 #endif
             for( int k=0 ; k<5 ; k++ ) {
@@ -309,7 +309,7 @@ namespace acc {
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
     #pragma omp target
     #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present( DSy [0:kTmpArraySize], \
                                   sumX [0:kTmpArraySize] )
 
@@ -335,7 +335,7 @@ namespace acc {
                                       charge /* [istart_pack:current_pack_size] */, \
                                       weight /* [istart_pack:current_pack_size] */ )
     #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present( iold [0:3 * nparts],     \
                                   Jy [0:Jy_size],         \
                                   Sx0 [0:kTmpArraySize],   \
@@ -358,7 +358,7 @@ namespace acc {
             const double cry_p = dy_ov_dt_inv_cell_volume * static_cast<double>( charge[ipart] ) * weight[ipart];
 
             const int linindex1 = iold[ipart+0*packsize]*yz_size1+iold[ipart+1*packsize]*z_size1+iold[ipart+2*packsize];
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc loop vector
 #endif
             for( int k=0 ; k<5 ; k++ ) {
@@ -383,7 +383,7 @@ namespace acc {
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
     #pragma omp target
     #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present( DSz [0:kTmpArraySize], \
                                   sumX [0:kTmpArraySize] )
 
@@ -409,7 +409,7 @@ namespace acc {
                                       charge /* [istart_pack:current_pack_size] */, \
                                       weight /* [istart_pack:current_pack_size] */ )
     #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present( iold [0:3 * nparts],     \
                                   Jz [0:Jz_size],         \
                                   Sx0 [0:kTmpArraySize],   \
@@ -432,7 +432,7 @@ namespace acc {
             const double crz_p = dz_ov_dt_inv_cell_volume * static_cast<double>( charge[ipart] ) * weight[ipart];
 
             const int linindex2 = iold[ipart+0*packsize]*yz_size2+iold[ipart+1*packsize]*z_size2+iold[ipart+2*packsize];
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc loop vector
 #endif
             for( int k=1 ; k<5 ; k++ ) {
@@ -536,7 +536,7 @@ namespace acc {
                                       position_y /* [istart_pack:current_pack_size] */, \
                                       position_z /* [istart_pack:current_pack_size] */ )
     #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present( iold [0:3 * nparts],     \
                                   deltaold [0:3 * nparts], \
                                   Sx1 [0:kTmpArraySize],   \
@@ -630,7 +630,7 @@ namespace acc {
                                       charge /* [istart_pack:current_pack_size] */, \
                                       weight /* [istart_pack:current_pack_size] */ )
     #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
           #pragma acc parallel present( iold [0:3 * nparts], \
                                   rho [0:rho_size],          \
                                   Sx1 [0:kTmpArraySize],     \
diff --git a/src/Projector/Projector3D2OrderGPUKernelCUDAHIP.cu b/src/Projector/Projector3D2OrderGPUKernelCUDAHIP.cu
index 195a02667..dd8d1e61d 100644
--- a/src/Projector/Projector3D2OrderGPUKernelCUDAHIP.cu
+++ b/src/Projector/Projector3D2OrderGPUKernelCUDAHIP.cu
@@ -1,6 +1,6 @@
 //! HIP CUDA implementation
 
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
 
 //#include "Projector3D2OrderGPUKernelCUDAHIP.h"
 
diff --git a/src/Projector/Projector3D2OrderGPUKernelCUDAHIP.h b/src/Projector/Projector3D2OrderGPUKernelCUDAHIP.h
index 94368f4dd..1b78b1252 100644
--- a/src/Projector/Projector3D2OrderGPUKernelCUDAHIP.h
+++ b/src/Projector/Projector3D2OrderGPUKernelCUDAHIP.h
@@ -4,7 +4,7 @@
 #define Projector3D2OrderGPUKernelCUDAHIP_H
 
 
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
 
 #if defined( __HIP__ )
     #include <hip/hip_runtime.h>
diff --git a/src/Projector/Projector3D2OrderGPUKernelNaive.h b/src/Projector/Projector3D2OrderGPUKernelNaive.h
index b6cfac080..a261af40b 100644
--- a/src/Projector/Projector3D2OrderGPUKernelNaive.h
+++ b/src/Projector/Projector3D2OrderGPUKernelNaive.h
@@ -1,6 +1,6 @@
 //! Naive ACC/OMP implementation
 
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
 
 #include <cmath>
 #include "Tools.h"
@@ -66,7 +66,7 @@ namespace acc {
                                       position_y /* [istart_pack:current_pack_size] */, \
                                       position_z /* [istart_pack:current_pack_size] */ )
     #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present( iold [0:3 * nparts],     \
                                   deltaold [0:3 * nparts], \
                                   Jx[0:Jx_size], \
@@ -344,7 +344,7 @@ namespace acc {
                                       position_y /* [istart_pack:current_pack_size] */, \
                                       position_z /* [istart_pack:current_pack_size] */ )
     #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present( iold [0:3 * nparts],     \
                                   deltaold [0:3 * nparts], \
                                   rho[0:rho_size] \
diff --git a/src/Projector/ProjectorAM2OrderV.cpp b/src/Projector/ProjectorAM2OrderV.cpp
index b222aa4ee..890d37332 100755
--- a/src/Projector/ProjectorAM2OrderV.cpp
+++ b/src/Projector/ProjectorAM2OrderV.cpp
@@ -673,10 +673,6 @@ void ProjectorAM2OrderV::susceptibility( ElectroMagn *EMfields, Particles &parti
     double charge_weight[8] __attribute__( ( aligned( 64 ) ) );
     // double r_bar[8] __attribute__( ( aligned( 64 ) ) );
 
-    //double *invR_local = &(invR_[jpom2]);
-    // double *invRd_local = &(invRd_[jpom2]);
-
-    double *invR_local = &(invR_[jpom2]);
     // Pointer for GPU and vectorization on ARM processors
     double * __restrict__ position_x = particles.getPtrPosition(0);
     double * __restrict__ position_y = particles.getPtrPosition(1);
diff --git a/src/Projector/ProjectorFactory.h b/src/Projector/ProjectorFactory.h
index db8c39e1f..278739301 100755
--- a/src/Projector/ProjectorFactory.h
+++ b/src/Projector/ProjectorFactory.h
@@ -42,7 +42,7 @@ class ProjectorFactory
         // ---------------
         else if( ( params.geometry == "2Dcartesian" ) && ( params.interpolation_order == ( unsigned int )2 ) ) {
             if( !vectorization ) {
-                #if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE )
+                #if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC )
                     Proj = new Projector2D2OrderGPU( params, patch );
                 #else
                     Proj = new Projector2D2Order( params, patch );
@@ -64,7 +64,7 @@ class ProjectorFactory
         // ---------------
         else if( ( params.geometry == "3Dcartesian" ) && ( params.interpolation_order == ( unsigned int )2 ) ) {
             if( !vectorization ) {
-                #if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE )
+                #if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC )
                     Proj = new Projector3D2OrderGPU( params, patch );
                 #else
                     Proj = new Projector3D2Order( params, patch );
diff --git a/src/Pusher/PusherBoris.cpp b/src/Pusher/PusherBoris.cpp
index 536def7a9..8f70a6cc3 100755
--- a/src/Pusher/PusherBoris.cpp
+++ b/src/Pusher/PusherBoris.cpp
@@ -57,7 +57,7 @@ void PusherBoris::operator()( Particles &particles, SmileiMPI *smpi, int istart,
                        position_y /* [istart:particle_number] */,             \
                        position_z /* [istart:particle_number] */ )
     #pragma omp teams distribute parallel for
-#elif defined(SMILEI_OPENACC_MODE)
+#elif defined(SMILEI_ACCELERATOR_GPU_OACC)
     const int istart_offset   = istart - ipart_buffer_offset;
     const int particle_number = iend - istart;
 
diff --git a/src/Pusher/PusherBorisNR.cpp b/src/Pusher/PusherBorisNR.cpp
index 84f072e1f..df4a3277b 100755
--- a/src/Pusher/PusherBorisNR.cpp
+++ b/src/Pusher/PusherBorisNR.cpp
@@ -57,7 +57,7 @@ void PusherBorisNR::operator()( Particles &particles, SmileiMPI *smpi, int istar
                        position_y /* [istart:particle_number] */,             \
                        position_z /* [istart:particle_number] */ )
     #pragma omp teams distribute parallel for
-#elif defined(SMILEI_OPENACC_MODE)
+#elif defined(SMILEI_ACCELERATOR_GPU_OACC)
     const int istart_offset   = istart - ipart_buffer_offset;
     const int particle_number = iend - istart;
 
diff --git a/src/Pusher/PusherHigueraCary.cpp b/src/Pusher/PusherHigueraCary.cpp
index 2ab234ae1..c85189fff 100755
--- a/src/Pusher/PusherHigueraCary.cpp
+++ b/src/Pusher/PusherHigueraCary.cpp
@@ -68,7 +68,7 @@ void PusherHigueraCary::operator()( Particles &particles, SmileiMPI *smpi, int i
                        position_y /* [istart:particle_number] */,             \
                        position_z /* [istart:particle_number] */ )
     #pragma omp teams distribute parallel for
-#elif defined(SMILEI_OPENACC_MODE)
+#elif defined(SMILEI_ACCELERATOR_GPU_OACC)
     const int istart_offset   = istart - ipart_buffer_offset;
     const int particle_number = iend - istart;
 
diff --git a/src/Pusher/PusherPhoton.cpp b/src/Pusher/PusherPhoton.cpp
index a94a521e3..5feb7823d 100755
--- a/src/Pusher/PusherPhoton.cpp
+++ b/src/Pusher/PusherPhoton.cpp
@@ -53,7 +53,7 @@ void PusherPhoton::operator()( Particles &particles, SmileiMPI *smpi,
                        position_y /* [istart:particle_number] */,             \
                        position_z /* [istart:particle_number] */ )
     #pragma omp teams distribute parallel for
-#elif defined(SMILEI_OPENACC_MODE)
+#elif defined(SMILEI_ACCELERATOR_GPU_OACC)
     const int istart_offset   = istart - ipart_ref;
     const int particle_number = iend - istart;
 
diff --git a/src/Pusher/PusherPonderomotiveBoris.cpp b/src/Pusher/PusherPonderomotiveBoris.cpp
index 41afa42e6..9d151dabb 100755
--- a/src/Pusher/PusherPonderomotiveBoris.cpp
+++ b/src/Pusher/PusherPonderomotiveBoris.cpp
@@ -55,7 +55,7 @@ void PusherPonderomotiveBoris::operator()( Particles &particles, SmileiMPI *smpi
     const double *const __restrict__ GradPhiz = &( ( *GradPhipart )[2*nparts] );
     //double *inv_gamma_ponderomotive = &( ( *dynamics_inv_gamma_ponderomotive )[0*nparts] );
     
-    #ifndef SMILEI_OPENACC_MODE
+    #ifndef SMILEI_ACCELERATOR_GPU_OACC
         #pragma omp simd
     #else
         int np = iend-istart;
diff --git a/src/Pusher/PusherPonderomotiveBorisBTIS3.cpp b/src/Pusher/PusherPonderomotiveBorisBTIS3.cpp
index 379f41763..a32f359cb 100644
--- a/src/Pusher/PusherPonderomotiveBorisBTIS3.cpp
+++ b/src/Pusher/PusherPonderomotiveBorisBTIS3.cpp
@@ -31,7 +31,6 @@ void PusherPonderomotiveBorisBTIS3::operator()( Particles &particles, SmileiMPI
     double charge_over_mass_dts2, charge_sq_over_mass_sq_dts4;
     double umx, umy, umz, upx, upy, upz;
     double alpha;
-    double TxTy, TyTz, TzTx;
     double pxsm, pysm, pzsm;
     //double one_ov_gamma_ponderomotive;
     
diff --git a/src/Pusher/PusherPonderomotivePositionBoris.cpp b/src/Pusher/PusherPonderomotivePositionBoris.cpp
index 16a4e6c69..9b9bea639 100755
--- a/src/Pusher/PusherPonderomotivePositionBoris.cpp
+++ b/src/Pusher/PusherPonderomotivePositionBoris.cpp
@@ -52,7 +52,7 @@ void PusherPonderomotivePositionBoris::operator()( Particles &particles, SmileiM
     const double *const __restrict__ GradPhi_my = &( ( *GradPhi_mpart )[1*nparts] );
     const double *const __restrict__ GradPhi_mz = &( ( *GradPhi_mpart )[2*nparts] );
     
-    #ifndef SMILEI_OPENACC_MODE
+    #ifndef SMILEI_ACCELERATOR_GPU_OACC
         #pragma omp simd
     #else
         int np = iend-istart;
diff --git a/src/Pusher/PusherVay.cpp b/src/Pusher/PusherVay.cpp
index c1ba76693..83debaae4 100755
--- a/src/Pusher/PusherVay.cpp
+++ b/src/Pusher/PusherVay.cpp
@@ -67,7 +67,7 @@ void PusherVay::operator()( Particles &particles, SmileiMPI *smpi, int istart, i
                        position_y /* [istart:particle_number] */,             \
                        position_z /* [istart:particle_number] */ )
     #pragma omp teams distribute parallel for
-#elif defined(SMILEI_OPENACC_MODE)
+#elif defined(SMILEI_ACCELERATOR_GPU_OACC)
     const int istart_offset   = istart - ipart_buffer_offset;
     const int particle_number = iend - istart;
 
diff --git a/src/Radiation/RadiationCorrLandauLifshitz.cpp b/src/Radiation/RadiationCorrLandauLifshitz.cpp
index 16c7b01fe..ebb0e54dd 100755
--- a/src/Radiation/RadiationCorrLandauLifshitz.cpp
+++ b/src/Radiation/RadiationCorrLandauLifshitz.cpp
@@ -96,7 +96,7 @@ void RadiationCorrLandauLifshitz::operator()(
     // cumulative Radiated energy from istart to iend
     double radiated_energy_loc = 0;
 
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
     // Local vector to store the radiated energy
     double * rad_norm_energy = new double [iend-istart];
     // double * rad_norm_energy = (double*) aligned_alloc(64, (iend-istart)*sizeof(double));
@@ -112,7 +112,7 @@ void RadiationCorrLandauLifshitz::operator()(
     // Computation
 
     // NVIDIA GPUs
-    #if defined (SMILEI_OPENACC_MODE)
+    #if defined (SMILEI_ACCELERATOR_GPU_OACC)
         const int istart_offset   = istart - ipart_ref;
         const int np = iend-istart;
         #pragma acc parallel \
@@ -185,7 +185,7 @@ void RadiationCorrLandauLifshitz::operator()(
     // _______________________________________________________________
     // Computation of the thread radiated energy
 
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
 
             // Exact energy loss due to the radiation
             rad_norm_energy[ipart-istart] = gamma - std::sqrt( 1.0
@@ -210,7 +210,7 @@ void RadiationCorrLandauLifshitz::operator()(
     // _______________________________________________________________
     // Update of the quantum parameter
     
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
     #pragma omp simd
     for( int ipart=istart ; ipart<iend; ipart++ ) {
 #endif
@@ -229,7 +229,7 @@ void RadiationCorrLandauLifshitz::operator()(
                        Ex[ipart-ipart_ref], Ey[ipart-ipart_ref], Ez[ipart-ipart_ref],
                        Bx[ipart-ipart_ref], By[ipart-ipart_ref], Bz[ipart-ipart_ref] );
 
-    #ifndef SMILEI_OPENACC_MODE
+    #ifndef SMILEI_ACCELERATOR_GPU_OACC
     } // end loop ipart
     #else
             } // end if
@@ -241,7 +241,7 @@ void RadiationCorrLandauLifshitz::operator()(
     radiated_energy += radiated_energy_loc;
 
 
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
     // _______________________________________________________________
     // Cleaning
 
diff --git a/src/Radiation/RadiationLandauLifshitz.cpp b/src/Radiation/RadiationLandauLifshitz.cpp
index e8e3f0a91..a44bce0b3 100755
--- a/src/Radiation/RadiationLandauLifshitz.cpp
+++ b/src/Radiation/RadiationLandauLifshitz.cpp
@@ -93,7 +93,7 @@ void RadiationLandauLifshitz::operator()(
     // cumulative Radiated energy from istart to iend
     double radiated_energy_loc = 0;
 
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
     // Local vector to store the radiated energy
     double * rad_norm_energy = new double [iend-istart];
     //double  * rad_norm_energy = (double*) aligned_alloc(64, (iend-istart)*sizeof(double));
@@ -109,7 +109,7 @@ void RadiationLandauLifshitz::operator()(
     // _______________________________________________________________
     // Computation
 
-    #ifndef SMILEI_OPENACC_MODE
+    #ifndef SMILEI_ACCELERATOR_GPU_OACC
         #pragma omp simd aligned(rad_norm_energy:64)
     #else
         int np = iend-istart;
@@ -153,7 +153,7 @@ void RadiationLandauLifshitz::operator()(
     // _______________________________________________________________
     // Computation of the thread radiated energy
                                               
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
 
             // Exact energy loss due to the radiation
             rad_norm_energy[ipart-istart] = gamma - std::sqrt( 1.0
@@ -178,7 +178,7 @@ void RadiationLandauLifshitz::operator()(
     // _______________________________________________________________
     // Update of the quantum parameter
 
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
     #pragma omp simd
     for( int ipart=istart ; ipart<iend; ipart++ ) {
 #endif
@@ -198,7 +198,7 @@ void RadiationLandauLifshitz::operator()(
                      Ex[ipart-ipart_ref], Ey[ipart-ipart_ref], Ez[ipart-ipart_ref],
                      Bx[ipart-ipart_ref], By[ipart-ipart_ref], Bz[ipart-ipart_ref] );
 
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
     } // end loop ipart
 #else
             } // end if
@@ -209,7 +209,7 @@ void RadiationLandauLifshitz::operator()(
     radiated_energy += radiated_energy_loc;
 
 
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
 
     // _______________________________________________________________
     // Cleaning
diff --git a/src/Radiation/RadiationMonteCarlo.cpp b/src/Radiation/RadiationMonteCarlo.cpp
index d1a2ed6ff..53ce5c602 100755
--- a/src/Radiation/RadiationMonteCarlo.cpp
+++ b/src/Radiation/RadiationMonteCarlo.cpp
@@ -14,7 +14,7 @@
 #include <cstring>
 #include <fstream>
 
-#if defined(SMILEI_OPENACC_MODE)
+#if defined(SMILEI_ACCELERATOR_GPU_OACC)
     #define __HIP_PLATFORM_NVCC__
     #define __HIP_PLATFORM_NVIDIA__
     #include "gpuRandom.h"
@@ -103,7 +103,7 @@ void RadiationMonteCarlo::operator()(
     // Temporary double parameter
     double temp;
 
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     unsigned long long seed; // Parameters for CUDA generator
     unsigned long long seq;
     unsigned long long offset;
@@ -152,7 +152,7 @@ void RadiationMonteCarlo::operator()(
 
     // Number of photons
     int nphotons;
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     int nphotons_start;
 #endif
     
@@ -160,7 +160,7 @@ void RadiationMonteCarlo::operator()(
     const double photon_buffer_size_per_particle = radiation_photon_sampling_ * max_photon_emissions_;
     
     if (photons) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             // We reserve a large number of potential photons on device since we can't reallocate
             nphotons_start = photons->deviceSize();
             //static_cast<nvidiaParticles*>(photons)->deviceReserve( nphotons + (iend - istart) * photon_buffer_size_per_particle );
@@ -199,13 +199,13 @@ void RadiationMonteCarlo::operator()(
 
     double *const __restrict__ photon_tau = photons ? (photons->has_Monte_Carlo_process ? photons->getPtrTau() : nullptr) : nullptr;
 
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     // Cell keys as a mask
     int *const __restrict__ photon_cell_keys = photons ? photons->getPtrCellKeys() : nullptr;
 #endif
 
     // Table properties ----------------------------------------------------------------
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     // Size of tables
     // int size_of_Table_integfochi = RadiationTables.integfochi_.size_particle_chi_;
     // int size_of_Table_min_photon_chi = RadiationTables.xi_.size_particle_chi_;
@@ -221,7 +221,7 @@ void RadiationMonteCarlo::operator()(
 
     // _______________________________________________________________
     // Computation
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     // Management of the data on GPU though this data region
     int np = iend-istart;
     
@@ -342,7 +342,7 @@ void RadiationMonteCarlo::operator()(
                 // New final optical depth to reach for emision
                 while( tau[ipart] <= epsilon_tau_ ) {
                     //tau[ipart] = -log( 1.-Rand::uniform() );
-                    #ifndef SMILEI_OPENACC_MODE
+                    #ifndef SMILEI_ACCELERATOR_GPU_OACC
                         tau[ipart] = -std::log( 1.-rand_->uniform() );
                     #else
                         seed_curand_1 = (int) (ipart+1)*(initial_seed_1+1); //Seed for linear generator
@@ -385,7 +385,7 @@ void RadiationMonteCarlo::operator()(
 
 
                     // Draw random number in [0,1[
-                    #ifndef SMILEI_OPENACC_MODE
+                    #ifndef SMILEI_ACCELERATOR_GPU_OACC
                         random_number = rand_->uniform();
                     #else
                         seed_curand_2 = (int) (ipart + 1)*(initial_seed_2 + 1); //Seed for linear generator
@@ -433,7 +433,7 @@ void RadiationMonteCarlo::operator()(
                             && ( i_photon_emission < max_photon_emissions_)) {
                                 
 // CPU implementation (non-threaded implementation)
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
 
                         // Creation of new photons in the temporary array photons
                         photons->createParticles( radiation_photon_sampling_ );
@@ -611,14 +611,14 @@ void RadiationMonteCarlo::operator()(
         } // end while
     } // end for
 
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     } // end acc parallel
 #endif
 
     //if (photons) std::cerr << photons->deviceSize()  << std::endl;
 
     // Remove extra space to save memory
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
     if (photons) {
         photons->shrinkToFit( true );
     }
@@ -631,7 +631,7 @@ void RadiationMonteCarlo::operator()(
     // ____________________________________________________
     // Update of the quantum parameter chi
 
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
         #pragma omp simd
 #else
     int np = iend-istart;
@@ -660,11 +660,11 @@ void RadiationMonteCarlo::operator()(
 
         }
 
-    #ifdef SMILEI_OPENACC_MODE
+    #ifdef SMILEI_ACCELERATOR_GPU_OACC
     } // end acc parallel
     #endif
 
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     }   // end acc data
 #endif
 
diff --git a/src/Radiation/RadiationMonteCarlo.h b/src/Radiation/RadiationMonteCarlo.h
index 34b8c31db..4e84f169d 100755
--- a/src/Radiation/RadiationMonteCarlo.h
+++ b/src/Radiation/RadiationMonteCarlo.h
@@ -16,7 +16,7 @@
 #include "Radiation.h"
 #include "userFunctions.h"
 
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
 #include <openacc.h>
 // This is wrong. Dont include nvidiaParticles, it may cause problem!
 // See particle factory.
diff --git a/src/Radiation/RadiationNiel.cpp b/src/Radiation/RadiationNiel.cpp
index 6e61f3759..dff292df4 100755
--- a/src/Radiation/RadiationNiel.cpp
+++ b/src/Radiation/RadiationNiel.cpp
@@ -127,7 +127,7 @@ void RadiationNiel::operator()(
     double radiated_energy_loc = 0;
     
     // Parameters for linear alleatory number generator
-    #ifdef SMILEI_OPENACC_MODE
+    #ifdef SMILEI_ACCELERATOR_GPU_OACC
 
         // Initialize initial seed for linear generator
         double initial_seed = rand_->uniform();
@@ -144,7 +144,7 @@ void RadiationNiel::operator()(
     //double t0 = MPI_Wtime();
 
     // 1) Vectorized computation of gamma and the particle quantum parameter
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
             #pragma omp simd
 #else
         
@@ -190,12 +190,12 @@ void RadiationNiel::operator()(
                                   Ex[ipart-ipart_ref], Ey[ipart-ipart_ref], Ez[ipart-ipart_ref],
                                   Bx[ipart-ipart_ref], By[ipart-ipart_ref], Bz[ipart-ipart_ref] );
     
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
         } //finish cycle
 #endif
     //double t1 = MPI_Wtime();
 
-        #ifdef SMILEI_OPENACC_MODE
+        #ifdef SMILEI_ACCELERATOR_GPU_OACC
             if( particle_chi[ipart] > minimum_chi_continuous ) {
 
 		        seed_curand = (int) (ipart+1)*(initial_seed+1); //Seed for linear generator
@@ -297,7 +297,7 @@ void RadiationNiel::operator()(
 
     if( niel_computation_method == 0 ) {
 
-        #ifndef SMILEI_OPENACC_MODE
+        #ifndef SMILEI_ACCELERATOR_GPU_OACC
         for( ipart=istart ; ipart<iend; ipart++ ) {
                 // Below particle_chi = minimum_chi_continuous, radiation losses are negligible
             if( particle_chi[ipart] > minimum_chi_continuous ) {
@@ -310,7 +310,7 @@ void RadiationNiel::operator()(
 
                     diffusion[ipart-istart] = std::sqrt( factor_classical_radiated_power*gamma[ipart-ipart_ref]*temp )*random_numbers[ipart-istart];
 
-        #ifndef SMILEI_OPENACC_MODE
+        #ifndef SMILEI_ACCELERATOR_GPU_OACC
             }
         }
         #endif
@@ -318,7 +318,7 @@ void RadiationNiel::operator()(
     // Using the fit at order 5 (vectorized)
     else if( niel_computation_method == 1 ) {
 
-        #ifndef SMILEI_OPENACC_MODE
+        #ifndef SMILEI_ACCELERATOR_GPU_OACC
         #pragma omp simd private(temp)
 	    for( ipart=istart ; ipart<iend; ipart++ ) {
                 // Below particle_chi = minimum_chi_continuous, radiation losses are negligible
@@ -329,7 +329,7 @@ void RadiationNiel::operator()(
 
                     diffusion[ipart-istart] = std::sqrt( factor_classical_radiated_power*gamma[ipart-ipart_ref]*temp )*random_numbers[ipart-istart];
 
-        #ifndef SMILEI_OPENACC_MODE
+        #ifndef SMILEI_ACCELERATOR_GPU_OACC
             }
         }
         #endif
@@ -338,7 +338,7 @@ void RadiationNiel::operator()(
     // Using the fit at order 10 (vectorized)
     else if( niel_computation_method == 2 ) {
         
-        #ifndef SMILEI_OPENACC_MODE
+        #ifndef SMILEI_ACCELERATOR_GPU_OACC
         #pragma omp simd private(temp)
 	    for( ipart=istart ; ipart<iend; ipart++ ) {
                	// Below particle_chi = minimum_chi_continuous, radiation losses are negligible
@@ -348,7 +348,7 @@ void RadiationNiel::operator()(
 
                     	diffusion[ipart-istart] = std::sqrt( factor_classical_radiated_power*gamma[ipart-ipart_ref]*temp )*random_numbers[ipart-istart];
 
-        #ifndef SMILEI_OPENACC_MODE
+        #ifndef SMILEI_ACCELERATOR_GPU_OACC
             }
         }
         #endif
@@ -357,7 +357,7 @@ void RadiationNiel::operator()(
     // Using Ridgers
     else if( niel_computation_method == 3) {
 
-        #ifndef SMILEI_OPENACC_MODE
+        #ifndef SMILEI_ACCELERATOR_GPU_OACC
 	    #pragma omp simd private(temp)
         for( ipart=istart ; ipart<iend; ipart++ ) {
                 // Below particle_chi = minimum_chi_continuous, radiation losses are negligible
@@ -368,7 +368,7 @@ void RadiationNiel::operator()(
 
                     diffusion[ipart-istart] = std::sqrt( factor_classical_radiated_power*gamma[ipart-ipart_ref]*temp )*random_numbers[ipart-istart];
 
-        #ifndef SMILEI_OPENACC_MODE
+        #ifndef SMILEI_ACCELERATOR_GPU_OACC
             }
         }
         #endif
@@ -378,7 +378,7 @@ void RadiationNiel::operator()(
 
     // 4) Vectorized update of the momentum
 
-    #ifndef SMILEI_OPENACC_MODE
+    #ifndef SMILEI_ACCELERATOR_GPU_OACC
         #pragma omp simd private(temp,rad_energy)
         for( ipart=istart ; ipart<iend; ipart++ ) {
             // Below particle_chi = minimum_chi_continuous, radiation losses are negligible
@@ -398,7 +398,7 @@ void RadiationNiel::operator()(
                 momentum_y[ipart] -= temp*momentum_y[ipart];
                 momentum_z[ipart] -= temp*momentum_z[ipart];
 
-    #ifndef SMILEI_OPENACC_MODE
+    #ifndef SMILEI_ACCELERATOR_GPU_OACC
         }
     }
     #else
@@ -411,7 +411,7 @@ void RadiationNiel::operator()(
     // Vectorized computation of the thread radiated energy
     // and update of the quantum parameter
 
-    #ifndef SMILEI_OPENACC_MODE
+    #ifndef SMILEI_ACCELERATOR_GPU_OACC
         #pragma omp simd reduction(+:radiated_energy_loc)
         for( int ipart=istart ; ipart<iend; ipart++ ) {
 
@@ -431,7 +431,7 @@ void RadiationNiel::operator()(
                          Ex[ipart-ipart_ref], Ey[ipart-ipart_ref], Ez[ipart-ipart_ref],
                          Bx[ipart-ipart_ref], By[ipart-ipart_ref], Bz[ipart-ipart_ref] );
 
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
     }
 #else
         } // end acc parallel loop
diff --git a/src/Radiation/RadiationNiel.h b/src/Radiation/RadiationNiel.h
index 2a8be813c..01cf0564a 100755
--- a/src/Radiation/RadiationNiel.h
+++ b/src/Radiation/RadiationNiel.h
@@ -22,7 +22,7 @@
 #include "RadiationTools.h"
 #include "userFunctions.h"
 
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
 #include <openacc.h>
 #endif
 
diff --git a/src/Radiation/RadiationTables.h b/src/Radiation/RadiationTables.h
index bc5003966..77bcac8e2 100755
--- a/src/Radiation/RadiationTables.h
+++ b/src/Radiation/RadiationTables.h
@@ -58,7 +58,7 @@ class RadiationTables
     //! param[in] particle_chi particle quantum parameter
     //! param[in] particle_gamma particle Lorentz factor
     //! param[in] integfochi_table table of the discretized integrated f/chi function for Photon production yield computation
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     #pragma acc routine seq
 #endif
     double computePhotonProductionYield( const double particle_chi,
@@ -77,7 +77,7 @@ class RadiationTables
     //! \param[in] xi
     //! \param[in] table_min_photon_chi
     //! \param[in] table_xi
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     #pragma acc routine seq
 #endif
     double computeRandomPhotonChiWithInterpolation( double particle_chi,
@@ -95,7 +95,7 @@ class RadiationTables
     //! from the computed table niel_.table
     //! \param particle_chi particle quantum parameter
     
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     #pragma acc routine seq
 #endif
     double getHNielFromTable( double particle_chi, double * tableNiel);
@@ -116,7 +116,7 @@ class RadiationTables
     //! \param particle_chi particle quantum parameter
     //! \param dt time step
     //#pragma omp declare simd
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     #pragma acc routine seq
 #endif
     inline double __attribute__((always_inline)) getRidgersCorrectedRadiatedEnergy( const double particle_chi,
@@ -138,7 +138,7 @@ class RadiationTables
     //! Get of the classical continuous radiated energy during dt
     //! \param particle_chi particle quantum parameter
     //! \param dt time step
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     #pragma acc routine seq
 #endif
     inline double __attribute__((always_inline)) getClassicalRadiatedEnergy( double particle_chi, double dt )
@@ -148,7 +148,7 @@ class RadiationTables
 
     //! Return the minimum_chi_discontinuous_ value
     //! Under this value, no discontinuous radiation reaction
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     #pragma acc routine seq
 #endif
     inline double __attribute__((always_inline)) getMinimumChiDiscontinuous()
@@ -158,7 +158,7 @@ class RadiationTables
 
     //! Return the minimum_chi_continuous_ value
     //! Under this value, no continuous radiation reaction
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     #pragma acc routine seq
 #endif
     inline double __attribute__((always_inline)) getMinimumChiContinuous()
diff --git a/src/Radiation/RadiationTools.h b/src/Radiation/RadiationTools.h
index 33cb5f501..1746c894e 100644
--- a/src/Radiation/RadiationTools.h
+++ b/src/Radiation/RadiationTools.h
@@ -32,7 +32,7 @@ class RadiationTools {
         //! Valid between particle_chi in 1E-3 and 1E1
         //! \param particle_chi particle quantum parameter
         // -----------------------------------------------------------------------------
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc routine seq
 #endif
         static inline double __attribute__((always_inline)) getHNielFitOrder10(double particle_chi)
@@ -62,7 +62,7 @@ class RadiationTools {
         //! Valid between particle_chi in 1E-3 and 1E1
         //! \param particle_chi particle quantum parameter
         // -----------------------------------------------------------------------------
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc routine seq
 #endif
         static inline double __attribute__((always_inline)) getHNielFitOrder5(double particle_chi)
@@ -86,7 +86,7 @@ class RadiationTools {
         //! Ridgers et al., ArXiv 1708.04511 (2017)
         //! \param particle_chi particle quantum parameter
         // -----------------------------------------------------------------------------
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc routine seq
 #endif
         static inline double __attribute__((always_inline)) getHNielFitRidgers(double particle_chi)
@@ -104,7 +104,7 @@ class RadiationTools {
         //! approximation formulae
         //! \param particle_chi particle quantum parameter
         //#pragma omp declare simd
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc routine seq
 #endif
         static inline double __attribute__((always_inline)) computeGRidgers(double particle_chi)
@@ -117,7 +117,7 @@ class RadiationTools {
         //! Return f1(nu) = Int_nu^\infty K_{5/3}(y) dy
         //! used in computed synchrotron power spectrum
         // -----------------------------------------------------------------------------
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc routine seq
 #endif
         static inline double __attribute__((always_inline)) computeF1Nu(double nu)
@@ -155,7 +155,7 @@ class RadiationTools {
         //! Return f2(nu) = BesselK_{2/3}(nu)
         //! used in computed synchrotron power spectrum
         // -----------------------------------------------------------------------------
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc routine seq
 #endif
         static inline double __attribute__((always_inline)) computeF2Nu(double nu)
@@ -194,7 +194,7 @@ class RadiationTools {
         //! = Int_nu^\infty K_{5/3}(y) dy + cst * BesselK_{2/3}(nu)
         //! used in computed synchrotron power spectrum
         // -----------------------------------------------------------------------------
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc routine seq
 #endif
         static inline double __attribute__((always_inline)) computeBesselPartsRadiatedPower(double nu, double cst)
diff --git a/src/Radiation/Table.h b/src/Radiation/Table.h
index 8b74aeeaa..a028d4df3 100644
--- a/src/Radiation/Table.h
+++ b/src/Radiation/Table.h
@@ -45,7 +45,7 @@ class Table
     void compute_parameters();
     
     //! get value using linear interpolation at position x
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     #pragma acc routine seq
 #endif
     double get(double x);
diff --git a/src/Smilei.cpp b/src/Smilei.cpp
index eae1993d9..81ba6c258 100755
--- a/src/Smilei.cpp
+++ b/src/Smilei.cpp
@@ -20,7 +20,7 @@
 #include <iomanip>
 #include <string>
 #include <omp.h>
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
 #include <openacc.h>
 #endif
 
@@ -44,7 +44,7 @@ using namespace std;
 //                                                   MAIN CODE
 // ---------------------------------------------------------------------------------------------------------------------
 
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     #ifdef _OPENACC
     void initialization_openacc()
     {
@@ -80,7 +80,7 @@ int main( int argc, char *argv[] )
     // -------------------------
 
     // Create the OpenACC environment
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     initialization_openacc();
 #endif
 
@@ -248,7 +248,7 @@ int main( int argc, char *argv[] )
 
         checkpoint.restartAll( vecPatches, region, &smpi, params );
 
-#if !defined( SMILEI_ACCELERATOR_MODE )
+#if !defined( SMILEI_ACCELERATOR_GPU )
         // CPU only, its too early to sort on GPU
         vecPatches.initialParticleSorting( params );
 #endif
@@ -271,7 +271,7 @@ int main( int argc, char *argv[] )
 
         PatchesFactory::createVector( vecPatches, params, &smpi, openPMD, &radiation_tables_, 0 );
 
-#if !(defined( SMILEI_ACCELERATOR_MODE ))
+#if !(defined( SMILEI_ACCELERATOR_GPU ))
         // CPU only, its too early to sort on GPU
         vecPatches.initialParticleSorting( params );
 #endif
@@ -407,7 +407,7 @@ int main( int argc, char *argv[] )
         }
     }
 
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
     TITLE( "GPU allocation and copy of the fields and particles" );
     // Allocate particle and field arrays
     // Also copy particle array content on device
@@ -685,7 +685,7 @@ int main( int argc, char *argv[] )
         } //End omp parallel region
 
         if( params.has_load_balancing && params.load_balancing_time_selection->theTimeIsNow( itime ) ) {
-// #if defined( SMILEI_ACCELERATOR_MODE )
+// #if defined( SMILEI_ACCELERATOR_GPU )
 //             ERROR( "Load balancing not tested on GPU !" );
 // #endif
             count_dlb++;
@@ -777,7 +777,7 @@ int main( int argc, char *argv[] )
         region.clean();
     }
     
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
     vecPatches.cleanDataOnDevice( params, &smpi, &radiation_tables_, &multiphoton_Breit_Wheeler_tables_ );
 #endif
     
diff --git a/src/SmileiMPI/SmileiMPI.cpp b/src/SmileiMPI/SmileiMPI.cpp
index 4fe93fd03..88e03c864 100755
--- a/src/SmileiMPI/SmileiMPI.cpp
+++ b/src/SmileiMPI/SmileiMPI.cpp
@@ -763,7 +763,7 @@ void SmileiMPI::isend_species( Patch *patch, int to, int &irequest, int tag, Par
         irequest ++;
     }
 
-#if defined( SMILEI_ACCELERATOR_MODE) 
+#if defined( SMILEI_ACCELERATOR_GPU) 
 
     // For the particles
     for( unsigned int ispec=0; ispec<nspec; ispec++ ) {
@@ -904,7 +904,7 @@ void SmileiMPI::recv_species( Patch *patch, int from, int &tag, Params &params )
         }
     }
 
-#if defined( SMILEI_ACCELERATOR_MODE) 
+#if defined( SMILEI_ACCELERATOR_GPU) 
 
     for( unsigned int ispec=0; ispec<nspec; ispec++ ) {
 
@@ -1209,7 +1209,7 @@ void  SmileiMPI::send_PML(ElectroMagn *EM, Tpml embc, int bcId, int to, int &ire
 void SmileiMPI::isend( ElectroMagn *EM, int to, int &irequest, vector<MPI_Request> &requests, int tag, bool send_xmax_bc )
 {
 
-// #if defined (SMILEI_ACCELERATOR_MODE)
+// #if defined (SMILEI_ACCELERATOR_GPU)
 
 //     isendOnDevice( EM->Ex_, to, tag+irequest, requests[irequest] );
 //     irequest++;
@@ -1745,7 +1745,7 @@ int  SmileiMPI::recv_PML(ElectroMagn *EM, Tpml embc, int bcId, int from, int tag
 void SmileiMPI::recv( ElectroMagn *EM, int from, int &tag, bool recv_xmin_bc )
 {
 
-// #if defined (SMILEI_ACCELERATOR_MODE)
+// #if defined (SMILEI_ACCELERATOR_GPU)
 
 //      recvOnDevice( EM->Ex_, from, tag );
 //      tag++;
@@ -2121,7 +2121,7 @@ void SmileiMPI::isend( Field *field, int to, int tag, MPI_Request &request )
 } // End isend ( Field )
 
 
-#if defined (SMILEI_ACCELERATOR_MODE)
+#if defined (SMILEI_ACCELERATOR_GPU)
 //! Sends the whole Field Device to Device (assuming MPI enables it)
 void SmileiMPI::isendOnDevice( Field *field, int to, int tag, MPI_Request &request )
 {
@@ -2194,7 +2194,7 @@ void SmileiMPI::recv( Field *field, int from, int tag )
 
 } // End recv ( Field )
 
-#if defined (SMILEI_ACCELERATOR_MODE) 
+#if defined (SMILEI_ACCELERATOR_GPU) 
 void SmileiMPI::recvOnDevice( Field *field, int from, int tag )
 {
 
@@ -2524,7 +2524,7 @@ void SmileiMPI::eraseBufferParticleTrail( const int ndim, const int istart, cons
 }
 
 
-#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC )
 
 template <typename Container>
 static inline void
diff --git a/src/SmileiMPI/SmileiMPI.h b/src/SmileiMPI/SmileiMPI.h
index 13cacc416..2785921de 100755
--- a/src/SmileiMPI/SmileiMPI.h
+++ b/src/SmileiMPI/SmileiMPI.h
@@ -103,7 +103,7 @@ class SmileiMPI
     //! Sends the whole Field
     void isend( Field *field, int to, int tag, MPI_Request &request );
     //! Sends the whole Field Device to Device (assuming MPI enables it)
-#if defined (SMILEI_ACCELERATOR_MODE)
+#if defined (SMILEI_ACCELERATOR_GPU)
     void isendOnDevice( Field *field, int to, int tag, MPI_Request &request );
 #endif
 
@@ -114,7 +114,7 @@ class SmileiMPI
     //! Receives the whole Field
     void recv( Field *field, int from, int tag);     
     //! Receives the whole Field Device to Device (assuming MPI enables it)
-#if defined (SMILEI_ACCELERATOR_MODE)
+#if defined (SMILEI_ACCELERATOR_GPU)
     void recvOnDevice( Field *field, int from, int tag);     
 #endif
 
@@ -248,7 +248,7 @@ class SmileiMPI
     //! Erase Particles from istart ot the end in the buffers of thread ithread
     void eraseBufferParticleTrail( const int ndim, const int istart, const int ithread, bool isAM = false );
 
-#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC )
     //! Map CPU buffers onto the GPU to at least accommodate particle_count
     //! particles. This method tries to reduce the number of
     //! allocation/deallocation which produces a lot of fragmentation on some
diff --git a/src/Species/Species.cpp b/src/Species/Species.cpp
index 65358f555..089e25f27 100755
--- a/src/Species/Species.cpp
+++ b/src/Species/Species.cpp
@@ -500,7 +500,7 @@ Species::~Species()
 
 }
 
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
 //! Prepare the species Current and Rho grids on Device
 void
 Species::prepareSpeciesCurrentAndChargeOnDevice( 
@@ -540,7 +540,7 @@ Species::prepareSpeciesCurrentAndChargeOnDevice(
     }
 
 
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
         #pragma acc parallel present( Jx_s[0:Jx_size],     \
                                         Jy_s[0:Jy_size], \
                                         Jz_s[0:Jz_size],   \
@@ -551,7 +551,7 @@ Species::prepareSpeciesCurrentAndChargeOnDevice(
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
             #pragma omp target
             #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
             #pragma acc loop gang worker vector
 #endif
             for( unsigned int i=0 ; i<Jx_size; i++ ) {
@@ -562,7 +562,7 @@ Species::prepareSpeciesCurrentAndChargeOnDevice(
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
             #pragma omp target
             #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
             #pragma acc loop gang worker vector
 #endif
             for( unsigned int i=0 ; i<Jy_size; i++ ) {
@@ -573,7 +573,7 @@ Species::prepareSpeciesCurrentAndChargeOnDevice(
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
             #pragma omp target
             #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
             #pragma acc loop gang worker vector
 #endif
             for( unsigned int i=0 ; i<Jz_size; i++ ) {
@@ -584,14 +584,14 @@ Species::prepareSpeciesCurrentAndChargeOnDevice(
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
             #pragma omp target
             #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
             #pragma acc loop gang worker vector
 #endif
             for( unsigned int i=0 ; i<rho_size; i++ ) {
                 rho_s[i] = 0;
             }
         }
-#if defined( SMILEI_OPENACC_MODE )  
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )  
         } // end parallel region
 #endif
 }
@@ -659,7 +659,7 @@ Species::copyParticlesFromHostToDevice()
     particles->copyFromHostToDevice();
 }
 
-#endif // end if SMILEI_ACCELERATOR_MODE
+#endif // end if SMILEI_ACCELERATOR_GPU
 
 // ---------------------------------------------------------------------------------------------------------------------
 //! Method calculating the Particle dynamics (interpolation, pusher, projection and more)
@@ -700,7 +700,7 @@ void Species::dynamics( double time_dual,
     if( time_dual>time_frozen_ || Ionize) { // moving particle
 
         // Prepare temporary buffers for this iteration
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
         smpi->resizeDeviceBuffers( ithread,
                                    nDim_field,
                                    particles->numberOfParticles() );
@@ -713,7 +713,7 @@ void Species::dynamics( double time_dual,
 
             patch->startFineTimer(mBW_timer_id_);
 
-#if defined( SMILEI_OPENACC_MODE) 
+#if defined( SMILEI_ACCELERATOR_GPU_OACC) 
             static_cast<nvidiaParticles*>(mBW_pair_particles_[0])->deviceResize( particles->deviceSize() * Multiphoton_Breit_Wheeler_process->getPairCreationSampling(0) );
             static_cast<nvidiaParticles*>(mBW_pair_particles_[0])->resetCellKeys();
             static_cast<nvidiaParticles*>(mBW_pair_particles_[1])->deviceResize( particles->deviceSize() * Multiphoton_Breit_Wheeler_process->getPairCreationSampling(1) );
@@ -726,7 +726,7 @@ void Species::dynamics( double time_dual,
             patch->stopFineTimer(mBW_timer_id_);
         }
 
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
         // Make sure some bin preconditions are respected
         SMILEI_ASSERT( particles->first_index.size() == 1 );
         SMILEI_ASSERT( particles->last_index.size() >= 1 );
@@ -832,7 +832,7 @@ void Species::dynamics( double time_dual,
             // Compression of the bins if necessary
             if( Multiphoton_Breit_Wheeler_process ) {
 
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
                 removeTaggedParticles(smpi,
                                     &particles->first_index[0],
                                     &particles->last_index[0],
@@ -1690,14 +1690,14 @@ void Species::dynamicsImportParticles( double time_dual, Params &params, Patch *
         // Radiation losses
         if( Radiate && photon_species_ ) {
             // If creation of macro-photon, we add them to photon_species
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
                 // We first erase empty slots in the buffer of photons
                 // radiation_photons_->cell_keys is used as a mask
             static_cast<nvidiaParticles*>(radiated_photons_)->eraseLeavingParticles();
 #endif
             photon_species_->importParticles( params, patch, *radiated_photons_, localDiags, time_dual );
 
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             // We explicitely clear the device Particles
             static_cast<nvidiaParticles*>(radiated_photons_)->deviceClear();
 #endif
@@ -1709,7 +1709,7 @@ void Species::dynamicsImportParticles( double time_dual, Params &params, Patch *
             // Addition of the electron-positron particles
             for( int k=0; k<2; k++ ) {
 
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
                 // We first erase empty slots in the buffer of photons
                 // radiation_photons_->cell_keys is used as a mask
                 static_cast<nvidiaParticles*>(mBW_pair_particles_[k])->eraseLeavingParticles();
@@ -1717,7 +1717,7 @@ void Species::dynamicsImportParticles( double time_dual, Params &params, Patch *
 
                 mBW_pair_species_[k]->importParticles( params, patch, *mBW_pair_particles_[k], localDiags, time_dual );
                 
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
                 // We explicitely clear the device Particles
                 static_cast<nvidiaParticles*>(mBW_pair_particles_[k])->deviceClear();
 #endif
@@ -1771,7 +1771,7 @@ void Species::computeCharge( ElectroMagn *EMfields, bool old /*=false*/ )
 void Species::sortParticles( Params &params )
 {
 
-#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC )
 
     // -----------------------------
     // GPU version
@@ -2096,7 +2096,7 @@ void Species::countSortParticles( Params &params )
 // Move all particles from another species to this one
 void Species::importParticles( Params &params, Patch *patch, Particles &source_particles, vector<Diagnostic *> &localDiags, double time_dual, Ionization *I )
 {
-#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC )
     // ---------------------------------------------------
     // GPU version
     // Warning: the GPU version does not handle bin and sorting
@@ -2207,7 +2207,7 @@ void Species::compress(SmileiMPI *smpi, int ithread, bool compute_cell_keys) {
 
     const int nparts = smpi->dynamics_Epart[ithread].size()/3;
 
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
 
     double *const __restrict__ weight =  particles->getPtrWeight();
 
@@ -2246,7 +2246,7 @@ void Species::compress(SmileiMPI *smpi, int ithread, bool compute_cell_keys) {
 
     const int nbin = particles->numberOfBins();
 
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     #pragma acc parallel \
     present(Ex[0:nparts],Ey[0:nparts],Ez[0:nparts], \
     Bx[0:nparts], By[0:nparts], Bz[0:nparts], \
@@ -2291,7 +2291,7 @@ void Species::compress(SmileiMPI *smpi, int ithread, bool compute_cell_keys) {
 
             if (copy_particle_number>0) {
 
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
                 particles->overwriteParticle(copy_first_index, particles->last_index[ibin], copy_particle_number, compute_cell_keys );
 #else
                 for (auto ipart = 0 ; ipart < copy_particle_number ; ipart ++) {
@@ -2346,7 +2346,7 @@ void Species::compress(SmileiMPI *smpi, int ithread, bool compute_cell_keys) {
                     }
                 }
 
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
                 if (thetaold) {
                     for( unsigned int ipart = 0 ; ipart < copy_particle_number ; ipart ++ ) {
                         thetaold[copy_first_index + ipart] = thetaold[particles->last_index[ibin] + ipart];
@@ -2384,7 +2384,7 @@ void Species::compress(SmileiMPI *smpi, int ithread, bool compute_cell_keys) {
         }
     }
 
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
 } // end parallel region
 #endif
 
@@ -2418,7 +2418,7 @@ void Species::removeTaggedParticlesPerBin(
     // Weight shortcut
     double *const __restrict__ weight =  particles->getPtrWeight();
 
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     double *const __restrict__ position_x = particles->getPtrPosition( 0 );
     double *const __restrict__ position_y = nDim_particle > 1 ? particles->getPtrPosition( 1 ) : nullptr;
     double *const __restrict__ position_z = nDim_particle > 2 ? particles->getPtrPosition( 2 ) : nullptr;
@@ -2436,7 +2436,7 @@ void Species::removeTaggedParticlesPerBin(
     // Total number of bins / cells
     const int nbin = particles->numberOfBins();
 
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     #pragma acc parallel  \
     present(Epart[0:nparts*3],\
     Bpart[0:nparts*3], \
@@ -2478,7 +2478,7 @@ void Species::removeTaggedParticlesPerBin(
                     if( ipart < last_photon_index ) {
                         // The last existing photon comes to the position of
                         // the deleted photon
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
                         particles->overwriteParticle( last_photon_index, ipart, compute_cell_keys );
 #else
                         weight[ipart] = weight[last_photon_index];
@@ -2512,7 +2512,7 @@ void Species::removeTaggedParticlesPerBin(
                         }
                         gamma[ipart] = gamma[0*nparts+last_photon_index];
 
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
                         if (thetaold) {
                             thetaold[0*nparts+ipart] = thetaold[0*nparts+last_photon_index];
                         }
@@ -2539,13 +2539,14 @@ void Species::removeTaggedParticlesPerBin(
         } // if last_index[ibin] > first_index[ibin]
     } // end loop over the bins
 
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     } // end parallel region
 #endif
 }
 
 //! This method removes particles with a negative weight
 //! when a single bin is used
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
 void Species::removeTaggedParticles(
     SmileiMPI *smpi,
     int *const first_index,
@@ -2554,8 +2555,6 @@ void Species::removeTaggedParticles(
     bool compute_cell_keys)
 {
 
-#ifdef SMILEI_OPENACC_MODE
-
     unsigned int new_n_parts = 0;
     unsigned int nb_deleted  = 0;
 
@@ -2623,7 +2622,7 @@ void Species::removeTaggedParticles(
         // that will not be erased
 
         // Backward loop over the tagged particles to fill holes in the photon particle array (at the bin level only)
-//#ifdef SMILEI_OPENACC_MODE
+//#ifdef SMILEI_ACCELERATOR_GPU_OACC
 //        #pragma acc loop seq
 //#endif
         for( int ipart=last_moving_index-1 ; ipart>=*first_index; ipart-- ) {
@@ -2700,9 +2699,9 @@ void Species::removeTaggedParticles(
     }
     } // if nparts > 0
 
+}
 #endif
 
-}
 
 // ------------------------------------------------
 // Set position when using restart & moving window
diff --git a/src/Species/Species.h b/src/Species/Species.h
index 83a2bab9d..d4af3bf9d 100755
--- a/src/Species/Species.h
+++ b/src/Species/Species.h
@@ -6,7 +6,7 @@
 // #include "PyTools.h"
 
 #include "Particles.h"
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
 #include "nvidiaParticles.h"
 #endif
 #include "Params.h"
@@ -382,7 +382,7 @@ class Species
         return particles->capacity();
     }
 
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
 
     void allocateParticlesOnDevice();
 
@@ -566,12 +566,14 @@ class Species
 
     //! This method removes particles with a negative weight
     //! when a single bin is used
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     void removeTaggedParticles(
         SmileiMPI *smpi,
         int *const first_index,
         int *const last_index,
         int ithread,
         bool compute_cell_keys = false);
+#endif
 
     //! Moving window boundary conditions managment
     void disableXmax();
diff --git a/src/Tools/Pragma.h b/src/Tools/Pragma.h
index b1a81cdae..0fb5e1e9d 100644
--- a/src/Tools/Pragma.h
+++ b/src/Tools/Pragma.h
@@ -31,7 +31,7 @@
 #if defined ( SMILEI_ACCELERATOR_GPU_OMP )
     #define ATOMIC(mode) \
     _Pragma( TOSTRING(omp atomic mode))
-#elif defined ( SMILEI_OPENACC_MODE )
+#elif defined ( SMILEI_ACCELERATOR_GPU_OACC )
     #define ATOMIC(mode) \
     _Pragma( TOSTRING(acc atomic mode))
 #endif
diff --git a/src/Tools/gpu.cpp b/src/Tools/gpu.cpp
index 7ce000e03..497786096 100644
--- a/src/Tools/gpu.cpp
+++ b/src/Tools/gpu.cpp
@@ -1,6 +1,6 @@
 #include "gpu.h"
 
-#if defined( SMILEI_ACCELERATOR_GPU_OMP ) && defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OMP ) && defined( SMILEI_ACCELERATOR_GPU_OACC )
     #error "You can not enable both OpenACC and OpenMP GPU support"
 #endif
 
@@ -29,7 +29,7 @@
     #else
         #error "Asking for OpenMP support without enabling compiler support for OpenMP"
     #endif
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #if defined( _OPENACC )
         #include <openacc.h>
     #else
@@ -46,11 +46,12 @@ namespace smilei {
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
     #pragma omp target enter data map( alloc \
                                        : byte_array [0:a_count * an_object_size] )
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc enter data create( byte_array [0:a_count * an_object_size] )
 #else
                 SMILEI_UNUSED( a_host_pointer );
                 SMILEI_UNUSED( a_count );
+                SMILEI_UNUSED( an_object_size );
                 SMILEI_UNUSED( byte_array );
 #endif
             }
@@ -61,11 +62,12 @@ namespace smilei {
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
     #pragma omp target enter data map( to \
                                        : byte_array [0:a_count * an_object_size] )
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc enter data copyin( byte_array [0:a_count * an_object_size] )
 #else
                 SMILEI_UNUSED( a_host_pointer );
                 SMILEI_UNUSED( a_count );
+                SMILEI_UNUSED( an_object_size );
                 SMILEI_UNUSED( byte_array );
 #endif
             }
@@ -75,11 +77,12 @@ namespace smilei {
                 const unsigned char* byte_array = static_cast<const unsigned char*>( a_host_pointer );
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
     #pragma omp target update to( byte_array [0:a_count * an_object_size] )
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc update device( byte_array [0:a_count * an_object_size] )
 #else
                 SMILEI_UNUSED( a_host_pointer );
                 SMILEI_UNUSED( a_count );
+                SMILEI_UNUSED( an_object_size );
                 SMILEI_UNUSED( byte_array );
 #endif
             }
@@ -89,11 +92,12 @@ namespace smilei {
                 unsigned char* byte_array = static_cast<unsigned char*>( a_host_pointer );
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
     #pragma omp target update from( byte_array [0:a_count * an_object_size] )
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc update host( byte_array [0:a_count * an_object_size] )
 #else
                 SMILEI_UNUSED( a_host_pointer );
                 SMILEI_UNUSED( a_count );
+                SMILEI_UNUSED( an_object_size );
                 SMILEI_UNUSED( byte_array );
 #endif
             }
@@ -104,11 +108,12 @@ namespace smilei {
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
     #pragma omp target exit data map( from \
                                       : byte_array [0:a_count * an_object_size] )
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc exit data copyout( byte_array [0:a_count * an_object_size] )
 #else
                 SMILEI_UNUSED( a_host_pointer );
                 SMILEI_UNUSED( a_count );
+                SMILEI_UNUSED( an_object_size );
                 SMILEI_UNUSED( byte_array );
 #endif
             }
@@ -119,11 +124,12 @@ namespace smilei {
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
     #pragma omp target exit data map( delete \
                                       : byte_array [0:a_count * an_object_size] )
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc exit data delete( byte_array [0:a_count * an_object_size] )
 #else
                 SMILEI_UNUSED( a_host_pointer );
                 SMILEI_UNUSED( a_count );
+                SMILEI_UNUSED( an_object_size );
                 SMILEI_UNUSED( byte_array );
 #endif
             }
@@ -154,7 +160,7 @@ namespace smilei {
                 SMILEI_ASSERT( a_device_pointer != nullptr );
 
                 return const_cast<void*>( a_device_pointer );
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
                 //return const_cast<void*>( ::acc_deviceptr( a_host_pointer ) );
                 return ::acc_deviceptr( const_cast<void*>(a_host_pointer) ) ;
 #else
@@ -171,7 +177,7 @@ namespace smilei {
                                          a_count * an_object_size, 0, 0, device_num, device_num ) != 0 ) {
                     ERROR( "omp_target_memcpy failed" );
                 }
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
                 // It seems that the interface of ::acc_memcpy_device does not accept ptr to array of const type !
                 // https://www.openacc.org/sites/default/files/inline-files/OpenACC.2.7.pdf
                 // void acc_memcpy_device( d_void* dest, d_void* src, size_t bytes );
diff --git a/src/Tools/gpu.h b/src/Tools/gpu.h
index 28a8c98da..bc6552986 100644
--- a/src/Tools/gpu.h
+++ b/src/Tools/gpu.h
@@ -19,7 +19,7 @@ namespace smilei {
     #define SMILEI_ACCELERATOR_DECLARE_ROUTINE     _Pragma( "omp declare target" )
     #define SMILEI_ACCELERATOR_DECLARE_ROUTINE_END _Pragma( "omp end declare target" )
     #define SMILEI_ACCELERATOR_ATOMIC _Pragma( "omp atomic update" )
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #define SMILEI_ACCELERATOR_DECLARE_ROUTINE _Pragma( "acc routine seq" )
     #define SMILEI_ACCELERATOR_DECLARE_ROUTINE_END
     #define SMILEI_ACCELERATOR_ATOMIC _Pragma( "acc atomic" )
diff --git a/src/Tools/gpuRandom.h b/src/Tools/gpuRandom.h
index 916a7b8f8..bdb9aca59 100644
--- a/src/Tools/gpuRandom.h
+++ b/src/Tools/gpuRandom.h
@@ -1,7 +1,7 @@
 #ifndef GPU_RANDOM
 #define GPU_RANDOM
 
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     // #include <openacc_curand.h>
     #include "curand_kernel.h"
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -29,7 +29,7 @@ namespace smilei {
             {
             protected:
                 using State =
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
                     ::curandState_t;
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
                     // TODO
@@ -42,7 +42,7 @@ namespace smilei {
 
             public:
                 Random()
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
                     : a_state_{ 0xDEADBEEFU }
 #else
@@ -53,26 +53,36 @@ namespace smilei {
                 }
 
                 // Initialization
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
                 void init( unsigned long long seed,
                            unsigned long long seq,
                            unsigned long long offset )
                 {
-#if defined( SMILEI_OPENACC_MODE )
                     // Cuda generator initialization
                     ::curand_init( seed, seq, offset, &a_state_ );
+                }
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
+                void init( unsigned long long seed,
+                           unsigned long long ,
+                           unsigned long long  )
+                {
                     // Hip generator initialization
                     // ::hiprand_init( seed, seq, offset, &state );
                     a_state_ = State{ static_cast<unsigned int>( seed ) };
+                }
 #else
+                void init( unsigned long long seed,
+                           unsigned long long ,
+                           unsigned long long  )
+                {
                     a_state_ = State{ static_cast<unsigned int>( seed ) };
-#endif
                 }
+#endif
 
                 // Initialization
                 double uniform()
                 {
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
                     return ::curand_uniform( &a_state_ );
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
                     // TODO
diff --git a/src/Tools/userFunctions.h b/src/Tools/userFunctions.h
index 63753fb20..d9525723d 100755
--- a/src/Tools/userFunctions.h
+++ b/src/Tools/userFunctions.h
@@ -1,5 +1,5 @@
 
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
 #include <openacc.h>
 #endif
 
@@ -36,7 +36,7 @@ class userFunctions
     //! \param array array in which to find the value
     //! \param elem element to be found
     //! \param nb_elem number of elements
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     #pragma acc routine seq
 #endif
     template <class T>

From da51604bac5a2590f4e04df582ff9d38d223c08a Mon Sep 17 00:00:00 2001
From: Frederic Perez <frederic.perez@polytechnique.edu>
Date: Mon, 27 May 2024 14:59:52 +0200
Subject: [PATCH 35/54] update ci

---
 .gitlab-ci.yml | 24 ++++++++----------------
 1 file changed, 8 insertions(+), 16 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index f50bfd819..cf3208df7 100755
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -15,8 +15,7 @@ install:
   stage: install
   only:
     - develop
-    - particle_exchange
-
+ 
   script:
     # Force workdir cleaning in case of retried
     - echo "CI_PIPELINE_ID = " $CI_PIPELINE_ID
@@ -34,8 +33,7 @@ compile_default:
   stage: compile_default
   only:
     - develop
-    - particle_exchange
-
+ 
   script:
     # Move in test dir
     - cd /sps3/gitlab-runner/$CI_PIPELINE_ID/smilei/validation
@@ -46,8 +44,7 @@ runQuick:
   stage: run_quick
   only:
     - develop
-    - particle_exchange
-
+ 
   script:
     # Move in test dir
     - cd /sps3/gitlab-runner/$CI_PIPELINE_ID/smilei/validation
@@ -58,8 +55,7 @@ run1D:
   stage: run_default
   only:
     - develop
-    - particle_exchange
-
+ 
   script:
     # Move in test dir
     - cd /sps3/gitlab-runner/$CI_PIPELINE_ID/smilei/validation
@@ -71,8 +67,7 @@ run2D:
   stage: run_default
   only:
     - develop
-    - particle_exchange
-
+ 
   script:
     # Move in test dir
     - cd /sps3/gitlab-runner/$CI_PIPELINE_ID/smilei/validation
@@ -86,8 +81,7 @@ run3D:
   stage: run_default
   only:
     - develop
-    - particle_exchange
-
+ 
   script:
     # Move in test dir
     - cd /sps3/gitlab-runner/$CI_PIPELINE_ID/smilei/validation
@@ -102,8 +96,7 @@ runAM:
   stage: run_default
   only:
     - develop
-    - particle_exchange
-
+ 
   script:
     # Move in test dir
     - cd /sps3/gitlab-runner/$CI_PIPELINE_ID/smilei/validation
@@ -115,8 +108,7 @@ runCollisions:
   stage: run_default
   only:
     - develop
-    - particle_exchange
-
+ 
   script:
     # Move in test dir
     - cd /sps3/gitlab-runner/$CI_PIPELINE_ID/smilei/validation

From 2d0474ae533225023478e40ef0a757537d665b7c Mon Sep 17 00:00:00 2001
From: Frederic Perez <frederic.perez@polytechnique.edu>
Date: Tue, 28 May 2024 10:49:09 +0200
Subject: [PATCH 36/54] test CI


From 83ee20d1a895da93ce092af8627bb009f65f63ab Mon Sep 17 00:00:00 2001
From: Arnaud Beck <beck@llr.in2p3.fr>
Date: Tue, 28 May 2024 11:43:27 +0200
Subject: [PATCH 37/54] retest CI

---
 .gitlab-ci.yml | 40 ++++++++++++++++++++--------------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index cf3208df7..e2efed6dc 100755
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -8,8 +8,8 @@ stages:
     - compile_debug
     - compile_no_mpi_threadmultiple
     - compile_no_openmp
-    - compile_omptasks
-    - run_omptasks
+#    - compile_omptasks
+#    - run_omptasks
 
 install:
   stage: install
@@ -164,21 +164,21 @@ compile_no_openmp:
     - make clean
     - python validation/validation.py -k noopenmp -c -v
 
-compile_omptasks:
-  stage: compile_omptasks
-  only:
-    - develop
-
-  script:
-    - cd /sps3/gitlab-runner/$CI_PIPELINE_ID/smilei
-    - make clean
-    - python validation/validation.py -k omptasks -c -v
-
-run_omptasks:
-  stage: run_omptasks
-  only:
-    - develop
-    
-  script:
-    - cd /sps3/gitlab-runner/$CI_PIPELINE_ID/smilei/validation
-    - python validation.py -k omptasks -b "tst2d_tasks_01_radiation_pressure_acc.py" -m 4 -o 4 -n 1 -v
+#compile_omptasks:
+#  stage: compile_omptasks
+#  only:
+#    - develop
+#
+#  script:
+#    - cd /sps3/gitlab-runner/$CI_PIPELINE_ID/smilei
+#    - make clean
+#    - python validation/validation.py -k omptasks -c -v
+#
+#run_omptasks:
+#  stage: run_omptasks
+#  only:
+#    - develop
+#    
+#  script:
+#    - cd /sps3/gitlab-runner/$CI_PIPELINE_ID/smilei/validation
+#    - python validation.py -k omptasks -b "tst2d_tasks_01_radiation_pressure_acc.py" -m 4 -o 4 -n 1 -v

From b9754d7101c874e68359bb636916cb08f24520e9 Mon Sep 17 00:00:00 2001
From: Frederic Perez <frederic.perez@polytechnique.edu>
Date: Fri, 31 May 2024 18:08:35 +0200
Subject: [PATCH 38/54] support matplotlib 3.9

---
 doc/Sphinx/Use/namelist.rst | 32 +++++++++++++++++++-------------
 happi/_Utils.py             |  5 ++++-
 2 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/doc/Sphinx/Use/namelist.rst b/doc/Sphinx/Use/namelist.rst
index 6c5eaf2be..a07f19005 100755
--- a/doc/Sphinx/Use/namelist.rst
+++ b/doc/Sphinx/Use/namelist.rst
@@ -3347,19 +3347,20 @@ for instance::
     def my_filter(particles):
         return (particles.px>-1.)*(particles.px<1.) + (particles.pz>3.)
 
-.. Note:: The ``px``, ``py`` and ``pz`` quantities are not exactly the momenta.
-  They are actually the velocities multiplied by the lorentz factor, i.e.,
-  :math:`\gamma v_x`, :math:`\gamma v_y` and :math:`\gamma v_z`. This is true only
-  inside the ``filter`` function (not for the output of the diagnostic).
-
-.. Note:: The ``id`` attribute contains the :doc:`particles identification number<ids>`.
-  This number is set to 0 at the beginning of the simulation. **Only after particles have
-  passed the filter**, they acquire a positive ``id``.
-
-.. Note:: For advanced filtration, Smilei provides the quantity ``Main.iteration``,
-  accessible within the ``filter`` function. Its value is always equal to the current
-  iteration number of the PIC loop. The current time of the simulation is thus
-  ``Main.iteration * Main.timestep``.
+.. Note::
+  
+  * In the ``filter`` function only, the ``px``, ``py`` and ``pz`` quantities
+    are not exactly the momenta.
+    They are actually the velocities multiplied by the lorentz factor, i.e.,
+    :math:`\gamma v_x`, :math:`\gamma v_y` and :math:`\gamma v_z`.
+    This is *not* true for the output of the diagnostic.
+  * The ``id`` attribute contains the :doc:`particles identification number<ids>`.
+    This number is set to 0 at the beginning of the simulation. **Only after particles have
+    passed the filter**, they acquire a positive ``id``.
+  * For advanced filtration, Smilei provides the quantity ``Main.iteration``,
+    accessible within the ``filter`` function. Its value is always equal to the current
+    iteration number of the PIC loop. The current time of the simulation is thus
+    ``Main.iteration * Main.timestep``.
 
 .. py:data:: attributes
 
@@ -3372,6 +3373,11 @@ for instance::
   (``"chi"``, only for species with radiation losses) or the fields interpolated
   at their  positions (``"Ex"``, ``"Ey"``, ``"Ez"``, ``"Bx"``, ``"By"``, ``"Bz"``).
 
+.. Note:: Here, interpolated fields are normally computed after the Maxwell solver.
+  They may thus differ by half a timestep from those computed at the middle of the
+  timestep to push particles. When exact values are needed, use the option
+  :py:data:`keep_interpolated_fields`.
+
 ----
 
 .. rst-class:: experimental
diff --git a/happi/_Utils.py b/happi/_Utils.py
index 28dd028df..070046786 100755
--- a/happi/_Utils.py
+++ b/happi/_Utils.py
@@ -42,7 +42,10 @@ def updateMatplotLibColormaps():
 	if "smilei" in matplotlib.pyplot.colormaps(): return
 	def register(name, d):
 		cmap = matplotlib.colors.LinearSegmentedColormap(name, d, N=256, gamma=1.0)
-		matplotlib.pyplot.register_cmap(cmap=cmap)
+		try:
+			matplotlib.pyplot.register_cmap(cmap=cmap)
+		except Exception as e:
+			matplotlib.colormaps.register(cmap)
 	register(u"smilei", {
 			'red'  :((0., 0., 0.), (0.0625 , 0.091, 0.091), (0.09375, 0.118, 0.118), (0.125 , 0.127, 0.127), (0.1875 , 0.135, 0.135), (0.21875, 0.125, 0.125), (0.28125, 0.034, 0.034), (0.3125 , 0.010, 0.010), (0.34375, 0.009, 0.009), (0.4375 , 0.049, 0.049), (0.46875, 0.057, 0.057), (0.5 , 0.058, 0.058), (0.59375, 0.031, 0.031), (0.625 , 0.028, 0.028), (0.65625, 0.047, 0.047), (0.71875, 0.143, 0.143), (0.78125, 0.294, 0.294), (0.84375, 0.519, 0.519), (0.90625, 0.664, 0.664), (0.9375 , 0.760, 0.760), (0.96875, 0.880, 0.880), (1., 1., 1. )),
 			'green':((0., 0., 0.), (0.21875, 0.228, 0.228), (0.78125, 0.827, 0.827), (0.8125 , 0.852, 0.852), (0.84375, 0.869, 0.869), (0.9375 , 0.937, 0.937), (0.96875, 0.967, 0.967), (1. , 1. , 1. )),

From d5eadb44ad81b974c52b9d61a9473903c57f33a8 Mon Sep 17 00:00:00 2001
From: Frederic Perez <frederic.perez@polytechnique.edu>
Date: Fri, 31 May 2024 18:32:03 +0200
Subject: [PATCH 39/54] Fix recent commit for laser offset

---
 src/Params/Params.cpp | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/src/Params/Params.cpp b/src/Params/Params.cpp
index b1fafcb09..69973d104 100755
--- a/src/Params/Params.cpp
+++ b/src/Params/Params.cpp
@@ -1063,19 +1063,18 @@ Params::Params( SmileiMPI *smpi, std::vector<std::string> namelistsFiles ) :
                 ERROR_NAMELIST( "For LaserOffset #" << n_laser_offset << ": space_time_profile needs 2 profiles.",  LINK_NAMELIST + std::string("#lasers") );
             }
             vector<int> profiles_n;
+            vector<PyObject *> profiles_kept;
             for( unsigned int i = 0; i < 2; i++ ) {
-                if( profiles[i]  == Py_None ) {
-                    Py_DECREF( profiles[i] );
-                    profiles.erase( profiles.begin() );
-                } else {
-                    profiles_n.push_back( i );
+                if( profiles[i] != Py_None ) {
+                    profiles_kept.push_back( profiles[i] );
+                    profiles_n.push_back( i + 1 );
                 }
             }
-            if( profiles.size() == 0 ) {
+            if( profiles_kept.size() == 0 ) {
                 ERROR_NAMELIST( "For LaserOffset #" << n_laser_offset << ": space_time_profile cannot be [None, None]", LINK_NAMELIST + std::string("#lasers") );
             }
-            for( unsigned int i=0; i<profiles.size(); i++ ) {
-                int nargs = PyTools::function_nargs( profiles[i] );
+            for( unsigned int i=0; i<profiles_kept.size(); i++ ) {
+                int nargs = PyTools::function_nargs( profiles_kept[i] );
                 if( nargs == -2 ) {
                     ERROR_NAMELIST( "For LaserOffset #" << n_laser_offset << ": space_time_profile["<<i<<"] not callable", LINK_NAMELIST + std::string("#lasers") );
                 }
@@ -1121,7 +1120,7 @@ Params::Params( SmileiMPI *smpi, std::vector<std::string> namelistsFiles ) :
 
                 // Make the propagation happen and write out the file
                 if( ! smpi->test_mode ) {
-                    propagateX( profiles, profiles_n, offset, file, keep_n_strongest_modes, angle_z );
+                    propagateX( profiles_kept, profiles_n, offset, file, keep_n_strongest_modes, angle_z );
                 }
             }
             

From 4642c1b79eb8dabb306c5d252d434cd23e9fb931 Mon Sep 17 00:00:00 2001
From: "charles.prouveur" <charles.prouveur@gmail.com>
Date: Sun, 2 Jun 2024 01:14:50 +0200
Subject: [PATCH 40/54] Fix: Adapting new 1D GPU implementation to the change
 in macro names

---
 src/ElectroMagn/ElectroMagn1D.cpp                  |  8 ++++----
 src/ElectroMagnBC/ElectroMagnBC1D_SM.cpp           |  4 ++--
 src/ElectroMagnSolver/MA_Solver1D_norm.cpp         |  8 ++++----
 src/ElectroMagnSolver/MF_Solver1D_Yee.cpp          |  4 ++--
 src/Field/Field1D.cpp                              | 14 +++++++-------
 src/Interpolator/Interpolator1D2Order.cpp          | 10 +++++-----
 src/Projector/Projector1D2OrderGPU.cpp             | 10 +++++-----
 src/Projector/Projector1D2OrderGPU.h               |  2 +-
 src/Projector/Projector1D2OrderGPUKernelCUDAHIP.cu |  4 ++--
 src/Projector/Projector1D2OrderGPUKernelCUDAHIP.h  |  2 +-
 src/Projector/Projector2D2OrderGPU.cpp             |  2 +-
 src/Projector/Projector2D2OrderGPUKernelCUDAHIP.h  |  1 -
 src/Projector/ProjectorFactory.h                   |  6 +++---
 13 files changed, 37 insertions(+), 38 deletions(-)

diff --git a/src/ElectroMagn/ElectroMagn1D.cpp b/src/ElectroMagn/ElectroMagn1D.cpp
index 4891912fb..d90c6ee2e 100755
--- a/src/ElectroMagn/ElectroMagn1D.cpp
+++ b/src/ElectroMagn/ElectroMagn1D.cpp
@@ -570,7 +570,7 @@ void ElectroMagn1D::centerMagneticFields()
 
 
     // for Bx^(p)
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     const int sizeofBx = Bx_->size();
     const int sizeofBy = By_->size();
     const int sizeofBz = Bz_->size();
@@ -586,7 +586,7 @@ void ElectroMagn1D::centerMagneticFields()
     }
 
     // for By^(d) & Bz^(d)
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present(Bz1D[0:sizeofBz],Bz1D_m[0:sizeofBz],By1D[0:sizeofBy],By1D_m[0:sizeofBy])
     #pragma acc loop gang worker vector
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -601,7 +601,7 @@ void ElectroMagn1D::centerMagneticFields()
     if (use_BTIS3){
         double *const By1D_oldBTIS3 = By_mBTIS3->data();
         double *const Bz1D_oldBTIS3 = Bz_mBTIS3->data();
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     const int sizeofByBTIS3 = By_mBTIS3->size();
     const int sizeofBzBTIS3 = Bz_mBTIS3->size();
     #pragma acc parallel present(By1D_oldBTIS3[0:sizeofByBTIS3],By1D[0:sizeofBy],Bz1D_oldBTIS3[0:sizeofBzBTIS3],Bz1D[0:sizeofBz])
@@ -610,7 +610,7 @@ void ElectroMagn1D::centerMagneticFields()
     #pragma omp target
     #pragma omp teams distribute parallel for
 #endif
-#if !defined( SMILEI_ACCELERATOR_MODE )
+#if !defined( SMILEI_ACCELERATOR_GPU )
         #pragma omp simd
 #endif
         for( unsigned int i=0 ; i<nx_p-1 ; i++ ) {
diff --git a/src/ElectroMagnBC/ElectroMagnBC1D_SM.cpp b/src/ElectroMagnBC/ElectroMagnBC1D_SM.cpp
index c9e89d0fe..be9945707 100755
--- a/src/ElectroMagnBC/ElectroMagnBC1D_SM.cpp
+++ b/src/ElectroMagnBC/ElectroMagnBC1D_SM.cpp
@@ -94,7 +94,7 @@ void ElectroMagnBC1D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch *
             bz += vecLaser[ilaser]->getAmplitude1( pos, time_dual, 0, 0 );
         }
         
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         const int sizeofE1 = E[1]->number_of_points_;
         const int sizeofE2 = E[2]->number_of_points_;
         const int sizeofB1 = B[1]->number_of_points_;
@@ -102,7 +102,7 @@ void ElectroMagnBC1D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch *
 #endif
         // Apply Silver-Mueller EM boundary condition at x=xmin or xmax
         
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc parallel present(E1[0:sizeofE1],E2[0:sizeofE2],B1[0:sizeofB1],B2[0:sizeofB2])
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
         #pragma omp target
diff --git a/src/ElectroMagnSolver/MA_Solver1D_norm.cpp b/src/ElectroMagnSolver/MA_Solver1D_norm.cpp
index 4ef123b2d..a5ceccabe 100755
--- a/src/ElectroMagnSolver/MA_Solver1D_norm.cpp
+++ b/src/ElectroMagnSolver/MA_Solver1D_norm.cpp
@@ -32,7 +32,7 @@ void MA_Solver1D_norm::operator()( ElectroMagn *fields )
     // Solve Maxwell-Ampere
     // --------------------
     // Calculate the electrostatic field ex on the dual grid
-#if defined( SMILEI_OPENACC_MODE )                                                                                                     
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )                                                                                                     
     const int sizeofEx = fields->Ex_->number_of_points_;                                                                               
     const int sizeofEy = fields->Ey_->number_of_points_;                                                                               
     const int sizeofEz = fields->Ez_->number_of_points_;                                                                               
@@ -45,7 +45,7 @@ void MA_Solver1D_norm::operator()( ElectroMagn *fields )
     #pragma omp target
     #pragma omp teams distribute parallel for
 #endif
-#if !defined( SMILEI_ACCELERATOR_MODE )
+#if !defined( SMILEI_ACCELERATOR_GPU )
         #pragma omp simd
 #endif
     for( unsigned int ix=0 ; ix<nx_d ; ++ix ) {
@@ -54,14 +54,14 @@ void MA_Solver1D_norm::operator()( ElectroMagn *fields )
     }
     // Transverse fields ey, ez  are defined on the primal grid    #pragma acc parallel present( Ex1D[0:sizeofEx], Jx1D[0:sizeofEx], Bx1D[0:sizeofBz],Ey1D[0:sizeofEx], Jy1D[0:sizeofEx], By1D[0:sizeofBz],Ez1D[0:sizeofEx], Jz1D[0:sizeofEx], Bz1D[0:sizeofBz]  )                             
 
-#if defined( SMILEI_OPENACC_MODE )                    
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )                    
     #pragma acc parallel present(Ey1D[0:sizeofEy], Jy1D[0:sizeofEy], By1D[0:sizeofBy],Ez1D[0:sizeofEz], Jz1D[0:sizeofEz], Bz1D[0:sizeofBz])
     #pragma acc loop gang worker vector
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
     #pragma omp target
     #pragma omp teams distribute parallel for
 #endif
-#if !defined( SMILEI_ACCELERATOR_MODE )
+#if !defined( SMILEI_ACCELERATOR_GPU )
         #pragma omp simd
 #endif
     for( unsigned int ix=0 ; ix<nx_p ; ++ix ) {
diff --git a/src/ElectroMagnSolver/MF_Solver1D_Yee.cpp b/src/ElectroMagnSolver/MF_Solver1D_Yee.cpp
index 9dd869768..3cf5fd09c 100755
--- a/src/ElectroMagnSolver/MF_Solver1D_Yee.cpp
+++ b/src/ElectroMagnSolver/MF_Solver1D_Yee.cpp
@@ -32,7 +32,7 @@ void MF_Solver1D_Yee::operator()( ElectroMagn *fields )
     // ---------------------
     // NB: bx is given in 1d and defined when initializing the fields (here put to 0)
     // Transverse fields  by & bz are defined on the dual grid
-#if defined( SMILEI_OPENACC_MODE )                                                                                                    
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )                                                                                                    
     const int sizeofEy = fields->Ey_->number_of_points_;
     const int sizeofEz = fields->Ez_->number_of_points_;
     const int sizeofBy = fields->By_->number_of_points_;
@@ -43,7 +43,7 @@ void MF_Solver1D_Yee::operator()( ElectroMagn *fields )
     #pragma omp target
     #pragma omp teams distribute parallel for
 #endif
-#if !defined( SMILEI_ACCELERATOR_MODE )
+#if !defined( SMILEI_ACCELERATOR_GPU )
     #pragma omp simd
 #endif
     for( unsigned int ix=1 ; ix<nx_d-1 ; ix++ ) {
diff --git a/src/Field/Field1D.cpp b/src/Field/Field1D.cpp
index fe285b54b..194660ce6 100755
--- a/src/Field/Field1D.cpp
+++ b/src/Field/Field1D.cpp
@@ -208,7 +208,7 @@ double Field1D::norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3
                       map(to: idxlocalstart[0]) \
 		      /* is_device_ptr( data_ )*/ \
 		      reduction(+:nrj) 
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present(field) //deviceptr( data_ )
     #pragma acc loop gang worker vector reduction(+:nrj)
 #endif
@@ -271,7 +271,7 @@ void Field1D::create_sub_fields  ( int iDim, int iNeighbor, int ghost_size )
     if ( sendFields_[iDim*2+iNeighbor] == NULL ) {
         sendFields_[iDim*2+iNeighbor] = new Field1D(size);
         recvFields_[iDim*2+iNeighbor] = new Field1D(size);
-#if defined( SMILEI_ACCELERATOR_MODE ) 
+#if defined( SMILEI_ACCELERATOR_GPU ) 
        if( ( name[0] == 'B' ) || ( name[0] == 'J' || name[0] == 'R' ) ) {
            sendFields_[iDim * 2 + iNeighbor]->allocateAndCopyFromHostToDevice();
            recvFields_[iDim * 2 + iNeighbor]->allocateAndCopyFromHostToDevice();
@@ -279,7 +279,7 @@ void Field1D::create_sub_fields  ( int iDim, int iNeighbor, int ghost_size )
 #endif
     }
     else if( ghost_size != (int) sendFields_[iDim*2+iNeighbor]->dims_[iDim] ) {
-#if defined( SMILEI_OPENACC_MODE ) || defined( SMILEI_ACCELERATOR_GPU_OMP )
+#if defined( SMILEI_ACCELERATOR_GPU )
         ERROR( "To Do GPU : envelope" );
 #endif
         delete sendFields_[iDim*2+iNeighbor];
@@ -313,7 +313,7 @@ void Field1D::extract_fields_exch( int iDim, int iNeighbor, int ghost_size )
     const unsigned field_last  = ix + NX - 1;
     #pragma omp target if( should_manipulate_gpu_memory )
     #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     const int subSize = sendFields_[iDim*2+iNeighbor]->size();
     const int fSize = number_of_points_;
     bool fieldName( (name.substr(0,1) == "B") );
@@ -348,7 +348,7 @@ void Field1D::inject_fields_exch ( int iDim, int iNeighbor, int ghost_size )
     #pragma omp target if( should_manipulate_gpu_memory ) \
         map( tofrom : field [field_first:field_last - field_first] )
     #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     int subSize = recvFields_[iDim*2+(iNeighbor+1)%2]->size();
     const int fSize = number_of_points_;
     bool fieldName( name.substr(0,1) == "B" );
@@ -384,7 +384,7 @@ void Field1D::extract_fields_sum ( int iDim, int iNeighbor, int ghost_size )
     #pragma omp target if( should_manipulate_gpu_memory ) \
         map( to : field [field_first:field_last - field_first] )
     #pragma omp teams distribute parallel for 
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     const int subSize = sendFields_[iDim*2+iNeighbor]->size();
     const int fSize = number_of_points_;
     bool fieldName( ((name.substr(0,1) == "J") || (name.substr(0,1) == "R") ) && smilei::tools::gpu::HostDeviceMemoryManagement::IsHostPointerMappedOnDevice( sub ));
@@ -419,7 +419,7 @@ void Field1D::inject_fields_sum  ( int iDim, int iNeighbor, int ghost_size )
     #pragma omp target if( should_manipulate_gpu_memory ) \
         map( tofrom : field [field_first:field_last - field_first] )
     #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     int subSize = recvFields_[iDim*2+(iNeighbor+1)%2]->size();
     int fSize = number_of_points_;
     bool fieldName( name.substr(0,1) == "J" || name.substr(0,1) == "R");
diff --git a/src/Interpolator/Interpolator1D2Order.cpp b/src/Interpolator/Interpolator1D2Order.cpp
index dd245bfd1..0af89f3e6 100755
--- a/src/Interpolator/Interpolator1D2Order.cpp
+++ b/src/Interpolator/Interpolator1D2Order.cpp
@@ -143,7 +143,7 @@ void Interpolator1D2Order::fieldsWrapper( ElectroMagn *EMfields,
     const double *const __restrict__ By1D = static_cast<Field1D *>( EMfields->By_m )->data();
     const double *const __restrict__ Bz1D = static_cast<Field1D *>( EMfields->Bz_m )->data();
 
-#if defined(SMILEI_OPENACC_MODE)
+#if defined(SMILEI_ACCELERATOR_GPU_OACC)
     const int sizeofEx = EMfields->Ex_->size();
     const int sizeofEy = EMfields->Ey_->size();
     const int sizeofEz = EMfields->Ez_->size();
@@ -163,7 +163,7 @@ void Interpolator1D2Order::fieldsWrapper( ElectroMagn *EMfields,
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
     #pragma omp target map( to : i_domain_begin_) is_device_ptr (position_x)
     #pragma omp teams distribute parallel for
-#elif defined(SMILEI_OPENACC_MODE)
+#elif defined(SMILEI_ACCELERATOR_GPU_OACC)
     #pragma acc enter data create(this)
     #pragma acc update device(this)
     size_t interpolation_range_size = ( last_index + 0 * nparts ) - first_index;
@@ -210,7 +210,7 @@ void Interpolator1D2Order::fieldsWrapper( ElectroMagn *EMfields,
             delta[0*nparts+ipart] = delta_p[0];
 
     } // end ipart loop
-    #if defined(SMILEI_OPENACC_MODE)
+    #if defined(SMILEI_ACCELERATOR_GPU_OACC)
         #pragma acc exit data delete(this)
     #endif
 
@@ -222,7 +222,7 @@ void Interpolator1D2Order::fieldsWrapper( ElectroMagn *EMfields,
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
         #pragma omp target map( to : i_domain_begin_) is_device_ptr ( position_x)
         #pragma omp teams distribute parallel for
-#elif defined(SMILEI_OPENACC_MODE)
+#elif defined(SMILEI_ACCELERATOR_GPU_OACC)
         #pragma acc enter data create(this)
         #pragma acc update device(this)
         size_t interpolation_range_size = ( last_index + 1 * nparts ) - first_index;
@@ -286,7 +286,7 @@ void Interpolator1D2Order::fieldsWrapper( ElectroMagn *EMfields,
             delta[0*nparts+ipart] = delta_p[0];
 
         } // end ipart loop
-    #if defined(SMILEI_OPENACC_MODE)
+    #if defined(SMILEI_ACCELERATOR_GPU_OACC)
         #pragma acc exit data delete(this)
     #endif
     } // end with B-TIS interpolation
diff --git a/src/Projector/Projector1D2OrderGPU.cpp b/src/Projector/Projector1D2OrderGPU.cpp
index c63223885..19493ef8d 100755
--- a/src/Projector/Projector1D2OrderGPU.cpp
+++ b/src/Projector/Projector1D2OrderGPU.cpp
@@ -1,6 +1,6 @@
 
 
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
 #include "Projector1D2OrderGPUKernelCUDAHIP.h"
 #include <cmath>
 #include "Tools.h"
@@ -23,7 +23,7 @@ Projector1D2OrderGPU::Projector1D2OrderGPU( Params &parameters, Patch *a_patch )
     not_spectral_  = !parameters.is_pxr;
     dts2_ = parameters.timestep / 2.0;
     dts4_ = dts2_ / 2.0;
-#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined ( SMILEI_OPENACC_MODE ) 
+#if defined( SMILEI_ACCELERATOR_GPU ) 
     x_dimension_bin_count_ = parameters.getGPUBinCount( 1 );
 #else
     ERROR( "Only usable in GPU mode! " );
@@ -33,7 +33,7 @@ Projector1D2OrderGPU::Projector1D2OrderGPU( Params &parameters, Patch *a_patch )
 Projector1D2OrderGPU::~Projector1D2OrderGPU()
 {
 }
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
 
 
 //! Project global current densities (EMfields->Jx_/Jy_/Jz_)
@@ -216,7 +216,7 @@ void Projector1D2OrderGPU::currentsAndDensityWrapper( ElectroMagn *EMfields,
 
         // Does not compute Rho !
 
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
 
         currentAndDensityDepositionKernel1DOnDevice( b_Jx,b_Jy,b_Jz,b_rho,
                             Jx_size, Jy_size, Jz_size, rho_size,
@@ -245,7 +245,7 @@ void Projector1D2OrderGPU::currentsAndDensityWrapper( ElectroMagn *EMfields,
         }
         else{
 
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
             currentDepositionKernel1DOnDevice(Jx_, Jy_, Jz_,
                     EMfields->Jx_->size(), EMfields->Jy_->size(), EMfields->Jz_->size(),
                     particles.getPtrPosition( 0 ),
diff --git a/src/Projector/Projector1D2OrderGPU.h b/src/Projector/Projector1D2OrderGPU.h
index 7ce78af1e..f35e8e4ee 100755
--- a/src/Projector/Projector1D2OrderGPU.h
+++ b/src/Projector/Projector1D2OrderGPU.h
@@ -64,7 +64,7 @@ class Projector1D2OrderGPU : public Projector1D
                                              int ispec,
                                              int icell = 0,
                                              int ipart_ref = 0 ) override {};
-/*#if defined( SMILEI_ACCELERATOR_MODE )
+/*#if defined( SMILEI_ACCELERATOR_GPU )
 
 extern "C" void
 currentDepositionKernel1DOnDevice( double *__restrict__ Jx,
diff --git a/src/Projector/Projector1D2OrderGPUKernelCUDAHIP.cu b/src/Projector/Projector1D2OrderGPUKernelCUDAHIP.cu
index df3c3dbbc..0a77a63db 100755
--- a/src/Projector/Projector1D2OrderGPUKernelCUDAHIP.cu
+++ b/src/Projector/Projector1D2OrderGPUKernelCUDAHIP.cu
@@ -68,7 +68,7 @@
 //                                                         device_particle_charge /* [0:particle_count] */,     \
 //                                                         device_particle_weight /* [0:particle_count] */ )
 //            #pragma omp teams thread_limit( 64 ) distribute parallel for
-//        #elif defined( SMILEI_OPENACC_MODE )
+//        #elif defined( SMILEI_ACCELERATOR_GPU_OACC )
 //            #pragma acc parallel                      \
 //            deviceptr( device_particle_position_x,    \
 //                       device_particle_momentum_y,    \
@@ -217,7 +217,7 @@
 //                                                         device_particle_charge /* [0:particle_count] */,     \
 //                                                         device_particle_weight /* [0:particle_count] */ )
 //            #pragma omp teams thread_limit( 64 ) distribute parallel for
-//        #elif defined( SMILEI_OPENACC_MODE )
+//        #elif defined( SMILEI_ACCELERATOR_GPU_OACC )
 //            #pragma acc parallel                      \
 //            deviceptr( device_particle_position_x,    \
 //                       device_particle_momentum_y,    \
diff --git a/src/Projector/Projector1D2OrderGPUKernelCUDAHIP.h b/src/Projector/Projector1D2OrderGPUKernelCUDAHIP.h
index 37cabb963..f5e64e408 100755
--- a/src/Projector/Projector1D2OrderGPUKernelCUDAHIP.h
+++ b/src/Projector/Projector1D2OrderGPUKernelCUDAHIP.h
@@ -3,7 +3,7 @@
 #ifndef Projector1D2OrderGPUKernelCUDAHIP_H
 #define Projector1D2OrderGPUKernelCUDAHIP_H
 
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
 
 #if defined( __HIP__ )
     #include <hip/hip_runtime.h>
diff --git a/src/Projector/Projector2D2OrderGPU.cpp b/src/Projector/Projector2D2OrderGPU.cpp
index 119556fb6..a91a29dde 100755
--- a/src/Projector/Projector2D2OrderGPU.cpp
+++ b/src/Projector/Projector2D2OrderGPU.cpp
@@ -200,7 +200,7 @@ namespace { // Unnamed namespace == static == internal linkage == no exported sy
                         double,
                         int not_spectral )
     {
-        currentAndDensityDepositionKernelOnDevice( Jx,
+        currentAndDensityDepositionKernel2DOnDevice( Jx,
                                            Jy,
                                            Jz,
                                            rho,
diff --git a/src/Projector/Projector2D2OrderGPUKernelCUDAHIP.h b/src/Projector/Projector2D2OrderGPUKernelCUDAHIP.h
index 6e5fec26c..d789796ab 100755
--- a/src/Projector/Projector2D2OrderGPUKernelCUDAHIP.h
+++ b/src/Projector/Projector2D2OrderGPUKernelCUDAHIP.h
@@ -3,7 +3,6 @@
 #ifndef Projector2D2OrderGPUKernelCUDAHIP_H
 #define Projector2D2OrderGPUKernelCUDAHIP_H
 
-
 #if defined( SMILEI_ACCELERATOR_GPU )
 
 #if defined( __HIP__ )
diff --git a/src/Projector/ProjectorFactory.h b/src/Projector/ProjectorFactory.h
index 2d6a94f90..5b1f50e37 100755
--- a/src/Projector/ProjectorFactory.h
+++ b/src/Projector/ProjectorFactory.h
@@ -34,7 +34,7 @@ class ProjectorFactory
         // 1Dcartesian simulation
         // ---------------
         if( ( params.geometry == "1Dcartesian" ) && ( params.interpolation_order == ( unsigned int )2 ) ) {
-            #if defined( SMILEI_ACCELERATOR_MODE )
+            #if defined( SMILEI_ACCELERATOR_GPU )
                 Proj = new Projector1D2OrderGPU( params, patch );
             #else
                 Proj = new Projector1D2Order( params, patch );
@@ -47,7 +47,7 @@ class ProjectorFactory
         // ---------------
         else if( ( params.geometry == "2Dcartesian" ) && ( params.interpolation_order == ( unsigned int )2 ) ) {
             if( !vectorization ) {
-                #if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC )
+                #if defined( SMILEI_ACCELERATOR_GPU )
                     Proj = new Projector2D2OrderGPU( params, patch );
                 #else
                     Proj = new Projector2D2Order( params, patch );
@@ -69,7 +69,7 @@ class ProjectorFactory
         // ---------------
         else if( ( params.geometry == "3Dcartesian" ) && ( params.interpolation_order == ( unsigned int )2 ) ) {
             if( !vectorization ) {
-                #if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC )
+                #if defined( SMILEI_ACCELERATOR_GPU )
                     Proj = new Projector3D2OrderGPU( params, patch );
                 #else
                     Proj = new Projector3D2Order( params, patch );

From 7323b5bd44c31bc5cfb865a725e4e7648ceb5483 Mon Sep 17 00:00:00 2001
From: "charles.prouveur" <charles.prouveur@gmail.com>
Date: Mon, 3 Jun 2024 13:31:24 +0200
Subject: [PATCH 41/54] Fixing error in 1D MA solver introduced with its GPU
 implementation

---
 src/ElectroMagnSolver/MA_Solver1D_norm.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/ElectroMagnSolver/MA_Solver1D_norm.cpp b/src/ElectroMagnSolver/MA_Solver1D_norm.cpp
index a5ceccabe..803ffc6cb 100755
--- a/src/ElectroMagnSolver/MA_Solver1D_norm.cpp
+++ b/src/ElectroMagnSolver/MA_Solver1D_norm.cpp
@@ -65,8 +65,8 @@ void MA_Solver1D_norm::operator()( ElectroMagn *fields )
         #pragma omp simd
 #endif
     for( unsigned int ix=0 ; ix<nx_p ; ++ix ) {
-        Ey1D[ix] -= dt_ov_dx * Bz1D[ix+1] - Bz1D[ix] - dt * Jy1D[ix];
-        Ez1D[ix] += dt_ov_dx * By1D[ix+1] - By1D[ix] - dt * Jz1D[ix];
+        Ey1D[ix] += -dt_ov_dx * ( Bz1D[ix+1] - Bz1D[ix] ) - dt * Jy1D[ix];
+        Ez1D[ix] +=  dt_ov_dx * ( By1D[ix+1] - By1D[ix] ) - dt * Jz1D[ix];
     }
 }
 

From e42a57882c0c2c8acb7dd83fb602c920c4ce74a6 Mon Sep 17 00:00:00 2001
From: "charles.prouveur" <charles.prouveur@gmail.com>
Date: Mon, 3 Jun 2024 18:45:16 +0200
Subject: [PATCH 42/54] Fixing typo in 1D SM BC

---
 src/ElectroMagnBC/ElectroMagnBC1D_SM.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/ElectroMagnBC/ElectroMagnBC1D_SM.cpp b/src/ElectroMagnBC/ElectroMagnBC1D_SM.cpp
index be9945707..ff767bc12 100755
--- a/src/ElectroMagnBC/ElectroMagnBC1D_SM.cpp
+++ b/src/ElectroMagnBC/ElectroMagnBC1D_SM.cpp
@@ -111,7 +111,7 @@ void ElectroMagnBC1D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch *
             //( *By1D )( iB_ ) = -sign_*Alpha_*( *Ez1D )( iE_ ) + Beta_*( ( *By1D )( iB_old_ )-By_val_ ) + Gamma_*by + By_val_;
             //( *Bz1D )( iB_ ) =  sign_*Alpha_*( *Ey1D )( iE_ ) + Beta_*( ( *Bz1D )( iB_old_ )-Bz_val_ ) + Gamma_*bz + Bz_val_;
             B1[ iB_ ] = -sign_ * Alpha_ * E2[iE_] + Beta_ * ( B1[iB_old_] - By_val_) + Gamma_ * by + By_val_;
-            B2[ iB_ ] = -sign_ * Alpha_ * E1[iE_] + Beta_ * ( B2[iB_old_] - Bz_val_) + Gamma_ * bz + Bz_val_;
+            B2[ iB_ ] =  sign_ * Alpha_ * E1[iE_] + Beta_ * ( B2[iB_old_] - Bz_val_) + Gamma_ * bz + Bz_val_;
         }
     }
     

From 5fa52043ae845a09e21408108dda8323c01e482a Mon Sep 17 00:00:00 2001
From: "charles.prouveur" <charles.prouveur@gmail.com>
Date: Tue, 4 Jun 2024 12:06:14 +0200
Subject: [PATCH 43/54] reversed some changes in the betis part in
 interpolation 1D order 2 - to be updated

---
 src/Interpolator/Interpolator1D2Order.cpp | 170 +++++++++++++++-------
 src/Interpolator/Interpolator1D2Order.h   |  40 -----
 2 files changed, 120 insertions(+), 90 deletions(-)

diff --git a/src/Interpolator/Interpolator1D2Order.cpp b/src/Interpolator/Interpolator1D2Order.cpp
index 0af89f3e6..b21954702 100755
--- a/src/Interpolator/Interpolator1D2Order.cpp
+++ b/src/Interpolator/Interpolator1D2Order.cpp
@@ -22,17 +22,16 @@ Interpolator1D2Order::Interpolator1D2Order( Params &params, Patch *patch ) : Int
 void Interpolator1D2Order::fields( ElectroMagn *EMfields, Particles &particles, int ipart, int nparts, double *ELoc, double *BLoc )
 {
     // Static cast of the electromagnetic fields
-    Field1D *Ex1D    = static_cast<Field1D *>( EMfields->Ex_ );
-    Field1D *Ey1D    = static_cast<Field1D *>( EMfields->Ey_ );
-    Field1D *Ez1D    = static_cast<Field1D *>( EMfields->Ez_ );
-    Field1D *Bx1D_m  = static_cast<Field1D *>( EMfields->Bx_m );
-    Field1D *By1D_m  = static_cast<Field1D *>( EMfields->By_m );
-    Field1D *Bz1D_m  = static_cast<Field1D *>( EMfields->Bz_m );
-
+    Field1D *Ex1D = static_cast<Field1D *>( EMfields->Ex_ );
+    Field1D *Ey1D = static_cast<Field1D *>( EMfields->Ey_ );
+    Field1D *Ez1D = static_cast<Field1D *>( EMfields->Ez_ );
+    Field1D *Bx1D = static_cast<Field1D *>( EMfields->Bx_m );
+    Field1D *By1D = static_cast<Field1D *>( EMfields->By_m );
+    Field1D *Bz1D = static_cast<Field1D *>( EMfields->Bz_m );
     // Particle position (in units of the spatial-step)
-    double xjn = particles.position( 0, ipart ) * dx_inv_;
+    double xpn = particles.position( 0, ipart ) * dx_inv_;
     // Calculate coeffs
-    coeffs( xjn );
+    /*coeffs( xjn );
 
     // Interpolate the fields from the Dual grid : Ex, By, Bz
     *( ELoc+0*nparts ) = compute( coeffd_, Ex1D,   id_ );
@@ -42,7 +41,27 @@ void Interpolator1D2Order::fields( ElectroMagn *EMfields, Particles &particles,
     // Interpolate the fields from the Primal grid : Ey, Ez, Bx
     *( ELoc+1*nparts ) = compute( coeffp_, Ey1D,   ip_ );
     *( ELoc+2*nparts ) = compute( coeffp_, Ez1D,   ip_ );
-    *( BLoc+0*nparts ) = compute( coeffp_, Bx1D_m, ip_ );
+    *( BLoc+0*nparts ) = compute( coeffp_, Bx1D_m, ip_ );*/
+
+    int idx_p[1], idx_d[1];
+    double delta_p[1];
+    double coeffxp[3];
+    double coeffxd[3];
+
+    coeffs( xpn, idx_p, idx_d, coeffxp, coeffxd, delta_p );
+
+    // Interpolation of Ex^(d)
+    ELoc[0*nparts+ipart] = compute( &coeffxd[0], Ex1D, idx_d[0] );
+    // Interpolation of Ey^(p)
+    ELoc[1*nparts+ipart] = compute( &coeffxp[0], Ey1D, idx_p[0] );
+    // Interpolation of Ez^(p)
+    ELoc[2*nparts+ipart] = compute( &coeffxp[0], Ez1D, idx_p[0] );
+    // Interpolation of Bx^(p)
+    BLoc[0*nparts+ipart] = compute( &coeffxp[0], Bx1D, idx_p[0] );
+    // Interpolation of By^(d)
+    BLoc[1*nparts+ipart] = compute( &coeffxd[0], By1D, idx_d[0] );
+    // Interpolation of Bz^(d)
+    BLoc[2*nparts+ipart] = compute( &coeffxd[0], Bz1D, idx_d[0] );
 
 }//END Interpolator1D2Order
 
@@ -63,9 +82,9 @@ void Interpolator1D2Order::fieldsAndCurrents( ElectroMagn *EMfields, Particles &
     Field1D *Ex1D     = static_cast<Field1D *>( EMfields->Ex_ );
     Field1D *Ey1D     = static_cast<Field1D *>( EMfields->Ey_ );
     Field1D *Ez1D     = static_cast<Field1D *>( EMfields->Ez_ );
-    Field1D *Bx1D_m   = static_cast<Field1D *>( EMfields->Bx_m );
-    Field1D *By1D_m   = static_cast<Field1D *>( EMfields->By_m );
-    Field1D *Bz1D_m   = static_cast<Field1D *>( EMfields->Bz_m );
+    Field1D *Bx1D   = static_cast<Field1D *>( EMfields->Bx_m );
+    Field1D *By1D   = static_cast<Field1D *>( EMfields->By_m );
+    Field1D *Bz1D   = static_cast<Field1D *>( EMfields->Bz_m );
     Field1D *Jx1D     = static_cast<Field1D *>( EMfields->Jx_ );
     Field1D *Jy1D     = static_cast<Field1D *>( EMfields->Jy_ );
     Field1D *Jz1D     = static_cast<Field1D *>( EMfields->Jz_ );
@@ -78,12 +97,13 @@ void Interpolator1D2Order::fieldsAndCurrents( ElectroMagn *EMfields, Particles &
     }
 
     // Particle position (in units of the spatial-step)
-    double xjn = particles.position( 0, ipart )*dx_inv_;
+    double xpn = particles.position( 0, ipart )*dx_inv_;
     // Calculate coeffs
-    coeffs( xjn );
+    //coeffs( xjn );
 
     int nparts( particles.numberOfParticles() );
 
+    /*
     // Interpolate the fields from the Dual grid : Ex, By, Bz
     *( ELoc+0*nparts ) = compute( coeffd_, Ex1D,   id_ );
     *( BLoc+1*nparts ) = compute( coeffd_, By1D_m, id_ );
@@ -92,10 +112,46 @@ void Interpolator1D2Order::fieldsAndCurrents( ElectroMagn *EMfields, Particles &
     // Interpolate the fields from the Primal grid : Ey, Ez, Bx
     *( ELoc+1*nparts ) = compute( coeffp_, Ey1D,   ip_ );
     *( ELoc+2*nparts ) = compute( coeffp_, Ez1D,   ip_ );
-    *( BLoc+0*nparts ) = compute( coeffp_, Bx1D_m, ip_ );
+    *( BLoc+0*nparts ) = compute( coeffp_, Bx1D_m, ip_ );*/
+
+    int idx_p[1], idx_d[1];
+    double delta_p[1];
+    double coeffxp[3];
+    double coeffxd[3];
+
+    coeffs( xpn, idx_p, idx_d, coeffxp, coeffxd, delta_p );
+
+    // Interpolation of Ex^(d)
+    ELoc[0*nparts+ipart] = compute( &coeffxd[0], Ex1D, idx_d[0] );
+    // Interpolation of Ey^(p)
+    ELoc[1*nparts+ipart] = compute( &coeffxp[0], Ey1D, idx_p[0] );
+    // Interpolation of Ez^(p)
+    ELoc[2*nparts+ipart] = compute( &coeffxp[0], Ez1D, idx_p[0] );
+    // Interpolation of Bx^(p)
+    BLoc[0*nparts+ipart] = compute( &coeffxp[0], Bx1D, idx_p[0] );
+    // Interpolation of By^(d)
+    BLoc[1*nparts+ipart] = compute( &coeffxd[0], By1D, idx_d[0] );
+    // Interpolation of Bz^(d)
+    BLoc[2*nparts+ipart] = compute( &coeffxd[0], Bz1D, idx_d[0] );
+
+    // Interpolation of Jx^(d,p)
+    JLoc->x = compute( &coeffxd[1], Jx1D, idx_d[0] );
+    // Interpolation of Jy^(p,d)
+    JLoc->y = compute( &coeffxp[1], Jy1D, idx_p[0] );
+    // Interpolation of Jz^(p,p)
+    JLoc->z = compute( &coeffxp[1], Jz1D, idx_p[0] );
+    // Interpolation of Rho^(p,p)
+    ( *RhoLoc ) = compute( &coeffxp[1], Rho1D, idx_p[0]);
+    
+    if (smpi->use_BTIS3){
+        // Interpolation of ByBTIS3^(p,p)
+        *( BLocyBTIS3+0*nparts ) = compute( &coeffxp[1], By1DBTIS3, idx_p[0]);
+        // Interpolation of BzBTIS3^(p,d)
+        *( BLoczBTIS3+0*nparts ) = compute( &coeffxp[1], Bz1DBTIS3, idx_p[0]);
+    }
 
     // Interpolate the fields from the Primal grid : Jy, Jz, Rho
-    JLoc->y = compute( coeffp_, Jy1D,  ip_ );
+    /*JLoc->y = compute( coeffp_, Jy1D,  ip_ );
     JLoc->z = compute( coeffp_, Jz1D,  ip_ );
     ( *RhoLoc ) = compute( coeffp_, Rho1D, ip_ );
 
@@ -105,7 +161,7 @@ void Interpolator1D2Order::fieldsAndCurrents( ElectroMagn *EMfields, Particles &
     if (smpi->use_BTIS3){
         *( BLocyBTIS3+0*nparts ) = compute( &coeffp_[1], By1DBTIS3, ip_ );
         *( BLoczBTIS3+0*nparts ) = compute( &coeffp_[1], Bz1DBTIS3, ip_ );
-    }
+    }*/
 
 }
 
@@ -113,12 +169,20 @@ void Interpolator1D2Order::fieldsAndCurrents( ElectroMagn *EMfields, Particles &
 void Interpolator1D2Order::oneField( Field **field, Particles &particles, int *istart, int *iend, double *FieldLoc, double *, double *, double * )
 {
     Field1D *F = static_cast<Field1D *>( *field );
-    double *coeff = F->isDual( 0 ) ? coeffd_ : coeffp_;
-    int *i = F->isDual( 0 ) ? &id_ : &ip_;
+
+    int idx_p[1], idx_d[1];
+    double delta_p[1];
+    double coeffxp[3];
+    double coeffxd[3];
+
+    double *coeff = F->isDual( 0 ) ? &coeffxd[1] : &coeffxp[1];//coeffd_ : coeffp_;
+    int    *i     = F->isDual( 0 ) ? &idx_d[0]   : &idx_p[0];  //&id_ : &ip_;
 
     for( int ipart=*istart ; ipart<*iend; ipart++ ) {
-        double xjn = particles.position( 0, ipart )*dx_inv_;
-        coeffs( xjn );
+        double xpn = particles.position( 0, ipart )*dx_inv_;
+
+        coeffs( xpn, idx_p, idx_d, coeffxp, coeffxd, delta_p );
+        //coeffs( xpn );
         FieldLoc[ipart] = compute( coeff, F, *i );
     }
 }
@@ -214,18 +278,26 @@ void Interpolator1D2Order::fieldsWrapper( ElectroMagn *EMfields,
         #pragma acc exit data delete(this)
     #endif
 
-    } else { // with B-TIS3 interpolation
-        double *const __restrict__ BypartBTIS3  = smpi->dynamics_Bpart_yBTIS3[ithread].data();
-        double *const __restrict__ BzpartBTIS3  = smpi->dynamics_Bpart_zBTIS3[ithread].data();
+    }else { 
+        
+        double  *BypartBTIS3 = &( smpi->dynamics_Bpart_yBTIS3[ithread][0]  );
+        double  *BzpartBTIS3 = &( smpi->dynamics_Bpart_zBTIS3[ithread][0]  ); //*/
+        //double *const __restrict__ BypartBTIS3  = smpi->dynamics_Bpart_yBTIS3[ithread].data();
+        //double *const __restrict__ BzpartBTIS3  = smpi->dynamics_Bpart_zBTIS3[ithread].data();
+
         const double *const __restrict__ By1D_mBTIS3 = static_cast<Field1D *>( EMfields->By_mBTIS3 )->data();
         const double *const __restrict__ Bz1D_mBTIS3 = static_cast<Field1D *>( EMfields->Bz_mBTIS3 )->data();
+            
+        //double *const __restrict__ ELoc = smpi->dynamics_Epart[ithread].data();
+
+
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
         #pragma omp target map( to : i_domain_begin_) is_device_ptr ( position_x)
         #pragma omp teams distribute parallel for
 #elif defined(SMILEI_ACCELERATOR_GPU_OACC)
         #pragma acc enter data create(this)
         #pragma acc update device(this)
-        size_t interpolation_range_size = ( last_index + 1 * nparts ) - first_index;
+        size_t interpolation_range_size = ( last_index + 0 * nparts ) - first_index;
         #pragma acc parallel present(ELoc [first_index:interpolation_range_size],\
                                     BLoc [first_index:interpolation_range_size],\
                                     BypartBTIS3 [first_index:interpolation_range_size],\
@@ -243,17 +315,11 @@ void Interpolator1D2Order::fieldsWrapper( ElectroMagn *EMfields,
         deviceptr(position_x)              \
         copyin(d_inv_)
         #pragma acc loop gang worker vector
-#endif
-      
-        // would it be possile to just use another  #pragma acc parallel present(  
-        // for By1D_mBTIS3 [0:sizeofEz],\ Bz1D_mBTIS3 [0:sizeofEy])\ BypartBTIS3 [first_index:interpolation_range_size],\
-                                 BzpartBTIS3 [first_index:interpolation_range_size],\
-        // ?
-
+#endif  //*/
         for (int ipart=*istart; ipart < *iend; ipart++){
 
             // Normalized particle position
-            double xpn = position_x[ipart] * dx_inv_;//particles.position( 0, ipart )*dx_inv_;
+            double xpn = particles.position( 0, ipart )*dx_inv_;
 
             // Calculate coeffs
             int idx_p[1], idx_d[1];
@@ -276,19 +342,21 @@ void Interpolator1D2Order::fieldsWrapper( ElectroMagn *EMfields,
             // Interpolation of Bz^(d)
             BLoc[2*nparts+ipart] = compute( coeffxd, Bz1D, idx_d[0] );
             // Interpolation of ByBTIS3^(p)
-            BypartBTIS3[0*nparts+ipart ]  = compute( coeffxp, By1D_mBTIS3, idx_p[0] );
+            *( BypartBTIS3+0*nparts )  = compute( coeffxp, By1D_mBTIS3, idx_p[0] );
+            // Interpolation of BzBTIS3^(p)
+            *( BzpartBTIS3+0*nparts )  = compute( coeffxp, Bz1D_mBTIS3, idx_p[0] );
+            // Interpolation of ByBTIS3^(p)
+            //BypartBTIS3[0*nparts+ipart ]  = compute( coeffxp, By1D_mBTIS3, idx_p[0] );
             // Interpolation of BzBTIS3^(p)
-            BzpartBTIS3[0*nparts+ipart ]  = compute( coeffxp, Bz1D_mBTIS3, idx_p[0] );
-            
+            //BzpartBTIS3[0*nparts+ipart ]  = compute( coeffxp, Bz1D_mBTIS3, idx_p[0] );
 
             //Buffering of iol and delta
             iold[0*nparts+ipart]  = idx_p[0];
             delta[0*nparts+ipart] = delta_p[0];
-
         } // end ipart loop
     #if defined(SMILEI_ACCELERATOR_GPU_OACC)
         #pragma acc exit data delete(this)
-    #endif
+    #endif 
     } // end with B-TIS interpolation
 
 }
@@ -492,43 +560,45 @@ void Interpolator1D2Order::envelopeAndSusceptibility( ElectroMagn *EMfields, Par
     // Normalized particle position
     double xpn = particles.position( 0, ipart )*dx_inv_;
 
+    // Calculate coeffs
+    double coeffxp[3];
+
     // Indexes of the central nodes
-    ip_ = round( xpn );
+    int ip = round( xpn );
 
     // Declaration and calculation of the coefficient for interpolation
     double deltax, delta2;
 
-
-    deltax     = xpn - ( double )ip_;
+    deltax     = xpn - ( double )ip;
     delta2     = deltax*deltax;
-    coeffp_[0] = 0.5 * ( delta2-deltax+0.25 );
-    coeffp_[1] = 0.75 - delta2;
-    coeffp_[2] = 0.5 * ( delta2+deltax+0.25 );
+    coeffxp[0] = 0.5 * ( delta2-deltax+0.25 );
+    coeffxp[1] = 0.75 - delta2;
+    coeffxp[2] = 0.5 * ( delta2+deltax+0.25 );
 
 
     //!\todo CHECK if this is correct for both primal & dual grids !!!
     // First index for summation
-    ip_ = ip_ - i_domain_begin_;
+    ip = ip - i_domain_begin_;
 
     // -------------------------
     // Interpolation of Env_A_abs_^(p)
     // -------------------------
-    *( Env_A_abs_Loc )  = compute( coeffp_, Env_A_abs_1D, ip_ ); //compute( &coeffp_[1], Env_A_abs_1D, ip_ );
+    *( Env_A_abs_Loc )  = compute( coeffxp, Env_A_abs_1D, ip ); //compute( &coeffp_[1], Env_A_abs_1D, ip_ );
 
     // -------------------------
     // Interpolation of Env_Chi_^(p)
     // -------------------------
-    *( Env_Chi_Loc )    = compute( coeffp_, Env_Chi_1D, ip_ ); //compute( &coeffp_[1], Env_Chi_1D, ip_ );
+    *( Env_Chi_Loc )    = compute( coeffxp, Env_Chi_1D, ip ); //compute( &coeffp_[1], Env_Chi_1D, ip_ );
 
     // -------------------------
     // Interpolation of Env_E_abs_^(p)
     // -------------------------
-    *( Env_E_abs_Loc )  = compute( coeffp_, Env_E_abs_1D, ip_ ); // compute( &coeffp_[1], Env_E_abs_1D, ip_ );
+    *( Env_E_abs_Loc )  = compute( coeffxp, Env_E_abs_1D, ip ); // compute( &coeffp_[1], Env_E_abs_1D, ip_ );
 
     // -------------------------
     // Interpolation of Env_Ex_abs_^(p)
     // -------------------------
-    *( Env_Ex_abs_Loc ) = compute( coeffp_, Env_Ex_abs_1D, ip_ ); // compute( &coeffp_[1], Env_Ex_abs_1D, ip_ );
+    *( Env_Ex_abs_Loc ) = compute( coeffxp, Env_Ex_abs_1D, ip ); // compute( &coeffp_[1], Env_Ex_abs_1D, ip_ );
 
 } // END Interpolator1D2Order
 
diff --git a/src/Interpolator/Interpolator1D2Order.h b/src/Interpolator/Interpolator1D2Order.h
index 9a1b2a9e4..44e6651d4 100755
--- a/src/Interpolator/Interpolator1D2Order.h
+++ b/src/Interpolator/Interpolator1D2Order.h
@@ -48,34 +48,6 @@ class Interpolator1D2Order final : public Interpolator1D
     void envelopeFieldForIonization( ElectroMagn *EMfields, Particles &particles, SmileiMPI *smpi, int *istart, int *iend, int ithread, int ipart_ref = 0 ) override final;
 
 private:
-    inline void __attribute__((always_inline)) coeffs( double xjn )
-    {
-        double xjmxi2;
-
-        // Dual
-        id_    = std::round( xjn + 0.5 );      // index of the central point
-        xjmxi  = xjn - static_cast<double>(id_) + 0.5; // normalized distance to the central node
-        xjmxi2 = xjmxi*xjmxi;            // square of the normalized distance to the central node
-
-        // 2nd order interpolation on 3 nodes
-        coeffd_[0] = 0.5 * ( xjmxi2-xjmxi + 0.25 );
-        coeffd_[1] = ( 0.75 - xjmxi2 );
-        coeffd_[2] = 0.5 * ( xjmxi2+xjmxi + 0.25 );
-
-        id_ -= i_domain_begin_;
-
-        // Primal
-        ip_      = std::round( xjn );    // index of the central point
-        xjmxi  = xjn - static_cast<double>(ip_); // normalized distance to the central node
-        xjmxi2 = xjmxi * xjmxi;   // square of the normalized distance to the central node
-
-        // 2nd order interpolation on 3 nodes
-        coeffp_[0] = 0.5 * ( xjmxi2 - xjmxi + 0.25 );
-        coeffp_[1] = ( 0.75 - xjmxi2 );
-        coeffp_[2] = 0.5 * ( xjmxi2 + xjmxi + 0.25 );
-
-        ip_ -= i_domain_begin_;
-    }
     
     // 2nd order interpolation on 3 nodes
     SMILEI_ACCELERATOR_DECLARE_ROUTINE
@@ -109,18 +81,6 @@ class Interpolator1D2Order final : public Interpolator1D
         
     }    
     SMILEI_ACCELERATOR_DECLARE_ROUTINE_END
-    // Last prim index computed
-    int ip_;
-    // Last dual index computed
-    int id_;
-    // Last delta computed
-    double xjmxi;
-    // Interpolation coefficient on Prim grid
-    double coeffp_[3];
-    // Interpolation coefficient on Dual grid
-    double coeffd_[3];
-
-
 };//END class
 
 #endif

From 04564b0819fc841d6165b816aee05889d7cf2be5 Mon Sep 17 00:00:00 2001
From: "charles.prouveur" <charles.prouveur@gmail.com>
Date: Tue, 4 Jun 2024 13:00:57 +0200
Subject: [PATCH 44/54] fixing previous commit

---
 src/Interpolator/Interpolator1D2Order.cpp | 123 ++++++++--------------
 1 file changed, 44 insertions(+), 79 deletions(-)

diff --git a/src/Interpolator/Interpolator1D2Order.cpp b/src/Interpolator/Interpolator1D2Order.cpp
index b21954702..cc0a62cff 100755
--- a/src/Interpolator/Interpolator1D2Order.cpp
+++ b/src/Interpolator/Interpolator1D2Order.cpp
@@ -8,7 +8,6 @@
 #include "Particles.h"
 #include "LaserEnvelope.h"
 
-
 using namespace std;
 
 Interpolator1D2Order::Interpolator1D2Order( Params &params, Patch *patch ) : Interpolator1D( patch )
@@ -30,18 +29,6 @@ void Interpolator1D2Order::fields( ElectroMagn *EMfields, Particles &particles,
     Field1D *Bz1D = static_cast<Field1D *>( EMfields->Bz_m );
     // Particle position (in units of the spatial-step)
     double xpn = particles.position( 0, ipart ) * dx_inv_;
-    // Calculate coeffs
-    /*coeffs( xjn );
-
-    // Interpolate the fields from the Dual grid : Ex, By, Bz
-    *( ELoc+0*nparts ) = compute( coeffd_, Ex1D,   id_ );
-    *( BLoc+1*nparts ) = compute( coeffd_, By1D_m, id_ );
-    *( BLoc+2*nparts ) = compute( coeffd_, Bz1D_m, id_ );
-
-    // Interpolate the fields from the Primal grid : Ey, Ez, Bx
-    *( ELoc+1*nparts ) = compute( coeffp_, Ey1D,   ip_ );
-    *( ELoc+2*nparts ) = compute( coeffp_, Ez1D,   ip_ );
-    *( BLoc+0*nparts ) = compute( coeffp_, Bx1D_m, ip_ );*/
 
     int idx_p[1], idx_d[1];
     double delta_p[1];
@@ -61,7 +48,18 @@ void Interpolator1D2Order::fields( ElectroMagn *EMfields, Particles &particles,
     // Interpolation of By^(d)
     BLoc[1*nparts+ipart] = compute( &coeffxd[0], By1D, idx_d[0] );
     // Interpolation of Bz^(d)
-    BLoc[2*nparts+ipart] = compute( &coeffxd[0], Bz1D, idx_d[0] );
+    BLoc[2*nparts+ipart] = compute( &coeffxd[0], Bz1D, idx_d[0] );//*/
+
+/*
+    // Interpolate the fields from the Dual grid : Ex, By, Bz
+    *( ELoc+0*nparts ) = compute( coeffxd, Ex1D,   idx_d[0] );
+    *( BLoc+1*nparts ) = compute( coeffxd, By1D, idx_d[0] );
+    *( BLoc+2*nparts ) = compute( coeffxd, Bz1D, idx_d[0] );
+
+    // Interpolate the fields from the Primal grid : Ey, Ez, Bx
+    *( ELoc+1*nparts ) = compute( coeffxp, Ey1D,   idx_p[0] );
+    *( ELoc+2*nparts ) = compute( coeffxp, Ez1D,   idx_p[0] );
+    *( BLoc+0*nparts ) = compute( coeffxp, Bx1D, idx_p[0] );*/
 
 }//END Interpolator1D2Order
 
@@ -79,16 +77,16 @@ void Interpolator1D2Order::fieldsAndCurrents( ElectroMagn *EMfields, Particles &
     }
 
     // Static cast of the electromagnetic fields
-    Field1D *Ex1D     = static_cast<Field1D *>( EMfields->Ex_ );
-    Field1D *Ey1D     = static_cast<Field1D *>( EMfields->Ey_ );
-    Field1D *Ez1D     = static_cast<Field1D *>( EMfields->Ez_ );
-    Field1D *Bx1D   = static_cast<Field1D *>( EMfields->Bx_m );
-    Field1D *By1D   = static_cast<Field1D *>( EMfields->By_m );
-    Field1D *Bz1D   = static_cast<Field1D *>( EMfields->Bz_m );
-    Field1D *Jx1D     = static_cast<Field1D *>( EMfields->Jx_ );
-    Field1D *Jy1D     = static_cast<Field1D *>( EMfields->Jy_ );
-    Field1D *Jz1D     = static_cast<Field1D *>( EMfields->Jz_ );
-    Field1D *Rho1D    = static_cast<Field1D *>( EMfields->rho_ );
+    Field1D *Ex1D  = static_cast<Field1D *>( EMfields->Ex_ );
+    Field1D *Ey1D  = static_cast<Field1D *>( EMfields->Ey_ );
+    Field1D *Ez1D  = static_cast<Field1D *>( EMfields->Ez_ );
+    Field1D *Bx1D  = static_cast<Field1D *>( EMfields->Bx_m );
+    Field1D *By1D  = static_cast<Field1D *>( EMfields->By_m );
+    Field1D *Bz1D  = static_cast<Field1D *>( EMfields->Bz_m );
+    Field1D *Jx1D  = static_cast<Field1D *>( EMfields->Jx_ );
+    Field1D *Jy1D  = static_cast<Field1D *>( EMfields->Jy_ );
+    Field1D *Jz1D  = static_cast<Field1D *>( EMfields->Jz_ );
+    Field1D *Rho1D = static_cast<Field1D *>( EMfields->rho_ );
     Field1D *By1DBTIS3;
     Field1D *Bz1DBTIS3;
     if (smpi->use_BTIS3){
@@ -99,69 +97,40 @@ void Interpolator1D2Order::fieldsAndCurrents( ElectroMagn *EMfields, Particles &
     // Particle position (in units of the spatial-step)
     double xpn = particles.position( 0, ipart )*dx_inv_;
     // Calculate coeffs
-    //coeffs( xjn );
+    int idx_p[1], idx_d[1];
+    double delta_p[1];
+    double coeffxp[3];
+    double coeffxd[3];
+
+    coeffs( xpn, idx_p, idx_d, coeffxp, coeffxd, delta_p );
 
     int nparts( particles.numberOfParticles() );
 
-    /*
     // Interpolate the fields from the Dual grid : Ex, By, Bz
-    *( ELoc+0*nparts ) = compute( coeffd_, Ex1D,   id_ );
-    *( BLoc+1*nparts ) = compute( coeffd_, By1D_m, id_ );
-    *( BLoc+2*nparts ) = compute( coeffd_, Bz1D_m, id_ );
+    *( ELoc+0*nparts ) = compute( coeffxd, Ex1D, idx_d[0] );
+    *( BLoc+1*nparts ) = compute( coeffxd, By1D, idx_d[0] );
+    *( BLoc+2*nparts ) = compute( coeffxd, Bz1D, idx_d[0] );
 
     // Interpolate the fields from the Primal grid : Ey, Ez, Bx
-    *( ELoc+1*nparts ) = compute( coeffp_, Ey1D,   ip_ );
-    *( ELoc+2*nparts ) = compute( coeffp_, Ez1D,   ip_ );
-    *( BLoc+0*nparts ) = compute( coeffp_, Bx1D_m, ip_ );*/
+    *( ELoc+1*nparts ) = compute( coeffxp, Ey1D, idx_p[0] );
+    *( ELoc+2*nparts ) = compute( coeffxp, Ez1D, idx_p[0] );
+    *( BLoc+0*nparts ) = compute( coeffxp, Bx1D, idx_p[0] );//*/
 
-    int idx_p[1], idx_d[1];
-    double delta_p[1];
-    double coeffxp[3];
-    double coeffxd[3];
-
-    coeffs( xpn, idx_p, idx_d, coeffxp, coeffxd, delta_p );
+    // Interpolate the fields from the Primal grid : Jy, Jz, Rho
+    JLoc->y     = compute( coeffxp, Jy1D,  idx_p[0] );
+    JLoc->z     = compute( coeffxp, Jz1D,  idx_p[0] );
+    ( *RhoLoc ) = compute( coeffxp, Rho1D, idx_p[0] );
 
-    // Interpolation of Ex^(d)
-    ELoc[0*nparts+ipart] = compute( &coeffxd[0], Ex1D, idx_d[0] );
-    // Interpolation of Ey^(p)
-    ELoc[1*nparts+ipart] = compute( &coeffxp[0], Ey1D, idx_p[0] );
-    // Interpolation of Ez^(p)
-    ELoc[2*nparts+ipart] = compute( &coeffxp[0], Ez1D, idx_p[0] );
-    // Interpolation of Bx^(p)
-    BLoc[0*nparts+ipart] = compute( &coeffxp[0], Bx1D, idx_p[0] );
-    // Interpolation of By^(d)
-    BLoc[1*nparts+ipart] = compute( &coeffxd[0], By1D, idx_d[0] );
-    // Interpolation of Bz^(d)
-    BLoc[2*nparts+ipart] = compute( &coeffxd[0], Bz1D, idx_d[0] );
-
-    // Interpolation of Jx^(d,p)
-    JLoc->x = compute( &coeffxd[1], Jx1D, idx_d[0] );
-    // Interpolation of Jy^(p,d)
-    JLoc->y = compute( &coeffxp[1], Jy1D, idx_p[0] );
-    // Interpolation of Jz^(p,p)
-    JLoc->z = compute( &coeffxp[1], Jz1D, idx_p[0] );
-    // Interpolation of Rho^(p,p)
-    ( *RhoLoc ) = compute( &coeffxp[1], Rho1D, idx_p[0]);
+    // Interpolate the fields from the Dual grid : Jx
+    JLoc->x = compute( coeffxd, Jx1D,  idx_d[0] );
     
     if (smpi->use_BTIS3){
         // Interpolation of ByBTIS3^(p,p)
-        *( BLocyBTIS3+0*nparts ) = compute( &coeffxp[1], By1DBTIS3, idx_p[0]);
+        *( BLocyBTIS3+0*nparts ) = compute( &coeffxp[0], By1DBTIS3, idx_p[0]);
         // Interpolation of BzBTIS3^(p,d)
-        *( BLoczBTIS3+0*nparts ) = compute( &coeffxp[1], Bz1DBTIS3, idx_p[0]);
+        *( BLoczBTIS3+0*nparts ) = compute( &coeffxp[0], Bz1DBTIS3, idx_p[0]);
     }
 
-    // Interpolate the fields from the Primal grid : Jy, Jz, Rho
-    /*JLoc->y = compute( coeffp_, Jy1D,  ip_ );
-    JLoc->z = compute( coeffp_, Jz1D,  ip_ );
-    ( *RhoLoc ) = compute( coeffp_, Rho1D, ip_ );
-
-    // Interpolate the fields from the Dual grid : Jx
-    JLoc->x = compute( coeffd_, Jx1D,  id_ );
-    
-    if (smpi->use_BTIS3){
-        *( BLocyBTIS3+0*nparts ) = compute( &coeffp_[1], By1DBTIS3, ip_ );
-        *( BLoczBTIS3+0*nparts ) = compute( &coeffp_[1], Bz1DBTIS3, ip_ );
-    }*/
 
 }
 
@@ -169,13 +138,11 @@ void Interpolator1D2Order::fieldsAndCurrents( ElectroMagn *EMfields, Particles &
 void Interpolator1D2Order::oneField( Field **field, Particles &particles, int *istart, int *iend, double *FieldLoc, double *, double *, double * )
 {
     Field1D *F = static_cast<Field1D *>( *field );
-
     int idx_p[1], idx_d[1];
     double delta_p[1];
     double coeffxp[3];
     double coeffxd[3];
-
-    double *coeff = F->isDual( 0 ) ? &coeffxd[1] : &coeffxp[1];//coeffd_ : coeffp_;
+    double *coeff = F->isDual( 0 ) ? coeffxd : coeffxp;
     int    *i     = F->isDual( 0 ) ? &idx_d[0]   : &idx_p[0];  //&id_ : &ip_;
 
     for( int ipart=*istart ; ipart<*iend; ipart++ ) {
@@ -288,9 +255,7 @@ void Interpolator1D2Order::fieldsWrapper( ElectroMagn *EMfields,
         const double *const __restrict__ By1D_mBTIS3 = static_cast<Field1D *>( EMfields->By_mBTIS3 )->data();
         const double *const __restrict__ Bz1D_mBTIS3 = static_cast<Field1D *>( EMfields->Bz_mBTIS3 )->data();
             
-        //double *const __restrict__ ELoc = smpi->dynamics_Epart[ithread].data();
-
-
+/*
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
         #pragma omp target map( to : i_domain_begin_) is_device_ptr ( position_x)
         #pragma omp teams distribute parallel for

From 7b237fe2a78b573a324fd72f16fdce3d498f8f81 Mon Sep 17 00:00:00 2001
From: Arnaud Beck <beck@llr.in2p3.fr>
Date: Tue, 4 Jun 2024 16:16:45 +0200
Subject: [PATCH 45/54] Update partners

---
 doc/Sphinx/Overview/partners.rst | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/doc/Sphinx/Overview/partners.rst b/doc/Sphinx/Overview/partners.rst
index 69b87e746..87d9d978a 100755
--- a/doc/Sphinx/Overview/partners.rst
+++ b/doc/Sphinx/Overview/partners.rst
@@ -52,11 +52,11 @@ Partners
 |            |   `Maison de la Simulation <https://mdls.fr/>`_ (MdlS), USR 3441                                        |
 |            |                                                                                                         |
 +            +---------------------------------------------------------------------------------------------------------+
-|            |   * `Olga Abramkina <olga.abramkina@idriss.fr>`_                                                        |
-|            |   * `Julien Dérouillat <julien.derouillat@cea.fr>`_                                                     |
+|            |   * `Olga Abramkina <olga.abramkina@idris.fr>`_ (Developer)                                             |
+|            |   * `Julien Dérouillat <julien.derouillat@cea.fr>`_ (Cofounder)                                         |
 |            |   * `Haithem Kallala <haithem.kallala@cea.fr>`_                                                         |
-|            |   * `Mathieu Lobet <mathieu.lobet@cea.fr>`_                                                             |
-|            |   * `Charles Prouveur <charles.prouveur@cea.fr>`_                                                       |
+|            |   * `Mathieu Lobet <mathieu.lobet@cea.fr>`_ (Developer)                                                 |
+|            |   * `Charles Prouveur <charles.prouveur@cea.fr>`_ (Architect)                                           |
 |            |                                                                                                         |
 +------------+---------------------------------------------------------------------------------------------------------+
 
@@ -67,11 +67,11 @@ Partners
 |            |   `Laboratoire pour l'Utilisation des Lasers Intenses <https://luli.ip-paris.fr/>`_ (LULI), UMR 7605        |
 |            |                                                                                                             |
 +            +-------------------------------------------------------------------------------------------------------------+
-|            |   * `Mickael Grech <mickael.grech@polytechnique.edu>`_                                                      |
-|            |   * `Tommaso Vinci <tommaso.vinci@polytechnique.edu>`_                                                      |
+|            |   * `Mickael Grech <mickael.grech@polytechnique.edu>`_ (Founder)                                            |
+|            |   * `Tommaso Vinci <tommaso.vinci@polytechnique.edu>`_ (Developer)                                          |
 |            |   * `Marco Chiaramello <marco.chiaramello@polytechnique.edu>`_                                              |
 |            |   * `Anna Grassi <anna.grassi@polytechnique.edu>`_                                                          |
-|            |   * `Frédéric Pérez <frederic.perez@polytechnique.edu>`_                                                    |
+|            |   * `Frédéric Pérez <frederic.perez@polytechnique.edu>`_ (Community manager, Developer)                     |
 |            |   * `Caterina Riconda <caterina.riconda@upmc.fr>`_                                                          |
 |            |                                                                                                             |
 +------------+-------------------------------------------------------------------------------------------------------------+
@@ -83,9 +83,9 @@ Partners
 |            |   `Laboratoire Leprince-Ringuet <https://llr.in2p3.fr>`_ (LLR), UMR 7638                                |
 +            +---------------------------------------------------------------------------------------------------------+
 |            |                                                                                                         |
-|            |   * `Arnaud Beck <beck@llr.in2p3.fr>`_                                                                  |
+|            |   * `Arnaud Beck <beck@llr.in2p3.fr>`_ (Project Coordinator, Cofounder, Developer)                      |
 |            |   * `Imen Zemzemi <zemzemi@llr.in2p3.fr>`_                                                              |
-|            |   * `Guillaume Bouchard <bouchard@llr.in2p3.fr>`_                                                       |
+|            |   * `Guillaume Bouchard <bouchard@llr.in2p3.fr>`_ (Developer)                                           |
 +------------+---------------------------------------------------------------------------------------------------------+
 
 .. rst-class:: noborder
@@ -95,7 +95,7 @@ Partners
 |            |   `Laboratoire de Physique des Gaz et des Plasmas <https://www.lpgp.universite-paris-saclay.fr>`_ (LPGP), UMR 8578   |
 +            +----------------------------------------------------------------------------------------------------------------------+
 |            |                                                                                                                      |
-|            |   * `Francesco Massimo <francesco.massimo@universite-paris-saclay.fr>`_                                              |
+|            |   * `Francesco Massimo <francesco.massimo@universite-paris-saclay.fr>`_ (Developer)                                  |
 +------------+----------------------------------------------------------------------------------------------------------------------+
 
 .. rst-class:: noborder
@@ -105,7 +105,7 @@ Partners
 |            |   `Institut du developpement et des ressources en informatique scientifique <http://www.idris.fr>`_ (IDRIS), UPS 851 |
 +            +----------------------------------------------------------------------------------------------------------------------+
 |            |                                                                                                                      |
-|            |   * `Olga Abramkina <olga.abramkina@idriss.fr>`_                                                                     |
+|            |   * `Olga Abramkina <olga.abramkina@idris.fr>`_ (Developer)                                                          |
 |            |   * `Marie Flé <marie.fle@idris.fr>`_                                                                                |
 +------------+----------------------------------------------------------------------------------------------------------------------+
 

From 59ce0096400de28d9dbe18732cf52467a8b71cfe Mon Sep 17 00:00:00 2001
From: Frederic Perez <frederic.perez@polytechnique.edu>
Date: Tue, 4 Jun 2024 16:44:10 +0200
Subject: [PATCH 46/54] fixes for sphinx >= 5

---
 doc/Sphinx/smilei_theme/layout.html               |  2 +-
 doc/Sphinx/smilei_theme/static/smilei_theme.css_t | 12 ++++++++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/doc/Sphinx/smilei_theme/layout.html b/doc/Sphinx/smilei_theme/layout.html
index 592f7e532..1bed82e81 100755
--- a/doc/Sphinx/smilei_theme/layout.html
+++ b/doc/Sphinx/smilei_theme/layout.html
@@ -97,7 +97,7 @@
         <div class="header">
         <div class="logo">
             <a href="{{ pathto(master_doc) }}">
-                <img class="logo" src="{{ pathto('_static/' ~ logo, 1) }}" alt="Logo" />
+                <img class="logo" src="{{ pathto('_static/smileiLogo.svg', 1) }}" alt="Logo" />
             </a>
         </div>
         
diff --git a/doc/Sphinx/smilei_theme/static/smilei_theme.css_t b/doc/Sphinx/smilei_theme/static/smilei_theme.css_t
index fdf918810..4de1a7428 100755
--- a/doc/Sphinx/smilei_theme/static/smilei_theme.css_t
+++ b/doc/Sphinx/smilei_theme/static/smilei_theme.css_t
@@ -172,6 +172,10 @@ a:hover {
 	text-decoration: underline;
 }
 
+a:visited {
+	color:{{ theme_main_color_bold }}; color: var(--main_bold);
+}
+
 div.body h1,
 div.body h2,
 div.body h3,
@@ -402,6 +406,14 @@ table.footnote td {
 	padding: 0.3em 0.5em;
 }
 
+table.noborder {
+  width: 100%;
+}
+
+table.noborder tr:first-child td:first-child {
+  width: 7em;
+}
+
 table.noborder, table.noborder td {
 	border:0 !important;
 }

From 51edda886db7b25ce74ff8c87915c2406920bce1 Mon Sep 17 00:00:00 2001
From: "charles.prouveur" <charles.prouveur@gmail.com>
Date: Tue, 4 Jun 2024 17:34:03 +0200
Subject: [PATCH 47/54] cleaning

---
 src/Interpolator/Interpolator1D2Order.cpp | 75 ++++++-----------------
 1 file changed, 19 insertions(+), 56 deletions(-)

diff --git a/src/Interpolator/Interpolator1D2Order.cpp b/src/Interpolator/Interpolator1D2Order.cpp
index cc0a62cff..0d192afff 100755
--- a/src/Interpolator/Interpolator1D2Order.cpp
+++ b/src/Interpolator/Interpolator1D2Order.cpp
@@ -27,9 +27,10 @@ void Interpolator1D2Order::fields( ElectroMagn *EMfields, Particles &particles,
     Field1D *Bx1D = static_cast<Field1D *>( EMfields->Bx_m );
     Field1D *By1D = static_cast<Field1D *>( EMfields->By_m );
     Field1D *Bz1D = static_cast<Field1D *>( EMfields->Bz_m );
+
     // Particle position (in units of the spatial-step)
     double xpn = particles.position( 0, ipart ) * dx_inv_;
-
+    // Calculate coeffs
     int idx_p[1], idx_d[1];
     double delta_p[1];
     double coeffxp[3];
@@ -48,18 +49,7 @@ void Interpolator1D2Order::fields( ElectroMagn *EMfields, Particles &particles,
     // Interpolation of By^(d)
     BLoc[1*nparts+ipart] = compute( &coeffxd[0], By1D, idx_d[0] );
     // Interpolation of Bz^(d)
-    BLoc[2*nparts+ipart] = compute( &coeffxd[0], Bz1D, idx_d[0] );//*/
-
-/*
-    // Interpolate the fields from the Dual grid : Ex, By, Bz
-    *( ELoc+0*nparts ) = compute( coeffxd, Ex1D,   idx_d[0] );
-    *( BLoc+1*nparts ) = compute( coeffxd, By1D, idx_d[0] );
-    *( BLoc+2*nparts ) = compute( coeffxd, Bz1D, idx_d[0] );
-
-    // Interpolate the fields from the Primal grid : Ey, Ez, Bx
-    *( ELoc+1*nparts ) = compute( coeffxp, Ey1D,   idx_p[0] );
-    *( ELoc+2*nparts ) = compute( coeffxp, Ez1D,   idx_p[0] );
-    *( BLoc+0*nparts ) = compute( coeffxp, Bx1D, idx_p[0] );*/
+    BLoc[2*nparts+ipart] = compute( &coeffxd[0], Bz1D, idx_d[0] );
 
 }//END Interpolator1D2Order
 
@@ -143,11 +133,10 @@ void Interpolator1D2Order::oneField( Field **field, Particles &particles, int *i
     double coeffxp[3];
     double coeffxd[3];
     double *coeff = F->isDual( 0 ) ? coeffxd : coeffxp;
-    int    *i     = F->isDual( 0 ) ? &idx_d[0]   : &idx_p[0];  //&id_ : &ip_;
+    int    *i     = F->isDual( 0 ) ? &idx_d[0] : &idx_p[0];  //&id_ : &ip_;
 
     for( int ipart=*istart ; ipart<*iend; ipart++ ) {
         double xpn = particles.position( 0, ipart )*dx_inv_;
-
         coeffs( xpn, idx_p, idx_d, coeffxp, coeffxd, delta_p );
         //coeffs( xpn );
         FieldLoc[ipart] = compute( coeff, F, *i );
@@ -491,9 +480,8 @@ void Interpolator1D2Order::timeCenteredEnvelope( ElectroMagn *EMfields, Particle
         int idx_p[1], idx_d[1];
         double delta_p[1];
         double coeffxp[3];
-        double coeffxd[3];
 
-        coeffs( xpn, idx_p, idx_d, coeffxp, coeffxd, delta_p );
+        coeffs( xpn, idx_p, NULL, coeffxp, NULL, delta_p );
 
         // Interpolation of Phi^(p)
         *( PHI_mpart+0*nparts+ipart )     = compute( coeffxp, Phi_m1D, idx_d[0] );
@@ -525,50 +513,37 @@ void Interpolator1D2Order::envelopeAndSusceptibility( ElectroMagn *EMfields, Par
     // Normalized particle position
     double xpn = particles.position( 0, ipart )*dx_inv_;
 
-    // Calculate coeffs
-    double coeffxp[3];
-
     // Indexes of the central nodes
-    int ip = round( xpn );
-
-    // Declaration and calculation of the coefficient for interpolation
-    double deltax, delta2;
-
-    deltax     = xpn - ( double )ip;
-    delta2     = deltax*deltax;
-    coeffxp[0] = 0.5 * ( delta2-deltax+0.25 );
-    coeffxp[1] = 0.75 - delta2;
-    coeffxp[2] = 0.5 * ( delta2+deltax+0.25 );
-
-
-    //!\todo CHECK if this is correct for both primal & dual grids !!!
-    // First index for summation
-    ip = ip - i_domain_begin_;
+    int idx_p[1];
+    double delta_p[1];
+    double coeffxp[3];
+    coeffs( xpn, idx_p, NULL, coeffxp, NULL, delta_p );
 
     // -------------------------
     // Interpolation of Env_A_abs_^(p)
     // -------------------------
-    *( Env_A_abs_Loc )  = compute( coeffxp, Env_A_abs_1D, ip ); //compute( &coeffp_[1], Env_A_abs_1D, ip_ );
+    *( Env_A_abs_Loc )  = compute( coeffxp, Env_A_abs_1D, idx_p[0] ); 
 
     // -------------------------
     // Interpolation of Env_Chi_^(p)
     // -------------------------
-    *( Env_Chi_Loc )    = compute( coeffxp, Env_Chi_1D, ip ); //compute( &coeffp_[1], Env_Chi_1D, ip_ );
+    *( Env_Chi_Loc )    = compute( coeffxp, Env_Chi_1D, idx_p[0] ); 
 
     // -------------------------
     // Interpolation of Env_E_abs_^(p)
     // -------------------------
-    *( Env_E_abs_Loc )  = compute( coeffxp, Env_E_abs_1D, ip ); // compute( &coeffp_[1], Env_E_abs_1D, ip_ );
+    *( Env_E_abs_Loc )  = compute( coeffxp, Env_E_abs_1D, idx_p[0] ); 
 
     // -------------------------
     // Interpolation of Env_Ex_abs_^(p)
     // -------------------------
-    *( Env_Ex_abs_Loc ) = compute( coeffxp, Env_Ex_abs_1D, ip ); // compute( &coeffp_[1], Env_Ex_abs_1D, ip_ );
+    *( Env_Ex_abs_Loc ) = compute( coeffxp, Env_Ex_abs_1D, idx_p[0] ); 
 
 } // END Interpolator1D2Order
 
 void Interpolator1D2Order::envelopeFieldForIonization( ElectroMagn *EMfields, Particles &particles, SmileiMPI *smpi, int *istart, int *iend, int ithread, int )
 {
+
     // Static cast of the envelope fields
     Field1D *Env_Eabs = static_cast<Field1D *>( EMfields->Env_E_abs_ );
 
@@ -577,32 +552,20 @@ void Interpolator1D2Order::envelopeFieldForIonization( ElectroMagn *EMfields, Pa
     //Loop on bin particles
     for( int ipart=*istart ; ipart<*iend; ipart++ ) {
 
-        int idx_p[1];
-        double delta_p[1];
-        double coeffxp[3];
-
         // Normalized particle position
         double xpn = particles.position( 0, ipart )*dx_inv_;
 
-        double delta2;
-
-        // Primal
-        idx_p[0]     = round( xpn );                 // index of the central point
-        delta_p[0]   = xpn -( double )idx_p[0];      // normalized distance to the central node
-        delta2       = pow( delta_p[0], 2 );         // square of the normalized distance to the central node
-
-        // 2nd order interpolation on 3 nodes
-        coeffxp[0]   = 0.5 * ( delta2-delta_p[0]+0.25 );
-        coeffxp[1]   = ( 0.75-delta2 );
-        coeffxp[2]   = 0.5 * ( delta2+delta_p[0]+0.25 );
-
-        idx_p[0]    -= i_domain_begin_;
+        int idx_p[1];
+        double delta_p[1];
+        double coeffxp[3];
+        coeffs( xpn, idx_p, NULL, coeffxp, NULL, delta_p );
 
         // ---------------------------------
         // Interpolation of Env_E_abs^(p)
         // ---------------------------------
         ( *Env_Eabs_part )[ipart] = compute( coeffxp, Env_Eabs, idx_p[0] );
 
+
         // In 1D the Env_Ex_abs field is always zero
 
     }

From 13641d46e0f7055818e6560a66ef94f92ca6fd68 Mon Sep 17 00:00:00 2001
From: "charles.prouveur" <charles.prouveur@gmail.com>
Date: Tue, 4 Jun 2024 18:06:02 +0200
Subject: [PATCH 48/54] small change

---
 src/Interpolator/Interpolator1D2Order.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpolator/Interpolator1D2Order.cpp b/src/Interpolator/Interpolator1D2Order.cpp
index 0d192afff..3625e98a0 100755
--- a/src/Interpolator/Interpolator1D2Order.cpp
+++ b/src/Interpolator/Interpolator1D2Order.cpp
@@ -132,7 +132,7 @@ void Interpolator1D2Order::oneField( Field **field, Particles &particles, int *i
     double delta_p[1];
     double coeffxp[3];
     double coeffxd[3];
-    double *coeff = F->isDual( 0 ) ? coeffxd : coeffxp;
+    double *coeff = F->isDual( 0 ) ? &coeffxd[0] : &coeffxp[0];
     int    *i     = F->isDual( 0 ) ? &idx_d[0] : &idx_p[0];  //&id_ : &ip_;
 
     for( int ipart=*istart ; ipart<*iend; ipart++ ) {

From b7c9d9df1b8c9080efaed80784b64b00120d21be Mon Sep 17 00:00:00 2001
From: "charles.prouveur" <charles.prouveur@gmail.com>
Date: Wed, 5 Jun 2024 09:57:20 +0200
Subject: [PATCH 49/54] took out st1d_24_cir_plane_wave_BTIS3.py for further
 investigations

---
 benchmarks/tst1d_24_cir_plane_wave_BTIS3.py |  95 -------------
 src/Interpolator/Interpolator1D2Order.cpp   | 144 +++++++++++++-------
 src/Interpolator/Interpolator1D4Order.h     |  86 +++---------
 3 files changed, 114 insertions(+), 211 deletions(-)
 delete mode 100755 benchmarks/tst1d_24_cir_plane_wave_BTIS3.py

diff --git a/benchmarks/tst1d_24_cir_plane_wave_BTIS3.py b/benchmarks/tst1d_24_cir_plane_wave_BTIS3.py
deleted file mode 100755
index cab778662..000000000
--- a/benchmarks/tst1d_24_cir_plane_wave_BTIS3.py
+++ /dev/null
@@ -1,95 +0,0 @@
-# _____________________________________________________________________________
-#
-# Electron trajectory in a plane wave 
-# with a Gaussian temporal profile.
-#
-# Validation in the relativist regime
-# 
-# _____________________________________________________________________________
-
-import math
-
-# _____________________________________________________________________________
-# Main parameters
-
-l0 = 2.0*math.pi              # laser wavelength
-t0 = l0                       # optical cicle
-Lx = 80*l0
-
-n0 = 1e-8                     # particle density
-
-Tsim = 150.*t0                 # duration of the simulation
-resx = 64.                    # nb of cells in one laser wavelength
-
-dx = l0/resx                            # space step
-dt  = 0.95 * dx                 		# timestep (0.95 x CFL)
-
-a0 = 5
-start = 0                               # Laser start
-fwhm = 10*t0                            # Gaussian time fwhm
-duration = 90*t0                        # Laser duration
-center = duration*0.5                   # Laser profile center
-
-pusher = "borisBTIS3"
-
-# Density profile for inital location of the particles
-def n0_(x):
-        if (dx<x<2*dx):
-                return n0
-        else:
-                return 0.
-
-# ______________________________________________________________________________
-# Namelists
-
-Main(
-    geometry = "1Dcartesian",
-    
-    interpolation_order = 2 ,
-    
-    cell_length = [dx],
-    grid_length  = [Lx],
-    
-    number_of_patches = [32],
-    
-    timestep = dt,
-    simulation_time = Tsim,
-    
-    EM_boundary_conditions = [ ['silver-muller', 'reflective'] ],
-
-    use_BTIS3_interpolation = True,
-    
-)
-
-LaserPlanar1D(
-    box_side         = "xmin",
-    a0               = a0,
-    omega            = 1.,
-    polarization_phi = 0.,
-    ellipticity      = 1.,
-    time_envelope    = tgaussian(start=start,duration=duration,fwhm=fwhm,center=center,order=2)
-)
-
-Species(
-    name = "electron_" + pusher,
-    position_initialization = "centered",
-    momentum_initialization = "cold",
-    particles_per_cell = 10,
-    c_part_max = 1.0,
-    mass = 1.0,
-    charge = -1.0,
-    charge_density = n0_,
-    mean_velocity = [0., 0.0, 0.0],
-    temperature = [0.],
-    pusher = pusher,
-    boundary_conditions = [["remove"]],
-    is_test = True
-)
-    
-DiagTrackParticles(
-    species = "electron_" + pusher,
-    every = 10,
-    flush_every = 100,
-)
-
-DiagFields( every = 1000 )
diff --git a/src/Interpolator/Interpolator1D2Order.cpp b/src/Interpolator/Interpolator1D2Order.cpp
index 3625e98a0..3af775e3d 100755
--- a/src/Interpolator/Interpolator1D2Order.cpp
+++ b/src/Interpolator/Interpolator1D2Order.cpp
@@ -39,7 +39,7 @@ void Interpolator1D2Order::fields( ElectroMagn *EMfields, Particles &particles,
     coeffs( xpn, idx_p, idx_d, coeffxp, coeffxd, delta_p );
 
     // Interpolation of Ex^(d)
-    ELoc[0*nparts+ipart] = compute( &coeffxd[0], Ex1D, idx_d[0] );
+    /*ELoc[0*nparts+ipart] = compute( &coeffxd[0], Ex1D, idx_d[0] );
     // Interpolation of Ey^(p)
     ELoc[1*nparts+ipart] = compute( &coeffxp[0], Ey1D, idx_p[0] );
     // Interpolation of Ez^(p)
@@ -49,7 +49,17 @@ void Interpolator1D2Order::fields( ElectroMagn *EMfields, Particles &particles,
     // Interpolation of By^(d)
     BLoc[1*nparts+ipart] = compute( &coeffxd[0], By1D, idx_d[0] );
     // Interpolation of Bz^(d)
-    BLoc[2*nparts+ipart] = compute( &coeffxd[0], Bz1D, idx_d[0] );
+    BLoc[2*nparts+ipart] = compute( &coeffxd[0], Bz1D, idx_d[0] );*/
+
+    // Interpolate the fields from the Dual grid : Ex, By, Bz
+    *( ELoc+0*nparts ) = compute( coeffxd, Ex1D, idx_d[0] );
+    *( BLoc+1*nparts ) = compute( coeffxd, By1D, idx_d[0] );
+    *( BLoc+2*nparts ) = compute( coeffxd, Bz1D, idx_d[0] );
+
+    // Interpolate the fields from the Primal grid : Ey, Ez, Bx
+    *( ELoc+1*nparts ) = compute( coeffxp, Ey1D, idx_p[0] );
+    *( ELoc+2*nparts ) = compute( coeffxp, Ez1D, idx_p[0] );
+    *( BLoc+0*nparts ) = compute( coeffxp, Bx1D, idx_p[0] );
 
 }//END Interpolator1D2Order
 
@@ -96,32 +106,42 @@ void Interpolator1D2Order::fieldsAndCurrents( ElectroMagn *EMfields, Particles &
 
     int nparts( particles.numberOfParticles() );
 
+    // Interpolation of Ex^(d)
+    /*ELoc[0*nparts+ipart] = compute( &coeffxd[0], Ex1D, idx_d[0] );
+    // Interpolation of Ey^(p)
+    ELoc[1*nparts+ipart] = compute( &coeffxp[0], Ey1D, idx_p[0] );
+    // Interpolation of Ez^(p)
+    ELoc[2*nparts+ipart] = compute( &coeffxp[0], Ez1D, idx_p[0] );
+    // Interpolation of Bx^(p)
+    BLoc[0*nparts+ipart] = compute( &coeffxp[0], Bx1D, idx_p[0] );
+    // Interpolation of By^(d)
+    BLoc[1*nparts+ipart] = compute( &coeffxd[0], By1D, idx_d[0] );
+    // Interpolation of Bz^(d)
+    BLoc[2*nparts+ipart] = compute( &coeffxd[0], Bz1D, idx_d[0] );*/
+
+
     // Interpolate the fields from the Dual grid : Ex, By, Bz
-    *( ELoc+0*nparts ) = compute( coeffxd, Ex1D, idx_d[0] );
-    *( BLoc+1*nparts ) = compute( coeffxd, By1D, idx_d[0] );
-    *( BLoc+2*nparts ) = compute( coeffxd, Bz1D, idx_d[0] );
+    *( ELoc+0*nparts ) = compute( &coeffxd[0], Ex1D, idx_d[0] );
+    *( BLoc+1*nparts ) = compute( &coeffxd[0], By1D, idx_d[0] );
+    *( BLoc+2*nparts ) = compute( &coeffxd[0], Bz1D, idx_d[0] );
 
     // Interpolate the fields from the Primal grid : Ey, Ez, Bx
-    *( ELoc+1*nparts ) = compute( coeffxp, Ey1D, idx_p[0] );
-    *( ELoc+2*nparts ) = compute( coeffxp, Ez1D, idx_p[0] );
-    *( BLoc+0*nparts ) = compute( coeffxp, Bx1D, idx_p[0] );//*/
+    *( ELoc+1*nparts ) = compute( &coeffxp[0], Ey1D, idx_p[0] );
+    *( ELoc+2*nparts ) = compute( &coeffxp[0], Ez1D, idx_p[0] );
+    *( BLoc+0*nparts ) = compute( &coeffxp[0], Bx1D, idx_p[0] );
 
     // Interpolate the fields from the Primal grid : Jy, Jz, Rho
-    JLoc->y     = compute( coeffxp, Jy1D,  idx_p[0] );
-    JLoc->z     = compute( coeffxp, Jz1D,  idx_p[0] );
+    JLoc->y = compute(     coeffxp, Jy1D,  idx_p[0] );
+    JLoc->z = compute(     coeffxp, Jz1D,  idx_p[0] );
     ( *RhoLoc ) = compute( coeffxp, Rho1D, idx_p[0] );
 
     // Interpolate the fields from the Dual grid : Jx
     JLoc->x = compute( coeffxd, Jx1D,  idx_d[0] );
     
     if (smpi->use_BTIS3){
-        // Interpolation of ByBTIS3^(p,p)
-        *( BLocyBTIS3+0*nparts ) = compute( &coeffxp[0], By1DBTIS3, idx_p[0]);
-        // Interpolation of BzBTIS3^(p,d)
-        *( BLoczBTIS3+0*nparts ) = compute( &coeffxp[0], Bz1DBTIS3, idx_p[0]);
+        *( BLocyBTIS3+0*nparts ) = compute( coeffxp, By1DBTIS3, idx_p[0] );
+        *( BLoczBTIS3+0*nparts ) = compute( coeffxp, Bz1DBTIS3, idx_p[0] );
     }
-
-
 }
 
 // Interpolator on another field than the basic ones
@@ -132,13 +152,12 @@ void Interpolator1D2Order::oneField( Field **field, Particles &particles, int *i
     double delta_p[1];
     double coeffxp[3];
     double coeffxd[3];
-    double *coeff = F->isDual( 0 ) ? &coeffxd[0] : &coeffxp[0];
-    int    *i     = F->isDual( 0 ) ? &idx_d[0] : &idx_p[0];  //&id_ : &ip_;
+    double *coeff = F->isDual( 0 ) ? coeffxd : coeffxp;
+    int    *i     = F->isDual( 0 ) ? &idx_d[0] : &idx_p[0];
 
     for( int ipart=*istart ; ipart<*iend; ipart++ ) {
         double xpn = particles.position( 0, ipart )*dx_inv_;
         coeffs( xpn, idx_p, idx_d, coeffxp, coeffxd, delta_p );
-        //coeffs( xpn );
         FieldLoc[ipart] = compute( coeff, F, *i );
     }
 }
@@ -389,13 +408,13 @@ void Interpolator1D2Order::fieldsAndEnvelope( ElectroMagn *EMfields, Particles &
             // Interpolation of Bz^(d)
             *( Bpart+2*nparts+ipart ) = compute( coeffxd, Bz1D, idx_d[0] );
             // Interpolation of Phi^(p)
-            *( PHIpart+0*nparts+ipart ) = compute( coeffxp, Phi1D, idx_d[0] );
+            *( PHIpart+0*nparts+ipart ) = compute( coeffxp, Phi1D, idx_p[0] );
             // Interpolation of GradPhix^(p)
-            *( GradPHIpart+0*nparts+ipart ) = compute( coeffxp, GradPhix1D, idx_d[0] );
+            *( GradPHIpart+0*nparts+ipart ) = compute( coeffxp, GradPhix1D, idx_p[0] );
             // Interpolation of GradPhiy^(p)
-            *( GradPHIpart+1*nparts+ipart ) = compute( coeffxp, GradPhiy1D, idx_d[0] );
+            *( GradPHIpart+1*nparts+ipart ) = compute( coeffxp, GradPhiy1D, idx_p[0] );
             // Interpolation of GradPhiz^(p)
-            *( GradPHIpart+2*nparts+ipart ) = compute( coeffxp, GradPhiz1D, idx_d[0] );
+            *( GradPHIpart+2*nparts+ipart ) = compute( coeffxp, GradPhiz1D, idx_p[0] );
 
             //Buffering of iol and delta
             *( iold+0*nparts+ipart)  = idx_p[0];
@@ -433,17 +452,17 @@ void Interpolator1D2Order::fieldsAndEnvelope( ElectroMagn *EMfields, Particles &
             // Interpolation of Bz^(d)
             *( Bpart+2*nparts+ipart ) = compute( coeffxd, Bz1D, idx_d[0] );
             // Interpolation of ByBTIS3^(p)
-            *( BypartBTIS3+0*nparts )  = compute( coeffxp, By1D_mBTIS3, idx_p[0] );
+            *( BypartBTIS3+0*nparts) = compute( coeffxp, By1D_mBTIS3, idx_p[0] );
             // Interpolation of BzBTIS3^(p)
-            *( BzpartBTIS3+0*nparts )  = compute( coeffxp, Bz1D_mBTIS3, idx_p[0] );
+            *( BzpartBTIS3+0*nparts) = compute( coeffxp, Bz1D_mBTIS3, idx_p[0] );
             // Interpolation of Phi^(p)
-            *( PHIpart+0*nparts+ipart )     = compute( coeffxp, Phi1D, idx_d[0] );
+            *( PHIpart+0*nparts+ipart )     = compute( coeffxp, Phi1D, idx_p[0] );
             // Interpolation of GradPhix^(p)
-            *( GradPHIpart+0*nparts+ipart ) = compute( coeffxp, GradPhix1D, idx_d[0] );
+            *( GradPHIpart+0*nparts+ipart ) = compute( coeffxp, GradPhix1D, idx_p[0] );
             // Interpolation of GradPhiy^(p)
-            *( GradPHIpart+1*nparts+ipart ) = compute( coeffxp, GradPhiy1D, idx_d[0] );
+            *( GradPHIpart+1*nparts+ipart ) = compute( coeffxp, GradPhiy1D, idx_p[0] );
             // Interpolation of GradPhiz^(p)
-            *( GradPHIpart+2*nparts+ipart ) = compute( coeffxp, GradPhiz1D, idx_d[0] );
+            *( GradPHIpart+2*nparts+ipart ) = compute( coeffxp, GradPhiz1D, idx_p[0] );
 
             //Buffering of iol and delta
             *( iold+0*nparts+ipart)  = idx_p[0];
@@ -480,23 +499,22 @@ void Interpolator1D2Order::timeCenteredEnvelope( ElectroMagn *EMfields, Particle
         int idx_p[1], idx_d[1];
         double delta_p[1];
         double coeffxp[3];
+        double coeffxd[3];
 
-        coeffs( xpn, idx_p, NULL, coeffxp, NULL, delta_p );
+        coeffs( xpn, idx_p, idx_d, coeffxp, coeffxd, delta_p );
 
         // Interpolation of Phi^(p)
-        *( PHI_mpart+0*nparts+ipart )     = compute( coeffxp, Phi_m1D, idx_d[0] );
+        *( PHI_mpart+0*nparts+ipart )     = compute( coeffxp, Phi_m1D, idx_p[0] );
         // Interpolation of GradPhix^(p)
-        *( GradPHI_mpart+0*nparts+ipart ) = compute( coeffxp, GradPhix_m1D, idx_d[0] );
+        *( GradPHI_mpart+0*nparts+ipart ) = compute( coeffxp, GradPhix_m1D, idx_p[0] );
         // Interpolation of GradPhiy^(p)
-        *( GradPHI_mpart+1*nparts+ipart ) = compute( coeffxp, GradPhiy_m1D, idx_d[0] );
+        *( GradPHI_mpart+1*nparts+ipart ) = compute( coeffxp, GradPhiy_m1D, idx_p[0] );
         // Interpolation of GradPhiz^(p)
-        *( GradPHI_mpart+2*nparts+ipart ) = compute( coeffxp, GradPhiz_m1D, idx_d[0] );
+        *( GradPHI_mpart+2*nparts+ipart ) = compute( coeffxp, GradPhiz_m1D, idx_p[0] );
 
         //Buffering of iol and delta
         *( iold+ipart+0*nparts)  = idx_p[0];
         *( delta+ipart+0*nparts) = delta_p[0];
-
-
     }
 
 } // END Interpolator1D2Order
@@ -513,37 +531,51 @@ void Interpolator1D2Order::envelopeAndSusceptibility( ElectroMagn *EMfields, Par
     // Normalized particle position
     double xpn = particles.position( 0, ipart )*dx_inv_;
 
-    // Indexes of the central nodes
-    int idx_p[1];
-    double delta_p[1];
+    // Calculate coeffs
     double coeffxp[3];
-    coeffs( xpn, idx_p, NULL, coeffxp, NULL, delta_p );
+
+    // Indexes of the central nodes
+    int ip = round( xpn );
+
+    // Declaration and calculation of the coefficient for interpolation
+    double deltax, delta2;
+
+    deltax     = xpn - ( double )ip;
+    delta2     = deltax*deltax;
+    coeffxp[0] = 0.5 * ( delta2-deltax+0.25 );
+    coeffxp[1] = 0.75 - delta2;
+    coeffxp[2] = 0.5 * ( delta2+deltax+0.25 );
+
+
+    //!\todo CHECK if this is correct for both primal & dual grids !!!
+    // First index for summation
+    ip = ip - i_domain_begin_;
 
     // -------------------------
     // Interpolation of Env_A_abs_^(p)
     // -------------------------
-    *( Env_A_abs_Loc )  = compute( coeffxp, Env_A_abs_1D, idx_p[0] ); 
+    *( Env_A_abs_Loc )  = compute( coeffxp, Env_A_abs_1D, ip ); //compute( &coeffp_[1], Env_A_abs_1D, ip_ );
 
     // -------------------------
     // Interpolation of Env_Chi_^(p)
     // -------------------------
-    *( Env_Chi_Loc )    = compute( coeffxp, Env_Chi_1D, idx_p[0] ); 
+    *( Env_Chi_Loc )    = compute( coeffxp, Env_Chi_1D, ip ); //compute( &coeffp_[1], Env_Chi_1D, ip_ );
 
     // -------------------------
     // Interpolation of Env_E_abs_^(p)
     // -------------------------
-    *( Env_E_abs_Loc )  = compute( coeffxp, Env_E_abs_1D, idx_p[0] ); 
+    *( Env_E_abs_Loc )  = compute( coeffxp, Env_E_abs_1D, ip ); // compute( &coeffp_[1], Env_E_abs_1D, ip_ );
 
     // -------------------------
     // Interpolation of Env_Ex_abs_^(p)
     // -------------------------
-    *( Env_Ex_abs_Loc ) = compute( coeffxp, Env_Ex_abs_1D, idx_p[0] ); 
+    *( Env_Ex_abs_Loc ) = compute( coeffxp, Env_Ex_abs_1D, ip ); // compute( &coeffp_[1], Env_Ex_abs_1D, ip_ );
+
 
 } // END Interpolator1D2Order
 
 void Interpolator1D2Order::envelopeFieldForIonization( ElectroMagn *EMfields, Particles &particles, SmileiMPI *smpi, int *istart, int *iend, int ithread, int )
 {
-
     // Static cast of the envelope fields
     Field1D *Env_Eabs = static_cast<Field1D *>( EMfields->Env_E_abs_ );
 
@@ -552,20 +584,32 @@ void Interpolator1D2Order::envelopeFieldForIonization( ElectroMagn *EMfields, Pa
     //Loop on bin particles
     for( int ipart=*istart ; ipart<*iend; ipart++ ) {
 
-        // Normalized particle position
-        double xpn = particles.position( 0, ipart )*dx_inv_;
-
         int idx_p[1];
         double delta_p[1];
         double coeffxp[3];
-        coeffs( xpn, idx_p, NULL, coeffxp, NULL, delta_p );
+
+        // Normalized particle position
+        double xpn = particles.position( 0, ipart )*dx_inv_;
+
+        double delta2;
+
+        // Primal
+        idx_p[0]     = round( xpn );                 // index of the central point
+        delta_p[0]   = xpn -( double )idx_p[0];      // normalized distance to the central node
+        delta2       = pow( delta_p[0], 2 );         // square of the normalized distance to the central node
+
+        // 2nd order interpolation on 3 nodes
+        coeffxp[0]   = 0.5 * ( delta2-delta_p[0]+0.25 );
+        coeffxp[1]   = ( 0.75-delta2 );
+        coeffxp[2]   = 0.5 * ( delta2+delta_p[0]+0.25 );
+
+        idx_p[0]    -= i_domain_begin_;
 
         // ---------------------------------
         // Interpolation of Env_E_abs^(p)
         // ---------------------------------
         ( *Env_Eabs_part )[ipart] = compute( coeffxp, Env_Eabs, idx_p[0] );
 
-
         // In 1D the Env_Ex_abs field is always zero
 
     }
diff --git a/src/Interpolator/Interpolator1D4Order.h b/src/Interpolator/Interpolator1D4Order.h
index 7bca2b949..0e8831091 100755
--- a/src/Interpolator/Interpolator1D4Order.h
+++ b/src/Interpolator/Interpolator1D4Order.h
@@ -33,64 +33,12 @@ class Interpolator1D4Order final : public Interpolator1D
     void envelopeAndSusceptibility( ElectroMagn *EMfields, Particles &particles, int ipart, double *Env_A_abs_Loc, double *Env_Chi_Loc, double *Env_E_abs_Loc, double *Env_Ex_abs_Loc ) override final;
 
 private:
-    inline void __attribute__((always_inline)) coeffs( double xjn )
-    {
-        double xjmxi2, xjmxi3, xjmxi4;
-
-        // Dual
-        id_      = round( xjn+0.5 ); // index of the central point
-        xjmxi  = xjn -( double )id_+0.5; // normalized distance to the central node
-        xjmxi2 = xjmxi*xjmxi;     // square of the normalized distance to the central node
-        xjmxi3 = xjmxi2*xjmxi;    // cube of the normalized distance to the central node
-        xjmxi4 = xjmxi3*xjmxi;    // 4th power of the normalized distance to the central node
-
-        // coefficients for the 4th order interpolation on 5 nodes
-        coeffd_[0] = dble_1_ov_384   - dble_1_ov_48  * xjmxi  + dble_1_ov_16 * xjmxi2 - dble_1_ov_12 * xjmxi3 + dble_1_ov_24 * xjmxi4;
-        coeffd_[1] = dble_19_ov_96   - dble_11_ov_24 * xjmxi  + dble_1_ov_4 * xjmxi2  + dble_1_ov_6  * xjmxi3 - dble_1_ov_6  * xjmxi4;
-        coeffd_[2] = dble_115_ov_192 - dble_5_ov_8   * xjmxi2 + dble_1_ov_4 * xjmxi4;
-        coeffd_[3] = dble_19_ov_96   + dble_11_ov_24 * xjmxi  + dble_1_ov_4 * xjmxi2  - dble_1_ov_6  * xjmxi3 - dble_1_ov_6  * xjmxi4;
-        coeffd_[4] = dble_1_ov_384   + dble_1_ov_48  * xjmxi  + dble_1_ov_16 * xjmxi2 + dble_1_ov_12 * xjmxi3 + dble_1_ov_24 * xjmxi4;
-
-        id_ -= i_domain_begin_;
-
-        // Primal
-        ip_      = round( xjn );    // index of the central point
-        xjmxi  = xjn -( double )ip_; // normalized distance to the central node
-        xjmxi2 = xjmxi*xjmxi;     // square of the normalized distance to the central node
-        xjmxi3 = xjmxi2*xjmxi;    // cube of the normalized distance to the central node
-        xjmxi4 = xjmxi3*xjmxi;    // 4th power of the normalized distance to the central node
-
-        // coefficients for the 4th order interpolation on 5 nodes
-        coeffp_[0] = dble_1_ov_384   - dble_1_ov_48  * xjmxi  + dble_1_ov_16 * xjmxi2 - dble_1_ov_12 * xjmxi3 + dble_1_ov_24 * xjmxi4;
-        coeffp_[1] = dble_19_ov_96   - dble_11_ov_24 * xjmxi  + dble_1_ov_4 * xjmxi2  + dble_1_ov_6  * xjmxi3 - dble_1_ov_6  * xjmxi4;
-        coeffp_[2] = dble_115_ov_192 - dble_5_ov_8   * xjmxi2 + dble_1_ov_4 * xjmxi4;
-        coeffp_[3] = dble_19_ov_96   + dble_11_ov_24 * xjmxi  + dble_1_ov_4 * xjmxi2  - dble_1_ov_6  * xjmxi3 - dble_1_ov_6  * xjmxi4;
-        coeffp_[4] = dble_1_ov_384   + dble_1_ov_48  * xjmxi  + dble_1_ov_16 * xjmxi2 + dble_1_ov_12 * xjmxi3 + dble_1_ov_24 * xjmxi4;
-
-        ip_ -= i_domain_begin_;
-    }
-
     inline void coeffs( double xpn, int* idx_p, int* idx_d,
                         double *coeffxp, double *coeffxd, double* delta_p )
     {
         double delta, delta2, delta3, delta4 ;
-        
-        // Dual
-        idx_d[0]   = round( xpn+0.5 );       // index of the central point
-        delta      = xpn -( double )idx_d[0]+0.5; // normalized distance to the central node
-        delta2     = delta*delta;            // square of the normalized distance to the central node
-        delta3     = delta2*delta;           // cube of the normalized distance to the central node
-        delta4     = delta3*delta;           // 4th power of the normalized distance to the central node
-        
-        // coefficients for the 4th order interpolation on 5 nodes
-        coeffxd[0] = dble_1_ov_384   - dble_1_ov_48  * delta  + dble_1_ov_16 * delta2 - dble_1_ov_12 * delta3 + dble_1_ov_24 * delta4;
-        coeffxd[1] = dble_19_ov_96   - dble_11_ov_24 * delta  + dble_1_ov_4 * delta2  + dble_1_ov_6  * delta3 - dble_1_ov_6  * delta4;
-        coeffxd[2] = dble_115_ov_192 - dble_5_ov_8   * delta2 + dble_1_ov_4 * delta4;
-        coeffxd[3] = dble_19_ov_96   + dble_11_ov_24 * delta  + dble_1_ov_4 * delta2  - dble_1_ov_6  * delta3 - dble_1_ov_6  * delta4;
-        coeffxd[4] = dble_1_ov_384   + dble_1_ov_48  * delta  + dble_1_ov_16 * delta2 + dble_1_ov_12 * delta3 + dble_1_ov_24 * delta4;
-        
-        idx_d[0]  -= i_domain_begin_;
-        
+     
+                
         // Primal
         idx_p[0]   = round( xpn );            // index of the central point
         delta_p[0] = xpn -( double )idx_p[0]; // normalized distance to the central node
@@ -106,6 +54,24 @@ class Interpolator1D4Order final : public Interpolator1D
         coeffxp[4] = dble_1_ov_384   + dble_1_ov_48  * delta_p[0]  + dble_1_ov_16 * delta2 + dble_1_ov_12 * delta3 + dble_1_ov_24 * delta4;
         
         idx_p[0]  -= i_domain_begin_;
+
+        if(idx_d){   
+            // Dual
+            idx_d[0]   = round( xpn+0.5 );       // index of the central point
+            delta      = xpn -( double )idx_d[0]+0.5; // normalized distance to the central node
+            delta2     = delta*delta;            // square of the normalized distance to the central node
+            delta3     = delta2*delta;           // cube of the normalized distance to the central node
+            delta4     = delta3*delta;           // 4th power of the normalized distance to the central node
+            
+            // coefficients for the 4th order interpolation on 5 nodes
+            coeffxd[0] = dble_1_ov_384   - dble_1_ov_48  * delta  + dble_1_ov_16 * delta2 - dble_1_ov_12 * delta3 + dble_1_ov_24 * delta4;
+            coeffxd[1] = dble_19_ov_96   - dble_11_ov_24 * delta  + dble_1_ov_4 * delta2  + dble_1_ov_6  * delta3 - dble_1_ov_6  * delta4;
+            coeffxd[2] = dble_115_ov_192 - dble_5_ov_8   * delta2 + dble_1_ov_4 * delta4;
+            coeffxd[3] = dble_19_ov_96   + dble_11_ov_24 * delta  + dble_1_ov_4 * delta2  - dble_1_ov_6  * delta3 - dble_1_ov_6  * delta4;
+            coeffxd[4] = dble_1_ov_384   + dble_1_ov_48  * delta  + dble_1_ov_16 * delta2 + dble_1_ov_12 * delta3 + dble_1_ov_24 * delta4;
+            
+            idx_d[0]  -= i_domain_begin_;
+        }
     }
     
     double dble_1_ov_384 ;
@@ -120,18 +86,6 @@ class Interpolator1D4Order final : public Interpolator1D
     double dble_115_ov_192 ;
     double dble_5_ov_8 ;
 
-    // Last prim index computed
-    int ip_;
-    // Last dual index computed
-    int id_;
-    // Last delta computed
-    double xjmxi;
-    // Interpolation coefficient on Prim grid
-    double coeffp_[5];
-    // Interpolation coefficient on Dual grid
-    double coeffd_[5];
-
-
 };//END class
 
 #endif

From e41df5ea46947b5a4dd5f8b7a649b9a0ee3c186c Mon Sep 17 00:00:00 2001
From: Frederic Perez <frederic.perez@polytechnique.edu>
Date: Wed, 5 Jun 2024 14:49:27 +0200
Subject: [PATCH 50/54] Persistent buffers for GPU sorting

---
 src/Particles/nvidiaParticles.cu | 349 ++++++++++++-------------------
 src/Particles/nvidiaParticles.h  |  36 +---
 2 files changed, 145 insertions(+), 240 deletions(-)

diff --git a/src/Particles/nvidiaParticles.cu b/src/Particles/nvidiaParticles.cu
index 42995603d..f84ed3463 100644
--- a/src/Particles/nvidiaParticles.cu
+++ b/src/Particles/nvidiaParticles.cu
@@ -313,27 +313,23 @@ namespace detail {
         particle_to_inject.scatterParticles( particle_container, available_places );
         // If there are more imported particles than places, copy the remaining imported particles at the end
         if( inject_count >= erased_count ) {
-            particle_container.resize( new_count );
+            particle_container.deviceResize( new_count );
             particle_container.pasteParticles( &particle_to_inject, initial_count, erased_count );
         // If there are more places than imported particles, the remaining places should be filled
         } else {
             const auto last_filled = available_places[inject_count];
             particle_container.eraseParticlesByPredicate( cellKeyBelow<0>(), last_filled );
-            particle_container.resize( new_count );
+            particle_container.deviceResize( new_count );
         }
+        particle_to_inject.deviceFree();
         
         // Compute keys of particles
         computeParticleClusterKey( particle_container, parameters, a_parent_patch );
         
-        // Use particle_to_inject as a buffer
-        particle_to_inject.softReserve( new_count );
-        particle_to_inject.resize( new_count );
+        // Sort particles by keys
+        particle_container.sortParticleByKey();
         
-        // Sort particles using thrust::gather, according to the sorting map
-        // (particle_to_inject serves as a buffer)
-        particle_container.sortParticleByKey( particle_to_inject );
-        
-        // Recompute bins
+        // Recompute bin locations
         computeBinIndex( particle_container );
     }
 
@@ -533,13 +529,7 @@ nvidiaParticles::~nvidiaParticles() {
     }
 }
 
-void nvidiaParticles::resizeDimensions( unsigned int nDim )
-{
-    nvidia_position_.resize( nDim );
-    nvidia_momentum_.resize( 3 );
-}
-
-void nvidiaParticles::softReserve( unsigned int particle_count, float growth_factor  )
+void nvidiaParticles::deviceReserve( unsigned int particle_count, float growth_factor  )
 {
     if( particle_count <= deviceCapacity() ) {
         // Dont reserve, for now we have enough capacity.
@@ -548,23 +538,12 @@ void nvidiaParticles::softReserve( unsigned int particle_count, float growth_fac
 
     const unsigned int new_capacity = static_cast<unsigned int>( particle_count * growth_factor );
 
-    for( unsigned int idim = 0; idim < nvidia_position_.size(); idim++ ) {
-        nvidia_position_[idim].reserve( new_capacity );
-    }
-
-    for( unsigned int idim = 0; idim < 3; idim++ ) {
-        nvidia_momentum_[idim].reserve( new_capacity );
-    }
-
-    nvidia_weight_.reserve( new_capacity );
-    nvidia_charge_.reserve( new_capacity );
-
-    if( has_quantum_parameter ) {
-        nvidia_chi_.reserve( new_capacity );
+    for( auto prop: nvidia_double_prop_) {
+        prop->reserve( new_capacity );
     }
 
-    if( has_Monte_Carlo_process ) {
-        nvidia_tau_.reserve( new_capacity );
+    for( auto prop: nvidia_short_prop_ ) {
+        prop->reserve( new_capacity );
     }
 
     if( tracked ) {
@@ -572,138 +551,72 @@ void nvidiaParticles::softReserve( unsigned int particle_count, float growth_fac
     }
 
     nvidia_cell_keys_.reserve( new_capacity );
-}
-
-void nvidiaParticles::reserve( unsigned int particle_count )
-{
-    for( unsigned int idim = 0; idim < nvidia_position_.size(); idim++ ) {
-        nvidia_position_[idim].reserve( particle_count );
-    }
-
-    for( unsigned int idim = 0; idim < 3; idim++ ) {
-        nvidia_momentum_[idim].reserve( particle_count );
-    }
-
-    nvidia_weight_.reserve( particle_count );
-    nvidia_charge_.reserve( particle_count );
 
-    if( has_quantum_parameter ) {
-        nvidia_chi_.reserve( particle_count );
+    for( auto &v: double_buffers_ ) {
+        v.reserve( new_capacity );
     }
-
-    if( has_Monte_Carlo_process ) {
-        nvidia_tau_.reserve( particle_count );
+    for( auto &v: short_buffers_ ) {
+        v.reserve( new_capacity );
     }
-
-    if( tracked ) {
-        nvidia_id_.reserve( particle_count );
+    for( auto &v: uint64_buffers_ ) {
+        v.reserve( new_capacity );
     }
 
-    nvidia_cell_keys_.reserve( particle_count );
 }
 
-void nvidiaParticles::resize( unsigned int particle_count )
+void nvidiaParticles::deviceFree()
 {
-
-    // TODO(Etienne M): Use non-initializing vector/allocator (dont pay the cost
-    // of what you dont use) ?
-
-    for( int idim = 0; idim < nvidia_position_.size(); idim++ ) {
-        nvidia_position_[idim].resize( particle_count );
-    }
-
-    for( int idim = 0; idim < 3; idim++ ) {
-        nvidia_momentum_[idim].resize( particle_count );
+    for( auto prop: nvidia_double_prop_) {
+        thrust::device_vector<double>().swap( *prop );
     }
 
-    nvidia_weight_.resize( particle_count );
-    nvidia_charge_.resize( particle_count );
-
-    if( has_quantum_parameter ) {
-        nvidia_chi_.resize( particle_count );
-    }
-
-    if( has_Monte_Carlo_process ) {
-        nvidia_tau_.resize( particle_count );
+    for( auto prop: nvidia_short_prop_ ) {
+        thrust::device_vector<short>().swap( *prop );
     }
 
     if( tracked ) {
-        nvidia_id_.resize( particle_count );
-    }
-
-    nvidia_cell_keys_.resize( particle_count );
-
-    gpu_nparts_ = particle_count;
-}
-
-void nvidiaParticles::free()
-{
-    for( auto& a_vector : nvidia_position_ ) {
-        thrust::device_vector<double> a_dummy_vector{};
-        std::swap( a_vector, a_dummy_vector );
+        thrust::device_vector<uint64_t>().swap( nvidia_id_ );
     }
 
-    for( auto& a_vector : nvidia_momentum_ ) {
-        thrust::device_vector<double> a_dummy_vector{};
-        std::swap( a_vector, a_dummy_vector );
-    }
+    thrust::device_vector<int>().swap( nvidia_cell_keys_ );
 
-    {
-        thrust::device_vector<double> a_dummy_vector{};
-        std::swap( nvidia_weight_, a_dummy_vector );
+    for( auto &v: double_buffers_ ) {
+        thrust::device_vector<double>().swap( v );
     }
-
-    {
-        thrust::device_vector<short> a_dummy_vector{};
-        std::swap( nvidia_charge_, a_dummy_vector );
+    for( auto &v: short_buffers_ ) {
+        thrust::device_vector<short>().swap( v );
     }
-
-    if( has_quantum_parameter ) {
-        thrust::device_vector<double> a_dummy_vector{};
-        std::swap( nvidia_chi_, a_dummy_vector );
-    }
-
-    if( has_Monte_Carlo_process ) {
-        thrust::device_vector<double> a_dummy_vector{};
-        std::swap( nvidia_tau_, a_dummy_vector );
-    }
-
-    if( tracked ) {
-        thrust::device_vector<uint64_t> a_dummy_vector{};
-        std::swap( nvidia_id_, a_dummy_vector );
-    }
-
-    {
-        thrust::device_vector<int> a_dummy_vector{};
-        std::swap( nvidia_cell_keys_, a_dummy_vector );
+    for( auto &v: uint64_buffers_ ) {
+        thrust::device_vector<uint64_t>().swap( v );
     }
 
     gpu_nparts_ = 0;
 }
 
-// ---------------------------------------------------------------------------------------------------------------------
-//! Resize particle vectors
-// ---------------------------------------------------------------------------------------------------------------------
 void nvidiaParticles::deviceResize( unsigned int new_size )
 {
-    for( unsigned int iprop=0 ; iprop<nvidia_double_prop_.size() ; iprop++ ) {
-        ( *nvidia_double_prop_[iprop] ).resize(new_size);
+    for( auto prop: nvidia_double_prop_ ) {
+        prop->resize( new_size );
     }
 
-    for( unsigned int iprop=0 ; iprop<nvidia_short_prop_.size() ; iprop++ ) {
-        ( *nvidia_short_prop_[iprop] ).resize(new_size);
+    for( auto prop: nvidia_short_prop_ ) {
+        prop->resize( new_size );
     }
 
-    //
-    // for( unsigned int iprop=0 ; iprop<uint64_prop.size() ; iprop++ ) {
-    //     ( *nvidia_uint64_prop[iprop] ).resize( n_particles+n_additional_particles );
-    // }
-
-    if (tracked) {
+    if( tracked ) {
         nvidia_id_.resize( new_size );
     }
 
     nvidia_cell_keys_.resize( new_size );
+    for( auto &v: double_buffers_ ) {
+        v.resize( new_size );
+    }
+    for( auto &v: short_buffers_ ) {
+        v.resize( new_size );
+    }
+    for( auto &v: uint64_buffers_ ) {
+        v.resize( new_size );
+    }
 
     gpu_nparts_ = new_size;
 }
@@ -714,20 +627,30 @@ void nvidiaParticles::deviceResize( unsigned int new_size )
 // ---------------------------------------------------------------------------------------------------------------------
 void nvidiaParticles::deviceClear()
 {
-    for( unsigned int iprop = 0; iprop < nvidia_double_prop_.size(); iprop++ ) {
-        nvidia_double_prop_[iprop]->clear();
+    for( auto prop: nvidia_double_prop_ ) {
+        prop->clear();
     }
 
-    for( unsigned int iprop = 0; iprop < nvidia_short_prop_.size(); iprop++ ) {
-        nvidia_short_prop_[iprop]->clear();
+    for( auto prop: nvidia_short_prop_ ) {
+        prop->clear();
     }
 
     // TODO(Etienne M): Clear cell keys too ?
 
-    if (tracked) {
+    if( tracked ) {
         nvidia_id_.clear();
     }
 
+    for( auto &v: double_buffers_ ) {
+        v.clear();
+    }
+    for( auto &v: short_buffers_ ) {
+        v.clear();
+    }
+    for( auto &v: uint64_buffers_ ) {
+        v.clear();
+    }
+    
     gpu_nparts_ = 0;
 }
 
@@ -748,23 +671,18 @@ void nvidiaParticles::initializeDataOnDevice()
     // The world shall end if we call this function multiple times
     SMILEI_ASSERT( nvidia_double_prop_.empty() );
 
-    const auto kPositionDimension = Position.size();
-
     // We sure that we have as many say, position dimension as the base class.
-    resizeDimensions( kPositionDimension );
+    nvidia_position_.resize( Position.size() );
+    nvidia_momentum_.resize( 3 );
 
     // Initialize the list of pointers
-
-    for( unsigned int i = 0; i < kPositionDimension; i++ ) {
-        nvidia_double_prop_.push_back( &nvidia_position_[i] );
+    for( auto &pos: nvidia_position_ ) {
+        nvidia_double_prop_.push_back( &pos );
     }
-
-    for( unsigned int i = 0; i < 3; i++ ) {
-        nvidia_double_prop_.push_back( &nvidia_momentum_[i] );
+    for( auto &mom: nvidia_momentum_ ) {
+        nvidia_double_prop_.push_back( &mom );
     }
-
     nvidia_double_prop_.push_back( &nvidia_weight_ );
-
     nvidia_short_prop_.push_back( &nvidia_charge_ );
 
     // Quantum parameter (for QED effects):
@@ -781,9 +699,9 @@ void nvidiaParticles::initializeDataOnDevice()
         nvidia_double_prop_.push_back( &nvidia_tau_ );
     }
 
-    const auto kHostParticleCount = Position[0].size();
+    const auto hostParticleCount = Position[0].size();
 
-    if( kHostParticleCount == 0 ) {
+    if( hostParticleCount == 0 ) {
         // Should we reserve some space ?
         // reserve( 100 );
     } else {
@@ -805,6 +723,12 @@ void nvidiaParticles::initializeDataOnDevice()
         // setHostBinIndex();
     } else {
 
+        // Allocate buffers that are necessary for sorting particles with binning
+        double_buffers_.resize( nvidia_double_prop_.size() );
+        short_buffers_ .resize( nvidia_short_prop_ .size() );
+        uint64_buffers_.resize( 2 );
+        deviceResize( gpu_nparts_ ); // resizes the buffers
+        
         // At this point, a copy of the host particles and last_index is on the
         // device and we know we support the space dimension.
 
@@ -832,7 +756,7 @@ void nvidiaParticles::initializeIDsOnDevice()
 // -------------------------------------------------------------------------------------------------
 void nvidiaParticles::copyFromHostToDevice()
 {
-    resize( Position[0].size() );
+    deviceResize( Position[0].size() );
 
     for( int idim = 0; idim < Position.size(); idim++ ) {
         thrust::copy( Position[idim].begin(), Position[idim].end(), nvidia_position_[idim].begin() );
@@ -841,7 +765,6 @@ void nvidiaParticles::copyFromHostToDevice()
     for( int idim = 0; idim < Momentum.size(); idim++ ) {
         thrust::copy( Momentum[idim].begin(), Momentum[idim].end(), nvidia_momentum_[idim].begin() );
     }
-
     thrust::copy( Weight.begin(), Weight.end(), nvidia_weight_.begin() );
 
     thrust::copy( Charge.begin(), Charge.end(), nvidia_charge_.begin() );
@@ -921,7 +844,7 @@ void nvidiaParticles::copyParticlesByPredicate( Particles* buffer, Predicate pre
     
     // Resize destination buffer (copy_if does not resize)
     nvidiaParticles* const dest = static_cast<nvidiaParticles*>( buffer );
-    dest->resize( nparts_to_copy );
+    dest->deviceResize( nparts_to_copy );
     
     if( nparts_to_copy ) {
         // Copy the particles to the destination
@@ -951,7 +874,7 @@ int nvidiaParticles::addParticles( Particles* particles_to_inject )
 {
     const auto nparts = gpu_nparts_;
     nvidiaParticles* to_inject = static_cast<nvidiaParticles*>( particles_to_inject );
-    resize( nparts + to_inject->gpu_nparts_ );
+    deviceResize( nparts + to_inject->gpu_nparts_ );
     pasteParticles( to_inject, nparts, 0 );
     return to_inject->gpu_nparts_;
 }
@@ -1009,7 +932,7 @@ void nvidiaParticles::pasteParticles( nvidiaParticles* particles_to_inject, size
 int nvidiaParticles::eraseLeavingParticles()
 {
     const auto nremoved = eraseParticlesByPredicate( cellKeyBelow<0>(), 0 );
-    resize( gpu_nparts_ - nremoved );
+    deviceResize( gpu_nparts_ - nremoved );
     return nremoved;
 }
 
@@ -1023,12 +946,12 @@ int nvidiaParticles::eraseParticlesByPredicate( Predicate pred, size_t offset )
     // Copy the particles to the destination
     // Using more memory, we could use the faster remove_copy_if
     // NOTE: remove_if is stable.
-    for( int ip = 0; ip < nvidia_double_prop_.size(); ip++ ) {
-        const auto in = nvidia_double_prop_[ip]->begin();
+    for( auto prop: nvidia_double_prop_ ) {
+        const auto in = prop->begin();
         thrust::remove_if( SMILEI_ACCELERATOR_ASYNC_POLYCY, in + offset, in + gpu_nparts_, keys + offset, pred );
     }
-    for( int ip = 0; ip < nvidia_short_prop_.size(); ip++ ) {
-        const auto in = nvidia_short_prop_[ip]->begin();
+    for( auto prop: nvidia_short_prop_ ) {
+        const auto in = prop->begin();
         thrust::remove_if( SMILEI_ACCELERATOR_ASYNC_POLYCY, in + offset, in + gpu_nparts_, keys + offset, pred );
     }
     if( tracked ) {
@@ -1049,29 +972,22 @@ void nvidiaParticles::createParticles( int n_additional_particles )
 {
     int n_particles = gpu_nparts_;
     int new_size = n_particles + n_additional_particles;
-    for( unsigned int iprop=0 ; iprop<nvidia_double_prop_.size() ; iprop++ ) {
-        ( *nvidia_double_prop_[iprop] ).resize(new_size);
-         thrust::fill(( *nvidia_double_prop_[iprop] ).begin() + n_particles, ( *nvidia_double_prop_[iprop] ).begin() + new_size, 0);
+    
+    deviceResize( new_size );
+    
+    for( auto prop: nvidia_double_prop_ ) {
+         thrust::fill( prop->begin() + n_particles, prop->begin() + new_size, 0);
     }
-
-    for( unsigned int iprop=0 ; iprop<nvidia_short_prop_.size() ; iprop++ ) {
-        ( *nvidia_short_prop_[iprop] ).resize(new_size);
-        thrust::fill(( *nvidia_short_prop_[iprop] ).begin() + n_particles, ( *nvidia_short_prop_[iprop] ).begin() + new_size, 0);
+    
+    for( auto prop: nvidia_short_prop_ ) {
+        thrust::fill( prop->begin() + n_particles, prop->begin() + new_size, 0);
     }
-
-    // for( unsigned int iprop=0 ; iprop<uint64_prop.size() ; iprop++ ) {
-    //     ( *nvidia_uint64_prop[iprop] ).resize( n_particles+n_additional_particles );
-    // }
-
-    if (tracked) {
-        nvidia_id_.resize( new_size );
+    
+    if( tracked ) {
         thrust::fill( nvidia_id_.begin() + n_particles, nvidia_id_.begin() + new_size, 0 );
     }
-
-    nvidia_cell_keys_.resize( new_size );
+    
     thrust::fill( nvidia_cell_keys_.begin() + n_particles, nvidia_cell_keys_.begin() + new_size, -1 );
-
-    gpu_nparts_ = new_size;
 }
 
 //! Import Particles and sort depending if Binning is available or not
@@ -1090,58 +1006,67 @@ void nvidiaParticles::importAndSortParticles( Particles* particles_to_inject )
     setHostBinIndex();
 }
 
-//! Sort by cell_keys_
-//! This version synchronizes for every vector, but uses less buffers
-void nvidiaParticles::sortParticleByKey()
-{
-    // Make a sorting map using the cell keys (like numpy.argsort)
-    thrust::device_vector<int> index( gpu_nparts_ );
-    thrust::sequence( thrust::device, index.begin(), index.end() );
-    thrust::sort_by_key( thrust::device, nvidia_cell_keys_.begin(), nvidia_cell_keys_.end(), index.begin() );
-    
-    // Sort particles using thrust::gather, according to the sorting map
-    thrust::device_vector<double> buffer( gpu_nparts_ );
-    for( auto prop: nvidia_double_prop_ ) {
-        thrust::gather( thrust::device, index.begin(), index.end(), prop->begin(), buffer.begin() );
-        prop->swap( buffer );
-    }
-    buffer.clear();
-    thrust::device_vector<short> buffer_short( gpu_nparts_ );
-    for( auto prop: nvidia_short_prop_ ) {
-        thrust::gather( thrust::device, index.begin(), index.end(), prop->begin(), buffer_short.begin() );
-        prop->swap( buffer_short );
-    }
-    buffer_short.clear();
-    if( tracked ) {
-        thrust::device_vector<uint64_t> buffer_uint64( gpu_nparts_ );
-        thrust::gather( thrust::device, index.begin(), index.end(), nvidia_id_.begin(), buffer_uint64.begin() );
-        nvidia_id_.swap( buffer_uint64 );
-        buffer_uint64.clear();
-    }
-}
+// //! Sort by cell_keys_
+// //! This version synchronizes for every vector, but uses less buffers
+// void nvidiaParticles::sortParticleByKey()
+// {
+//     // Make a sorting map using the cell keys (like numpy.argsort)
+//     thrust::device_vector<int> index( gpu_nparts_ );
+//     thrust::sequence( thrust::device, index.begin(), index.end() );
+//     thrust::sort_by_key( thrust::device, nvidia_cell_keys_.begin(), nvidia_cell_keys_.end(), index.begin() );
+//     
+//     // Sort particles using thrust::gather, according to the sorting map
+//     thrust::device_vector<double> buffer( gpu_nparts_ );
+//     for( auto prop: nvidia_double_prop_ ) {
+//         thrust::gather( thrust::device, index.begin(), index.end(), prop->begin(), buffer.begin() );
+//         prop->swap( buffer );
+//     }
+//     buffer.clear();
+//     thrust::device_vector<short> buffer_short( gpu_nparts_ );
+//     for( auto prop: nvidia_short_prop_ ) {
+//         thrust::gather( thrust::device, index.begin(), index.end(), prop->begin(), buffer_short.begin() );
+//         prop->swap( buffer_short );
+//     }
+//     buffer_short.clear();
+//     if( tracked ) {
+//         thrust::device_vector<uint64_t> buffer_uint64( gpu_nparts_ );
+//         thrust::gather( thrust::device, index.begin(), index.end(), nvidia_id_.begin(), buffer_uint64.begin() );
+//         nvidia_id_.swap( buffer_uint64 );
+//         buffer_uint64.clear();
+//     }
+// }
 
 //! Sort by cell_keys_
 //! This version is asynchronous, but requires a buffer of equal size to be provided
-void nvidiaParticles::sortParticleByKey( nvidiaParticles& buffer )
+void nvidiaParticles::sortParticleByKey()
 {
     // Make a sorting map using the cell keys (like numpy.argsort)
-    thrust::device_vector<int> index( gpu_nparts_ );
+    thrust::device_vector<uint64_t> & index = uint64_buffers_[1];
     thrust::sequence( thrust::device, index.begin(), index.end() );
     thrust::sort_by_key( thrust::device, nvidia_cell_keys_.begin(), nvidia_cell_keys_.end(), index.begin() );
     
     // Sort particles using thrust::gather, according to the sorting map
     for( int ip = 0; ip < nvidia_double_prop_.size(); ip++ ) {
-        thrust::gather( SMILEI_ACCELERATOR_ASYNC_POLYCY, index.begin(), index.end(), nvidia_double_prop_[ip]->begin(), buffer.nvidia_double_prop_[ip]->begin() );
+        thrust::gather( SMILEI_ACCELERATOR_ASYNC_POLYCY, index.begin(), index.end(), nvidia_double_prop_[ip]->begin(), double_buffers_[ip].begin() );
     }
     for( int ip = 0; ip < nvidia_short_prop_.size(); ip++ ) {
-        thrust::gather( SMILEI_ACCELERATOR_ASYNC_POLYCY, index.begin(), index.end(), nvidia_short_prop_[ip]->begin(), buffer.nvidia_short_prop_[ip]->begin() );
+        thrust::gather( SMILEI_ACCELERATOR_ASYNC_POLYCY, index.begin(), index.end(), nvidia_short_prop_[ip]->begin(), short_buffers_[ip].begin() );
     }
     if( tracked ) {
-        thrust::gather( SMILEI_ACCELERATOR_ASYNC_POLYCY, index.begin(), index.end(), nvidia_id_.begin(), buffer.nvidia_id_.begin() );
+        thrust::gather( SMILEI_ACCELERATOR_ASYNC_POLYCY, index.begin(), index.end(), nvidia_id_.begin(), uint64_buffers_[0].begin() );
     }
     SMILEI_ACCELERATOR_DEVICE_SYNC();
     
-    swap( buffer );
+    // Swap properties with their buffer
+    for( int iprop = 0; iprop < nvidia_double_prop_.size(); iprop++ ) {
+        nvidia_double_prop_[iprop]->swap( double_buffers_[iprop] );
+    }
+    for( int iprop = 0; iprop < nvidia_short_prop_.size(); iprop++ ) {
+        nvidia_short_prop_[iprop]->swap( short_buffers_[iprop] );
+    }
+    if( tracked ) {
+        nvidia_id_.swap( uint64_buffers_[0] );
+    }
 }
 
 
@@ -1227,7 +1152,7 @@ void nvidiaParticles::naiveImportAndSortParticles( nvidiaParticles* particles_to
 
     // Inject newly arrived particles in particles_to_inject
     const size_t current_size = gpu_nparts_;
-    resize( current_size + particles_to_inject->size() );
+    deviceResize( current_size + particles_to_inject->size() );
     pasteParticles( particles_to_inject, current_size, 0 );
     particles_to_inject->clear();
 }
diff --git a/src/Particles/nvidiaParticles.h b/src/Particles/nvidiaParticles.h
index 37b3fc18d..19c20b70d 100644
--- a/src/Particles/nvidiaParticles.h
+++ b/src/Particles/nvidiaParticles.h
@@ -34,33 +34,20 @@ class nvidiaParticles : public Particles
     //! Destructor for nvidiaParticles
     ~nvidiaParticles();
 
-    //! Allocate the right amount of position and momentum dimensions
-    void resizeDimensions( unsigned int nDim );
-
     //! Reserve space for (particle_count * growth_factor) particles only if 
     //! particle_count >= deviceCapacity(). Must be called after
     //! allocateDimensions()
-    void softReserve( unsigned int particle_count, float growth_factor = 1.3F );
-
-    //! Reserve space for particle_count particles. Must be called after
-    //! allocateDimensions()
-    void reserve( unsigned int particle_count );
-
-    //! Allocate particle_count particles. Must be called after
-    //! allocateDimensions()
-    //! Set the size (deviceSize) of nvidiaParticles to particle_count.
-    //!
-    void resize( unsigned int particle_count );
+    void deviceReserve( unsigned int particle_count, float growth_factor = 1.3F );
 
     //! Assures that the memory holden by the nvidia_[position|momentum|weight|
     //! charge|chi|tau|cell_keys]_ is freed. This is not something you can
     //! achieve via a naive resize.
     //! The pointers in nvidia_[double|short]_prop_ are not invalidated.
     //!
-    void free();
+    void deviceFree();
 
     //! Resize Particle vectors on device
-    void deviceResize(unsigned int new_size);
+    void deviceResize( unsigned int new_size );
 
     //! Remove all particles
     void deviceClear();
@@ -112,18 +99,6 @@ class nvidiaParticles : public Particles
         return thrust::raw_pointer_cast( nvidia_id_.data() );
     };
 
-    void swap( nvidiaParticles & p ) {
-        for( int iprop = 0; iprop < nvidia_double_prop_.size(); iprop++ ) {
-            nvidia_double_prop_[iprop]->swap( *p.nvidia_double_prop_[iprop] );
-        }
-        for( int iprop = 0; iprop < nvidia_short_prop_.size(); iprop++ ) {
-            nvidia_short_prop_[iprop]->swap( *p.nvidia_short_prop_[iprop] );
-        }
-        if( tracked ) {
-            nvidia_id_.swap( p.nvidia_id_ );
-        }
-    }
-
     // -----------------------------------------------------------------------------
     //! Move leaving particles to the buffers
     // -----------------------------------------------------------------------------
@@ -253,6 +228,11 @@ class nvidiaParticles : public Particles
     //! List of short* arrays
     std::vector<thrust::device_vector<short>*> nvidia_short_prop_;
 
+    //! Buffers for sorting particles
+    std::vector<thrust::device_vector<double>> double_buffers_;
+    std::vector<thrust::device_vector<short>> short_buffers_;
+    std::vector<thrust::device_vector<uint64_t>> uint64_buffers_;
+
     const Params* parameters_;
     //! We are interested in having the patch coordinates. This allows us to
     //! compute a bin index relative to the patch which in turns, makes the bin

From d204c5b205cdb4d75206102e19d56e39e2d10a2a Mon Sep 17 00:00:00 2001
From: Francesco Massimo
 <francescomassimo@client-172-18-85-160.eduroam.universite-paris-saclay.fr>
Date: Wed, 5 Jun 2024 16:10:32 +0200
Subject: [PATCH 51/54] correct B-TIS3 implementation in 1D

---
 src/ElectroMagn/ElectroMagn1D.cpp         | 4 ++--
 src/Interpolator/Interpolator1D2Order.cpp | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/ElectroMagn/ElectroMagn1D.cpp b/src/ElectroMagn/ElectroMagn1D.cpp
index d90c6ee2e..a444e19f5 100755
--- a/src/ElectroMagn/ElectroMagn1D.cpp
+++ b/src/ElectroMagn/ElectroMagn1D.cpp
@@ -615,9 +615,9 @@ void ElectroMagn1D::centerMagneticFields()
 #endif
         for( unsigned int i=0 ; i<nx_p-1 ; i++ ) {
             // Magnetic field By^(p) for BTIS3 interpolation
-            By1D_oldBTIS3[i] = By1D[i+1] + By1D_oldBTIS3[i] * 0.5;
+            By1D_oldBTIS3[i] = ( By1D[i+1] + By1D_oldBTIS3[i] ) * 0.5;
             // Magnetic field Bz^(p) for BTIS3 interpolation
-            Bz1D_oldBTIS3[i] = Bz1D[i+1] + Bz1D_oldBTIS3[i] * 0.5;
+            Bz1D_oldBTIS3[i] = ( Bz1D[i+1] + Bz1D_oldBTIS3[i] ) * 0.5;
         }
     }
     
diff --git a/src/Interpolator/Interpolator1D2Order.cpp b/src/Interpolator/Interpolator1D2Order.cpp
index 3af775e3d..f85e735de 100755
--- a/src/Interpolator/Interpolator1D2Order.cpp
+++ b/src/Interpolator/Interpolator1D2Order.cpp
@@ -315,9 +315,9 @@ void Interpolator1D2Order::fieldsWrapper( ElectroMagn *EMfields,
             // Interpolation of Bz^(d)
             BLoc[2*nparts+ipart] = compute( coeffxd, Bz1D, idx_d[0] );
             // Interpolation of ByBTIS3^(p)
-            *( BypartBTIS3+0*nparts )  = compute( coeffxp, By1D_mBTIS3, idx_p[0] );
+            *( BypartBTIS3+0*nparts+ipart )  = compute( coeffxp, By1D_mBTIS3, idx_p[0] );
             // Interpolation of BzBTIS3^(p)
-            *( BzpartBTIS3+0*nparts )  = compute( coeffxp, Bz1D_mBTIS3, idx_p[0] );
+            *( BzpartBTIS3+0*nparts+ipart )  = compute( coeffxp, Bz1D_mBTIS3, idx_p[0] );
             // Interpolation of ByBTIS3^(p)
             //BypartBTIS3[0*nparts+ipart ]  = compute( coeffxp, By1D_mBTIS3, idx_p[0] );
             // Interpolation of BzBTIS3^(p)

From aaddbb173bdd78a096abdc480d78b463a9a1ef59 Mon Sep 17 00:00:00 2001
From: Frederic Perez <frederic.perez@polytechnique.edu>
Date: Fri, 7 Jun 2024 11:32:24 +0200
Subject: [PATCH 52/54] revert persistent buffers until more complete analysis

---
 src/Particles/nvidiaParticles.cu | 136 +++++++++++--------------------
 src/Particles/nvidiaParticles.h  |   5 --
 2 files changed, 46 insertions(+), 95 deletions(-)

diff --git a/src/Particles/nvidiaParticles.cu b/src/Particles/nvidiaParticles.cu
index f84ed3463..6bc9387b4 100644
--- a/src/Particles/nvidiaParticles.cu
+++ b/src/Particles/nvidiaParticles.cu
@@ -321,13 +321,15 @@ namespace detail {
             particle_container.eraseParticlesByPredicate( cellKeyBelow<0>(), last_filled );
             particle_container.deviceResize( new_count );
         }
-        particle_to_inject.deviceFree();
         
         // Compute keys of particles
         computeParticleClusterKey( particle_container, parameters, a_parent_patch );
         
-        // Sort particles by keys
-        particle_container.sortParticleByKey();
+        // Sort particles by keys 
+        // using particle_to_inject as a buffer (it is swapped with particle_container after sorting)
+        particle_to_inject.deviceReserve( new_count ); // reserve a bit more memory for the final arrays
+        particle_to_inject.deviceResize( new_count );
+        particle_container.sortParticleByKey( particle_to_inject );
         
         // Recompute bin locations
         computeBinIndex( particle_container );
@@ -538,7 +540,7 @@ void nvidiaParticles::deviceReserve( unsigned int particle_count, float growth_f
 
     const unsigned int new_capacity = static_cast<unsigned int>( particle_count * growth_factor );
 
-    for( auto prop: nvidia_double_prop_) {
+    for( auto prop: nvidia_double_prop_ ) {
         prop->reserve( new_capacity );
     }
 
@@ -551,22 +553,11 @@ void nvidiaParticles::deviceReserve( unsigned int particle_count, float growth_f
     }
 
     nvidia_cell_keys_.reserve( new_capacity );
-
-    for( auto &v: double_buffers_ ) {
-        v.reserve( new_capacity );
-    }
-    for( auto &v: short_buffers_ ) {
-        v.reserve( new_capacity );
-    }
-    for( auto &v: uint64_buffers_ ) {
-        v.reserve( new_capacity );
-    }
-
 }
 
 void nvidiaParticles::deviceFree()
 {
-    for( auto prop: nvidia_double_prop_) {
+    for( auto prop: nvidia_double_prop_ ) {
         thrust::device_vector<double>().swap( *prop );
     }
 
@@ -580,16 +571,6 @@ void nvidiaParticles::deviceFree()
 
     thrust::device_vector<int>().swap( nvidia_cell_keys_ );
 
-    for( auto &v: double_buffers_ ) {
-        thrust::device_vector<double>().swap( v );
-    }
-    for( auto &v: short_buffers_ ) {
-        thrust::device_vector<short>().swap( v );
-    }
-    for( auto &v: uint64_buffers_ ) {
-        thrust::device_vector<uint64_t>().swap( v );
-    }
-
     gpu_nparts_ = 0;
 }
 
@@ -606,18 +587,9 @@ void nvidiaParticles::deviceResize( unsigned int new_size )
     if( tracked ) {
         nvidia_id_.resize( new_size );
     }
-
+    
     nvidia_cell_keys_.resize( new_size );
-    for( auto &v: double_buffers_ ) {
-        v.resize( new_size );
-    }
-    for( auto &v: short_buffers_ ) {
-        v.resize( new_size );
-    }
-    for( auto &v: uint64_buffers_ ) {
-        v.resize( new_size );
-    }
-
+    
     gpu_nparts_ = new_size;
 }
 
@@ -640,16 +612,6 @@ void nvidiaParticles::deviceClear()
     if( tracked ) {
         nvidia_id_.clear();
     }
-
-    for( auto &v: double_buffers_ ) {
-        v.clear();
-    }
-    for( auto &v: short_buffers_ ) {
-        v.clear();
-    }
-    for( auto &v: uint64_buffers_ ) {
-        v.clear();
-    }
     
     gpu_nparts_ = 0;
 }
@@ -722,12 +684,6 @@ void nvidiaParticles::initializeDataOnDevice()
 
         // setHostBinIndex();
     } else {
-
-        // Allocate buffers that are necessary for sorting particles with binning
-        double_buffers_.resize( nvidia_double_prop_.size() );
-        short_buffers_ .resize( nvidia_short_prop_ .size() );
-        uint64_buffers_.resize( 2 );
-        deviceResize( gpu_nparts_ ); // resizes the buffers
         
         // At this point, a copy of the host particles and last_index is on the
         // device and we know we support the space dimension.
@@ -1006,66 +962,66 @@ void nvidiaParticles::importAndSortParticles( Particles* particles_to_inject )
     setHostBinIndex();
 }
 
-// //! Sort by cell_keys_
-// //! This version synchronizes for every vector, but uses less buffers
-// void nvidiaParticles::sortParticleByKey()
-// {
-//     // Make a sorting map using the cell keys (like numpy.argsort)
-//     thrust::device_vector<int> index( gpu_nparts_ );
-//     thrust::sequence( thrust::device, index.begin(), index.end() );
-//     thrust::sort_by_key( thrust::device, nvidia_cell_keys_.begin(), nvidia_cell_keys_.end(), index.begin() );
-//     
-//     // Sort particles using thrust::gather, according to the sorting map
-//     thrust::device_vector<double> buffer( gpu_nparts_ );
-//     for( auto prop: nvidia_double_prop_ ) {
-//         thrust::gather( thrust::device, index.begin(), index.end(), prop->begin(), buffer.begin() );
-//         prop->swap( buffer );
-//     }
-//     buffer.clear();
-//     thrust::device_vector<short> buffer_short( gpu_nparts_ );
-//     for( auto prop: nvidia_short_prop_ ) {
-//         thrust::gather( thrust::device, index.begin(), index.end(), prop->begin(), buffer_short.begin() );
-//         prop->swap( buffer_short );
-//     }
-//     buffer_short.clear();
-//     if( tracked ) {
-//         thrust::device_vector<uint64_t> buffer_uint64( gpu_nparts_ );
-//         thrust::gather( thrust::device, index.begin(), index.end(), nvidia_id_.begin(), buffer_uint64.begin() );
-//         nvidia_id_.swap( buffer_uint64 );
-//         buffer_uint64.clear();
-//     }
-// }
+//! Sort by cell_keys_
+//! This version synchronizes for every vector, but uses less buffers
+void nvidiaParticles::sortParticleByKey()
+{
+    // Make a sorting map using the cell keys (like numpy.argsort)
+    thrust::device_vector<int> index( gpu_nparts_ );
+    thrust::sequence( thrust::device, index.begin(), index.end() );
+    thrust::sort_by_key( thrust::device, nvidia_cell_keys_.begin(), nvidia_cell_keys_.end(), index.begin() );
+    
+    // Sort particles using thrust::gather, according to the sorting map
+    thrust::device_vector<double> buffer( gpu_nparts_ );
+    for( auto prop: nvidia_double_prop_ ) {
+        thrust::gather( thrust::device, index.begin(), index.end(), prop->begin(), buffer.begin() );
+        prop->swap( buffer );
+    }
+    buffer.clear();
+    thrust::device_vector<short> buffer_short( gpu_nparts_ );
+    for( auto prop: nvidia_short_prop_ ) {
+        thrust::gather( thrust::device, index.begin(), index.end(), prop->begin(), buffer_short.begin() );
+        prop->swap( buffer_short );
+    }
+    buffer_short.clear();
+    if( tracked ) {
+        thrust::device_vector<uint64_t> buffer_uint64( gpu_nparts_ );
+        thrust::gather( thrust::device, index.begin(), index.end(), nvidia_id_.begin(), buffer_uint64.begin() );
+        nvidia_id_.swap( buffer_uint64 );
+        buffer_uint64.clear();
+    }
+}
 
 //! Sort by cell_keys_
 //! This version is asynchronous, but requires a buffer of equal size to be provided
-void nvidiaParticles::sortParticleByKey()
+void nvidiaParticles::sortParticleByKey( nvidiaParticles &buffer )
 {
     // Make a sorting map using the cell keys (like numpy.argsort)
-    thrust::device_vector<uint64_t> & index = uint64_buffers_[1];
+    thrust::device_vector<int> index( gpu_nparts_ );
     thrust::sequence( thrust::device, index.begin(), index.end() );
     thrust::sort_by_key( thrust::device, nvidia_cell_keys_.begin(), nvidia_cell_keys_.end(), index.begin() );
     
     // Sort particles using thrust::gather, according to the sorting map
     for( int ip = 0; ip < nvidia_double_prop_.size(); ip++ ) {
-        thrust::gather( SMILEI_ACCELERATOR_ASYNC_POLYCY, index.begin(), index.end(), nvidia_double_prop_[ip]->begin(), double_buffers_[ip].begin() );
+        thrust::gather( SMILEI_ACCELERATOR_ASYNC_POLYCY, index.begin(), index.end(), nvidia_double_prop_[ip]->begin(), buffer.nvidia_double_prop_[ip]->begin() );
     }
     for( int ip = 0; ip < nvidia_short_prop_.size(); ip++ ) {
-        thrust::gather( SMILEI_ACCELERATOR_ASYNC_POLYCY, index.begin(), index.end(), nvidia_short_prop_[ip]->begin(), short_buffers_[ip].begin() );
+        thrust::gather( SMILEI_ACCELERATOR_ASYNC_POLYCY, index.begin(), index.end(), nvidia_short_prop_[ip]->begin(), buffer.nvidia_short_prop_[ip]->begin() );
     }
     if( tracked ) {
-        thrust::gather( SMILEI_ACCELERATOR_ASYNC_POLYCY, index.begin(), index.end(), nvidia_id_.begin(), uint64_buffers_[0].begin() );
+        thrust::gather( SMILEI_ACCELERATOR_ASYNC_POLYCY, index.begin(), index.end(), nvidia_id_.begin(), buffer.nvidia_id_.begin() );
     }
     SMILEI_ACCELERATOR_DEVICE_SYNC();
     
     // Swap properties with their buffer
     for( int iprop = 0; iprop < nvidia_double_prop_.size(); iprop++ ) {
-        nvidia_double_prop_[iprop]->swap( double_buffers_[iprop] );
+        nvidia_double_prop_[iprop]->swap( *buffer.nvidia_double_prop_[iprop] );
     }
     for( int iprop = 0; iprop < nvidia_short_prop_.size(); iprop++ ) {
-        nvidia_short_prop_[iprop]->swap( short_buffers_[iprop] );
+        nvidia_short_prop_[iprop]->swap( *buffer.nvidia_short_prop_[iprop] );
     }
     if( tracked ) {
-        nvidia_id_.swap( uint64_buffers_[0] );
+        nvidia_id_.swap( buffer.nvidia_id_ );
     }
 }
 
diff --git a/src/Particles/nvidiaParticles.h b/src/Particles/nvidiaParticles.h
index 19c20b70d..a02edffc8 100644
--- a/src/Particles/nvidiaParticles.h
+++ b/src/Particles/nvidiaParticles.h
@@ -228,11 +228,6 @@ class nvidiaParticles : public Particles
     //! List of short* arrays
     std::vector<thrust::device_vector<short>*> nvidia_short_prop_;
 
-    //! Buffers for sorting particles
-    std::vector<thrust::device_vector<double>> double_buffers_;
-    std::vector<thrust::device_vector<short>> short_buffers_;
-    std::vector<thrust::device_vector<uint64_t>> uint64_buffers_;
-
     const Params* parameters_;
     //! We are interested in having the patch coordinates. This allows us to
     //! compute a bin index relative to the patch which in turns, makes the bin

From b033879e56499264b6104f0d3bd3f4d1f5b5021f Mon Sep 17 00:00:00 2001
From: Francesco Massimo <francesco.massimo@universite-paris-saclay.fr>
Date: Mon, 24 Jun 2024 11:19:53 +0000
Subject: [PATCH 53/54] Adding new publications

---
 doc/Sphinx/Overview/material.rst | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/doc/Sphinx/Overview/material.rst b/doc/Sphinx/Overview/material.rst
index 66ed26180..5712bfeae 100644
--- a/doc/Sphinx/Overview/material.rst
+++ b/doc/Sphinx/Overview/material.rst
@@ -30,7 +30,7 @@ Papers involving Smilei
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
 Only papers published in peer-reviewed journals are listed (for the complete list of citing papers see `Google Scholar <https://scholar.google.com/scholar?hl=fr&as_sdt=2005&sciodt=0,5&cites=17416460455672944837&scipsc=&q=&scisbd=1>`_).
-As of May 2024, 189 papers have been published covering a broad range of topics:
+As of May 2024, at least 192 papers have been published covering a broad range of topics:
 
 * laser-plasma interaction (LPI) / inertial fusion (FCI)
 * ultra-high intensity (UHI) applications
@@ -50,6 +50,25 @@ Following is the distribution of these topics in the listed publications up to N
    Use the python script doc/doi2publications.py to generate entries from a DOI number, and paste them here
    You can count the number of papers in the list with the vim command :%s/.. \[//gn. 
 
+
+.. [Sikorski2024]
+
+    P. Sikorski, A. G. R. Thomas, S. S. Bulanov, M. Zepf and D. Seipt,
+    `Novel signatures of radiation reaction in electron–laser sidescattering`,
+    `New Journal of Physics 26 063011 (2024) <https://doi.org/10.1088/1367-2630/ad4f06>`_
+
+.. [Ivanov2024b]
+
+    K. A. Ivanov, S. A. Shulyapov, D. A. Gorlova, I. P. Tsygvintsev, M. S. Krivokorytov, I. N. Tsymbalov, R. V. Volkov and A. B. Savelev,
+    `Laser-accelerated MeV-scale collimated electron bunch from a near-critical plasma of a liquid jet target`,
+    `Laser Physics Letters 21, 7 (2024) <https://doi.org/10.1088/1612-202X/ad4bb8>`_
+
+.. [Malik2024]
+
+    H. K. Malik, S. Kumar, and D. K. Singh,
+    `Effect of trapezoidal plasma density region in bubble wakefield acceleration`,
+    `Physica Scripta 99, 075601 (2024) <https://doi.org/10.1088/1402-4896/ad4fe7>`_
+
 .. [Krafft2024b]
 
     C. Krafft, P. Savoini, and F. J. Polanco-Rodríguez,
@@ -62,7 +81,7 @@ Following is the distribution of these topics in the listed publications up to N
     `All-optical source size and emittance measurements of laser-accelerated electron beams`,
     `Physical Review Accelerators and Beams 27, 052803 (2024) <https://doi.org/10.1103/PhysRevAccelBeams.27.052803>`_
 
-.. [Ivanov2024]
+.. [Ivanov2024a]
 
     K. A. Ivanov, D. A. Gorlova, I. N. Tsymbalov, I. P. Tsygvintsev, S. A. Shulyapov, R. V. Volkov, and A. B. Savel’ev,
     `Laser-driven pointed acceleration of electrons with preformed plasma lens`,

From 438d43d8ab727d02dd8f4402c50393343e871b99 Mon Sep 17 00:00:00 2001
From: Frederic Perez <frederic.perez@polytechnique.edu>
Date: Mon, 24 Jun 2024 13:32:13 +0200
Subject: [PATCH 54/54] prepare v5.1

---
 doc/Sphinx/Overview/releases.rst | 58 ++++++++++++++++++--------------
 1 file changed, 33 insertions(+), 25 deletions(-)

diff --git a/doc/Sphinx/Overview/releases.rst b/doc/Sphinx/Overview/releases.rst
index 5c3e9d046..e271b32c5 100755
--- a/doc/Sphinx/Overview/releases.rst
+++ b/doc/Sphinx/Overview/releases.rst
@@ -16,18 +16,43 @@ Get Smilei
 
 You can find older, `unsupported versions here <https://github.com/SmileiPIC/Smilei/releases>`_
 
+..
+.. ----
+
+.. .. _latestVersion:
+
+.. Changes made in the repository (not released)
+.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
 ----
 
-.. _latestVersion:
+Ongoing projects
+^^^^^^^^^^^^^^^^
+
+* Already available, but experimental:
+
+  * Particle merging
+  * Nuclear reactions
+  * Perfectly Matched Layers
+  * NewParticles diagnostic
+
+* In preparation:
+
+  * Spectral solvers
+
+
+----
 
-Changes made in the repository (not released)
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Release 5.1
+^^^^^^^^^^^^^^^^^^^^^
 
-* GPU:
+* **GPU**:
 
+  * ``1Dcartesian`` geometry now available.
   * Compilation simplified and better documented.
+  * Improved performance of particle sorting.
 
-* Features:
+* **Features**:
 
   * Relativistic field initialization now supports multiple species and both direction propagations.
   * Added the argument ``phase_offset`` in laser definitions such as ``LaserGaussian2D``.
@@ -37,7 +62,7 @@ Changes made in the repository (not released)
   * Deprecated ``smilei_rand_max``.
   * New namelist variables ``smilei_omp_threads`` and ``smilei_total_cores``.
 
-* Happi:
+* **Happi**:
 
   * In ``Scalar``, it is now possible to make an operation on scalars such as ``"Uelm+Ukin"``.
   * The list of available scalars can be obtained from ``getScalars()``.
@@ -46,11 +71,11 @@ Changes made in the repository (not released)
   * Changed coordinate reference for 2D probe in 3D or AM geometry
     (zero is the box origin projected orthogonally on the probe plane).
 
-* Documentation:
+* **Documentation**:
 
   * Dark theme (click the switch on the bottom left, or set browser preferences).
 
-* Bug fixes:
+* **Bug fixes** :
 
   * ``dump_minutes`` often failed to write some checkpoint files.
   * ``"auto"`` limits in ``ParticleBinning`` could fail with only one side on ``"auto"``.
@@ -58,23 +83,6 @@ Changes made in the repository (not released)
 
 ----
 
-Projects
-^^^^^^^^^^^^^^^^
-
-* Already available, but experimental:
-
-  * Particle merging
-  * Nuclear reactions
-  * Perfectly Matched Layers
-  * NewParticles diagnostic
-
-* In preparation:
-
-  * Spectral solvers
-
-
-----
-
 Release 5.0
 ^^^^^^^^^^^^^^^^^^^^^