diff --git a/.gitignore b/.gitignore index 4f85ed2da..cf8f3579b 100755 --- a/.gitignore +++ b/.gitignore @@ -33,3 +33,4 @@ exec_script.sh.* .DS_Store tables/* *.stats* +.venv* diff --git a/benchmarks/collisions/ionization_multiple.py b/benchmarks/collisions/ionization_multiple.py index 66b2f75bf..6313a72dd 100755 --- a/benchmarks/collisions/ionization_multiple.py +++ b/benchmarks/collisions/ionization_multiple.py @@ -22,7 +22,7 @@ timestep2 = int(np.double(S2.namelist.Main.timestep)) D += [ - S2.ParticleBinning(0,sum={"ekin":[0,1]}, linestyle="", marker=".", color=color ) + S2.ParticleBinning(0,sum={"ekin":[0,1]}, linestyle="", marker=".", color=color, label=elm ) ] diff --git a/benchmarks/tst1d_25_bsi_ionisation.py b/benchmarks/tst1d_25_bsi_ionisation.py new file mode 100755 index 000000000..0a784974e --- /dev/null +++ b/benchmarks/tst1d_25_bsi_ionisation.py @@ -0,0 +1,115 @@ +# ---------------------------------------------------------------------------------------- +# SIMULATION PARAMETERS FOR THE PIC-CODE SMILEI +# ---------------------------------------------------------------------------------------- + +import math + +l0 = 2.0 * math.pi # wavelength in normalized units +t0 = l0 # optical cycle in normalized units +rest = 6000.0 # nb of timestep in 1 optical cycle +resx = 4000.0 # nb cells in 1 wavelength +Lsim = 0.01 * l0 # simulation length +Tsim = 0.2 * t0 # duration of the simulation + + +Main( + geometry="1Dcartesian", + interpolation_order=2, + cell_length=[l0 / resx], + grid_length=[Lsim], + number_of_patches=[4], + timestep=t0 / rest, + simulation_time=Tsim, + EM_boundary_conditions=[["silver-muller"]], + reference_angular_frequency_SI=6 * math.pi * 1e14, +) + +Species( + name="hydrogen", + ionization_model="barrier_suppression", + ionization_electrons="electron", + atomic_number=1, + position_initialization="regular", + momentum_initialization="cold", + particles_per_cell=40, + mass=1836.0 * 1000.0, + charge=0.0, + number_density=0.1, + boundary_conditions=[ + ["remove", "remove"], + ], +) + +Species( + name="carbon", + ionization_model="barrier_suppression", + ionization_electrons="electron", + atomic_number=6, + position_initialization="regular", + momentum_initialization="cold", + particles_per_cell=40, + mass=1836.0 * 1000.0, + charge=0.0, + number_density=0.1, + boundary_conditions=[ + ["remove", "remove"], + ], +) + +Species( + name="electron", + position_initialization="regular", + momentum_initialization="cold", + particles_per_cell=0, + mass=1.0, + charge=-1.0, + charge_density=0.0, + boundary_conditions=[ + ["remove", "remove"], + ], + keep_interpolated_fields=["Ex", "Ey", "Ez", "Wx", "Wy", "Wz"], +) + + +def By(t): + return 1e-7 * math.sin(t) + + +def Bz(t): + return 0.1 * math.sin(t) + + +Laser( + box_side="xmin", + space_time_profile=[By, Bz], +) + +DiagScalar(every=20) + +DiagFields(every=20, time_average=1, fields=["Ex", "Ey", "Ez"]) + +DiagParticleBinning( + deposited_quantity="weight", + every=20, + species=["hydrogen"], + axes=[["charge", -0.5, 1.5, 2]], +) + +DiagParticleBinning( + deposited_quantity="weight", + every=20, + species=["carbon"], + axes=[["charge", -0.5, 6.5, 7]], +) + +DiagTrackParticles( + species="electron", + every=[1, 1000, 30], + attributes=["x", "px", "py", "pz", "w", "Wy"], +) + +DiagNewParticles( + species="electron", + every=100, + attributes=["x", "py", "w", "q"], +) diff --git a/benchmarks/tst1d_26_TL_ionisation.py b/benchmarks/tst1d_26_TL_ionisation.py new file mode 100755 index 000000000..32326a81b --- /dev/null +++ b/benchmarks/tst1d_26_TL_ionisation.py @@ -0,0 +1,115 @@ +# ---------------------------------------------------------------------------------------- +# SIMULATION PARAMETERS FOR THE PIC-CODE SMILEI +# ---------------------------------------------------------------------------------------- + +import math + +l0 = 2.0 * math.pi # wavelength in normalized units +t0 = l0 # optical cycle in normalized units +rest = 6000.0 # nb of timestep in 1 optical cycle +resx = 4000.0 # nb cells in 1 wavelength +Lsim = 0.01 * l0 # simulation length +Tsim = 0.2 * t0 # duration of the simulation + + +Main( + geometry="1Dcartesian", + interpolation_order=2, + cell_length=[l0 / resx], + grid_length=[Lsim], + number_of_patches=[4], + timestep=t0 / rest, + simulation_time=Tsim, + EM_boundary_conditions=[["silver-muller"]], + reference_angular_frequency_SI=6 * math.pi * 1e14, +) + +Species( + name="hydrogen", + ionization_model="Tong_Lin", + ionization_electrons="electron", + atomic_number=1, + position_initialization="regular", + momentum_initialization="cold", + particles_per_cell=40, + mass=1836.0 * 1000.0, + charge=0.0, + number_density=0.1, + boundary_conditions=[ + ["remove", "remove"], + ], +) + +Species( + name="carbon", + ionization_model="Tong_Lin", + ionization_electrons="electron", + atomic_number=6, + position_initialization="regular", + momentum_initialization="cold", + particles_per_cell=40, + mass=1836.0 * 1000.0, + charge=0.0, + number_density=0.1, + boundary_conditions=[ + ["remove", "remove"], + ], +) + +Species( + name="electron", + position_initialization="regular", + momentum_initialization="cold", + particles_per_cell=0, + mass=1.0, + charge=-1.0, + charge_density=0.0, + boundary_conditions=[ + ["remove", "remove"], + ], + keep_interpolated_fields=["Ex", "Ey", "Ez", "Wx", "Wy", "Wz"], +) + + +def By(t): + return 1e-7 * math.sin(t) + + +def Bz(t): + return 0.1 * math.sin(t) + + +Laser( + box_side="xmin", + space_time_profile=[By, Bz], +) + +DiagScalar(every=20) + +DiagFields(every=20, time_average=1, fields=["Ex", "Ey", "Ez"]) + +DiagParticleBinning( + deposited_quantity="weight", + every=20, + species=["hydrogen"], + axes=[["charge", -0.5, 1.5, 2]], +) + +DiagParticleBinning( + deposited_quantity="weight", + every=20, + species=["carbon"], + axes=[["charge", -0.5, 6.5, 7]], +) + +DiagTrackParticles( + species="electron", + every=[1, 1000, 30], + attributes=["x", "px", "py", "pz", "w", "Wy"], +) + +DiagNewParticles( + species="electron", + every=100, + attributes=["x", "py", "w", "q"], +) diff --git a/benchmarks/tst1d_27_fullPPT_ionisation.py b/benchmarks/tst1d_27_fullPPT_ionisation.py new file mode 100755 index 000000000..7f165e8fa --- /dev/null +++ b/benchmarks/tst1d_27_fullPPT_ionisation.py @@ -0,0 +1,125 @@ +# ---------------------------------------------------------------------------------------- +# SIMULATION PARAMETERS FOR THE PIC-CODE SMILEI +# ---------------------------------------------------------------------------------------- + +import math +l0 = 2.0*math.pi # wavelength in normalized units +t0 = l0 # optical cycle in normalized units +rest = 6000.0 # nb of timestep in 1 optical cycle +resx = 4000.0 # nb cells in 1 wavelength +Lsim = 0.01*l0 # simulation length +Tsim = 0.2*t0 # duration of the simulation + + +Main( + geometry = "1Dcartesian", + + interpolation_order = 2, + + cell_length = [l0/resx], + grid_length = [Lsim], + + number_of_patches = [ 4 ], + + timestep = t0/rest, + simulation_time = Tsim, + + EM_boundary_conditions = [ ['silver-muller'] ], + + reference_angular_frequency_SI = 6*math.pi*1e14, + +) + +Species( + name = 'hydrogen', + ionization_model = 'tunnel_full_PPT', + ionization_electrons = 'electron', + atomic_number = 1, + position_initialization = 'regular', + momentum_initialization = 'cold', + particles_per_cell = 40, + mass = 1836.0*1000., + charge = 0.0, + number_density = 0.1, + boundary_conditions = [ + ["remove", "remove"], + ], +) + +Species( + name = 'carbon', + ionization_model = 'tunnel_full_PPT', + ionization_electrons = 'electron', + atomic_number = 6, + position_initialization = 'regular', + momentum_initialization = 'cold', + particles_per_cell = 40, + mass = 1836.0*1000., + charge = 0.0, + number_density = 0.1, + boundary_conditions = [ + ["remove", "remove"], + ], +) + +Species( + name = 'electron', + position_initialization = 'regular', + momentum_initialization = 'cold', + particles_per_cell = 0, + mass = 1.0, + charge = -1.0, + charge_density = 0.0, + boundary_conditions = [ + ["remove", "remove"], + ], + keep_interpolated_fields = ["Ex", "Ey", "Ez", "Wx", "Wy", "Wz"], +) + +def By(t): + return 1e-7 * math.sin(t) +def Bz(t): + return 0.1 * math.sin(t) + +Laser( + box_side = "xmin", + space_time_profile = [By, Bz], +) + +DiagScalar(every = 20) + +DiagFields( + every = 20, + time_average = 1, + fields = ["Ex", "Ey", "Ez"] +) + +DiagParticleBinning( + deposited_quantity = "weight", + every = 20, + species = ["hydrogen"], + axes = [ + ["charge", -0.5, 1.5, 2] + ] +) + +DiagParticleBinning( + deposited_quantity = "weight", + every = 20, + species = ["carbon"], + axes = [ + ["charge", -0.5, 6.5, 7] + ] +) + +DiagTrackParticles( + species = "electron", + every = [1,1000,30], + attributes = ["x","px","py","pz","w","Wy"] +) + +DiagNewParticles( + species = "electron", + every = 100, + attributes = ["x","py","w","q"], +) diff --git a/benchmarks/tst_collisions3_AM_uniformity.py b/benchmarks/tst_collisions3_AM_uniformity.py new file mode 100644 index 000000000..5c6b63f13 --- /dev/null +++ b/benchmarks/tst_collisions3_AM_uniformity.py @@ -0,0 +1,87 @@ +# --------------------------------------------- +# SIMULATION PARAMETERS FOR THE PIC-CODE SMILEI +# --------------------------------------------- + +import math +L0 = 2.*math.pi # conversion from normalization length to wavelength + + +Main( + geometry = "AMcylindrical", + + number_of_patches = [ 4, 4 ], + + interpolation_order = 2, + number_of_AM = 1, + + timestep = 1 * L0, + simulation_time = 10 * L0, + + time_fields_frozen = 100000000000., + + cell_length = [L0, L0], + grid_length = [64.*L0, 64*L0], + + EM_boundary_conditions = [ ["periodic", "periodic"], ["silver-muller", "buneman"] ], + + solve_poisson = False, + + reference_angular_frequency_SI = L0 * 3e8 /1.e-6, +) + + +ion = "ion" +eon = "eon" + +Species( + name = eon, + position_initialization = "random", + momentum_initialization = "cold", + particles_per_cell= 100, + mass = 1.0, + charge = -1.0, + charge_density = 1., + mean_velocity = [0.8, 0., 0.], + temperature = [0.]*3, + time_frozen = 100000000.0, + boundary_conditions = [ + ["periodic", "periodic"], + ["remove", "remove"], + ], +) + +Species( + name = ion, + position_initialization = "random", + momentum_initialization = "cold", + particles_per_cell= 100, + mass = 1836.0*13., + charge = 3.0, + charge_density = 1., + mean_velocity = [0., 0., 0.], + temperature = [0.]*3, + time_frozen = 100000000.0, + boundary_conditions = [ + ["periodic", "periodic"], + ["remove", "reflective"], + ], + atomic_number = 13 +) + +Collisions( + species1 = [eon], + species2 = [ion], + coulomb_log = 3, +) + +def radius(particles): return np.sqrt(particles.y**2 + particles.z**2) + +DiagParticleBinning( + deposited_quantity = "weight_vy_py", + every = 4, + species = [ion], + axes = [ + ["x", 0, Main.grid_length[0], 64], + [radius, 0, Main.grid_length[1], 64] + ] +) diff --git a/doc/Sphinx/Overview/material.rst b/doc/Sphinx/Overview/material.rst index 5712bfeae..07993d322 100644 --- a/doc/Sphinx/Overview/material.rst +++ b/doc/Sphinx/Overview/material.rst @@ -30,7 +30,7 @@ Papers involving Smilei ^^^^^^^^^^^^^^^^^^^^^^^^ Only papers published in peer-reviewed journals are listed (for the complete list of citing papers see `Google Scholar `_). -As of May 2024, at least 192 papers have been published covering a broad range of topics: +As of October 2024, at least 211 papers have been published covering a broad range of topics: * laser-plasma interaction (LPI) / inertial fusion (FCI) * ultra-high intensity (UHI) applications @@ -51,6 +51,127 @@ Following is the distribution of these topics in the listed publications up to N You can count the number of papers in the list with the vim command :%s/.. \[//gn. +.. [Jirka2024] + + M. Jirka and S. V. Bulanov, + `Effects of Colliding Laser Pulses Polarization on e- e+ Cascade Development in Extreme Focusing`, + `Physical Review Letters 133, 125001 (2024) `_ + +.. [Plotnikov2024] + + I. Plotnikov, A. J. van Marle, C. Guépin, A. Marcowith and Pierrick Martin, + `Kinetic simulations of electron–positron induced streaming instability in the context of gamma-ray halos around pulsars`, + `Astronomy & Astrophysics 688, A134 (2024) `_ + +.. [Mukherjee2024] + + A. Mukherjee and Daniel Seipt, + `Laser polarization control of ionization-injected electron beams and x-ray radiation in laser wakefield accelerators`, + `Plasma Physics and Controlled Fusion 66, 085001 (2024) `_ + +.. [Yao2024] + + W. Yao, R. Lelièvre, T. Waltenspiel, I. Cohen, A. Allaoua, P. Antici, A. Beck, E. Cohen, X. Davoine, E. d’Humières, Q. Ducasse, E. Filippov, C. Gautier, L. Gremillet, P. Koseoglou, D. Michaeli, D. Papadopoulos, S. Pikuz, I. Pomerantz, F. Trompier, Y. Yuan, F. Mathieu and Julien Fuchs, + `Enhanced Energy, Conversion Efficiency and Collimation of Protons Driven by High-Contrast and Ultrashort Laser Pulses`, + `Applied Sciences 14, 6101 (2024) `_ + +.. [Dmitriev2024] + + E. Dmitriev and Ph. Korneev, + `Angular momentum gain by electrons under the action of intense structured light`, + `Physical Review A 110, 013514 (2024) `_ + +.. [Yu2024] + + H. Yu, Q. Xia and Jun Fang, + `Nonthermal Acceleration of Electrons, Positrons, and Protons at a Nonrelativistic Quasi-parallel Collisionless Shock`, + `The Astrophysical Journal 969, 13 (2024) `_ + + +.. [Liu2024] + + B. Liu, B. Lei, Y. Gao, M. Wen, and K. Zhu, + `Plasma opacity induced by laser-driven movement of background ions`, + `Plasma Physics and Controlled Fusion 66, 115004 (2024) `_ + +.. [Martin2024] + + H. Martin, R. W. Paddock, M. W. von der Leyen, V. Eliseev, R. T. Ruskov, R. Timmis, J. J. Lee, A. James, and P. A. Norreys, + `Electrothermal filamentation of igniting plasmas`, + `Physical Review E 110, 035205 (2024) `_ + +.. [Horny2024] + + V. Horný, P. G. Bleotu, D. Ursescu, V. Malka, and P. Tomassini, + `Efficient laser wakefield accelerator in pump depletion dominated bubble regime`, + `Physical Review E 110, 035202 (2024) `_ + +.. [DeAndres2024] + + A. De Andres, S. Bhadoria, J. T. Marmolejo, A. Muschet, P. Fischer, H. R. Barzegar, T. Blackburn, A. Gonoskov, D. Hanstorp, M. Marklund and L. Veisz, + `Unforeseen advantage of looser focusing in vacuum laser acceleration`, + `Nature Communications Physics 7, 293 (2024) `_ + +.. [Yoon2024] + + Y. D. Yoon, M. Laishram, T. E. Moore, and G. S. Yun, + `Non-equilibrium formation and relaxation of magnetic flux ropes at kinetic scales`, + `Nature Communications Physics 7, 297 (2024) `_ + +.. [Laishram2024] + + M. Laishram, G. S. Yun, and Y. D. Yoon, + `Magnetogenesis via the canonical battery effect`, + `Physical Review Research 6, L032052 (2024) `_ + +.. [Kang2024] + + H. L. Kang, Y. D. Yoon, M.-H. Cho and G. S. Yun, + `Fast nonlinear scattering of runaway electron beams through resonant interactions with plasma waves`, + `Nuclear Fusion 64, 10 (2024) `_ + +.. [Gonzalez-Izquierdo2024] + + B. Gonzalez-Izquierdo, P. Fischer, M. Touati, J. Hartmann, M. Speicher, V. Scutelnic, G. Bodini, A. Fazzini, M. M. Guenther, A. K. Haerle, K. Kenney, E. Schork, S. Bruce, M. Spinks, H. J. Quevedo, A. Helal, M. Medina, E. Gaul, H. Ruhl, M. Schollmeier, S. Steinke, and G. Korn, + `Efficient laser-driven proton acceleration from a petawatt contrast-enhanced second harmonic mixed-glass laser system`, + `Physics of Plasmas 31, 083105 (2024) `_ + +.. [Ma2024] + + Z. Ma, Y. Wang, Y. Yang, Y. Wang, K. Zhao, Y. Li, C. Fu, W. He, Y. Ma, + `Simulation of nuclear isomer production in laser-induced plasma`, + `Matter and Radiation at Extremes 9, 055201 (2024) `_ + +.. [Kleij2024] + + P. S. Kleij, S. Marini, M. Caetano de Sousa, M. Grech, C. Riconda, M. Raynaud, + `Photon emission and radiation reaction effects in surface plasma waves in ultra-high intensities`, + `Physics of Plasmas 31, 072111 (2024) `_ + +.. [Ghizzo2024] + + A. Ghizzo, D. Del Sarto, H. Betar, + `Collisionless heating in Vlasov plasma and turbulence-driven filamentation aspects`, + `Physics of Plasmas 31, 072109 (2024) `_ + +.. [Gelfer2024] + + E. G. Gelfer, A. M. Fedotov, O. Klimo, and S. Weber, + `Coherent radiation of an electron bunch colliding with an intense laser pulse`, + `Physical Review Research 6, L032013 (2024) `_ + +.. [Zagidullin2024] + + R. Zagidullin, V. Zorina, J. W. Wang, S. G. Rykovanov, + `Polarization control of attosecond pulses from laser-nanofoil interactions using an external magnetic field`, + `Physics of Plasmas 31, 073303 (2024) `_ + +.. [Marini2024] + + S. Marini, D. F. G. Minenna, F. Massimo, L. Batista, V. Bencini, A. Chancé, N. Chauvin, S. Doebert, J. Farmer, E. Gschwendtner, I. Moulanier, P. Muggli, D. Uriot, B. Cros, and P. A. P. Nghiem, + `Beam physics studies for a high charge and high beam quality laser-plasma accelerator`, + `Physical Review Accelerators and Beams 27, 063401 (2024) `_ + .. [Sikorski2024] P. Sikorski, A. G. R. Thomas, S. S. Bulanov, M. Zepf and D. Seipt, @@ -385,7 +506,7 @@ Following is the distribution of these topics in the listed publications up to N V. Istokskaia, M. Tosca, L. Giuffrida, J. Psikal, F. Grepl, V. Kantarelou, S. Stancek, S. Di Siena, A. Hadjikyriacou, A. McIlvenny, Y. Levy, J. Huynh, M. Cimrman, P. Pleskunov, D. Nikitin, A. Choukourov, F. Belloni, A. Picciotto, S. Kar, M. Borghesi, A. Lucianetti, T. Mocek and D. Margarone, `A multi-MeV alpha particle source via proton-boron fusion driven by a 10-GW tabletop laser`, - `Communications Physics 6, 27 (2023) `_ + `Nature Communications Physics 6, 27 (2023) `_ .. [Yoon2023] @@ -428,6 +549,12 @@ Following is the distribution of these topics in the listed publications up to N S. Marini, M. Grech, P. S. Kleij, M. Raynaud and C. Riconda, `Electron acceleration by laser plasma wedge interaction`, `Physical Review Research 5, 013115 (2023) `_ + +.. [Miloshevsky2023] + + G. Miloshevsky, + `Particle-in-Cell Modeling of Omega Experiments on Ablation of Plasmas`, + `IEEE Transactions on Plasma Science 51, 4 (2023) `_ .. [Blackman2022] @@ -507,7 +634,7 @@ Following is the distribution of these topics in the listed publications up to N `Injection of electron beams into two laser wakefields and generation of electron rings`, `Physical Review E 106, 055202 (2022) `_ -.. [Kumar2022b] +.. [Ku2022] S. Ku., R. Dhawan, D.K. Singh and H. K. Malik, `Diagnostic of laser wakefield acceleration with ultra – Short laser pulse by using SMILEI PIC code`, @@ -519,12 +646,6 @@ Following is the distribution of these topics in the listed publications up to N `Comparative study of ultrashort single-pulse and multi-pulse driven laser wakefield acceleration`, `Laser Physics Letters 20, 026001 (2022) `_ -.. [Miloshevsky2022] - - G. Miloshevsky, - `Pic Modeling of Omega Experiments on Ablation of Plasmas`, - `2022 IEEE International Conference on Plasma Science (ICOPS), ICOPS45751.2022.9813047 (2022) `_ - .. [Zhang2022b] Y. Zhang, F. Wang, J. Liu and J. Sun, @@ -628,12 +749,6 @@ Following is the distribution of these topics in the listed publications up to N `Subcycle terahertz field waveforms clocked by attosecond high-harmonic pulses from relativistic laser plasmas`, `Journal of Applied Physics 131, 103104 (2022) `_ -.. [Umstadter2022] - - D. Umstadter - `Controlled Injection of Electrons for Improved Performance of Laser-Wakefield Acceleration`, - `United States Department of Energy Technical Report (2022) `_ - .. [Massimo2022] F. Massimo, M. Lobet, J. Derouillat, A. Beck, G. Bouchard, M. Grech, F. Pérez, T. Vinci, @@ -880,7 +995,7 @@ Following is the distribution of these topics in the listed publications up to N W. Yao, A. Fazzini, S. N. Chen, K. Burdonov, P. Antici, J. Béard, S. Bolaños, A. Ciardi, R. Diab, E. D. Filippov, S. Kisyov, V. Lelasseux, M. Miceli, Q. Moreno, V. Nastasa, S. Orlando, S. Pikuz, D. C. Popescu, G. Revet, X. Ribeyre, E. d’Humières and J. Fuchs, `Laboratory evidence for proton energization by collisionless shock surfing`, - `Nat. Phys. 17, 1177-1182 (2021) `_ + `Nature Physics 17, 1177-1182 (2021) `_ .. [Gelfer2021] diff --git a/doc/Sphinx/Overview/releases.rst b/doc/Sphinx/Overview/releases.rst index 9e2b1445b..85afde2aa 100755 --- a/doc/Sphinx/Overview/releases.rst +++ b/doc/Sphinx/Overview/releases.rst @@ -28,12 +28,14 @@ Changes made in the repository (not released) * Prescribed fields in AM geometry. * Particle reflective boundary conditions at Rmax in AM geometry. * 1st order Ruyten shape function in AM geometry. + * Support for collisions in single mode AM geometry. * **Bug fixes**: * Tunnel ionization was wrong in some cases for high atomic numbers. * Custom functions in ``ParticleBinning`` crashed with python 3.12. * Species-specific diagnostics in AM geometry with vectorization. + * Frozen particles in AM geometry with adaptive vectorization. * Happi's ``average`` argument would sometimes be missing the last bin. * 1D projector on GPU without diagnostics diff --git a/doc/Sphinx/Understand/GPU_offloading.rst b/doc/Sphinx/Understand/GPU_offloading.rst index 6b2ea599a..d3a82c37c 100644 --- a/doc/Sphinx/Understand/GPU_offloading.rst +++ b/doc/Sphinx/Understand/GPU_offloading.rst @@ -20,6 +20,8 @@ the announced exaflopic supercomputers will include GPUs. * Cartesian geometry in 1D, 2D and in 3D , for order 2 * Diagnostics: Field, Probes, Scalar, ParticleBinning, TrackParticles * Moving Window + * Boundary conditions for Fields: Periodic, reflective and silver-muller are supported (no PML or BM) + * Boundary conditions for Particles: Periodic, Reflective, thermal, remove and stop are supported * A few key features remain to be implemented (AM geometry, ionization, PML, envelope, additional physics), but the fundamentals of the code are ported. diff --git a/doc/Sphinx/Understand/collisions.rst b/doc/Sphinx/Understand/collisions.rst index 281ff0764..dc89d3e8c 100755 --- a/doc/Sphinx/Understand/collisions.rst +++ b/doc/Sphinx/Understand/collisions.rst @@ -309,21 +309,38 @@ the ion: \sum\limits_{p=0}^{k-1} R^{i+k}_{i+p} \left(\bar{P}^{i+k} - \bar{P}^{i+p}\right) \prod\limits_{j=0,j\ne p}^{k-1} R^{i+p}_{i+j} & - \quad\mathrm{if}\quad 0U`. + + ---- Test cases for ionization diff --git a/doc/Sphinx/Use/GPU_version.rst b/doc/Sphinx/Use/GPU_version.rst index 2228c0ea1..c2633b1a7 100755 --- a/doc/Sphinx/Use/GPU_version.rst +++ b/doc/Sphinx/Use/GPU_version.rst @@ -16,8 +16,6 @@ This page contains the links of this documentation to compile and run SMILEI on ---- -Known issues -^^^^^^^^^^^^ +Important note: -2D and 3D runs may crash with A2000 & A6000 GPUs (used in laptops and worstations respectively, -they are not 'production GPUs' which are designed for 64 bits floating point operations ) +The biggest challenge to execute SMILEI on an accelerator is the correct installation of the openmpi library. It needs to be compiled with nvc++ after configuring (ie. ./configure --options) with the appropriate options specific to your system diff --git a/doc/Sphinx/Use/install_linux_GPU.rst b/doc/Sphinx/Use/install_linux_GPU.rst index 197cd499b..fdd97258b 100644 --- a/doc/Sphinx/Use/install_linux_GPU.rst +++ b/doc/Sphinx/Use/install_linux_GPU.rst @@ -6,6 +6,9 @@ First, make sure you have a recent version of CMAKE, and the other libraries to compile Smilei on CPU as usual. In particular, for this example, you need GCC <= 12. +The installation protocol showed below uses the openmpi included in nvhpc. This approach often results in segfault at runtime (note that nvidia will remove openmpi from nvhpc in the future). +The "proper" way, which is much harder, consists in installing openmpi compiled with nvhpc ( + Make a directory to store all the nvidia tools. We call it $NVDIR: .. code:: bash @@ -72,3 +75,20 @@ To run: source nvidia_env.sh smilei namelist.py + + +As an example of a "simple" openmpi installation +Openmpi dependencies such as zlib, hwloc and libevent should first be compiled with nvc++ + +.. code:: bash + export cuda=PATH_TO_YOUR_NVHPC_FOLDER/Linux_x86_64/24.5/cuda + wget https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-4.1.5.tar.gz + tar -xzf openmpi-4.1.5.tar.gz + cd openmpi-4.1.5 + mkdir build + cd build + CC=nvc++ CXX=nvc++ CFLAGS=-fPIC CXXFLAGS=-fPIC ../configure --with-hwloc --enable-mpirun-prefix-by-default --prefix=PATH_TO_openmpi/openmpi-4.1.6/build --enable-mpi-cxx --without-verb --with-cuda=$cuda --disable-mpi-fortran -with-libevent=PATH_TO_libevent/libevent-2.1.12-stable/build + make -j 4 all + make install + +Because of the complexity of the configure for openmpi, we recommend using your supercomputer support to use smilei on GPUs. diff --git a/doc/Sphinx/Use/namelist.rst b/doc/Sphinx/Use/namelist.rst index 163aa8654..484512179 100755 --- a/doc/Sphinx/Use/namelist.rst +++ b/doc/Sphinx/Use/namelist.rst @@ -1390,8 +1390,8 @@ Lasers ^^^^^^ A laser consists in applying oscillating boundary conditions for the magnetic -field on one of the box sides. The only boundary condition that supports lasers -is ``"silver-muller"`` (see :py:data:`EM_boundary_conditions`). +field on one of the box sides. The only boundary conditions that support lasers +are ``"silver-muller"`` and ``"PML"`` (see :py:data:`EM_boundary_conditions`). There are several syntaxes to introduce a laser in :program:`Smilei`: .. note:: @@ -1440,10 +1440,10 @@ There are several syntaxes to introduce a laser in :program:`Smilei`: .. py:data:: space_time_profile_AM - :type: A list of maximum 2 x ``number_of_AM`` *python* functions. + :type: A list of maximum 2 x ``number_of_AM`` complex valued *python* functions. - These profiles define the first modes of :math:`B_r` and :math:`B_\theta` in the - order shown in the above example. Undefined modes are considered zero. + These profiles define the first modes of :math:`B_r` and :math:`B_\theta` of the laser in the + order shown in the above example. Higher undefined modes are considered zero. This can be used only in ``AMcylindrical`` geometry. In this geometry a two-dimensional :math:`(x,r)` grid is used and the laser is injected from a :math:`x` boundary, thus the provided profiles must be a function of :math:`(r,t)`. diff --git a/doc/Sphinx/_static/workshopLogo.svg b/doc/Sphinx/_static/workshopLogo.svg new file mode 100644 index 000000000..9cb595c1d --- /dev/null +++ b/doc/Sphinx/_static/workshopLogo.svg @@ -0,0 +1,21 @@ + + + + + + + + + + + + + + + + + + diff --git a/doc/Sphinx/index.rst b/doc/Sphinx/index.rst index b3ca0b413..045a35879 100755 --- a/doc/Sphinx/index.rst +++ b/doc/Sphinx/index.rst @@ -12,16 +12,15 @@ Open-source, collaborative, user-friendly and designed for high performances on it is applied to a wide range of physics studies: from relativistic laser-plasma interaction to astrophysics. -.. - .. raw:: html +.. raw:: html -.. - -
- 4th user & training workshop: 8-10 Nov 2023 in Prague (Czechia) + +
+ workshop +
+ 5th user & training workshop:
19-21st March 2025 in Madrid (Spain)
-..
diff --git a/doc/Sphinx/smilei_theme/layout.html b/doc/Sphinx/smilei_theme/layout.html index 1bed82e81..b477d1466 100755 --- a/doc/Sphinx/smilei_theme/layout.html +++ b/doc/Sphinx/smilei_theme/layout.html @@ -99,6 +99,12 @@ +
+ + workshop + +
{%- for menu in menus %} diff --git a/src/Collisions/BinaryProcesses.cpp b/src/Collisions/BinaryProcesses.cpp index 107d12c8f..344ff05ba 100644 --- a/src/Collisions/BinaryProcesses.cpp +++ b/src/Collisions/BinaryProcesses.cpp @@ -162,7 +162,7 @@ void BinaryProcesses::calculate_debye_length( Params ¶ms, Patch *patch ) // compute debye length squared in code units patch->debye_length_squared[ibin] = 1./inv_D2; // apply lower limit to the debye length (minimum interatomic distance) - double rmin2 = pow( coeff*density_max, -2./3. ); + double rmin2 = 1.0 / cbrt( coeff*density_max * coeff*density_max ) ; if( patch->debye_length_squared[ibin] < rmin2 ) { patch->debye_length_squared[ibin] = rmin2; } @@ -292,8 +292,8 @@ void BinaryProcesses::apply( Params ¶ms, Patch *patch, int itime, vectoratomic_number; rate .resize( atomic_number ); - irate.resize( atomic_number ); + rate_product.resize( atomic_number ); prob .resize( atomic_number ); ionization_electrons_ = CI->ionization_electrons_; new_electrons.initialize( 0, CI->new_electrons ); @@ -177,7 +177,7 @@ void CollisionalIonization::calculate( double gamma_s, double gammae, double gam // Loop for multiple ionization // k+1 is the number of ionizations const int kmax = atomic_number-Zstar-1; - double cs, w, e, cum_prob = 0; + double cs, w, e, cum_prob = 0, A = 1.; for( int k = 0; k <= kmax; k++ ) { // Calculate the location x (~log of energy) in the databases const double x = a2*log( a1*( gamma_s-1. ) ); @@ -203,24 +203,24 @@ void CollisionalIonization::calculate( double gamma_s, double gammae, double gam } rate[k] = K*cs/gammae ; // k-th ionization rate - irate[k] = 1./rate[k] ; // k-th ionization inverse rate prob[k] = exp( -rate[k] ); // k-th ionization probability + rate_product[k] = 1.; // Calculate the cumulative probability for k-th ionization (Nuter et al, 2011) if( k==0 ) { - cum_prob = prob[k]; + cum_prob = prob[k]; // cumulative probability } else { + double sum = 0.; for( int p=0; pmomentum( 0, ie ) *= pr; pe->momentum( 1, ie ) *= pr; pe->momentum( 2, ie ) *= pr; diff --git a/src/Collisions/CollisionalIonization.h b/src/Collisions/CollisionalIonization.h index 06a9d72b0..90bb90fd0 100755 --- a/src/Collisions/CollisionalIonization.h +++ b/src/Collisions/CollisionalIonization.h @@ -73,7 +73,7 @@ class CollisionalIonization : public BinaryProcess //! Current ionization rate array (one cell per number of ionization events) std::vector rate; - std::vector irate; + std::vector rate_product; //! Current ionization probability array (one cell per number of ionization events) std::vector prob; diff --git a/src/Collisions/Collisions.cpp b/src/Collisions/Collisions.cpp index f38b1b414..41618959b 100755 --- a/src/Collisions/Collisions.cpp +++ b/src/Collisions/Collisions.cpp @@ -22,7 +22,7 @@ Collisions::Collisions( coeff1_ = 4.046650232e-21*params.reference_angular_frequency_SI; // h*omega/(2*me*c^2) coeff2_ = 2.817940327e-15*params.reference_angular_frequency_SI/299792458.; // re omega / c coeff3_ = coeff2_ * coulomb_log_factor_; - coeff4_ = pow( 3.*coeff2_, -1./3. ); + coeff4_ = 1.0 / cbrt( 3.*coeff2_); } diff --git a/src/Diagnostic/DiagnosticScreen.cpp b/src/Diagnostic/DiagnosticScreen.cpp index 92b95e1ac..f98b2fd64 100755 --- a/src/Diagnostic/DiagnosticScreen.cpp +++ b/src/Diagnostic/DiagnosticScreen.cpp @@ -75,7 +75,7 @@ DiagnosticScreen::DiagnosticScreen( if( params.nDim_particle > 1 ) { screen_vector_a[0] = -screen_unitvector[1]; screen_vector_a[1] = screen_unitvector[0]; - double norm = sqrt( pow( screen_vector_a[0], 2 ) + pow( screen_vector_a[1], 2 ) ); + double norm = sqrt( screen_vector_a[0] * screen_vector_a[0] + screen_vector_a[1] * screen_vector_a[1] ); if( norm < 1.e-8 ) { screen_vector_a[0] = 0.; screen_vector_a[1] = 1.; @@ -132,7 +132,7 @@ DiagnosticScreen::DiagnosticScreen( ERROR( errorPrefix << ": axis `theta` not available for `" << screen_shape << "` screen" ); } for( idim=0; idimcenter_[idim] - screen_point[idim], 2 ); + distance_to_center += ( patch->center_[idim] - screen_point[idim] ) * ( patch->center_[idim] - screen_point[idim] ); } distance_to_center = sqrt( distance_to_center ); if( abs( screen_vectornorm - distance_to_center ) > patch->radius ) { @@ -196,10 +196,10 @@ void DiagnosticScreen::run( Patch *patch, int, SimWindow *simWindow ) } else if( screen_type == 2 ) { // cylinder double distance_to_axis = 0.; for( unsigned int idim=0; idimcenter_[(idim+1)%ndim] - screen_point[(idim+1)%ndim] ) * screen_unitvector[(idim+2)%ndim] - -( patch->center_[(idim+2)%ndim] - screen_point[(idim+2)%ndim] ) * screen_unitvector[(idim+1)%ndim] - , 2 ); + + distance_to_axis += ( ( patch->center_[(idim+1)%ndim] - screen_point[(idim+1)%ndim] ) * screen_unitvector[(idim+2)%ndim] + -( patch->center_[(idim+2)%ndim] - screen_point[(idim+2)%ndim] ) * screen_unitvector[(idim+1)%ndim] ) * ( ( patch->center_[(idim+1)%ndim] - screen_point[(idim+1)%ndim] ) * screen_unitvector[(idim+2)%ndim] + -( patch->center_[(idim+2)%ndim] - screen_point[(idim+2)%ndim] ) * screen_unitvector[(idim+1)%ndim] ); } distance_to_axis = sqrt( distance_to_axis ); if( abs( screen_vectornorm - distance_to_axis ) > patch->radius ) { @@ -260,8 +260,9 @@ void DiagnosticScreen::run( Patch *patch, int, SimWindow *simWindow ) double side_old = 0.; double dtg = dt / s->particles->LorentzFactor( ipart ); for( unsigned int idim=0; idimparticles->Position[idim][ipart] - screen_point[idim], 2 ); - side_old += pow( s->particles->Position[idim][ipart] - dtg*( s->particles->Momentum[idim][ipart] ) - screen_point[idim], 2 ); + side += ( s->particles->Position[idim][ipart] - screen_point[idim] ) * ( s->particles->Position[idim][ipart] - screen_point[idim] ); + side_old += ( s->particles->Position[idim][ipart] - dtg*( s->particles->Momentum[idim][ipart] ) - screen_point[idim] ) * + ( s->particles->Position[idim][ipart] - dtg*( s->particles->Momentum[idim][ipart] ) - screen_point[idim] ) ; } side = screen_vectornorm-sqrt( side ); side_old = screen_vectornorm-sqrt( side_old ); @@ -284,10 +285,12 @@ void DiagnosticScreen::run( Patch *patch, int, SimWindow *simWindow ) for( unsigned int idim=0; idimparticles->Position[(idim+1)%ndim][ipart] - screen_point[(idim+1)%ndim]; double u2 = s->particles->Position[(idim+2)%ndim][ipart] - screen_point[(idim+2)%ndim]; - side += pow( u1 * screen_unitvector[(idim+2)%ndim] - u2 * screen_unitvector[(idim+1)%ndim], 2 ); + side += ( u1 * screen_unitvector[(idim+2)%ndim] - u2 * screen_unitvector[(idim+1)%ndim] ) * + ( u1 * screen_unitvector[(idim+2)%ndim] - u2 * screen_unitvector[(idim+1)%ndim] ); u1 -= dtg * s->particles->Momentum[(idim+1)%ndim][ipart]; u2 -= dtg * s->particles->Momentum[(idim+1)%ndim][ipart]; - side_old += pow( u1 * screen_unitvector[(idim+2)%ndim] - u2 * screen_unitvector[(idim+1)%ndim], 2 ); + side_old += ( u1 * screen_unitvector[(idim+2)%ndim] - u2 * screen_unitvector[(idim+1)%ndim] ) * + ( u1 * screen_unitvector[(idim+2)%ndim] - u2 * screen_unitvector[(idim+1)%ndim] ); } side = r2 - side; side_old = r2 - side_old; diff --git a/src/Diagnostic/Histogram.h b/src/Diagnostic/Histogram.h index 49e970869..71c50f4ff 100755 --- a/src/Diagnostic/Histogram.h +++ b/src/Diagnostic/Histogram.h @@ -318,9 +318,9 @@ class HistogramAxis_p : public HistogramAxis if( index[ipart]<0 ) { continue; } - array[ipart] = s->mass_ * sqrt( pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + array[ipart] = s->mass_ * sqrt( s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } // Photons @@ -329,9 +329,9 @@ class HistogramAxis_p : public HistogramAxis if( index[ipart]<0 ) { continue; } - array[ipart] = sqrt( pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + array[ipart] = sqrt( s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } }; @@ -347,9 +347,9 @@ class HistogramAxis_gamma : public HistogramAxis if( index[ipart]<0 ) { continue; } - array[ipart] = sqrt( 1. + pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + array[ipart] = sqrt( 1. + s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } // Photons @@ -358,9 +358,9 @@ class HistogramAxis_gamma : public HistogramAxis if( index[ipart]<0 ) { continue; } - array[ipart] = sqrt( pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + array[ipart] = sqrt( s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } }; @@ -376,9 +376,9 @@ class HistogramAxis_ekin : public HistogramAxis if( index[ipart]<0 ) { continue; } - array[ipart] = s->mass_ * ( sqrt( 1. + pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ) - 1. ); + array[ipart] = s->mass_ * ( sqrt( 1. + s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ) - 1. ); } } // Photons @@ -387,9 +387,9 @@ class HistogramAxis_ekin : public HistogramAxis if( index[ipart]<0 ) { continue; } - array[ipart] = sqrt( pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + array[ipart] = sqrt( s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } }; @@ -406,9 +406,9 @@ class HistogramAxis_vx : public HistogramAxis continue; } array[ipart] = s->particles->Momentum[0][ipart] - / sqrt( 1. + pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + / sqrt( 1. + s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } // Photons @@ -418,9 +418,9 @@ class HistogramAxis_vx : public HistogramAxis continue; } array[ipart] = s->particles->Momentum[0][ipart] - / sqrt( pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + / sqrt( s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } }; @@ -437,9 +437,9 @@ class HistogramAxis_vy : public HistogramAxis continue; } array[ipart] = s->particles->Momentum[1][ipart] - / sqrt( 1. + pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + / sqrt( 1. + s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } // Photons @@ -449,9 +449,9 @@ class HistogramAxis_vy : public HistogramAxis continue; } array[ipart] = s->particles->Momentum[1][ipart] - / sqrt( pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + / sqrt( s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } }; @@ -468,9 +468,9 @@ class HistogramAxis_vz : public HistogramAxis continue; } array[ipart] = s->particles->Momentum[2][ipart] - / sqrt( 1. + pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + / sqrt( 1. + s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } // Photons @@ -480,9 +480,9 @@ class HistogramAxis_vz : public HistogramAxis continue; } array[ipart] = s->particles->Momentum[2][ipart] - / sqrt( pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + / sqrt( s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } }; @@ -496,9 +496,9 @@ class HistogramAxis_v : public HistogramAxis if( index[ipart]<0 ) { continue; } - array[ipart] = pow( 1. + 1./( pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ), -0.5 ); + array[ipart] = 1.0 / sqrt( 1. + 1./( s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ) ); } }; }; @@ -513,11 +513,11 @@ class HistogramAxis_vperp2 : public HistogramAxis if( index[ipart]<0 ) { continue; } - array[ipart] = ( pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) - ) / ( 1. + pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + array[ipart] = ( s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] + ) / ( 1. + s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } // Photons @@ -526,11 +526,11 @@ class HistogramAxis_vperp2 : public HistogramAxis if( index[ipart]<0 ) { continue; } - array[ipart] = ( pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) - ) / ( pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + array[ipart] = ( s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] + ) / ( s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } }; @@ -567,24 +567,26 @@ class HistogramAxis_user_function : public HistogramAxis public: HistogramAxis_user_function( PyObject *type_object ) : HistogramAxis(), - function( type_object ), - particleData( 0 ) - { - }; + function( type_object ) + {}; ~HistogramAxis_user_function() { + SMILEI_PY_ACQUIRE_GIL Py_DECREF( function ); + SMILEI_PY_RELEASE_GIL }; private: void calculate_locations( Species *s, double *array, int *index, unsigned int npart, SimWindow * ) { + PyArrayObject *ret; SMILEI_PY_ACQUIRE_GIL - // Expose particle data as numpy arrays - particleData.resize( npart ); - particleData.set( s->particles ); - // run the function - PyArrayObject *ret = ( PyArrayObject * )PyObject_CallFunctionObjArgs( function, particleData.get(), NULL ); - particleData.clear(); + { + // Expose particle data as numpy arrays + ParticleData particleData( npart ); + particleData.set( s->particles ); + // run the function + ret = ( PyArrayObject * )PyObject_CallFunctionObjArgs( function, particleData.get(), NULL ); + } // Copy the result to "array" double *arr = ( double * ) PyArray_GETPTR1( ret, 0 ); for( unsigned int ipart = 0 ; ipart < npart ; ipart++ ) @@ -599,7 +601,6 @@ class HistogramAxis_user_function : public HistogramAxis }; PyObject *function; - ParticleData particleData; }; #endif @@ -646,9 +647,9 @@ class Histogram_jx : public Histogram } array[ipart] = s->particles->Weight[ipart] * ( double )( s->particles->Charge[ipart] ) * s->particles->Momentum[0][ipart] - / sqrt( 1. + pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + / sqrt( 1. + s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } // Photons @@ -659,9 +660,9 @@ class Histogram_jx : public Histogram } array[ipart] = s->particles->Weight[ipart] * s->particles->Momentum[0][ipart] - / sqrt( pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + / sqrt( s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } }; @@ -680,9 +681,9 @@ class Histogram_jy : public Histogram } array[ipart] = s->particles->Weight[ipart] * ( double )( s->particles->Charge[ipart] ) * s->particles->Momentum[1][ipart] - / sqrt( 1. + pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + / sqrt( 1. + s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } // Photons @@ -693,9 +694,9 @@ class Histogram_jy : public Histogram } array[ipart] = s->particles->Weight[ipart] * s->particles->Momentum[1][ipart] - / sqrt( pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + / sqrt( s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } }; @@ -714,9 +715,9 @@ class Histogram_jz : public Histogram } array[ipart] = s->particles->Weight[ipart] * ( double )( s->particles->Charge[ipart] ) * s->particles->Momentum[2][ipart] - / sqrt( 1. + pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + / sqrt( 1. + s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } // Photons @@ -727,9 +728,9 @@ class Histogram_jz : public Histogram } array[ipart] = s->particles->Weight[ipart] * s->particles->Momentum[2][ipart] - / sqrt( pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + / sqrt( s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } }; @@ -747,9 +748,9 @@ class Histogram_ekin : public Histogram continue; } array[ipart] = s->mass_ * s->particles->Weight[ipart] - * ( sqrt( 1. + pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ) - 1. ); + * ( sqrt( 1. + s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ) - 1. ); } } // Photons @@ -759,9 +760,9 @@ class Histogram_ekin : public Histogram continue; } array[ipart] = s->particles->Weight[ipart] - * ( sqrt( pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ) ); + * ( sqrt( s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ) ); } } }; @@ -807,9 +808,9 @@ class Histogram_p : public Histogram continue; } array[ipart] = s->mass_ * s->particles->Weight[ipart] - * sqrt( pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + * sqrt( s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } // Photons @@ -819,9 +820,9 @@ class Histogram_p : public Histogram continue; } array[ipart] = s->particles->Weight[ipart] - * sqrt( pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + * sqrt( s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } }; @@ -917,10 +918,10 @@ class Histogram_pressure_xx : public Histogram continue; } array[ipart] = s->mass_ * s->particles->Weight[ipart] - * pow( s->particles->Momentum[0][ipart], 2 ) - / sqrt( 1. + pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + * ( s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] ) + / sqrt( 1. + s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } // Photons @@ -930,10 +931,10 @@ class Histogram_pressure_xx : public Histogram continue; } array[ipart] = s->particles->Weight[ipart] - * pow( s->particles->Momentum[0][ipart], 2 ) - / sqrt( pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + * ( s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] ) + / sqrt( s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } }; @@ -951,10 +952,10 @@ class Histogram_pressure_yy : public Histogram continue; } array[ipart] = s->mass_ * s->particles->Weight[ipart] - * pow( s->particles->Momentum[1][ipart], 2 ) - / sqrt( 1. + pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + * ( s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] ) + / sqrt( 1. + s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } // Photons @@ -964,10 +965,10 @@ class Histogram_pressure_yy : public Histogram continue; } array[ipart] = s->particles->Weight[ipart] - * pow( s->particles->Momentum[1][ipart], 2 ) - / sqrt( pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + * ( s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] ) + / sqrt( s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } }; @@ -985,10 +986,10 @@ class Histogram_pressure_zz : public Histogram continue; } array[ipart] = s->mass_ * s->particles->Weight[ipart] - * pow( s->particles->Momentum[2][ipart], 2 ) - / sqrt( 1. + pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + * ( s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ) + / sqrt( 1. + s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } // Photons @@ -998,10 +999,10 @@ class Histogram_pressure_zz : public Histogram continue; } array[ipart] = s->particles->Weight[ipart] - * pow( s->particles->Momentum[2][ipart], 2 ) - / sqrt( pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + * ( s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ) + / sqrt( s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } }; @@ -1021,9 +1022,9 @@ class Histogram_pressure_xy : public Histogram array[ipart] = s->mass_ * s->particles->Weight[ipart] * s->particles->Momentum[0][ipart] * s->particles->Momentum[1][ipart] - / sqrt( 1. + pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + / sqrt( 1. + s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } // Photons @@ -1035,9 +1036,9 @@ class Histogram_pressure_xy : public Histogram array[ipart] = s->particles->Weight[ipart] * s->particles->Momentum[0][ipart] * s->particles->Momentum[1][ipart] - / sqrt( pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + / sqrt( s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } }; @@ -1057,9 +1058,9 @@ class Histogram_pressure_xz : public Histogram array[ipart] = s->mass_ * s->particles->Weight[ipart] * s->particles->Momentum[0][ipart] * s->particles->Momentum[2][ipart] - / sqrt( 1. + pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + / sqrt( 1. + s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } // Photons @@ -1071,9 +1072,9 @@ class Histogram_pressure_xz : public Histogram array[ipart] = s->particles->Weight[ipart] * s->particles->Momentum[0][ipart] * s->particles->Momentum[2][ipart] - / sqrt( pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + / sqrt( s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } }; @@ -1093,9 +1094,9 @@ class Histogram_pressure_yz : public Histogram array[ipart] = s->mass_ * s->particles->Weight[ipart] * s->particles->Momentum[1][ipart] * s->particles->Momentum[2][ipart] - / sqrt( 1. + pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + / sqrt( 1. + s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } // Photons @@ -1107,9 +1108,9 @@ class Histogram_pressure_yz : public Histogram array[ipart] = s->particles->Weight[ipart] * s->particles->Momentum[1][ipart] * s->particles->Momentum[2][ipart] - / sqrt( pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + / sqrt( s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } }; @@ -1128,9 +1129,9 @@ class Histogram_ekin_vx : public Histogram } array[ipart] = s->mass_ * s->particles->Weight[ipart] * s->particles->Momentum[0][ipart] - * ( 1. - 1./sqrt( 1. + pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ) ); + * ( 1. - 1./sqrt( 1. + s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ) ); } } // Photons @@ -1141,9 +1142,9 @@ class Histogram_ekin_vx : public Histogram } array[ipart] = s->particles->Weight[ipart] * s->particles->Momentum[0][ipart] - / sqrt( pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + / sqrt( s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } }; @@ -1155,24 +1156,27 @@ class Histogram_user_function : public Histogram public: Histogram_user_function( PyObject *deposited_quantity_object ) : Histogram(), - function( deposited_quantity_object ), - particleData( 0 ) + function( deposited_quantity_object ) {}; ~Histogram_user_function() { + SMILEI_PY_ACQUIRE_GIL Py_DECREF( function ); + SMILEI_PY_RELEASE_GIL }; private: void valuate( Species *s, double *array, int *index ) { unsigned int npart = s->getNbrOfParticles(); + PyArrayObject *ret; SMILEI_PY_ACQUIRE_GIL // Expose particle data as numpy arrays - particleData.resize( npart ); - particleData.set( s->particles ); - // run the function - PyArrayObject *ret = ( PyArrayObject * )PyObject_CallFunctionObjArgs( function, particleData.get(), NULL ); - particleData.clear(); + { + ParticleData particleData( npart ); + particleData.set( s->particles ); + // run the function + ret = ( PyArrayObject * )PyObject_CallFunctionObjArgs( function, particleData.get(), NULL ); + } // Copy the result to "array" double *arr = ( double * ) PyArray_GETPTR1( ret, 0 ); for( unsigned int ipart = 0 ; ipart < npart ; ipart++ ) { @@ -1186,7 +1190,6 @@ class Histogram_user_function : public Histogram }; PyObject *function; - ParticleData particleData; }; #endif diff --git a/src/Diagnostic/HistogramFactory.h b/src/Diagnostic/HistogramFactory.h index fe3b53651..bb0f8256c 100755 --- a/src/Diagnostic/HistogramFactory.h +++ b/src/Diagnostic/HistogramFactory.h @@ -86,9 +86,13 @@ class HistogramFactory #ifdef SMILEI_USE_NUMPY // Test the function with temporary, "fake" particles double *dummy = NULL; - ParticleData test( params.nDim_particle, deposited_quantity_object, deposited_quantityPrefix, dummy ); - histogram = new Histogram_user_function( deposited_quantity_object ); - histogram->deposited_quantity = "user_function"; + SMILEI_PY_ACQUIRE_GIL + { + ParticleData test( params.nDim_particle, deposited_quantity_object, deposited_quantityPrefix, dummy ); + histogram = new Histogram_user_function( deposited_quantity_object ); + histogram->deposited_quantity = "user_function"; + } + SMILEI_PY_RELEASE_GIL #else ERROR( deposited_quantityPrefix << " should be a string" ); #endif diff --git a/src/ElectroMagn/Laser.cpp b/src/ElectroMagn/Laser.cpp index eb2efee00..299e39c48 100755 --- a/src/ElectroMagn/Laser.cpp +++ b/src/ElectroMagn/Laser.cpp @@ -453,12 +453,9 @@ void LaserProfileSeparable::initFields( Params ¶ms, Patch *patch, ElectroMag double LaserProfileSeparable::getAmplitude( std::vector, double t, int j, int k ) { double amp; - #pragma omp critical - { - double omega = omega_ * chirpProfile_->valueAt( t ); - double phi = ( *phase )( j, k ); - amp = timeProfile_->valueAt( t-( phi+delay_phase_ )/omega ) * ( *space_envelope )( j, k ) * sin( omega*t - phi ); - } + double omega = omega_ * chirpProfile_->valueAt( t ); + double phi = ( *phase )( j, k ); + amp = timeProfile_->valueAt( t-( phi+delay_phase_ )/omega ) * ( *space_envelope )( j, k ) * sin( omega*t - phi ); return amp; } @@ -562,10 +559,7 @@ double LaserProfileFile::getAmplitude( std::vector pos, double t, int j, for( unsigned int i=0; ivalueAt( pos, t ); - } + amp *= extraProfile->valueAt( pos, t ); return amp; } diff --git a/src/ElectroMagn/Laser.h b/src/ElectroMagn/Laser.h index 1a848b6a9..134feff89 100755 --- a/src/ElectroMagn/Laser.h +++ b/src/ElectroMagn/Laser.h @@ -139,7 +139,6 @@ class LaserProfileNonSeparable : public LaserProfile inline double getAmplitude( std::vector pos, double t, int, int ) override { double amp; - #pragma omp critical amp = spaceAndTimeProfile_->valueAt( pos, t ); return amp; } @@ -147,7 +146,6 @@ class LaserProfileNonSeparable : public LaserProfile inline std::complex getAmplitudecomplex( std::vector pos, double t, int, int ) override { std::complex amp; - #pragma omp critical amp = spaceAndTimeProfile_->complexValueAt( pos, t ); return amp; } diff --git a/src/ElectroMagnBC/ElectroMagnBC1D_refl.cpp b/src/ElectroMagnBC/ElectroMagnBC1D_refl.cpp index 9d7518d78..47a5152b1 100755 --- a/src/ElectroMagnBC/ElectroMagnBC1D_refl.cpp +++ b/src/ElectroMagnBC/ElectroMagnBC1D_refl.cpp @@ -35,20 +35,31 @@ void ElectroMagnBC1D_refl::apply( ElectroMagn *EMfields, double, Patch *patch ) { if( i_boundary_ == 0 ) { if( patch->isXmin() ) { + const Field *B[3]{ EMfields->Bx_, EMfields->By_, EMfields->Bz_ }; + double *const __restrict__ By1D = B[1]->data_; + double *const __restrict__ Bz1D = B[2]->data_; // Application over the full-ghost cell - //Field1D* Ex1D = static_cast(EMfields->Ex_); - //Field1D* Ey1D = static_cast(EMfields->Ey_); - //Field1D* Ez1D = static_cast(EMfields->Ez_); - //Field1D* Bx1D = static_cast(EMfields->Bx_); - Field1D *By1D = static_cast( EMfields->By_ ); - Field1D *Bz1D = static_cast( EMfields->Bz_ ); + //Field1D *By1D = static_cast( EMfields->By_ ); + //Field1D *Bz1D = static_cast( EMfields->Bz_ ); +#ifdef SMILEI_ACCELERATOR_GPU_OACC + const int sizeofB1 = B[1]->number_of_points_; + const int sizeofB2 = B[2]->number_of_points_; +#endif // force constant magnetic fields in the ghost cells +#ifdef SMILEI_ACCELERATOR_GPU_OACC + #pragma acc parallel present(By1D[0:sizeofB1],Bz1D[0:sizeofB2]) + #pragma acc loop gang worker vector +#elif defined( SMILEI_ACCELERATOR_GPU_OMP ) + #pragma omp target + #pragma omp teams distribute parallel for +#endif for( unsigned int i=oversize_; i>0; i-- ) { - //(*Bx1D)(i-1) = (*Bx1D)(i); - ( *By1D )( i-1 ) = ( *By1D )( i ); - ( *Bz1D )( i-1 ) = ( *Bz1D )( i ); + //( *By1D )( i-1 ) = ( *By1D )( i ); + //( *Bz1D )( i-1 ) = ( *Bz1D )( i ); + By1D[i-1] = By1D[i]; + Bz1D[i-1] = Bz1D[i]; } // // force 0 electric fields in the ghost cells @@ -60,7 +71,6 @@ void ElectroMagnBC1D_refl::apply( ElectroMagn *EMfields, double, Patch *patch ) // (*Ez1D)(i) = 0.0; // } - /* DEFINITION BY NICO // The other fields are already defined everywhere, so no need for boundary conditions. @@ -76,21 +86,34 @@ void ElectroMagnBC1D_refl::apply( ElectroMagn *EMfields, double, Patch *patch ) }//if Xmin } else { if( patch->isXmax() ) { + const Field *B[3]{ EMfields->Bx_, EMfields->By_, EMfields->Bz_ }; + double *const __restrict__ By1D = B[1]->data_; + double *const __restrict__ Bz1D = B[2]->data_; +#ifdef SMILEI_ACCELERATOR_GPU_OACC + const int sizeofB1 = B[1]->number_of_points_; + const int sizeofB2 = B[2]->number_of_points_; +#endif + const unsigned int nxd = n_d[0]; // application of Bcs over the full ghost cells - //Field1D* Ex1D = static_cast(EMfields->Ex_); - //Field1D* Ey1D = static_cast(EMfields->Ey_); - //Field1D* Ez1D = static_cast(EMfields->Ez_); - //Field1D* Bx1D = static_cast(EMfields->Bx_); - Field1D *By1D = static_cast( EMfields->By_ ); - Field1D *Bz1D = static_cast( EMfields->Bz_ ); + //Field1D *By1D = static_cast( EMfields->By_ ); + //Field1D *Bz1D = static_cast( EMfields->Bz_ ); // force constant magnetic fields in the ghost cells // for (unsigned int i=n_p[0]-oversize_; i b1( n_p[axis1_], 0. ); double *const __restrict__ db1 = b1.data(); - const unsigned int n1p = n_p[axis1_]; + const unsigned int n1p = n_p[axis1_]; const unsigned int n1d = n_d[axis1_]; const unsigned int nyp = n_p[1]; const unsigned int nyd = n_d[1]; const unsigned int iB0 = iB_[0]; - const unsigned int p0 = iB_[0] - sign_; + const unsigned int p0 = iB_[0] - sign_; const unsigned int p1 = iB_[1] - sign_; const unsigned int iB1 = iB_[1]; - const unsigned int iB2 = iB_[2]; + const unsigned int iB2 = iB_[2]; const unsigned int p2 = iB_[2] - sign_; const int b1_size = n1p ; const int b2_size = n1d ; std::vector pos( 1 ); - if( ! vecLaser.empty() ) { + if( ! vecLaser.empty() ) { for( unsigned int j=isBoundary1min ; jgetDomainLocalMin( axis1_ ) + ( ( int )j - ( int )EMfields->oversize[axis1_] )*d[axis1_]; for( unsigned int ilaser=0; ilaser< vecLaser.size(); ilaser++ ) { diff --git a/src/ElectroMagnBC/ElectroMagnBC2D_refl.cpp b/src/ElectroMagnBC/ElectroMagnBC2D_refl.cpp index 6a6e4fdc7..a3d01be23 100755 --- a/src/ElectroMagnBC/ElectroMagnBC2D_refl.cpp +++ b/src/ElectroMagnBC/ElectroMagnBC2D_refl.cpp @@ -31,35 +31,56 @@ void ElectroMagnBC2D_refl::apply( ElectroMagn *EMfields, double, Patch *patch ) { if( i_boundary_ == 0 && patch->isXmin() ) { + const Field *B[3]{ EMfields->Bx_, EMfields->By_, EMfields->Bz_ }; + double *const __restrict__ By2D = B[1]->data_; + double *const __restrict__ Bz2D = B[2]->data_; +#ifdef SMILEI_ACCELERATOR_GPU_OACC + const int sizeofB1 = B[1]->number_of_points_; + const int sizeofB2 = B[2]->number_of_points_; +#endif + const unsigned int nyp = n_p[1]; + const unsigned int nyd = n_d[1]; // APPLICATION OF BCs OVER THE FULL GHOST CELL REGION // Static cast of the fields - //Field2D* Ex2D = static_cast(EMfields->Ex_); - //Field2D* Ey2D = static_cast(EMfields->Ey_); - //Field2D* Ez2D = static_cast(EMfields->Ez_); - //Field2D* Bx2D = static_cast(EMfields->Bx_); - Field2D *By2D = static_cast( EMfields->By_ ); - Field2D *Bz2D = static_cast( EMfields->Bz_ ); + //Field2D *By2D = static_cast( EMfields->By_ ); + //Field2D *Bz2D = static_cast( EMfields->Bz_ ); // FORCE CONSTANT MAGNETIC FIELDS - // // for Bx^(p,d) - // for (unsigned int i=oversize_; i>0; i--) { - // for (unsigned int j=0 ; j0; i-- ) { - for( unsigned int j=0 ; j0; i-- ) { - for( unsigned int j=0 ; jisXmax() ) { + const Field *B[3]{ EMfields->Bx_, EMfields->By_, EMfields->Bz_ }; + double *const __restrict__ By2D = B[1]->data_; + double *const __restrict__ Bz2D = B[2]->data_; +#ifdef SMILEI_ACCELERATOR_GPU_OACC + const int sizeofB1 = B[1]->number_of_points_; + const int sizeofB2 = B[2]->number_of_points_; +#endif + const unsigned int nxp = n_p[0]; + const unsigned int nxd = n_d[0]; + const unsigned int nyp = n_p[1]; + const unsigned int nyd = n_d[1]; // Static cast of the fields - //Field2D* Ex2D = static_cast(EMfields->Ex_); - //Field2D* Ey2D = static_cast(EMfields->Ey_); - //Field2D* Ez2D = static_cast(EMfields->Ez_); - //Field2D* Bx2D = static_cast(EMfields->Bx_); - Field2D *By2D = static_cast( EMfields->By_ ); - Field2D *Bz2D = static_cast( EMfields->Bz_ ); + //Field2D *By2D = static_cast( EMfields->By_ ); + //Field2D *Bz2D = static_cast( EMfields->Bz_ ); // FORCE CONSTANT MAGNETIC FIELDS - // // for Bx^(p,d) - // for (unsigned int i=n_p[0]-oversize_; iisYmin() ) { + const Field *B[3]{ EMfields->Bx_, EMfields->By_, EMfields->Bz_ }; + double *const __restrict__ Bx2D = B[0]->data_; + double *const __restrict__ Bz2D = B[2]->data_; +#ifdef SMILEI_ACCELERATOR_GPU_OACC + const int sizeofB0 = B[0]->number_of_points_; + const int sizeofB2 = B[2]->number_of_points_; +#endif + const unsigned int nxp = n_p[0]; + const unsigned int nxd = n_d[0]; + const unsigned int nyp = n_p[1]; + const unsigned int nyd = n_d[1]; // APPLICATION OF BCs OVER THE FULL GHOST CELL REGION // Static cast of the fields - //Field2D* Ex2D = static_cast(EMfields->Ex_); - //Field2D* Ey2D = static_cast(EMfields->Ey_); - //Field2D* Ez2D = static_cast(EMfields->Ez_); - Field2D *Bx2D = static_cast( EMfields->Bx_ ); - //Field2D* By2D = static_cast(EMfields->By_); - Field2D *Bz2D = static_cast( EMfields->Bz_ ); + //Field2D *Bx2D = static_cast( EMfields->Bx_ ); + //Field2D *Bz2D = static_cast( EMfields->Bz_ ); // FORCE CONSTANT MAGNETIC FIELDS // for Bx^(p,d) - for( unsigned int i=0; i0 ; j-- ) { - ( *Bx2D )( i, j-1 ) = ( *Bx2D )( i, j ); - }//j - }//i - - // // for By^(d,p) - // for (unsigned int i=0; i0 ; j--) { - // (*By2D)(i,j-1) = (*By2D)(i,j); - // }//j - // }//i + //( *Bx2D )( i, j-1 ) = ( *Bx2D )( i, j ); + Bx2D[i*nyd + j-1] = Bx2D[i*nyd + j]; + } + } // for Bz^(d,d) - for( unsigned int i=0; i0 ; j-- ) { - ( *Bz2D )( i, j-1 ) = ( *Bz2D )( i, j ); +#ifdef SMILEI_ACCELERATOR_GPU_OACC + #pragma acc parallel present(Bz2D[0:sizeofB2],) + #pragma acc loop gang +#elif defined( SMILEI_ACCELERATOR_GPU_OMP ) + #pragma omp target + #pragma omp teams distribute parallel for +#endif + for( unsigned int i=0; i0 ; j-- ) { + //( *Bz2D )( i, j-1 ) = ( *Bz2D )( i, j ); + Bz2D[i*nyd + j-1] = Bz2D[i*nyd + j]; } } @@ -268,34 +333,56 @@ void ElectroMagnBC2D_refl::apply( ElectroMagn *EMfields, double, Patch *patch ) } else if( i_boundary_ == 3 && patch->isYmax() ) { + const Field *B[3]{ EMfields->Bx_, EMfields->By_, EMfields->Bz_ }; + double *const __restrict__ Bx2D = B[0]->data_; + double *const __restrict__ Bz2D = B[2]->data_; +#ifdef SMILEI_ACCELERATOR_GPU_OACC + const int sizeofB0 = B[0]->number_of_points_; + const int sizeofB2 = B[2]->number_of_points_; +#endif + const unsigned int nxp = n_p[0]; + const unsigned int nxd = n_d[0]; + const unsigned int nyp = n_p[1]; + const unsigned int nyd = n_d[1]; // Static cast of the fields - //Field2D* Ex2D = static_cast(EMfields->Ex_); - //Field2D* Ey2D = static_cast(EMfields->Ey_); - //Field2D* Ez2D = static_cast(EMfields->Ez_); - Field2D *Bx2D = static_cast( EMfields->Bx_ ); - //Field2D* By2D = static_cast(EMfields->By_); - Field2D *Bz2D = static_cast( EMfields->Bz_ ); + //Field2D *Bx2D = static_cast( EMfields->Bx_ ); + //Field2D *Bz2D = static_cast( EMfields->Bz_ ); // FORCE CONSTANT MAGNETIC FIELDS // for Bx^(p,d) - for( unsigned int i=0; iBx_, EMfields->By_, EMfields->Bz_ }; + double *const __restrict__ Bx3D = B[0]->data_; + double *const __restrict__ By3D = B[1]->data_; + double *const __restrict__ Bz3D = B[2]->data_; + const unsigned int nzp = n_p[2]; + const unsigned int nzd = n_d[2]; + const unsigned int nxp = n_p[0]; + const unsigned int nxd = n_d[0]; + const unsigned int nyp = n_p[1]; + const unsigned int nyd = n_d[1]; + + const unsigned int nyz_pd = n_p[1] * n_d[2]; + const unsigned int nyz_dp = n_d[1] * n_p[2]; + const unsigned int nyz_dd = n_d[1] * n_d[2]; +#ifdef SMILEI_ACCELERATOR_GPU_OACC + const int sizeofB0 = B[0]->number_of_points_; + const int sizeofB1 = B[1]->number_of_points_; + const int sizeofB2 = B[2]->number_of_points_; +#endif if( i_boundary_ == 0 && patch->isXmin() ) { // APPLICATION OF BCs OVER THE FULL GHOST CELL REGION // Static cast of the fields - Field3D *By3D = static_cast( EMfields->By_ ); - Field3D *Bz3D = static_cast( EMfields->Bz_ ); + //Field3D *By3D = static_cast( EMfields->By_ ); + //Field3D *Bz3D = static_cast( EMfields->Bz_ ); // FORCE CONSTANT MAGNETIC FIELDS // for By^(d,p,d) +#ifdef SMILEI_ACCELERATOR_GPU_OACC + #pragma acc parallel present(By3D[0:sizeofB1]) + #pragma acc loop gang +#elif defined( SMILEI_ACCELERATOR_GPU_OMP ) + #pragma omp target + #pragma omp teams distribute parallel for +#endif for( unsigned int i=oversize_x; i>0; i-- ) { - for( unsigned int j=0 ; j0; i-- ) { - for( unsigned int j=0 ; jisXmax() ) { // Static cast of the fields - Field3D *By3D = static_cast( EMfields->By_ ); - Field3D *Bz3D = static_cast( EMfields->Bz_ ); + //Field3D *By3D = static_cast( EMfields->By_ ); + //Field3D *Bz3D = static_cast( EMfields->Bz_ ); // FORCE CONSTANT MAGNETIC FIELDS // for By^(d,p,d) - for( unsigned int i=n_d[0]-oversize_x; iisYmin() ) { // Static cast of the fields - Field3D *Bx3D = static_cast( EMfields->Bx_ ); - Field3D *Bz3D = static_cast( EMfields->Bz_ ); + //Field3D *Bx3D = static_cast( EMfields->Bx_ ); + //Field3D *Bz3D = static_cast( EMfields->Bz_ ); // FORCE CONSTANT MAGNETIC FIELDS // for Bx^(p,d,d) - for( unsigned int i=0; i0 ; j-- ) { - for( unsigned int k=0; k0 ; j-- ) { - for( unsigned int k=0; kisYmax() ) { // Static cast of the fields - Field3D *Bx3D = static_cast( EMfields->Bx_ ); - Field3D *Bz3D = static_cast( EMfields->Bz_ ); + //Field3D *Bx3D = static_cast( EMfields->Bx_ ); + //Field3D *Bz3D = static_cast( EMfields->Bz_ ); // FORCE CONSTANT MAGNETIC FIELDS // for Bx^(p,d,d) - for( unsigned int i=0; iisZmin() ) { // Static cast of the fields - Field3D *Bx3D = static_cast( EMfields->Bx_ ); - Field3D *By3D = static_cast( EMfields->By_ ); + //Field3D *Bx3D = static_cast( EMfields->Bx_ ); + //Field3D *By3D = static_cast( EMfields->By_ ); // FORCE CONSTANT MAGNETIC FIELDS // for Bx^(p,d,d) - for( unsigned int i=0; i0 ; k-- ) { - ( *Bx3D )( i, j, k-1 ) = ( *Bx3D )( i, j, k ); + //( *Bx3D )( i, j, k-1 ) = ( *Bx3D )( i, j, k ); + Bx3D[i*nyz_dd + j*nzd + k-1] = Bx3D[i*nyz_dd + j*nzd + k]; } } } // for By^(d,p,d) - for( unsigned int i=0; i0 ; k-- ) { - ( *By3D )( i, j, k-1 ) = ( *By3D )( i, j, k ); + //( *By3D )( i, j, k-1 ) = ( *By3D )( i, j, k ); + By3D[i*nyz_pd + j*nzd + k-1] = By3D[i*nyz_pd + j*nzd + k]; } } } @@ -169,25 +295,47 @@ void ElectroMagnBC3D_refl::apply( ElectroMagn *EMfields, double, Patch *patch ) } else if( i_boundary_==5 && patch->isZmax() ) { // Static cast of the fields - Field3D *Bx3D = static_cast( EMfields->Bx_ ); - Field3D *By3D = static_cast( EMfields->By_ ); + //Field3D *Bx3D = static_cast( EMfields->Bx_ ); + //Field3D *By3D = static_cast( EMfields->By_ ); // FORCE CONSTANT MAGNETIC FIELDS // for Bx^(p,d,d) - for( unsigned int i=0; i( idx_p[0] ); - delta2 = delta * delta; // pow( delta_p[0], 2 ); // square of the normalized distance to the central node + delta2 = delta * delta; // pow ( delta_p[0], 2 ); // square of the normalized distance to the central node delta_p[0] = delta; // normalized distance to the central node coeffxp[0] = 0.5 * ( delta2 - delta_p[0] + 0.25 ); diff --git a/src/Ionization/Ionization.cpp b/src/Ionization/Ionization.cpp index bf52b7b79..8ba734f2c 100755 --- a/src/Ionization/Ionization.cpp +++ b/src/Ionization/Ionization.cpp @@ -22,17 +22,6 @@ Ionization::Ionization(Params ¶ms, Species *species) EC_to_au = 3.314742578e-15 * reference_angular_frequency_SI; // hbar omega / (me c^2 alpha^3) au_to_w0 = 4.134137172e+16 / reference_angular_frequency_SI; // alpha^2 me c^2 / (hbar omega) - atomic_number_ = species->atomic_number_; - Potential.resize(atomic_number_); - Azimuthal_quantum_number.resize(atomic_number_); - - one_third = 1.0 / 3.0; - - alpha_tunnel.resize(atomic_number_); - beta_tunnel.resize(atomic_number_); - gamma_tunnel.resize(atomic_number_); - - #ifdef _OMPTASKS new_electrons_per_bin = new Particles[species->Nbins]; ion_charge_per_bin_.resize(species->Nbins); @@ -50,40 +39,33 @@ void Ionization::joinNewElectrons(unsigned int Nbins) // Resize new_electrons size_t total_n_new = 0; - for (size_t ibin = 0; ibin < Nbins; ibin++) - { + for (size_t ibin = 0; ibin < Nbins; ibin++) { total_n_new += new_electrons_per_bin[ibin].size(); } new_electrons.createParticles(total_n_new); // Also resize ion_charge_ if necessary - if (save_ion_charge_) - { + if (save_ion_charge_) { ion_charge_.resize(start + total_n_new); } // Move each new_electrons_per_bin into new_electrons - for (size_t ibin = 0; ibin < Nbins; ibin++) - { + for (size_t ibin = 0; ibin < Nbins; ibin++) { size_t n_new = new_electrons_per_bin[ibin].size(); - for (size_t i = 0; i < new_electrons.dimension(); i++) - { + for (size_t i = 0; i < new_electrons.dimension(); i++) { copy(&new_electrons_per_bin[ibin].position(i, 0), &new_electrons_per_bin[ibin].position(i, n_new), &new_electrons.position(i, start)); } - for (size_t i = 0; i < new_electrons.dimension(); i++) - { + for (size_t i = 0; i < new_electrons.dimension(); i++) { copy(&new_electrons_per_bin[ibin].momentum(i, 0), &new_electrons_per_bin[ibin].momentum(i, n_new), &new_electrons.momentum(i, start)); } copy(&new_electrons_per_bin[ibin].weight(0), &new_electrons_per_bin[ibin].weight(n_new), &new_electrons.weight(start)); copy(&new_electrons_per_bin[ibin].charge(0), &new_electrons_per_bin[ibin].charge(n_new), &new_electrons.charge(start)); new_electrons_per_bin[ibin].clear(); - if (save_ion_charge_) - { + if (save_ion_charge_) { copy(ion_charge_per_bin_[ibin].begin(), ion_charge_per_bin_[ibin].end(), ion_charge_.begin() + start); ion_charge_per_bin_[ibin].clear(); } start += n_new; } } - diff --git a/src/Ionization/Ionization.h b/src/Ionization/Ionization.h index 6ce28719c..61004857a 100755 --- a/src/Ionization/Ionization.h +++ b/src/Ionization/Ionization.h @@ -47,7 +47,6 @@ class Ionization double au_to_mec2; double EC_to_au; double au_to_w0; - double one_third; double reference_angular_frequency_SI; double dt; @@ -56,10 +55,6 @@ class Ionization unsigned int nDim_particle; double ionized_species_invmass; - unsigned int atomic_number_; - std::vector Potential, Azimuthal_quantum_number; - std::vector alpha_tunnel, beta_tunnel, gamma_tunnel; - private: }; diff --git a/src/Ionization/IonizationBSI.cpp b/src/Ionization/IonizationBSI.cpp index 6d9aa0190..99bc18239 100644 --- a/src/Ionization/IonizationBSI.cpp +++ b/src/Ionization/IonizationBSI.cpp @@ -1,62 +1,82 @@ -#include "IonizationTunnel.h" #include "IonizationTables.h" +#include "IonizationTunnel.h" template <> -IonizationTunnel<3>::IonizationTunnel(Params ¶ms, Species *species) : Ionization(params, species) -{ - DEBUG("Creating the Tunnel BSI Ionizaton class"); - - // Ionization potential & quantum numbers (all in atomic units 1 au = 27.2116 eV) - for (int Z = 0; Z < (int)atomic_number_; Z++) - { - DEBUG("Z : " << Z); - - Potential[Z] = IonizationTables::ionization_energy(atomic_number_, Z) * eV_to_au; - Azimuthal_quantum_number[Z] = IonizationTables::azimuthal_atomic_number(atomic_number_, Z); - - DEBUG("potential: " << Potential[Z] << " Az.q.num: " << Azimuthal_quantum_number[Z]); - - double cst = ((double)Z + 1.0) * sqrt(2.0 / Potential[Z]); - alpha_tunnel[Z] = cst - 1.0; - beta_tunnel[Z] = pow(2, alpha_tunnel[Z]) * (8. * Azimuthal_quantum_number[Z] + 4.0) / (cst * tgamma(cst)) * - Potential[Z] * au_to_w0; - gamma_tunnel[Z] = 2.0 * pow(2.0 * Potential[Z], 1.5); - } - DEBUG("Finished Creating the BSI Tunnel Ionizaton class"); +IonizationTunnel<3>::IonizationTunnel(Params ¶ms, Species *species) + : Ionization(params, species) { + DEBUG("Creating the Tunnel BSI Ionizaton class"); + + atomic_number_ = species->atomic_number_; + Potential.resize(atomic_number_); + Azimuthal_quantum_number.resize(atomic_number_); + + one_third = 1.0 / 3.0; + + alpha_tunnel.resize(atomic_number_); + beta_tunnel.resize(atomic_number_); + gamma_tunnel.resize(atomic_number_); + + // Ionization potential & quantum numbers (all in atomic units 1 au = 27.2116 + // eV) + for (int Z = 0; Z < (int)atomic_number_; Z++) { + DEBUG("Z : " << Z); + + Potential[Z] = + IonizationTables::ionization_energy(atomic_number_, Z) * eV_to_au; + Azimuthal_quantum_number[Z] = + IonizationTables::azimuthal_atomic_number(atomic_number_, Z); + + DEBUG("potential: " << Potential[Z] + << " Az.q.num: " << Azimuthal_quantum_number[Z]); + + double cst = ((double)Z + 1.0) * sqrt(2.0 / Potential[Z]); + alpha_tunnel[Z] = cst - 1.0; + beta_tunnel[Z] = pow(2, alpha_tunnel[Z]) * + (8. * Azimuthal_quantum_number[Z] + 4.0) / + (cst * tgamma(cst)) * Potential[Z] * au_to_w0; + gamma_tunnel[Z] = 2.0 * pow(2.0 * Potential[Z], 1.5); + } + DEBUG("Finished Creating the BSI Tunnel Ionizaton class"); } template <> -double IonizationTunnel<3>::ionizationRate(const int Z, const double E, const double oldZ) -{ - auto normal = [this](const int Z, const double E) -> double { - double delta = gamma_tunnel[Z] / E; - return beta_tunnel[Z] * exp(-delta * one_third + alpha_tunnel[Z] * log(delta)); - }; - - auto linear = [this](const int Z, const double E) -> double { - const double ratio_of_IPs = IH / IonizationTables::ionization_energy(atomic_number_, Z); - return au_to_w0 * (0.8 * E * pow(ratio_of_IPs, 0.5)); - }; - auto quadratic = [this](const int Z, const double E) -> double { - const double ratio_of_IPs = IH / IonizationTables::ionization_energy(atomic_number_, Z); - return au_to_w0 * (2.4 * (pow(E, 2)) * pow(ratio_of_IPs, 2)); - }; - - double BSI_rate_quadratic = quadratic(oldZ+1, E); - double BSI_rate_linear = linear(oldZ+1, E); - double Tunnel_rate = normal(oldZ, E); - - if (BSI_rate_quadratic >= BSI_rate_linear) - { - return quadratic(Z, E); - } - else if (std::min(Tunnel_rate, BSI_rate_quadratic) == BSI_rate_quadratic) - { - return linear(Z, E); - } - else - { - return normal(Z, E); - } +template <> +inline double IonizationTunnel<3>::ionizationRate<1>(const int Z, + const double E) { + double ratio_of_IPs = + IH / IonizationTables::ionization_energy(atomic_number_, Z); + double BSI_rate_quadratic = 2.4 * (pow(E, 2))*pow(ratio_of_IPs, 2) * au_to_w0; + double BSI_rate_linear = 0.8 * E * pow(ratio_of_IPs, 0.5) * au_to_w0; + double delta = gamma_tunnel[Z] / E; + double Tunnel_rate = + beta_tunnel[Z] * exp(-delta / 3.0 + alpha_tunnel[Z] * log(delta)); + + if (BSI_rate_quadratic >= BSI_rate_linear) { + rate_formula = 2; + return BSI_rate_linear; + } else if (std::min(Tunnel_rate, BSI_rate_quadratic) == BSI_rate_quadratic) { + rate_formula = 1; + return BSI_rate_quadratic; + } else { + rate_formula = 0; + return Tunnel_rate; + } +} + +template <> +template <> +inline double IonizationTunnel<3>::ionizationRate<2>(const int newZ, + const double E) { + double ratio_of_IPs_newZ = + IH / IonizationTables::ionization_energy(atomic_number_, newZ); + double delta = gamma_tunnel[newZ] / E; + if (rate_formula == 1) { + return au_to_w0 * (2.4 * (pow(E, 2))*pow(ratio_of_IPs_newZ, 2)); + } else if (rate_formula == 2) { + return au_to_w0 * (0.8 * E * pow(ratio_of_IPs_newZ, 0.5)); + } else { + return beta_tunnel[newZ] * + exp(-delta * one_third + alpha_tunnel[newZ] * log(delta)); + } } diff --git a/src/Ionization/IonizationFactory.h b/src/Ionization/IonizationFactory.h index a7039bec0..d3eabeeaa 100644 --- a/src/Ionization/IonizationFactory.h +++ b/src/Ionization/IonizationFactory.h @@ -43,11 +43,11 @@ class IonizationFactory checkMaxCharge(species); checkNotLaserEnvelopeModel(params); Ionize = new IonizationTunnel<1>(params, species); // FullPPT - } else if (model == "tunnel_TL") { + } else if (model == "Tong_Lin") { checkMaxCharge(species); checkNotLaserEnvelopeModel(params); - Ionize = new IonizationTunnel<2>(params, species); // Tong&Ling - } else if (model == "tunnel_BSI") { + Ionize = new IonizationTunnel<2>(params, species); // Tong&Lin + } else if (model == "barrier_suppression") { checkMaxCharge(species); checkNotLaserEnvelopeModel(params); Ionize = new IonizationTunnel<3>(params, species); // BSI diff --git a/src/Ionization/IonizationFromRate.cpp b/src/Ionization/IonizationFromRate.cpp index 57358f425..870ec4a47 100755 --- a/src/Ionization/IonizationFromRate.cpp +++ b/src/Ionization/IonizationFromRate.cpp @@ -38,14 +38,13 @@ void IonizationFromRate::operator()( Particles *particles, unsigned int ipart_mi #ifdef SMILEI_USE_NUMPY // Run python to evaluate the ionization rate for each particle - PyArrayObject *ret; unsigned int npart = ipart_max - ipart_min; - #pragma omp critical + SMILEI_PY_ACQUIRE_GIL { ParticleData particleData( npart ); particleData.startAt( ipart_min ); particleData.set( particles ); - ret = ( PyArrayObject * )PyObject_CallFunctionObjArgs( ionization_rate_, particleData.get(), NULL ); + PyArrayObject *ret = ( PyArrayObject * )PyObject_CallFunctionObjArgs( ionization_rate_, particleData.get(), NULL ); PyTools::checkPyError(); if( ret == NULL ) { ERROR( "ionization_rate profile has not provided a correct result" ); @@ -58,6 +57,7 @@ void IonizationFromRate::operator()( Particles *particles, unsigned int ipart_mi } Py_DECREF( ret ); } + SMILEI_PY_RELEASE_GIL #endif diff --git a/src/Ionization/IonizationFullPPT.cpp b/src/Ionization/IonizationFullPPT.cpp index d51de62cf..40d6d81cb 100644 --- a/src/Ionization/IonizationFullPPT.cpp +++ b/src/Ionization/IonizationFullPPT.cpp @@ -1,16 +1,25 @@ -#include "IonizationTunnel.h" #include "IonizationTables.h" +#include "IonizationTunnel.h" template <> IonizationTunnel<1>::IonizationTunnel(Params ¶ms, Species *species) : Ionization(params, species) { DEBUG("Creating the Tunnel Ionizaton class"); + atomic_number_ = species->atomic_number_; + Potential.resize(atomic_number_); + Azimuthal_quantum_number.resize(atomic_number_); + + one_third = 1.0 / 3.0; + + alpha_tunnel.resize(atomic_number_); + beta_tunnel.resize(atomic_number_); + gamma_tunnel.resize(atomic_number_); + // Ionization potential & quantum numbers (all in atomic units 1 au = 27.2116 eV) Magnetic_quantum_number.resize(atomic_number_); - for (unsigned int Z = 0; Z < atomic_number_; Z++) - { + for (unsigned int Z = 0; Z < atomic_number_; Z++) { DEBUG("Z : " << Z); Potential[Z] = IonizationTables::ionization_energy(atomic_number_, Z) * eV_to_au; @@ -31,5 +40,3 @@ IonizationTunnel<1>::IonizationTunnel(Params ¶ms, Species *species) : Ioniza DEBUG("Finished Creating the Tunnel Ionizaton class"); } - - diff --git a/src/Ionization/IonizationTL.cpp b/src/Ionization/IonizationTL.cpp index a44ad1439..84d06f5f0 100644 --- a/src/Ionization/IonizationTL.cpp +++ b/src/Ionization/IonizationTL.cpp @@ -1,40 +1,59 @@ -#include "IonizationTunnel.h" #include "IonizationTables.h" +#include "IonizationTunnel.h" template <> -IonizationTunnel<2>::IonizationTunnel(Params ¶ms, Species *species) : Ionization(params, species) -{ - DEBUG("Creating the Tong-Lin Tunnel Ionizaton class"); - - ionization_tl_parameter_ = - species->ionization_tl_parameter_; // species->ionization_tl_parameter_ is double - // Varies from 6 to 9. This is the alpha parameter in Tong-Lin exponential, see Eq. (6) in [M F Ciappina and S V Popruzhenko 2020 Laser Phys. Lett. 17 025301 2020]. - lambda_tunnel.resize(atomic_number_); - - // Ionization potential & quantum numbers (all in atomic units 1 au = 27.2116 eV) - for (int Z = 0; Z < (int)atomic_number_; Z++) - { - DEBUG("Z : " << Z); - Potential[Z] = IonizationTables::ionization_energy(atomic_number_, Z) * eV_to_au; - Azimuthal_quantum_number[Z] = IonizationTables::azimuthal_atomic_number(atomic_number_, Z); - - DEBUG("potential: " << Potential[Z] << " Az.q.num: " << Azimuthal_quantum_number[Z]); - - double cst = ((double)Z + 1.0) * sqrt(2.0 / Potential[Z]); - alpha_tunnel[Z] = cst - 1.0; - beta_tunnel[Z] = pow(2, alpha_tunnel[Z]) * (8. * Azimuthal_quantum_number[Z] + 4.0) / (cst * tgamma(cst)) * - Potential[Z] * au_to_w0; - gamma_tunnel[Z] = 2.0 * pow(2.0 * Potential[Z], 1.5); - lambda_tunnel[Z] = ionization_tl_parameter_ * pow(cst, 2) / gamma_tunnel[Z]; - } - - DEBUG("Finished Creating the Tong-Lin Tunnel Ionizaton class"); +IonizationTunnel<2>::IonizationTunnel(Params ¶ms, Species *species) + : Ionization(params, species) { + DEBUG("Creating the Tong-Lin Tunnel Ionizaton class"); + + atomic_number_ = species->atomic_number_; + Potential.resize(atomic_number_); + Azimuthal_quantum_number.resize(atomic_number_); + + one_third = 1.0 / 3.0; + + alpha_tunnel.resize(atomic_number_); + beta_tunnel.resize(atomic_number_); + gamma_tunnel.resize(atomic_number_); + + ionization_tl_parameter_ = + species->ionization_tl_parameter_; // species->ionization_tl_parameter_ is + // double Varies from 6 to 9. This is + // the alpha parameter in Tong-Lin + // exponential, see Eq. (6) in [M F + // Ciappina and S V Popruzhenko 2020 + // Laser Phys. Lett. 17 025301 2020]. + lambda_tunnel.resize(atomic_number_); + + // Ionization potential & quantum numbers (all in atomic units 1 au = 27.2116 + // eV) + for (int Z = 0; Z < (int)atomic_number_; Z++) { + DEBUG("Z : " << Z); + Potential[Z] = + IonizationTables::ionization_energy(atomic_number_, Z) * eV_to_au; + Azimuthal_quantum_number[Z] = + IonizationTables::azimuthal_atomic_number(atomic_number_, Z); + + DEBUG("potential: " << Potential[Z] + << " Az.q.num: " << Azimuthal_quantum_number[Z]); + + double cst = ((double)Z + 1.0) * sqrt(2.0 / Potential[Z]); + alpha_tunnel[Z] = cst - 1.0; + beta_tunnel[Z] = pow(2, alpha_tunnel[Z]) * + (8. * Azimuthal_quantum_number[Z] + 4.0) / + (cst * tgamma(cst)) * Potential[Z] * au_to_w0; + gamma_tunnel[Z] = 2.0 * pow(2.0 * Potential[Z], 1.5); + lambda_tunnel[Z] = ionization_tl_parameter_ * pow(cst, 2) / gamma_tunnel[Z]; + } + + DEBUG("Finished Creating the Tong-Lin Tunnel Ionizaton class"); } template <> -double IonizationTunnel<2>::ionizationRate(const int Z, const double E, const double oldZ) -{ - const double delta = gamma_tunnel[Z] / E; - return beta_tunnel[Z] * exp(-delta * one_third + alpha_tunnel[Z] * log(delta) - E * lambda_tunnel[Z]); +template +inline double IonizationTunnel<2>::ionizationRate(const int Z, const double E) { + const double delta = gamma_tunnel[Z] / E; + return beta_tunnel[Z] * + exp(-delta * one_third + alpha_tunnel[Z] * log(delta) - + E * lambda_tunnel[Z]); } - diff --git a/src/Ionization/IonizationTunnel.cpp b/src/Ionization/IonizationTunnel.cpp index 2bd478f89..0ce8d7852 100644 --- a/src/Ionization/IonizationTunnel.cpp +++ b/src/Ionization/IonizationTunnel.cpp @@ -1,31 +1,44 @@ #include "IonizationTunnel.h" -#include "IonizationTables.h" +#include "IonizationTables.h" // Classic: 1 template <> -IonizationTunnel<0>::IonizationTunnel(Params ¶ms, Species *species) : Ionization(params, species) -{ - DEBUG("Creating the Tunnel Ionizaton class"); - - // Ionization potential & quantum numbers (all in atomic units 1 au = 27.2116 eV) - for (unsigned int Z = 0; Z < atomic_number_; Z++) - { - DEBUG("Z : " << Z); - - Potential[Z] = IonizationTables::ionization_energy(atomic_number_, Z) * eV_to_au; - Azimuthal_quantum_number[Z] = IonizationTables::azimuthal_atomic_number(atomic_number_, Z); - - DEBUG("Potential: " << Potential[Z] << " Az.q.num: " << Azimuthal_quantum_number[Z]); - - double cst = ((double)Z + 1.0) * sqrt(2.0 / Potential[Z]); - alpha_tunnel[Z] = cst - 1.0; - beta_tunnel[Z] = pow(2, alpha_tunnel[Z]) * (8. * Azimuthal_quantum_number[Z] + 4.0) / (cst * tgamma(cst)) * - Potential[Z] * au_to_w0; - gamma_tunnel[Z] = 2.0 * pow(2.0 * Potential[Z], 1.5); - } - - DEBUG("Finished Creating the Tunnel Ionizaton class"); +IonizationTunnel<0>::IonizationTunnel(Params ¶ms, Species *species) + : Ionization(params, species) { + DEBUG("Creating the Tunnel Ionizaton class"); + + atomic_number_ = species->atomic_number_; + Potential.resize(atomic_number_); + Azimuthal_quantum_number.resize(atomic_number_); + + one_third = 1.0 / 3.0; + + alpha_tunnel.resize(atomic_number_); + beta_tunnel.resize(atomic_number_); + gamma_tunnel.resize(atomic_number_); + + // Ionization potential & quantum numbers (all in atomic units 1 au = 27.2116 + // eV) + for (unsigned int Z = 0; Z < atomic_number_; Z++) { + DEBUG("Z : " << Z); + + Potential[Z] = + IonizationTables::ionization_energy(atomic_number_, Z) * eV_to_au; + Azimuthal_quantum_number[Z] = + IonizationTables::azimuthal_atomic_number(atomic_number_, Z); + + DEBUG("Potential: " << Potential[Z] + << " Az.q.num: " << Azimuthal_quantum_number[Z]); + + double cst = ((double)Z + 1.0) * sqrt(2.0 / Potential[Z]); + alpha_tunnel[Z] = cst - 1.0; + beta_tunnel[Z] = pow(2, alpha_tunnel[Z]) * + (8. * Azimuthal_quantum_number[Z] + 4.0) / + (cst * tgamma(cst)) * Potential[Z] * au_to_w0; + gamma_tunnel[Z] = 2.0 * sqrt(2.0 * Potential[Z] * 2.0 * Potential[Z] * 2.0 * + Potential[Z]); + } + + DEBUG("Finished Creating the Tunnel Ionizaton class"); } - - diff --git a/src/Ionization/IonizationTunnel.h b/src/Ionization/IonizationTunnel.h index 69ffc7eb3..65967405b 100644 --- a/src/Ionization/IonizationTunnel.h +++ b/src/Ionization/IonizationTunnel.h @@ -6,178 +6,196 @@ #include #include "Ionization.h" -#include "Tools.h" - #include "Particles.h" #include "Species.h" +#include "Tools.h" class Particles; -template -class IonizationTunnel : public Ionization -{ - public: - inline IonizationTunnel(Params ¶ms, Species *species); +template class IonizationTunnel : public Ionization { +public: + inline IonizationTunnel(Params ¶ms, Species *species); - inline void operator()(Particles *, unsigned int, unsigned int, std::vector *, Patch *, Projector *, - int ipart_ref = 0) override; + inline void operator()(Particles *, unsigned int, unsigned int, + std::vector *, Patch *, Projector *, + int ipart_ref = 0) override; - //! method for tunnel ionization with tasks - inline void ionizationTunnelWithTasks(Particles *, unsigned int, unsigned int, std::vector *, Patch *, Projector *, - int, int, double *b_Jx, double *b_Jy, double *b_Jz, int ipart_ref = 0) override; + //! method for tunnel ionization with tasks + inline void ionizationTunnelWithTasks(Particles *, unsigned int, unsigned int, + std::vector *, Patch *, + Projector *, int, int, double *b_Jx, + double *b_Jy, double *b_Jz, + int ipart_ref = 0) override; - private: - inline double ionizationRate(const int Z, const double E, double oldZ); +private: + template + inline double ionizationRate(const int Z, const double E); - // To be conditionally prepared - // FullPPT - std::vector Magnetic_quantum_number; + double one_third; + unsigned int atomic_number_; + std::vector Potential, Azimuthal_quantum_number; + std::vector alpha_tunnel, beta_tunnel, gamma_tunnel; - // Tong&Ling - double ionization_tl_parameter_; - std::vector lambda_tunnel; + // To be conditionally prepared + // FullPPT + std::vector Magnetic_quantum_number; - // BSI - const double au_to_eV = 27.2116; - const double IH = 13.598434005136; + // Tong&Ling + double ionization_tl_parameter_; + std::vector lambda_tunnel; + + // BSI + const double au_to_eV = 27.2116; + const double IH = 13.598434005136; + int rate_formula; }; template -inline void IonizationTunnel::operator()( Particles *particles, unsigned int ipart_min, unsigned int ipart_max, - vector *Epart, Patch *patch, Projector *Proj, int ipart_ref) -{ -unsigned int Z, Zp1, newZ, k_times; - double TotalIonizPot, E, invE, factorJion, ran_p, Mult, D_sum, P_sum, Pint_tunnel; - vector IonizRate_tunnel( atomic_number_ ), Dnom_tunnel( atomic_number_ ); - LocalFields Jion; - double factorJion_0 = au_to_mec2 * EC_to_au*EC_to_au * invdt; - - int nparts = Epart->size()/3; - double *Ex = &( ( *Epart )[0*nparts] ); - double *Ey = &( ( *Epart )[1*nparts] ); - double *Ez = &( ( *Epart )[2*nparts] ); - - for( unsigned int ipart=ipart_min ; ipartcharge( ipart ) ); - - // If ion already fully ionized then skip - if( Z==atomic_number_ ) { - continue; - } - - // Absolute value of the electric field normalized in atomic units - E = EC_to_au * sqrt( pow( *( Ex+ipart-ipart_ref ), 2 ) - +pow( *( Ey+ipart-ipart_ref ), 2 ) - +pow( *( Ez+ipart-ipart_ref ), 2 ) ); - if( E<1e-10 ) { - continue; - } - - // -------------------------------- - // Start of the Monte-Carlo routine - // -------------------------------- - - invE = 1./E; - factorJion = factorJion_0 * invE*invE; - ran_p = patch->rand_->uniform(); - IonizRate_tunnel[Z] = ionizationRate(Z, E, Z); - - // Total ionization potential (used to compute the ionization current) - TotalIonizPot = 0.0; - - // k_times will give the nb of ionization events - k_times = 0; - Zp1=Z+1; - - if( Zp1 == atomic_number_ ) { - // if ionization of the last electron: single ionization - // ----------------------------------------------------- - if( ran_p < 1.0 -exp( -IonizRate_tunnel[Z]*dt ) ) { - TotalIonizPot += Potential[Z]; - k_times = 1; - } - - } else { - // else : multiple ionization can occur in one time-step - // partial & final ionization are decoupled (see Nuter Phys. Plasmas) - // ------------------------------------------------------------------------- - - // initialization - Mult = 1.0; - Dnom_tunnel[0]=1.0; - Pint_tunnel = exp( -IonizRate_tunnel[Z]*dt ); // cummulative prob. - - //multiple ionization loop while Pint_tunnel < ran_p and still partial ionization - while( ( Pint_tunnel < ran_p ) and ( k_times < atomic_number_-Zp1 ) ) { - newZ = Zp1+k_times; - IonizRate_tunnel[newZ] = ionizationRate(newZ, E, Z); - D_sum = 0.0; - P_sum = 0.0; - Mult *= IonizRate_tunnel[Z+k_times]; - for( unsigned int i=0; iran_p ) && ( k_times==atomic_number_-Zp1 ) ) { - TotalIonizPot += Potential[atomic_number_-1]; - k_times++; - } - }//END Multiple ionization routine - - // Compute ionization current - if (patch->EMfields->Jx_ != NULL){ // For the moment ionization current is not accounted for in AM geometry - factorJion *= TotalIonizPot; - Jion.x = factorJion * *( Ex+ipart ); - Jion.y = factorJion * *( Ey+ipart ); - Jion.z = factorJion * *( Ez+ipart ); - - Proj->ionizationCurrents( patch->EMfields->Jx_, patch->EMfields->Jy_, patch->EMfields->Jz_, *particles, ipart, Jion ); - } - - // Creation of the new electrons - // (variable weights are used) - // ----------------------------- - - if( k_times !=0 ) { - new_electrons.createParticle(); - int idNew = new_electrons.size() - 1; - for( unsigned int i=0; iposition( i, ipart ); - } - for( unsigned int i=0; i<3; i++ ) { - new_electrons.momentum( i, idNew ) = particles->momentum( i, ipart )*ionized_species_invmass; - } - new_electrons.weight( idNew )=double( k_times )*particles->weight( ipart ); - new_electrons.charge( idNew )=-1; - - if( save_ion_charge_ ) { - ion_charge_.push_back( particles->charge( ipart ) ); - } - - // Increase the charge of the particle - particles->charge( ipart ) += k_times; +inline void IonizationTunnel::operator()( + Particles *particles, unsigned int ipart_min, unsigned int ipart_max, + vector *Epart, Patch *patch, Projector *Proj, int ipart_ref) { + unsigned int Z, Zp1, newZ, k_times; + double TotalIonizPot, E, invE, factorJion, ran_p, Mult, D_sum, P_sum, + Pint_tunnel; + vector IonizRate_tunnel(atomic_number_), + Dnom_tunnel(atomic_number_, 0.); + LocalFields Jion; + double factorJion_0 = au_to_mec2 * EC_to_au * EC_to_au * invdt; + + int nparts = Epart->size() / 3; + double *Ex = &((*Epart)[0 * nparts]); + double *Ey = &((*Epart)[1 * nparts]); + double *Ez = &((*Epart)[2 * nparts]); + + for (unsigned int ipart = ipart_min; ipart < ipart_max; ipart++) { + // Current charge state of the ion + Z = (unsigned int)(particles->charge(ipart)); + + // If ion already fully ionized then skip + if (Z == atomic_number_) { + continue; + } + + // Absolute value of the electric field normalized in atomic units + E = EC_to_au * sqrt(*(Ex + ipart - ipart_ref) * *(Ex + ipart - ipart_ref) + + *(Ey + ipart - ipart_ref) * *(Ey + ipart - ipart_ref) + + *(Ez + ipart - ipart_ref) * *(Ez + ipart - ipart_ref)); + if (E < 1e-10) { + continue; + } + + // -------------------------------- + // Start of the Monte-Carlo routine + // -------------------------------- + + invE = 1. / E; + factorJion = factorJion_0 * invE * invE; + ran_p = patch->rand_->uniform(); + IonizRate_tunnel[Z] = ionizationRate<1>(Z, E); + + // Total ionization potential (used to compute the ionization current) + TotalIonizPot = 0.0; + + // k_times will give the nb of ionization events + k_times = 0; + Zp1 = Z + 1; + + if (Zp1 == atomic_number_) { + // if ionization of the last electron: single ionization + // ----------------------------------------------------- + if (ran_p < 1.0 - exp(-IonizRate_tunnel[Z] * dt)) { + TotalIonizPot += Potential[Z]; + k_times = 1; + } + + } else { + // else : multiple ionization can occur in one time-step + // partial & final ionization are decoupled (see Nuter Phys. + // Plasmas) + // ------------------------------------------------------------------------- + + // initialization + Mult = 1.0; + Dnom_tunnel[0] = 1.0; + Pint_tunnel = exp(-IonizRate_tunnel[Z] * dt); // cummulative prob. + + // multiple ionization loop while Pint_tunnel < ran_p and still partial + // ionization + while ((Pint_tunnel < ran_p) and (k_times < atomic_number_ - Zp1)) { + newZ = Zp1 + k_times; + IonizRate_tunnel[newZ] = ionizationRate<2>(newZ, E); + D_sum = 0.0; + P_sum = 0.0; + Mult *= IonizRate_tunnel[Z + k_times]; + for (unsigned int i = 0; i < k_times + 1; i++) { + Dnom_tunnel[i] = Dnom_tunnel[i] / + (IonizRate_tunnel[newZ] - IonizRate_tunnel[Z + i]); + D_sum += Dnom_tunnel[i]; + P_sum += exp(-IonizRate_tunnel[Z + i] * dt) * Dnom_tunnel[i]; } - - } // Loop on particles + Dnom_tunnel[k_times + 1] -= D_sum; + P_sum = P_sum + + Dnom_tunnel[k_times + 1] * exp(-IonizRate_tunnel[newZ] * dt); + Pint_tunnel = Pint_tunnel + P_sum * Mult; + + TotalIonizPot += Potential[Z + k_times]; + k_times++; + } // END while + + // final ionization (of last electron) + if (((1.0 - Pint_tunnel) > ran_p) && (k_times == atomic_number_ - Zp1)) { + TotalIonizPot += Potential[atomic_number_ - 1]; + k_times++; + } + } // END Multiple ionization routine + + // Compute ionization current + if (patch->EMfields->Jx_ != NULL) { // For the moment ionization current is + // not accounted for in AM geometry + factorJion *= TotalIonizPot; + Jion.x = factorJion * *(Ex + ipart); + Jion.y = factorJion * *(Ey + ipart); + Jion.z = factorJion * *(Ez + ipart); + + Proj->ionizationCurrents(patch->EMfields->Jx_, patch->EMfields->Jy_, + patch->EMfields->Jz_, *particles, ipart, Jion); + } + + // Creation of the new electrons + // (variable weights are used) + // ----------------------------- + + if (k_times != 0) { + new_electrons.createParticle(); + int idNew = new_electrons.size() - 1; + for (unsigned int i = 0; i < new_electrons.dimension(); i++) { + new_electrons.position(i, idNew) = particles->position(i, ipart); + } + for (unsigned int i = 0; i < 3; i++) { + new_electrons.momentum(i, idNew) = + particles->momentum(i, ipart) * ionized_species_invmass; + } + new_electrons.weight(idNew) = double(k_times) * particles->weight(ipart); + new_electrons.charge(idNew) = -1; + + if (save_ion_charge_) { + ion_charge_.push_back(particles->charge(ipart)); + } + + // Increase the charge of the particle + particles->charge(ipart) += k_times; + } + + } // Loop on particles } template -inline double IonizationTunnel::ionizationRate(const int Z, const double E, const double oldZ) -{ - double delta = gamma_tunnel[Z] / E; - return beta_tunnel[Z] * exp(-delta * one_third + alpha_tunnel[Z] * log(delta)); +template +inline double IonizationTunnel::ionizationRate(const int Z, + const double E) { + double delta = gamma_tunnel[Z] / E; + return beta_tunnel[Z] * + exp(-delta * one_third + alpha_tunnel[Z] * log(delta)); } // IonizationTunnel : 0 @@ -193,149 +211,168 @@ template <> IonizationTunnel<2>::IonizationTunnel(Params ¶ms, Species *species); template <> -double IonizationTunnel<2>::ionizationRate(const int Z, const double E, const double oldZ); +template +inline double IonizationTunnel<2>::ionizationRate(const int Z, const double E); // BSI: 3 template <> IonizationTunnel<3>::IonizationTunnel(Params ¶ms, Species *species); template <> -double IonizationTunnel<3>::ionizationRate(const int Z, const double E, const double oldZ); +template <> +double IonizationTunnel<3>::ionizationRate<1>(const int Z, const double E); + +template <> +template <> +double IonizationTunnel<3>::ionizationRate<2>(const int Z, const double E); template -void IonizationTunnel::ionizationTunnelWithTasks( Particles *particles, unsigned int ipart_min, unsigned int ipart_max, - vector *Epart, Patch *patch, Projector *Proj, int ibin, int bin_shift, - double *b_Jx, double *b_Jy, double *b_Jz, int ipart_ref ) -{ - - unsigned int Z, Zp1, newZ, k_times; - double TotalIonizPot, E, invE, factorJion, delta, ran_p, Mult, D_sum, P_sum, Pint_tunnel; - vector IonizRate_tunnel( atomic_number_ ), Dnom_tunnel( atomic_number_ ); - LocalFields Jion; - double factorJion_0 = au_to_mec2 * EC_to_au*EC_to_au * invdt; - - int nparts = Epart->size()/3; - double *Ex = &( ( *Epart )[0*nparts] ); - double *Ey = &( ( *Epart )[1*nparts] ); - double *Ez = &( ( *Epart )[2*nparts] ); - - for( unsigned int ipart=ipart_min ; ipartcharge( ipart ) ); - - // If ion already fully ionized then skip - if( Z==atomic_number_ ) { - continue; - } - - // Absolute value of the electric field normalized in atomic units - E = EC_to_au * sqrt( pow( *( Ex+ipart-ipart_ref ), 2 ) - +pow( *( Ey+ipart-ipart_ref ), 2 ) - +pow( *( Ez+ipart-ipart_ref ), 2 ) ); - if( E<1e-10 ) { - continue; - } - - // -------------------------------- - // Start of the Monte-Carlo routine - // -------------------------------- - - invE = 1./E; - factorJion = factorJion_0 * invE*invE; - delta = gamma_tunnel[Z]*invE; - ran_p = patch->rand_->uniform(); - IonizRate_tunnel[Z] = beta_tunnel[Z] * exp( -delta*one_third + alpha_tunnel[Z]*log( delta ) ); - - // Total ionization potential (used to compute the ionization current) - TotalIonizPot = 0.0; - - // k_times will give the nb of ionization events - k_times = 0; - Zp1=Z+1; - - if( Zp1 == atomic_number_ ) { - // if ionization of the last electron: single ionization - // ----------------------------------------------------- - if( ran_p < 1.0 -exp( -IonizRate_tunnel[Z]*dt ) ) { - TotalIonizPot += Potential[Z]; - k_times = 1; - } - - } else { - // else : multiple ionization can occur in one time-step - // partial & final ionization are decoupled (see Nuter Phys. Plasmas) - // ------------------------------------------------------------------------- - - // initialization - Mult = 1.0; - Dnom_tunnel[0]=1.0; - Pint_tunnel = exp( -IonizRate_tunnel[Z]*dt ); // cummulative prob. - - //multiple ionization loop while Pint_tunnel < ran_p and still partial ionization - while( ( Pint_tunnel < ran_p ) and ( k_times < atomic_number_-Zp1 ) ) { - newZ = Zp1+k_times; - delta = gamma_tunnel[newZ]*invE; - IonizRate_tunnel[newZ] = beta_tunnel[newZ] - * exp( -delta*one_third+alpha_tunnel[newZ]*log( delta ) ); - D_sum = 0.0; - P_sum = 0.0; - Mult *= IonizRate_tunnel[Z+k_times]; - for( unsigned int i=0; iran_p ) && ( k_times==atomic_number_-Zp1 ) ) { - TotalIonizPot += Potential[atomic_number_-1]; - k_times++; - } - }//END Multiple ionization routine - - // Compute ionization current - if (b_Jx != NULL){ // For the moment ionization current is not accounted for in AM geometry - factorJion *= TotalIonizPot; - Jion.x = factorJion * *( Ex+ipart ); - Jion.y = factorJion * *( Ey+ipart ); - Jion.z = factorJion * *( Ez+ipart ); - - Proj->ionizationCurrentsForTasks( b_Jx, b_Jy, b_Jz, *particles, ipart, Jion, bin_shift ); - } - - // Creation of the new electrons - // (variable weights are used) - // ----------------------------- - if( k_times !=0 ) { - new_electrons_per_bin[ibin].createParticle(); - int idNew = new_electrons_per_bin[ibin].size() - 1;//cout<<"ibin "<position( i, ipart ); - } - for( unsigned int i=0; i<3; i++ ) { - new_electrons_per_bin[ibin].momentum( i, idNew ) = particles->momentum( i, ipart )*ionized_species_invmass; - } - new_electrons_per_bin[ibin].weight( idNew )=double( k_times )*particles->weight( ipart ); - new_electrons_per_bin[ibin].charge( idNew )=-1; - - if( save_ion_charge_ ) { - ion_charge_per_bin_[ibin].push_back( particles->charge( ipart ) ); - } - - // // Increase the charge of the particle - particles->charge( ipart ) += k_times; +void IonizationTunnel::ionizationTunnelWithTasks( + Particles *particles, unsigned int ipart_min, unsigned int ipart_max, + vector *Epart, Patch *patch, Projector *Proj, int ibin, + int bin_shift, double *b_Jx, double *b_Jy, double *b_Jz, int ipart_ref) { + unsigned int Z, Zp1, newZ, k_times; + double TotalIonizPot, E, invE, factorJion, delta, ran_p, Mult, D_sum, P_sum, + Pint_tunnel; + vector IonizRate_tunnel(atomic_number_), Dnom_tunnel(atomic_number_); + LocalFields Jion; + double factorJion_0 = au_to_mec2 * EC_to_au * EC_to_au * invdt; + + int nparts = Epart->size() / 3; + double *Ex = &((*Epart)[0 * nparts]); + double *Ey = &((*Epart)[1 * nparts]); + double *Ez = &((*Epart)[2 * nparts]); + + for (unsigned int ipart = ipart_min; ipart < ipart_max; ipart++) { + // Current charge state of the ion + Z = (unsigned int)(particles->charge(ipart)); + + // If ion already fully ionized then skip + if (Z == atomic_number_) { + continue; + } + + // Absolute value of the electric field normalized in atomic units + E = EC_to_au * sqrt(*(Ex + ipart - ipart_ref) * *(Ex + ipart - ipart_ref) + + *(Ey + ipart - ipart_ref) * *(Ey + ipart - ipart_ref) + + *(Ez + ipart - ipart_ref) * *(Ez + ipart - ipart_ref)); + if (E < 1e-10) { + continue; + } + + // -------------------------------- + // Start of the Monte-Carlo routine + // -------------------------------- + + invE = 1. / E; + factorJion = factorJion_0 * invE * invE; + delta = gamma_tunnel[Z] * invE; + ran_p = patch->rand_->uniform(); + IonizRate_tunnel[Z] = + beta_tunnel[Z] * exp(-delta * one_third + alpha_tunnel[Z] * log(delta)); + + // Total ionization potential (used to compute the ionization current) + TotalIonizPot = 0.0; + + // k_times will give the nb of ionization events + k_times = 0; + Zp1 = Z + 1; + + if (Zp1 == atomic_number_) { + // if ionization of the last electron: single ionization + // ----------------------------------------------------- + if (ran_p < 1.0 - exp(-IonizRate_tunnel[Z] * dt)) { + TotalIonizPot += Potential[Z]; + k_times = 1; + } + + } else { + // else : multiple ionization can occur in one time-step + // partial & final ionization are decoupled (see Nuter Phys. + // Plasmas) + // ------------------------------------------------------------------------- + + // initialization + Mult = 1.0; + Dnom_tunnel[0] = 1.0; + Pint_tunnel = exp(-IonizRate_tunnel[Z] * dt); // cummulative prob. + + // multiple ionization loop while Pint_tunnel < ran_p and still partial + // ionization + while ((Pint_tunnel < ran_p) and (k_times < atomic_number_ - Zp1)) { + newZ = Zp1 + k_times; + delta = gamma_tunnel[newZ] * invE; + IonizRate_tunnel[newZ] = + beta_tunnel[newZ] * + exp(-delta * one_third + alpha_tunnel[newZ] * log(delta)); + D_sum = 0.0; + P_sum = 0.0; + Mult *= IonizRate_tunnel[Z + k_times]; + for (unsigned int i = 0; i < k_times + 1; i++) { + Dnom_tunnel[i] = Dnom_tunnel[i] / + (IonizRate_tunnel[newZ] - IonizRate_tunnel[Z + i]); + D_sum += Dnom_tunnel[i]; + P_sum += exp(-IonizRate_tunnel[Z + i] * dt) * Dnom_tunnel[i]; } - - } // Loop on particles -} + Dnom_tunnel[k_times + 1] -= D_sum; + P_sum = P_sum + + Dnom_tunnel[k_times + 1] * exp(-IonizRate_tunnel[newZ] * dt); + Pint_tunnel = Pint_tunnel + P_sum * Mult; + + TotalIonizPot += Potential[Z + k_times]; + k_times++; + } // END while + // final ionization (of last electron) + if (((1.0 - Pint_tunnel) > ran_p) && (k_times == atomic_number_ - Zp1)) { + TotalIonizPot += Potential[atomic_number_ - 1]; + k_times++; + } + } // END Multiple ionization routine + + // Compute ionization current + if (b_Jx != NULL) { // For the moment ionization current is not accounted + // for in AM geometry + factorJion *= TotalIonizPot; + Jion.x = factorJion * *(Ex + ipart); + Jion.y = factorJion * *(Ey + ipart); + Jion.z = factorJion * *(Ez + ipart); + + Proj->ionizationCurrentsForTasks(b_Jx, b_Jy, b_Jz, *particles, ipart, + Jion, bin_shift); + } + + // Creation of the new electrons + // (variable weights are used) + // ----------------------------- + if (k_times != 0) { + new_electrons_per_bin[ibin].createParticle(); + int idNew = new_electrons_per_bin[ibin].size() - + 1; // cout<<"ibin "<position(i, ipart); + } + for (unsigned int i = 0; i < 3; i++) { + new_electrons_per_bin[ibin].momentum(i, idNew) = + particles->momentum(i, ipart) * ionized_species_invmass; + } + new_electrons_per_bin[ibin].weight(idNew) = + double(k_times) * particles->weight(ipart); + new_electrons_per_bin[ibin].charge(idNew) = -1; + + if (save_ion_charge_) { + ion_charge_per_bin_[ibin].push_back(particles->charge(ipart)); + } + + // // Increase the charge of the particle + particles->charge(ipart) += k_times; + } + + } // Loop on particles +} #endif diff --git a/src/Ionization/IonizationTunnelEnvelopeAveraged.cpp b/src/Ionization/IonizationTunnelEnvelopeAveraged.cpp index f53ec02f2..b3993a736 100644 --- a/src/Ionization/IonizationTunnelEnvelopeAveraged.cpp +++ b/src/Ionization/IonizationTunnelEnvelopeAveraged.cpp @@ -40,8 +40,8 @@ IonizationTunnelEnvelopeAveraged::IonizationTunnelEnvelopeAveraged( Params ¶ double cst = ( ( double )Z+1.0 ) * sqrt( 2.0/Potential[Z] ); alpha_tunnel[Z] = cst-1.0; // 2(n^*)-1 beta_tunnel[Z] = pow( 2, alpha_tunnel[Z] ) * ( 8.*Azimuthal_quantum_number[Z]+4.0 ) / ( cst*tgamma( cst ) ) * Potential[Z] * au_to_w0; - gamma_tunnel[Z] = 2.0 * pow( 2.0*Potential[Z], 1.5 ); // 2*(2I_p)^{3/2} - Ip_times2_to_minus3ov4[Z] = pow(2.*Potential[Z],-0.75); // (2I_p)^{-3/4} + gamma_tunnel[Z] = 2.0 * sqrt( 2.0*Potential[Z] * 2.0*Potential[Z] * 2.0*Potential[Z] ); // 2*(2I_p)^{3/2} + Ip_times2_to_minus3ov4[Z] = 1.0 / sqrt(sqrt((2.*Potential[Z] * 2.*Potential[Z] * 2.*Potential[Z]))); // (2I_p)^{-3/4} } ellipticity = params.envelope_ellipticity; @@ -82,13 +82,14 @@ void IonizationTunnelEnvelopeAveraged::envelopeIonization( Particles *particles, // Absolute value of the electric field |E_plasma| (from the plasma) normalized in atomic units - E_sq = pow(EC_to_au,2) * (pow( *( Ex+ipart-ipart_ref ), 2 ) - +pow( *( Ey+ipart-ipart_ref ), 2 ) - +pow( *( Ez+ipart-ipart_ref ), 2 ) ); + E_sq = (EC_to_au * EC_to_au) * ( ( *( Ex+ipart-ipart_ref ) ) * ( *( Ex+ipart-ipart_ref ) ) + + ( *( Ey+ipart-ipart_ref ) ) * ( *( Ey+ipart-ipart_ref ) ) + + ( *( Ez+ipart-ipart_ref ) ) * ( *( Ez+ipart-ipart_ref ) ) ); // Laser envelope electric field normalized in atomic units, using both transverse and longitudinal components: // |E_envelope|^2 = |Env_E|^2 + |Env_Ex|^2 - EnvE_sq = pow(EC_to_au,2)*( pow( *( E_env+ipart-ipart_ref ), 2 ) ) + pow(EC_to_au,2)*( pow( *( Ex_env+ipart-ipart_ref ), 2 ) ); + EnvE_sq = (EC_to_au * EC_to_au) * ( *( E_env+ipart-ipart_ref )) * ( *( E_env+ipart-ipart_ref ) ) + (EC_to_au * EC_to_au) * + ( *( Ex_env+ipart-ipart_ref ) ) * ( *( Ex_env+ipart-ipart_ref ) ); // Effective electric field for ionization: // |E| = sqrt(|E_plasma|^2+|E_envelope|^2) @@ -109,7 +110,7 @@ void IonizationTunnelEnvelopeAveraged::envelopeIonization( Particles *particles, // Corrections on averaged ionization rate given by the polarization ellipticity if( ellipticity==0. ){ // linear polarization - coeff_ellipticity_in_ionization_rate = pow((3./M_PI)/delta*2.,0.5); + coeff_ellipticity_in_ionization_rate = sqrt((3./M_PI)/delta*2.); } else if( ellipticity==1. ){ // circular polarization coeff_ellipticity_in_ionization_rate = 1.; // for circular polarization, the ionization rate is unchanged } @@ -145,8 +146,8 @@ void IonizationTunnelEnvelopeAveraged::envelopeIonization( Particles *particles, // Corrections on averaged ionization rate given by the polarization ellipticity if( ellipticity==0. ){ // linear polarization - //coeff_ellipticity_in_ionization_rate = pow((3./M_PI)/(gamma_tunnel[newZ-1]*invE)*2.,0.5); - coeff_ellipticity_in_ionization_rate = pow((3./M_PI)/delta*2.,0.5); + //coeff_ellipticity_in_ionization_rate = sqrt((3./M_PI)/(gamma_tunnel[newZ-1]*invE)*2.); + coeff_ellipticity_in_ionization_rate = sqrt((3./M_PI)/delta*2.); } else if( ellipticity==1. ){ // circular polarization coeff_ellipticity_in_ionization_rate = 1.; // for circular polarization, the ionization rate is unchanged } diff --git a/src/MultiphotonBreitWheeler/MultiphotonBreitWheeler.cpp b/src/MultiphotonBreitWheeler/MultiphotonBreitWheeler.cpp index 8136f36ff..98c316337 100755 --- a/src/MultiphotonBreitWheeler/MultiphotonBreitWheeler.cpp +++ b/src/MultiphotonBreitWheeler/MultiphotonBreitWheeler.cpp @@ -448,7 +448,7 @@ void MultiphotonBreitWheeler::operator()( Particles &particles, for( int ipair=i_pair_start; ipair < i_pair_start+mBW_pair_creation_sampling_[0]; ipair++ ) { // Momentum - const double p = std::sqrt( std::pow( 1.+pair_chi[0]*inv_chiph_gammaph, 2 )-1 ); + const double p = std::sqrt( ( 1.+pair_chi[0]*inv_chiph_gammaph)*( 1.+pair_chi[0]*inv_chiph_gammaph) - 1 ); pair0_momentum_x[ipair] = p*ux; pair0_momentum_y[ipair] = p*uy; pair0_momentum_z[ipair] = p*uz; @@ -511,7 +511,7 @@ void MultiphotonBreitWheeler::operator()( Particles &particles, for( auto ipair=i_pair_start; ipair < i_pair_start + mBW_pair_creation_sampling_[1]; ipair++ ) { // Momentum - const double p = std::sqrt( std::pow( 1.+pair_chi[1]*inv_chiph_gammaph, 2 )-1 ); + const double p = std::sqrt( ( 1.+pair_chi[1]*inv_chiph_gammaph) * ( 1.+pair_chi[1]*inv_chiph_gammaph) - 1 ); pair1_momentum_x[ipair] = p*ux; pair1_momentum_y[ipair] = p*uy; pair1_momentum_z[ipair] = p*uz; @@ -569,7 +569,7 @@ void MultiphotonBreitWheeler::operator()( Particles &particles, for( int idNew=nparticles-mBW_pair_creation_sampling_[k]; idNew= T_.size_-1 ) { ichiph = T_.size_-2; - dNBWdt = 2.067731275227008*std::pow( photon_chi, 5.0/3.0 ); + dNBWdt = 2.067731275227008 * cbrt(photon_chi*photon_chi*photon_chi*photon_chi*photon_chi); } else { // Upper and lower values for linear interpolation const double logchiphm = ichiph*T_.delta_ + T_.log10_min_; diff --git a/src/Params/Params.cpp b/src/Params/Params.cpp index 822c4eb93..c3424f269 100755 --- a/src/Params/Params.cpp +++ b/src/Params/Params.cpp @@ -103,6 +103,9 @@ Params::Params( SmileiMPI *smpi, std::vector namelistsFiles ) : string seterr( "seterr" ); string sChar( "s" ); Py_DECREF( PyObject_CallMethod( numpy, &seterr[0], &sChar[0], "ignore" ) ); + string numpy_version = ""; + PyTools::getAttr( numpy, "__version__", numpy_version ); + MESSAGE( "Numpy version " << numpy_version ); Py_DECREF( numpy ); #else WARNING("Numpy not found. Some options will not be available"); @@ -868,7 +871,8 @@ Params::Params( SmileiMPI *smpi, std::vector namelistsFiles ) : if( geometry!="1Dcartesian" && geometry!="2Dcartesian" && geometry!="3Dcartesian" ) { - ERROR_NAMELIST( "Collisions only valid for cartesian geometries for the moment", LINK_NAMELIST + std::string("#collisions-reactions") ); + //ERROR_NAMELIST( "Collisions only valid for cartesian geometries for the moment", LINK_NAMELIST + std::string("#collisions-reactions") ); + WARNING( "Collisions in AM geometry is experimental and valid only with a single mode" ); } } @@ -1533,12 +1537,12 @@ void Params::print_parallelism_params( SmileiMPI *smpi ) #else MESSAGE( 1, "OpenMP disabled" ); #endif -#ifdef _OMPTASKS - MESSAGE( 1, "OpenMP task parallelization activated"); -#else - MESSAGE( 1, "OpenMP task parallelization not activated"); -#endif - MESSAGE( "" ); +// #ifdef _OMPTASKS +// MESSAGE( 1, "OpenMP task parallelization activated"); +// #else +// MESSAGE( 1, "OpenMP task parallelization not activated"); +// #endif +// MESSAGE( "" ); ostringstream np; np << "Number of patches: " << number_of_patches[0]; @@ -1835,7 +1839,7 @@ void Params::multiple_decompose_3D() // Number of domain in 3D // Decomposition in 2 times, X and larger side double tmp = (double)(number_of_patches[0]*number_of_patches[0]) / (double)(number_of_patches[1]*number_of_patches[2]); - number_of_region[0] = min( sz, max(1, (int) pow( (double)sz*tmp, 1./3. ) ) ); + number_of_region[0] = min( sz, max(1, (int) (cbrt (sz*tmp)) ) ); int rest = (int)(sz / number_of_region[0]); while ( (int)number_of_region[0]*rest != sz ) { diff --git a/src/ParticleBC/BoundaryConditionType.cpp b/src/ParticleBC/BoundaryConditionType.cpp index 55579a2c7..aa86586cf 100755 --- a/src/ParticleBC/BoundaryConditionType.cpp +++ b/src/ParticleBC/BoundaryConditionType.cpp @@ -9,7 +9,6 @@ #include "BoundaryConditionType.h" #include "Params.h" -#include "tabulatedFunctions.h" #include "userFunctions.h" @@ -132,12 +131,20 @@ void reflect_particle_wall( Species *species, int imin, int imax, int direction, energy_change = 0.; // no energy loss during reflection double* position = species->particles->getPtrPosition(direction); double* momentum = species->particles->getPtrMomentum(direction); + double* invgf_p = invgf.data(); +#ifdef SMILEI_ACCELERATOR_GPU_OACC + #pragma acc parallel deviceptr(position,momentum,invgf_p) + #pragma acc loop gang worker vector +#elif defined( SMILEI_ACCELERATOR_GPU_OMP ) + #pragma omp target is_device_ptr(position,momentum,invgf_p) + #pragma omp teams distribute parallel for +#endif for (int ipart=imin ; ipart= limit_sup*limit_sup ) { - double LorentzFactor = sqrt( 1.+pow( momentum_x[ipart], 2 )+pow( momentum_y[ipart], 2 )+pow( momentum_z[ipart], 2 ) ); + double LorentzFactor = sqrt( 1. + momentum_x[ipart] * momentum_x[ipart] + momentum_y[ipart] * momentum_y[ipart] + momentum_z[ipart] * momentum_z[ipart] ); energy_change += weight[ ipart ]*( LorentzFactor-1.0 ); // energy lost REDUCTION charge[ ipart ] = 0; cell_keys[ipart] = -1; @@ -343,7 +350,7 @@ void remove_photon_inf( Species *species, int imin, int imax, int direction, dou int* cell_keys = species->particles->getPtrCellKeys(); for (int ipart=imin ; ipartparticles->getPtrCellKeys(); for (int ipart=imin ; ipart= limit_sup) { - double momentumNorm = sqrt( pow( momentum_x[ipart], 2 )+pow( momentum_y[ipart], 2 )+pow( momentum_z[ipart], 2 ) ); + double momentumNorm = sqrt( momentum_x[ipart] * momentum_x[ipart] + momentum_y[ipart] * momentum_y[ipart] + momentum_z[ipart] * momentum_z[ipart] ); energy_change += weight[ ipart ]*( momentumNorm ); // energy lost REDUCTION charge[ ipart ] = 0; cell_keys[ipart] = -1; @@ -373,42 +380,62 @@ void remove_photon_sup( Species *species, int imin, int imax, int direction, dou void stop_particle_inf( Species *species, int imin, int imax, int direction, double limit_inf, double /*dt*/, std::vector &/*invgf*/, Random * /*rand*/, double &energy_change ) { - energy_change = 0; + double change_in_energy = 0.0; double* position = species->particles->getPtrPosition(direction); double* momentum_x = species->particles->getPtrMomentum(0); double* momentum_y = species->particles->getPtrMomentum(1); double* momentum_z = species->particles->getPtrMomentum(2); double* weight = species->particles->getPtrWeight(); +#if defined( SMILEI_ACCELERATOR_GPU_OMP ) + #pragma omp target is_device_ptr( position, momentum_x, momentum_y, momentum_z, weight ) map( tofrom : change_in_energy ) + #pragma omp teams distribute parallel for reduction( + : change_in_energy ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) + #pragma acc parallel deviceptr(position,momentum_x,momentum_y,momentum_z,weight) + #pragma acc loop gang worker vector reduction(+ : change_in_energy) +#else + #pragma omp simd reduction(+ : change_in_energy) +#endif for (int ipart=imin ; ipart &/*invgf*/, Random * /*rand*/, double &energy_change ) { - energy_change = 0; + double change_in_energy = 0.0; double* position = species->particles->getPtrPosition(direction); double* momentum_x = species->particles->getPtrMomentum(0); double* momentum_y = species->particles->getPtrMomentum(1); double* momentum_z = species->particles->getPtrMomentum(2); double* weight = species->particles->getPtrWeight(); +#if defined( SMILEI_ACCELERATOR_GPU_OMP ) + #pragma omp target is_device_ptr( position, momentum_x, momentum_y, momentum_z, weight ) map( tofrom : change_in_energy ) + #pragma omp teams distribute parallel for reduction( + : change_in_energy ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) + #pragma acc parallel deviceptr(position,momentum_x,momentum_y,momentum_z,weight) + #pragma acc loop gang worker vector reduction(+ : change_in_energy) +#else + #pragma omp simd reduction(+ : change_in_energy) +#endif for (int ipart=imin ; ipart= limit_sup) { - double LorentzFactor = sqrt( 1.+pow( momentum_x[ipart], 2 )+pow( momentum_y[ipart], 2 )+pow( momentum_z[ipart], 2 ) ); - energy_change = weight[ ipart ]*( LorentzFactor-1.0 ); // energy lost REDUCTION + double LorentzFactor = sqrt( 1. + momentum_x[ipart] * momentum_x[ipart] + momentum_y[ipart] * momentum_y[ipart] + momentum_z[ipart] * momentum_z[ipart] ); + change_in_energy += weight[ ipart ]*( LorentzFactor-1.0 ); // energy lost REDUCTION position[ ipart ] = 2.*limit_sup - position[ ipart ]; momentum_x[ ipart ] = 0.; momentum_y[ ipart ] = 0.; momentum_z[ ipart ] = 0.; } } + energy_change = change_in_energy; } void stop_particle_wall( Species *species, int imin, int imax, int direction, double wall_position, double dt, std::vector &invgf, Random * /*rand*/, double &energy_change ) @@ -424,7 +451,7 @@ void stop_particle_wall( Species *species, int imin, int imax, int direction, do double particle_position = position[ipart]; double particle_position_old = particle_position - dt*invgf[ipart]*momentum[ipart]; if ( ( wall_position-particle_position_old )*( wall_position-particle_position )<0 ) { - double LorentzFactor = sqrt( 1.+pow( momentum_x[ipart], 2 )+pow( momentum_y[ipart], 2 )+pow( momentum_z[ipart], 2 ) ); + double LorentzFactor = sqrt( 1. + momentum_x[ipart] * momentum_x[ipart] + momentum_y[ipart] * momentum_y[ipart] + momentum_z[ipart] * momentum_z[ipart] ); energy_change += weight[ ipart ]*( LorentzFactor-1.0 ); // energy lost REDUCTION position[ ipart ] = 2.*wall_position - position[ ipart ]; momentum_x[ ipart ] = 0.; @@ -447,7 +474,7 @@ void stop_particle_AM( Species *species, int imin, int imax, int /*direction*/, for (int ipart=imin ; ipart= limit_sup*limit_sup ) { - double LorentzFactor = sqrt( 1.+pow( momentum_x[ipart], 2 )+pow( momentum_y[ipart], 2 )+pow( momentum_z[ipart], 2 ) ); + double LorentzFactor = sqrt( 1. + momentum_x[ipart] * momentum_x[ipart] + momentum_y[ipart] * momentum_y[ipart] + momentum_z[ipart] * momentum_z[ipart] ); energy_change += weight[ ipart ]*( LorentzFactor-1.0 ); // energy lost REDUCTION double distance_to_axis = sqrt( distance2ToAxis ); // limit_pos = 2*limit_pos @@ -479,95 +506,158 @@ void thermalize_particle_inf( Species *species, int imin, int imax, int directio double* momentum_y = species->particles->getPtrMomentum(1); double* momentum_z = species->particles->getPtrMomentum(2); double* weight = species->particles->getPtrWeight(); +#if defined( SMILEI_ACCELERATOR_GPU ) + uint32_t xorshift32_state = rand->xorshift32_state; +#endif + double change_in_energy = 0.0; + double thermal_momentum = species->thermal_momentum_[direction]; + double thermal_momentum1; + double thermal_momentum2; + double v0 = species->thermal_velocity_[0]; + if (nDim>1) { + thermal_momentum1 = species->thermal_momentum_[(direction+1)%nDim]; + if (nDim>2) { + thermal_momentum2 = species->thermal_momentum_[(direction+2)%nDim]; + } + } + double vx, vy, vz, v2, g, gm1, Lxx, Lyy, Lzz, Lxy, Lxz, Lyz; + // mean-velocity + vx = -species->thermal_boundary_velocity_[0]; + vy = -species->thermal_boundary_velocity_[1]; + vz = -species->thermal_boundary_velocity_[2]; + v2 = vx*vx + vy*vy + vz*vz; + if( v2>0. ) { + g = 1.0/sqrt( 1.0-v2 ); + gm1 = g - 1.0; + // compute the different component of the Matrix block of the Lorentz transformation + Lxx = 1.0 + gm1 * vx*vx/v2; + Lyy = 1.0 + gm1 * vy*vy/v2; + Lzz = 1.0 + gm1 * vz*vz/v2; + Lxy = gm1 * vx*vy/v2; + Lxz = gm1 * vx*vz/v2; + Lyz = gm1 * vy*vz/v2; + } - energy_change = 0; - for (int ipart=imin ; ipart3.0*species->thermal_velocity_[0] ) { //IF VELOCITY > 3*THERMAL VELOCITY THEN THERMALIZE IT - - // velocity of the particle after thermalization/reflection - //for (int i=0; inDim_fields; i++) { - - // change of velocity in the direction normal to the reflection plane - double sign_vel = -momentum[ ipart ]/std::abs( momentum[ ipart ] ); - momentum[ ipart ] = sign_vel * species->thermal_momentum_[direction] * std::sqrt( -std::log( 1.0-rand->uniform1() ) ); - - // change of momentum in the direction(s) along the reflection plane - if (nDim>1) { - momentumRefl_2D[ipart] = species->thermal_momentum_[(direction+1)%nDim] * perp_rand( rand ); - if (nDim>2) { - momentumRefl_3D[ipart] = species->thermal_momentum_[(direction+2)%nDim] * perp_rand( rand ); - } - } - - // Adding the mean velocity (using relativistic composition) - double vx, vy, vz, v2, g, gm1, Lxx, Lyy, Lzz, Lxy, Lxz, Lyz, gp, px, py, pz; - // mean-velocity - vx = -species->thermal_boundary_velocity_[0]; - vy = -species->thermal_boundary_velocity_[1]; - vz = -species->thermal_boundary_velocity_[2]; - v2 = vx*vx + vy*vy + vz*vz; - if( v2>0. ) { - - g = 1.0/sqrt( 1.0-v2 ); - gm1 = g - 1.0; - - // compute the different component of the Matrix block of the Lorentz transformation - Lxx = 1.0 + gm1 * vx*vx/v2; - Lyy = 1.0 + gm1 * vy*vy/v2; - Lzz = 1.0 + gm1 * vz*vz/v2; - Lxy = gm1 * vx*vy/v2; - Lxz = gm1 * vx*vz/v2; - Lyz = gm1 * vy*vz/v2; - - // Lorentz transformation of the momentum - gp = sqrt( 1.0 + pow( momentum_x[ipart], 2 )+pow( momentum_y[ipart], 2 )+pow( momentum_z[ipart], 2 ) ); - px = -gp*g*vx + Lxx * momentum_x[ ipart ] + Lxy * momentum_y[ ipart ] + Lxz * momentum_z[ ipart ]; - py = -gp*g*vy + Lxy * momentum_x[ ipart ] + Lyy * momentum_y[ ipart ] + Lyz * momentum_z[ ipart ]; - pz = -gp*g*vz + Lxz * momentum_x[ ipart ] + Lyz * momentum_y[ ipart ] + Lzz * momentum_z[ ipart ]; - momentum_x[ ipart ] = px; - momentum_y[ ipart ] = py; - momentum_z[ ipart ] = pz; - - }//ENDif vel != 0 - - } else { // IF VELOCITY < 3*THERMAL SIMPLY REFLECT IT - momentum[ ipart ] = -momentum[ ipart ]; - - }// endif on v vs. thermal_velocity_ - - // position of the particle after reflection - position[ ipart ] = 2.*limit_inf - position[ ipart ]; - - // energy lost during thermalization - LorentzFactor = sqrt( 1.+pow( momentum_x[ipart], 2 )+pow( momentum_y[ipart], 2 )+pow( momentum_z[ipart], 2 ) ); - energy_change += weight[ ipart ]*( initial_energy - LorentzFactor+1.0 ); - - - /* HERE IS AN ATTEMPT TO INTRODUCE A SPACE DEPENDENCE ON THE BCs - // double val_min(params.dens_profile.vacuum_length[1]), val_max(params.dens_profile.vacuum_length[1]+params.dens_profile.length_params_y[0]); +#if defined( SMILEI_ACCELERATOR_GPU_OMP ) + #pragma omp target is_device_ptr( position, momentum, momentumRefl_2D, momentumRefl_3D, momentum_x, momentum_y, momentum_z, weight ) map( tofrom : change_in_energy ) + #pragma omp teams distribute thread_limit(32) reduction( + : change_in_energy ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) + #pragma acc parallel loop gang vector_length(32) reduction(+ : change_in_energy) independent deviceptr(position, momentum, momentumRefl_2D, momentumRefl_3D,momentum_x,momentum_y,momentum_z,weight) +#else + #pragma omp simd reduction(+ : change_in_energy) + for (int ipart = imin ; ipart < imax ; ++ipart ) { +#endif +#if defined( SMILEI_ACCELERATOR_GPU) + for (int ichunk = imin/32 ; ichunk < imax/32 ; ++ichunk ) { - if ( ( species->particles->position(1,ipart) >= val_min ) && ( species->particles->position(1,ipart) <= val_max ) ) { - // nrj computed during diagnostics - species->particles->position(direction, ipart) = limit_pos - species->particles->position(direction, ipart); - species->particles->momentum(direction, ipart) = sqrt(params.thermal_velocity_[direction]) * tabFcts.erfinv( rand->uniform() ); - } - else { - stop_particle( species->particles, ipart, direction, limit_pos, params, energy_change ); +#if defined( SMILEI_ACCELERATOR_GPU ) + uint32_t xorshift32_state_local = xorshift32_state + ichunk; + uint32_t xorshift32_state_array[32]; +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) + #pragma acc loop seq +#elif defined( SMILEI_ACCELERATOR_GPU_OMP ) + //#pragma omp single // does not work with rocm +#endif + // boucle sur les particules de ce chunk pour remplir xorshift32_state_array[...] avec le state local + for( int i = 0; i < 32; ++i ){ + xorshift32_state_array[i] = Random_namespace::xorshift32(xorshift32_state_local); + } +#endif + // boucle sur les particules de ce chunk qui utilise xorshift32_state_array[i] + int istart = ichunk==(imin/32) ? imin%32 : 0; + int iend = ichunk==(imax/32) ? imax%32 : 32; +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) + #pragma acc loop vector +#elif defined( SMILEI_ACCELERATOR_GPU_OMP ) + #pragma omp parallel for +#endif + for( int i = istart; i < iend ; ++i ){ + int ipart = ichunk * 32 + i; +#endif + if ( position[ ipart ] < limit_inf) { + // checking the particle's velocity compared to the thermal one + double p2 = momentum_x[ipart] * momentum_x[ipart] + momentum_y[ipart] * momentum_y[ipart] + momentum_z[ipart] * momentum_z[ipart]; + double LorentzFactor = sqrt( 1.+p2 ); + double v = sqrt( p2 )/LorentzFactor; + + // energy before thermalization + double initial_energy = LorentzFactor - 1.0; + // Apply bcs depending on the particle velocity + // -------------------------------------------- + if( v > 3.0 * v0) { //IF VELOCITY > 3*THERMAL VELOCITY THEN THERMALIZE IT + + // velocity of the particle after thermalization/reflection + //for (int i=0; inDim_fields; i++) { + // change of velocity in the direction normal to the reflection plane + double sign_vel = -momentum[ ipart ]/std::abs( momentum[ ipart ] ); + #if defined( SMILEI_ACCELERATOR_GPU ) + momentum[ ipart ] = sign_vel * thermal_momentum * std::sqrt( -std::log( 1.0 - Random_namespace::uniform1(xorshift32_state_array[i]) ) ); + #else + momentum[ ipart ] = sign_vel * thermal_momentum * std::sqrt( -std::log( 1.0 - rand->uniform1() ) ); + #endif + + // change of momentum in the direction(s) along the reflection plane + if (nDim>1) { + #if defined( SMILEI_ACCELERATOR_GPU ) + momentumRefl_2D[ ipart ] = thermal_momentum1 * Random_namespace::perp_rand_dp(xorshift32_state_array[i]); + #else + momentumRefl_2D[ ipart ] = thermal_momentum1 * perp_rand( rand ); + #endif + if (nDim>2) { + #if defined( SMILEI_ACCELERATOR_GPU ) + momentumRefl_3D[ ipart ] = thermal_momentum2 * Random_namespace::perp_rand_dp(xorshift32_state_array[i]); + #else + momentumRefl_3D[ ipart ] = thermal_momentum2 * perp_rand( rand ); + #endif + } + } + // Adding the mean velocity (using relativistic composition) + double gp, px, py, pz; + if( v2>0. ) { + // Lorentz transformation of the momentum + gp = sqrt( 1.0 + momentum_x[ipart] * momentum_x[ipart] + momentum_y[ipart] * momentum_y[ipart] + momentum_z[ipart] * momentum_z[ipart] ); + px = -gp*g*vx + Lxx * momentum_x[ ipart ] + Lxy * momentum_y[ ipart ] + Lxz * momentum_z[ ipart ]; + py = -gp*g*vy + Lxy * momentum_x[ ipart ] + Lyy * momentum_y[ ipart ] + Lyz * momentum_z[ ipart ]; + pz = -gp*g*vz + Lxz * momentum_x[ ipart ] + Lyz * momentum_y[ ipart ] + Lzz * momentum_z[ ipart ]; + momentum_x[ ipart ] = px; + momentum_y[ ipart ] = py; + momentum_z[ ipart ] = pz; + }//ENDif vel != 0 + } else { // IF VELOCITY < 3*THERMAL SIMPLY REFLECT IT + momentum[ ipart ] = -momentum[ ipart ]; + + }// endif on v vs. thermal_velocity_ + + // position of the particle after reflection + position[ ipart ] = 2.*limit_inf - position[ ipart ]; + + // energy lost during thermalization + LorentzFactor = sqrt( 1. + momentum_x[ipart] * momentum_x[ipart] + momentum_y[ipart] * momentum_y[ipart] + momentum_z[ipart] * momentum_z[ipart] ); + change_in_energy += weight[ ipart ] * ( initial_energy - LorentzFactor + 1.0 ); + + + // HERE IS AN ATTEMPT TO INTRODUCE A SPACE DEPENDENCE ON THE BCs + // double val_min(params.dens_profile.vacuum_length[1]), val_max(params.dens_profile.vacuum_length[1]+params.dens_profile.length_params_y[0]); + + //if ( ( species->particles->position(1,ipart) >= val_min ) && ( species->particles->position(1,ipart) <= val_max ) ) { + // nrj computed during diagnostics + //species->particles->position(direction, ipart) = limit_pos - species->particles->position(direction, ipart); + //species->particles->momentum(direction, ipart) = sqrt(params.thermal_velocity_[direction]) * tabFcts.erfinv( rand->uniform() ); + //} + //else { + //stop_particle( species->particles, ipart, direction, limit_pos, params, energy_change ); + //} + } - */ } +#if defined( SMILEI_ACCELERATOR_GPU ) } +#endif + energy_change = change_in_energy; +#if defined( SMILEI_ACCELERATOR_GPU ) + xorshift32_state += 32; + rand->xorshift32_state = xorshift32_state; +#endif } void thermalize_particle_sup( Species *species, int imin, int imax, int direction, double limit_sup, double /*dt*/, std::vector &/*invgf*/, Random * rand, double &energy_change ) @@ -581,95 +671,160 @@ void thermalize_particle_sup( Species *species, int imin, int imax, int directio double* momentum_y = species->particles->getPtrMomentum(1); double* momentum_z = species->particles->getPtrMomentum(2); double* weight = species->particles->getPtrWeight(); - - energy_change = 0; - for (int ipart=imin ; ipart= limit_sup) { - // checking the particle's velocity compared to the thermal one - double p2 = pow( momentum_x[ipart], 2 )+pow( momentum_y[ipart], 2 )+pow( momentum_z[ipart], 2 ); - double LorentzFactor = sqrt( 1.+p2 ); - double v = sqrt( p2 )/LorentzFactor; - - // energy before thermalization - double initial_energy = LorentzFactor-1.0; - - // Apply bcs depending on the particle velocity - // -------------------------------------------- - if( v>3.0*species->thermal_velocity_[0] ) { //IF VELOCITY > 3*THERMAL VELOCITY THEN THERMALIZE IT - - // velocity of the particle after thermalization/reflection - //for (int i=0; inDim_fields; i++) { - - // change of velocity in the direction normal to the reflection plane - double sign_vel = -momentum[ ipart ]/std::abs( momentum[ ipart ] ); - momentum[ ipart ] = sign_vel * species->thermal_momentum_[direction] * std::sqrt( -std::log( 1.0-rand->uniform1() ) ); - - // change of momentum in the direction(s) along the reflection plane - if (nDim>1) { - momentumRefl_2D[ ipart ] = species->thermal_momentum_[(direction+1)%nDim] * perp_rand( rand ); - if (nDim>2) { - momentumRefl_3D[ ipart ] = species->thermal_momentum_[(direction+2)%nDim] * perp_rand( rand ); +#if defined( SMILEI_ACCELERATOR_GPU ) + uint32_t xorshift32_state = rand->xorshift32_state; +#endif + double change_in_energy = 0.0; + double thermal_momentum = species->thermal_momentum_[direction]; + double thermal_momentum1; + double thermal_momentum2; + double v0 = species->thermal_velocity_[0]; + if (nDim>1) { + thermal_momentum1 = species->thermal_momentum_[(direction+1)%nDim]; + if (nDim>2) { + thermal_momentum2 = species->thermal_momentum_[(direction+2)%nDim]; + } + } + double vx, vy, vz, v2, g, gm1, Lxx, Lyy, Lzz, Lxy, Lxz, Lyz; + // mean-velocity + vx = -species->thermal_boundary_velocity_[0]; + vy = -species->thermal_boundary_velocity_[1]; + vz = -species->thermal_boundary_velocity_[2]; + v2 = vx*vx + vy*vy + vz*vz; + if( v2>0. ) { + g = 1.0/sqrt( 1.0-v2 ); + gm1 = g - 1.0; + // compute the different component of the Matrix block of the Lorentz transformation + Lxx = 1.0 + gm1 * vx*vx/v2; + Lyy = 1.0 + gm1 * vy*vy/v2; + Lzz = 1.0 + gm1 * vz*vz/v2; + Lxy = gm1 * vx*vy/v2; + Lxz = gm1 * vx*vz/v2; + Lyz = gm1 * vy*vz/v2; + } +#if defined( SMILEI_ACCELERATOR_GPU_OMP ) + #pragma omp target is_device_ptr( position, momentum, momentumRefl_2D, momentumRefl_3D, momentum_x, momentum_y, momentum_z, weight ) map( tofrom : change_in_energy ) + #pragma omp teams distribute thread_limit(32) reduction( + : change_in_energy ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) + #pragma acc parallel loop gang vector_length(32) reduction(+ : change_in_energy) independent deviceptr(position, momentum, momentumRefl_2D, momentumRefl_3D,momentum_x,momentum_y,momentum_z,weight) +#else + #pragma omp simd reduction(+ : change_in_energy) + for (int ipart = imin ; ipart < imax ; ++ipart ) { +#endif +#if defined( SMILEI_ACCELERATOR_GPU) + for (int ichunk = imin/32 ; ichunk < imax/32 ; ++ichunk ) { + +#if defined( SMILEI_ACCELERATOR_GPU ) + uint32_t xorshift32_state_local = xorshift32_state + ichunk; + uint32_t xorshift32_state_array[32]; +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) + #pragma acc loop seq +#elif defined( SMILEI_ACCELERATOR_GPU_OMP ) + //#pragma omp single // does not work with rocm +#endif + // boucle sur les particules de ce chunk pour remplir xorshift32_state_array[...] avec le state local + for( int i = 0; i < 32; ++i ){ + xorshift32_state_array[i] = Random_namespace::xorshift32(xorshift32_state_local); + } +#endif + int istart = ichunk==(imin/32) ? imin%32 : 0; + int iend = ichunk==(imax/32) ? imax%32 : 32; +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) + #pragma acc loop vector +#elif defined( SMILEI_ACCELERATOR_GPU_OMP ) + #pragma omp parallel for +#endif + for( int i = istart; i < iend ; ++i ){ + int ipart = ichunk * 32 + i; +#endif + if ( position[ ipart ] >= limit_sup) { + // checking the particle's velocity compared to the thermal one + double p2 = momentum_x[ipart] * momentum_x[ipart] + momentum_y[ipart] * momentum_y[ipart] + momentum_z[ipart] * momentum_z[ipart]; + double LorentzFactor = sqrt( 1.+p2 ); + double v = sqrt( p2 )/LorentzFactor; + + // energy before thermalization + double initial_energy = LorentzFactor - 1.0; + + // Apply bcs depending on the particle velocity + // -------------------------------------------- + if( v > 3.0 * v0 ) { //IF VELOCITY > 3*THERMAL VELOCITY THEN THERMALIZE IT + + // velocity of the particle after thermalization/reflection + //for (int i=0; inDim_fields; i++) { + + // change of velocity in the direction normal to the reflection plane + double sign_vel = -momentum[ ipart ]/std::abs( momentum[ ipart ] ); + #if defined( SMILEI_ACCELERATOR_GPU ) + momentum[ ipart ] = sign_vel * thermal_momentum * std::sqrt( -std::log( 1.0 - Random_namespace::uniform1(xorshift32_state_array[i]) ) ); + #else + momentum[ ipart ] = sign_vel * thermal_momentum * std::sqrt( -std::log( 1.0 - rand->uniform1() ) ); + #endif + + // change of momentum in the direction(s) along the reflection plane + if (nDim>1) { + #if defined( SMILEI_ACCELERATOR_GPU ) + momentumRefl_2D[ ipart ] = thermal_momentum1 * Random_namespace::perp_rand_dp(xorshift32_state_array[i]); + #else + momentumRefl_2D[ ipart ] = thermal_momentum1 * perp_rand( rand ); + #endif + if (nDim>2) { + #if defined( SMILEI_ACCELERATOR_GPU ) + momentumRefl_3D[ ipart ] = thermal_momentum2 * Random_namespace::perp_rand_dp(xorshift32_state_array[i]); + #else + momentumRefl_3D[ ipart ] = thermal_momentum2 * perp_rand( rand ); + #endif + } } - } - - // Adding the mean velocity (using relativistic composition) - double vx, vy, vz, v2, g, gm1, Lxx, Lyy, Lzz, Lxy, Lxz, Lyz, gp, px, py, pz; - // mean-velocity - vx = -species->thermal_boundary_velocity_[0]; - vy = -species->thermal_boundary_velocity_[1]; - vz = -species->thermal_boundary_velocity_[2]; - v2 = vx*vx + vy*vy + vz*vz; - if( v2>0. ) { - - g = 1.0/sqrt( 1.0-v2 ); - gm1 = g - 1.0; - - // compute the different component of the Matrix block of the Lorentz transformation - Lxx = 1.0 + gm1 * vx*vx/v2; - Lyy = 1.0 + gm1 * vy*vy/v2; - Lzz = 1.0 + gm1 * vz*vz/v2; - Lxy = gm1 * vx*vy/v2; - Lxz = gm1 * vx*vz/v2; - Lyz = gm1 * vy*vz/v2; - - // Lorentz transformation of the momentum - gp = sqrt( 1.0 + pow( momentum_x[ipart], 2 )+pow( momentum_y[ipart], 2 )+pow( momentum_z[ipart], 2 ) ); - px = -gp*g*vx + Lxx * momentum_x[ ipart ] + Lxy * momentum_y[ ipart ] + Lxz * momentum_z[ ipart ]; - py = -gp*g*vy + Lxy * momentum_x[ ipart ] + Lyy * momentum_y[ ipart ] + Lyz * momentum_z[ ipart ]; - pz = -gp*g*vz + Lxz * momentum_x[ ipart ] + Lyz * momentum_y[ ipart ] + Lzz * momentum_z[ ipart ]; - momentum_x[ ipart ] = px; - momentum_y[ ipart ] = py; - momentum_z[ ipart ] = pz; - - }//ENDif vel != 0 - } else { // IF VELOCITY < 3*THERMAL SIMPLY REFLECT IT - momentum[ ipart ] = -momentum[ ipart ]; - - }// endif on v vs. thermal_velocity_ - - // position of the particle after reflection - position[ ipart ] = 2.*limit_sup - position[ ipart ]; - - // energy lost during thermalization - LorentzFactor = sqrt( 1.+pow( momentum_x[ipart], 2 )+pow( momentum_y[ipart], 2 )+pow( momentum_z[ipart], 2 ) ); - energy_change += weight[ ipart ]*( initial_energy - LorentzFactor+1.0 ); - - - /* HERE IS AN ATTEMPT TO INTRODUCE A SPACE DEPENDENCE ON THE BCs - // double val_min(params.dens_profile.vacuum_length[1]), val_max(params.dens_profile.vacuum_length[1]+params.dens_profile.length_params_y[0]); - - if ( ( species->particles->position(1,ipart) >= val_min ) && ( species->particles->position(1,ipart) <= val_max ) ) { - // nrj computed during diagnostics - species->particles->position(direction, ipart) = limit_pos - species->particles->position(direction, ipart); - species->particles->momentum(direction, ipart) = sqrt(params.thermal_velocity_[direction]) * tabFcts.erfinv( rand->uniform() ); - } - else { - stop_particle( species->particles, ipart, direction, limit_pos, params, energy_change ); + // Adding the mean velocity (using relativistic composition) + double gp, px, py, pz; + // mean-velocity + if( v2>0. ) { + // Lorentz transformation of the momentum + gp = sqrt( 1.0 + momentum_x[ipart] * momentum_x[ipart] + momentum_y[ipart] * momentum_y[ipart] + momentum_z[ipart] * momentum_z[ipart] ); + px = -gp*g*vx + Lxx * momentum_x[ ipart ] + Lxy * momentum_y[ ipart ] + Lxz * momentum_z[ ipart ]; + py = -gp*g*vy + Lxy * momentum_x[ ipart ] + Lyy * momentum_y[ ipart ] + Lyz * momentum_z[ ipart ]; + pz = -gp*g*vz + Lxz * momentum_x[ ipart ] + Lyz * momentum_y[ ipart ] + Lzz * momentum_z[ ipart ]; + momentum_x[ ipart ] = px; + momentum_y[ ipart ] = py; + momentum_z[ ipart ] = pz; + }//ENDif vel != 0 + + } else { // IF VELOCITY < 3*THERMAL SIMPLY REFLECT IT + momentum[ ipart ] = -momentum[ ipart ]; + + }// endif on v vs. thermal_velocity_ + + // position of the particle after reflection + position[ ipart ] = 2.*limit_sup - position[ ipart ]; + + // energy lost during thermalization + LorentzFactor = sqrt( 1. + momentum_x[ipart] * momentum_x[ipart] + momentum_y[ipart] * momentum_y[ipart] + momentum_z[ipart] * momentum_z[ipart] ); + change_in_energy += weight[ ipart ] * ( initial_energy - LorentzFactor + 1.0 ); + + /* HERE IS AN ATTEMPT TO INTRODUCE A SPACE DEPENDENCE ON THE BCs + // double val_min(params.dens_profile.vacuum_length[1]), val_max(params.dens_profile.vacuum_length[1]+params.dens_profile.length_params_y[0]); + + if ( ( species->particles->position(1,ipart) >= val_min ) && ( species->particles->position(1,ipart) <= val_max ) ) { + // nrj computed during diagnostics + species->particles->position(direction, ipart) = limit_pos - species->particles->position(direction, ipart); + species->particles->momentum(direction, ipart) = sqrt(params.thermal_velocity_[direction]) * tabFcts.erfinv( rand->uniform() ); + } + else { + stop_particle( species->particles, ipart, direction, limit_pos, params, energy_change ); + } + */ } - */ } +#if defined( SMILEI_ACCELERATOR_GPU ) } +#endif + energy_change = change_in_energy; +#if defined( SMILEI_ACCELERATOR_GPU ) + xorshift32_state += 32; + rand->xorshift32_state = xorshift32_state; +#endif } @@ -691,7 +846,7 @@ void thermalize_particle_wall( Species *species, int imin, int imax, int directi double particle_position_old = particle_position - dt*invgf[ipart]*species->particles->Momentum[direction][ipart]; if ( ( wall_position-particle_position_old )*( wall_position-particle_position )<0 ) { // checking the particle's velocity compared to the thermal one - double p2 = pow( momentum_x[ipart], 2 )+pow( momentum_y[ipart], 2 )+pow( momentum_z[ipart], 2 ); + double p2 = momentum_x[ipart] * momentum_x[ipart] + momentum_y[ipart] * momentum_y[ipart] + momentum_z[ipart] * momentum_z[ipart]; double LorentzFactor = sqrt( 1.+p2 ); double v = sqrt( p2 )/LorentzFactor; @@ -739,7 +894,7 @@ void thermalize_particle_wall( Species *species, int imin, int imax, int directi Lyz = gm1 * vy*vz/v2; // Lorentz transformation of the momentum - gp = sqrt( 1.0 + pow( momentum_x[ipart], 2 )+pow( momentum_y[ipart], 2 )+pow( momentum_z[ipart], 2 ) ); + gp = sqrt( 1.0 + momentum_x[ipart] * momentum_x[ipart] + momentum_y[ipart] * momentum_y[ipart] + momentum_z[ipart] * momentum_z[ipart] ); px = -gp*g*vx + Lxx * momentum_x[ ipart ] + Lxy * momentum_y[ ipart ] + Lxz * momentum_z[ ipart ]; py = -gp*g*vy + Lxy * momentum_x[ ipart ] + Lyy * momentum_y[ ipart ] + Lyz * momentum_z[ ipart ]; pz = -gp*g*vz + Lxz * momentum_x[ ipart ] + Lyz * momentum_y[ ipart ] + Lzz * momentum_z[ ipart ]; @@ -758,7 +913,7 @@ void thermalize_particle_wall( Species *species, int imin, int imax, int directi position[ ipart ] = 2.*wall_position - position[ ipart ]; // energy lost during thermalization - LorentzFactor = sqrt( 1.+pow( momentum_x[ipart], 2 )+pow( momentum_y[ipart], 2 )+pow( momentum_z[ipart], 2 ) ); + LorentzFactor = sqrt( 1. + momentum_x[ipart] * momentum_x[ipart] + momentum_y[ipart] * momentum_y[ipart] + momentum_z[ipart] * momentum_z[ipart] ); energy_change += weight[ ipart ]*( initial_energy - LorentzFactor+1.0 ); diff --git a/src/ParticleBC/BoundaryConditionType.h b/src/ParticleBC/BoundaryConditionType.h index 3a89e9758..c4912ed12 100755 --- a/src/ParticleBC/BoundaryConditionType.h +++ b/src/ParticleBC/BoundaryConditionType.h @@ -13,11 +13,11 @@ #include "Particles.h" #include "Species.h" #include "Params.h" -#include "tabulatedFunctions.h" #include "userFunctions.h" +#include "Random.h" inline double perp_rand( Random * rand ) { - double a = userFunctions::erfinv( rand->uniform1() ); + double a = userFunctions::erfinv_dp( rand->uniform1() ); if( rand->cointoss() ) { a *= -1.; } diff --git a/src/ParticleBC/PartBoundCond.h b/src/ParticleBC/PartBoundCond.h index 7afd6ca9c..e03aaaf79 100755 --- a/src/ParticleBC/PartBoundCond.h +++ b/src/ParticleBC/PartBoundCond.h @@ -4,7 +4,6 @@ #include "Params.h" #include "Species.h" #include "Particles.h" -#include "tabulatedFunctions.h" class Patch; diff --git a/src/ParticleBC/PartWall.h b/src/ParticleBC/PartWall.h index 45aeb6fde..1f118d828 100755 --- a/src/ParticleBC/PartWall.h +++ b/src/ParticleBC/PartWall.h @@ -3,7 +3,6 @@ #include "Params.h" #include "Random.h" -#include "tabulatedFunctions.h" class Patch; class Species; diff --git a/src/Particles/ParticleCreator.cpp b/src/Particles/ParticleCreator.cpp index 270d0fee1..dc12e6089 100644 --- a/src/Particles/ParticleCreator.cpp +++ b/src/Particles/ParticleCreator.cpp @@ -843,7 +843,7 @@ void ParticleCreator::createMomentum( std::string momentum_initialization, for( unsigned int p=iPart; puniform2() ); double theta = rand->uniform_2pi(); - double psm = sqrt( pow( 1.0+energies[p-iPart], 2 )-1.0 ); + double psm = sqrt( ( 1.0 + energies[p-iPart]) * ( 1.0 + energies[p-iPart]) - 1.0 ); particles->momentum( 0, p ) = psm*cos( theta )*sin( phi ); particles->momentum( 1, p ) = psm*sin( theta )*sin( phi ); @@ -1018,7 +1018,7 @@ std::vector ParticleCreator::maxwellJuttner( Species * species, unsigned // Calculate the inverse of F lnlnU = log( -log( U ) ); if( lnlnU>2. ) { - invF = 3.*sqrt( M_PI )/4. * pow( U, 2./3. ); + invF = 3.*sqrt( M_PI )/4. * cbrt( U*U ); } else if( lnlnU<-19. ) { invF = 1.; } else { @@ -1047,7 +1047,7 @@ std::vector ParticleCreator::maxwellJuttner( Species * species, unsigned // Calculate the inverse of H at the point log(1.-U) + H0 lnU = log( -log( 1.-U ) - H0 ); if( lnU<-26. ) { - invH = pow( -6.*U, 1./3. ); + invH = cbrt( -6.*U ); } else if( lnU>12. ) { invH = -U + 11.35 * pow( -U, 0.06 ); } else { diff --git a/src/Particles/Particles.h b/src/Particles/Particles.h index 20b9c2ea6..f1b327f3c 100755 --- a/src/Particles/Particles.h +++ b/src/Particles/Particles.h @@ -310,7 +310,7 @@ class Particles //! Method used to get the Particle Lorentz factor inline double LorentzFactor( unsigned int ipart ) { - return sqrt( 1.+pow( momentum( 0, ipart ), 2 )+pow( momentum( 1, ipart ), 2 )+pow( momentum( 2, ipart ), 2 ) ); + return sqrt( 1. + momentum( 0, ipart ) * momentum( 0, ipart ) + momentum( 1, ipart ) * momentum( 1, ipart ) + momentum( 2, ipart ) * momentum( 2, ipart ) ); } //! Method used to get the inverse Particle Lorentz factor @@ -322,7 +322,7 @@ class Particles //! Method used to get the momentum norm which is also the normalized photon energy inline double momentumNorm( unsigned int ipart ) { - return sqrt( pow( momentum( 0, ipart ), 2 )+pow( momentum( 1, ipart ), 2 )+pow( momentum( 2, ipart ), 2 ) ); + return sqrt( momentum( 0, ipart ) * momentum( 0, ipart ) + momentum( 1, ipart ) * momentum( 1, ipart ) + momentum( 2, ipart ) * momentum( 2, ipart ) ); } void resetIds() diff --git a/src/Patch/Patch.cpp b/src/Patch/Patch.cpp index 1a96ab654..b24877bcd 100755 --- a/src/Patch/Patch.cpp +++ b/src/Patch/Patch.cpp @@ -149,10 +149,10 @@ void Patch::initStep3( Params ¶ms, SmileiMPI *smpi, unsigned int n_moved ) min_local_[i] = ( Pcoordinates[i] )*( params.patch_size_[i]*params.cell_length[i] ); max_local_[i] = ( Pcoordinates[i]+1 )*( params.patch_size_[i]*params.cell_length[i] ); cell_starting_global_index[i] += Pcoordinates[i]*params.patch_size_[i]; - cell_starting_global_index_noGC[i] = Pcoordinates[i]*params.patch_size_[i]; + cell_starting_global_index_noGC[i] = Pcoordinates[i]*params.patch_size_[i]; cell_starting_global_index[i] -= params.oversize[i]; center_[i] = ( min_local_[i]+max_local_[i] )*0.5; - radius += pow( max_local_[i] - center_[i] + params.cell_length[i], 2 ); + radius += ( max_local_[i] - center_[i] + params.cell_length[i] ) * ( max_local_[i] - center_[i] + params.cell_length[i] ); } radius = sqrt( radius ); @@ -327,9 +327,9 @@ void Patch::setLocationAndAllocateFields( Params ¶ms, DomainDecomposition *d min_local_[iDim] = params.offset_map[iDim][ijk[iDim]] * params.cell_length[iDim]; max_local_[iDim] = (params.offset_map[iDim][ijk[iDim]]+params.region_size_[iDim]) * params.cell_length[iDim]; center_[iDim] = ( min_local_[iDim]+max_local_[iDim] )*0.5; - radius += pow( max_local_[iDim] - center_[iDim] + params.cell_length[iDim], 2 ); + radius += ( max_local_[iDim] - center_[iDim] + params.cell_length[iDim]) * ( max_local_[iDim] - center_[iDim] + params.cell_length[iDim]); cell_starting_global_index[iDim] = params.offset_map[iDim][ijk[iDim]]; - cell_starting_global_index_noGC[iDim] = params.offset_map[iDim][ijk[iDim]]; + cell_starting_global_index_noGC[iDim] = params.offset_map[iDim][ijk[iDim]]; // Neighbor before if( ijk[iDim] > 0 ) { unsigned int IJK[3] = { ijk[0], ijk[1], ijk[2] }; @@ -396,7 +396,7 @@ void Patch::setLocationAndAllocateFields( Params ¶ms, DomainDecomposition *d max_local_[iDim] = params.global_size_[iDim]*params.cell_length[iDim]; center_[iDim] = ( min_local_[iDim]+max_local_[iDim] )*0.5; - radius += pow( max_local_[iDim] - center_[iDim] + params.cell_length[iDim], 2 ); + radius += ( max_local_[iDim] - center_[iDim] + params.cell_length[iDim] ) * ( max_local_[iDim] - center_[iDim] + params.cell_length[iDim] ); cell_starting_global_index[iDim] = -oversize[iDim]; diff --git a/src/Patch/PatchAM.h b/src/Patch/PatchAM.h index 9d8f2c17c..df58a8016 100755 --- a/src/Patch/PatchAM.h +++ b/src/Patch/PatchAM.h @@ -24,10 +24,24 @@ class PatchAM final : public Patch //! Return the volume (or surface or length depending on simulation dimension) //! of one cell at the position of a given particle - double getPrimalCellVolume( Particles *, unsigned int, Params & ) override final + double getPrimalCellVolume( Particles *p, unsigned int ipart, Params ¶ms ) override final { - ERROR( "getPrimalCellVolume not implemented in geometry AM" ); - return cell_volume; + double factor = 1.; + + double halfcell = 0.5 * params.cell_length[0]; + if( p->position(0,ipart) - getDomainLocalMin(0) < halfcell + || getDomainLocalMax(0) - p->position(0,ipart) < halfcell ) { + factor *= 0.5; + } + + halfcell = 0.5 * params.cell_length[1]; + double radius = sqrt(p->position(1,ipart)*p->position(1,ipart) + p->position(2,ipart)*p->position(2,ipart)); + if( radius - getDomainLocalMin(1) < halfcell + || getDomainLocalMax(1) - radius < halfcell ) { + factor *= 0.5; + } + + return factor * cell_volume * radius; }; //! Given several arrays (x,y,z), return indices of points in patch diff --git a/src/Patch/VectorPatch.cpp b/src/Patch/VectorPatch.cpp index bbce82778..b3d2190a2 100755 --- a/src/Patch/VectorPatch.cpp +++ b/src/Patch/VectorPatch.cpp @@ -540,7 +540,7 @@ void VectorPatch::injectParticlesFromBoundaries(Params ¶ms, Timers &timers, timers.particleInjection.restart(); //#pragma omp for schedule(runtime) - #pragma omp single + #pragma omp master for( unsigned int ipatch=0 ; ipatchsize() ; ipatch++ ) { Patch * patch = ( *this )( ipatch ); @@ -779,6 +779,7 @@ void VectorPatch::injectParticlesFromBoundaries(Params ¶ms, Timers &timers, } } // end for ipatch + #pragma omp barrier timers.particleInjection.update( params.printNow( itime ) ); } @@ -1169,6 +1170,7 @@ void VectorPatch::finalizeSyncAndBCFields( Params ¶ms, SmileiMPI *smpi, SimW } timers.maxwellBC.restart(); + SMILEI_PY_SAVE_MASTER_THREAD #pragma omp for schedule(static) for( unsigned int ipatch=0 ; ipatchsize() ; ipatch++ ) { // Applies boundary conditions on B @@ -1176,6 +1178,7 @@ void VectorPatch::finalizeSyncAndBCFields( Params ¶ms, SmileiMPI *smpi, SimW ( *this )( ipatch )->EMfields->boundaryConditions( time_dual, ( *this )( ipatch ), simWindow ); } + SMILEI_PY_RESTORE_MASTER_THREAD SyncVectorPatch::exchangeForPML( params, (*this), smpi ); #pragma omp for schedule(static) @@ -1284,9 +1287,6 @@ void VectorPatch::runAllDiags( Params &/*params*/, SmileiMPI *smpi, unsigned int for( unsigned int idiag = 0 ; idiag < globalDiags.size() ; idiag++ ) { if( globalDiags[idiag]->timeSelection->theTimeIsNow( itime ) ) { - //std::cout << " " << dynamic_cast( globalDiags[idiag] ) - // << std::endl; - if (dynamic_cast( globalDiags[idiag])) { //need_particles = true; //need_fields = true; @@ -1314,7 +1314,7 @@ void VectorPatch::runAllDiags( Params &/*params*/, SmileiMPI *smpi, unsigned int } else if (dynamic_cast(localDiags[idiag])) { need_fields = true; } else if (dynamic_cast(localDiags[idiag])) { - // Nothing to be done + // Nothing to be done } else { need_particles = true; need_fields = true; @@ -1508,12 +1508,6 @@ void VectorPatch::runAllDiags( Params &/*params*/, SmileiMPI *smpi, unsigned int } timers.diags.update(); - if( itime==0 ) { - for( unsigned int idiag = 0 ; idiag < diag_timers_.size() ; idiag++ ) { - diag_timers_[idiag]->reboot(); - } - } - } // END runAllDiags // --------------------------------------------------------------------------------------------------------------------- @@ -1636,13 +1630,14 @@ void VectorPatch::runAllDiagsTasks( Params &, SmileiMPI *smpi, unsigned int itim } timers.diags.update(); - if (itime==0) { - for( unsigned int idiag = 0 ; idiag < diag_timers_.size() ; idiag++ ) - diag_timers_[idiag]->reboot(); - } - } // END runAllDiags +void VectorPatch::rebootDiagTimers() +{ + for( unsigned int idiag = 0 ; idiag < diag_timers_.size() ; idiag++ ) { + diag_timers_[idiag]->reboot(); + } +} // --------------------------------------------------------------------------------------------------------------------- // Check if rho is null (MPI & patch sync) @@ -4033,9 +4028,10 @@ void VectorPatch::applyAntennas( double time ) } else { // Get intensity from antenna of the first patch - #pragma omp single + #pragma omp master antenna_intensity_ = patches_[0]->EMfields->antennas[iAntenna].time_profile->valueAt( time ); - + #pragma omp barrier + // Loop patches to apply #pragma omp for schedule(static) for( unsigned int ipatch=0 ; ipatchdiagPartEventTracing( time_dual, params.timestep); #endif + SMILEI_PY_SAVE_MASTER_THREAD #pragma omp for schedule(runtime) for( unsigned int ipatch=0 ; ipatchsize() ; ipatch++ ) { ( *this )( ipatch )->EMfields->restartRhoJ(); @@ -5017,6 +5014,7 @@ void VectorPatch::dynamicsWithoutTasks( Params ¶ms, } // end loop on species //MESSAGE("species dynamics"); } // end loop on patches + SMILEI_PY_RESTORE_MASTER_THREAD } void VectorPatch::ponderomotiveUpdateSusceptibilityAndMomentumWithoutTasks( Params ¶ms, diff --git a/src/Patch/VectorPatch.h b/src/Patch/VectorPatch.h index d2e12e545..dab9d0ce2 100755 --- a/src/Patch/VectorPatch.h +++ b/src/Patch/VectorPatch.h @@ -238,6 +238,7 @@ public : SimWindow *simWindow ); void runAllDiagsTasks( Params ¶ms, SmileiMPI *smpi, unsigned int itime, Timers &timers, SimWindow *simWindow ); + void rebootDiagTimers(); void initAllDiags( Params ¶ms, SmileiMPI *smpi ); void closeAllDiags( SmileiMPI *smpi ); diff --git a/src/Profiles/Function.cpp b/src/Profiles/Function.cpp index 6a0cf2ac0..d39994722 100755 --- a/src/Profiles/Function.cpp +++ b/src/Profiles/Function.cpp @@ -8,81 +8,144 @@ using namespace std; // 1D double Function_Python1D::valueAt( double time ) { - return PyTools::runPyFunction( py_profile, time ); + double v; + SMILEI_PY_ACQUIRE_GIL + v = PyTools::runPyFunction( py_profile, time ); + SMILEI_PY_RELEASE_GIL + return v; } double Function_Python1D::valueAt( vector, double time ) { - return PyTools::runPyFunction( py_profile, time ); + double v; + SMILEI_PY_ACQUIRE_GIL + v = PyTools::runPyFunction( py_profile, time ); + SMILEI_PY_RELEASE_GIL + return v; } double Function_Python1D::valueAt( vector x_cell ) { - return PyTools::runPyFunction( py_profile, x_cell[0] ); + double v; + SMILEI_PY_ACQUIRE_GIL + v = PyTools::runPyFunction( py_profile, x_cell[0] ); + SMILEI_PY_RELEASE_GIL + return v; } // 2D double Function_Python2D::valueAt( vector x_cell, double time ) { - return PyTools::runPyFunction( py_profile, x_cell[0], time ); + double v; + SMILEI_PY_ACQUIRE_GIL + v = PyTools::runPyFunction( py_profile, x_cell[0], time ); + SMILEI_PY_RELEASE_GIL + return v; } double Function_Python2D::valueAt( vector x_cell ) { - return PyTools::runPyFunction( py_profile, x_cell[0], x_cell[1] ); + double v; + SMILEI_PY_ACQUIRE_GIL + v = PyTools::runPyFunction( py_profile, x_cell[0], x_cell[1] ); + SMILEI_PY_RELEASE_GIL + return v; } // 2D complex std::complex Function_Python2D::complexValueAt( vector x_cell, double time ) { - return PyTools::runPyFunction>( py_profile, x_cell[0], time ); + std::complex v; + SMILEI_PY_ACQUIRE_GIL + v = PyTools::runPyFunction>( py_profile, x_cell[0], time ); + SMILEI_PY_RELEASE_GIL + return v; } std::complex Function_Python2D::complexValueAt( vector x_cell ) { - return PyTools::runPyFunction>( py_profile, x_cell[0], x_cell[1] ); + std::complex v; + SMILEI_PY_ACQUIRE_GIL + v = PyTools::runPyFunction>( py_profile, x_cell[0], x_cell[1] ); + SMILEI_PY_RELEASE_GIL + return v; } // 3D double Function_Python3D::valueAt( vector x_cell, double time ) { - return PyTools::runPyFunction( py_profile, x_cell[0], x_cell[1], time ); + double v; + SMILEI_PY_ACQUIRE_GIL + v = PyTools::runPyFunction( py_profile, x_cell[0], x_cell[1], time ); + SMILEI_PY_RELEASE_GIL + return v; } double Function_Python3D::valueAt( vector x_cell ) { - return PyTools::runPyFunction( py_profile, x_cell[0], x_cell[1], x_cell[2] ); + double v; + SMILEI_PY_ACQUIRE_GIL + v = PyTools::runPyFunction( py_profile, x_cell[0], x_cell[1], x_cell[2] ); + SMILEI_PY_RELEASE_GIL + return v; } // 3D complex std::complex Function_Python3D::complexValueAt( vector x_cell, double time ) { - return PyTools::runPyFunction>( py_profile, x_cell[0], x_cell[1], time ); + std::complex v; + SMILEI_PY_ACQUIRE_GIL + v = PyTools::runPyFunction>( py_profile, x_cell[0], x_cell[1], time ); + SMILEI_PY_RELEASE_GIL + return v; } // 4D double Function_Python4D::valueAt( vector x_cell, double time ) { - return PyTools::runPyFunction( py_profile, x_cell[0], x_cell[1], x_cell[2], time ); + double v; + SMILEI_PY_ACQUIRE_GIL + v = PyTools::runPyFunction( py_profile, x_cell[0], x_cell[1], x_cell[2], time ); + SMILEI_PY_RELEASE_GIL + return v; } // 4D complex std::complex Function_Python4D::complexValueAt( vector x_cell, double time ) { - return PyTools::runPyFunction>( py_profile, x_cell[0], x_cell[1], x_cell[2], time ); + std::complex v; + SMILEI_PY_ACQUIRE_GIL + v = PyTools::runPyFunction>( py_profile, x_cell[0], x_cell[1], x_cell[2], time ); + SMILEI_PY_RELEASE_GIL + return v; } // Special cases for locations specified in numpy arrays #ifdef SMILEI_USE_NUMPY PyArrayObject *Function_Python1D::valueAt( std::vector x ) { - return ( PyArrayObject * )PyObject_CallFunctionObjArgs( py_profile, x[0], NULL ); + PyArrayObject * v; + SMILEI_PY_ACQUIRE_GIL + v = ( PyArrayObject * )PyObject_CallFunctionObjArgs( py_profile, x[0], NULL ); + SMILEI_PY_RELEASE_GIL + return v; } PyArrayObject *Function_Python2D::valueAt( std::vector x ) { - return ( PyArrayObject * )PyObject_CallFunctionObjArgs( py_profile, x[0], x[1], NULL ); + PyArrayObject * v; + SMILEI_PY_ACQUIRE_GIL + v = ( PyArrayObject * )PyObject_CallFunctionObjArgs( py_profile, x[0], x[1], NULL ); + SMILEI_PY_RELEASE_GIL + return v; } PyArrayObject *Function_Python3D::valueAt( std::vector x ) { - return ( PyArrayObject * )PyObject_CallFunctionObjArgs( py_profile, x[0], x[1], x[2], NULL ); + PyArrayObject * v; + SMILEI_PY_ACQUIRE_GIL + v = ( PyArrayObject * )PyObject_CallFunctionObjArgs( py_profile, x[0], x[1], x[2], NULL ); + SMILEI_PY_RELEASE_GIL + return v; } PyArrayObject *Function_Python2D::complexValueAt( std::vector x ) { + PyArrayObject *cvalues; + SMILEI_PY_ACQUIRE_GIL PyObject *values = PyObject_CallFunctionObjArgs( py_profile, x[0], x[1], NULL ); - PyArrayObject *cvalues = ( PyArrayObject * )PyObject_CallMethod( values, const_cast("astype"), const_cast("s"), const_cast("complex"), NULL ); + cvalues = ( PyArrayObject * )PyObject_CallMethod( values, const_cast("astype"), const_cast("s"), const_cast("complex"), NULL ); Py_DECREF( values ); + SMILEI_PY_RELEASE_GIL return cvalues; } @@ -90,45 +153,63 @@ PyArrayObject *Function_Python2D::complexValueAt( std::vector x PyArrayObject *Function_Python1D::valueAt( std::vector, double time ) { + PyArrayObject * ret; + SMILEI_PY_ACQUIRE_GIL PyObject *t = PyFloat_FromDouble( time ); - PyArrayObject * ret = ( PyArrayObject * )PyObject_CallFunctionObjArgs( py_profile, t, NULL ); + ret = ( PyArrayObject * )PyObject_CallFunctionObjArgs( py_profile, t, NULL ); Py_DECREF( t ); + SMILEI_PY_RELEASE_GIL return ret; } PyArrayObject *Function_Python2D::valueAt( std::vector x, double time ) { + PyArrayObject * ret; + SMILEI_PY_ACQUIRE_GIL PyObject *t = PyFloat_FromDouble( time ); - PyArrayObject * ret = ( PyArrayObject * )PyObject_CallFunctionObjArgs( py_profile, x[0], t, NULL ); + ret = ( PyArrayObject * )PyObject_CallFunctionObjArgs( py_profile, x[0], t, NULL ); Py_DECREF( t ); + SMILEI_PY_RELEASE_GIL return ret; } PyArrayObject *Function_Python3D::valueAt( std::vector x, double time ) { + PyArrayObject * ret; + SMILEI_PY_ACQUIRE_GIL PyObject *t = PyFloat_FromDouble( time ); - PyArrayObject * ret = ( PyArrayObject * )PyObject_CallFunctionObjArgs( py_profile, x[0], x[1], t, NULL ); + ret = ( PyArrayObject * )PyObject_CallFunctionObjArgs( py_profile, x[0], x[1], t, NULL ); Py_DECREF( t ); + SMILEI_PY_RELEASE_GIL return ret; } PyArrayObject *Function_Python4D::valueAt( std::vector x, double time ) { + PyArrayObject * ret; + SMILEI_PY_ACQUIRE_GIL PyObject *t = PyFloat_FromDouble( time ); - PyArrayObject * ret = ( PyArrayObject * )PyObject_CallFunctionObjArgs( py_profile, x[0], x[1], x[2], t, NULL ); + ret = ( PyArrayObject * )PyObject_CallFunctionObjArgs( py_profile, x[0], x[1], x[2], t, NULL ); Py_DECREF( t ); + SMILEI_PY_RELEASE_GIL return ret; }PyArrayObject *Function_Python4D::complexValueAt( std::vector x, double time ) { + PyArrayObject *cvalues; + SMILEI_PY_ACQUIRE_GIL PyObject *t = PyFloat_FromDouble( time ); PyObject * values = PyObject_CallFunctionObjArgs( py_profile, x[0], x[1], x[2], t, NULL ); Py_DECREF( t ); - PyArrayObject *cvalues = ( PyArrayObject * )PyObject_CallMethod( values, const_cast("astype"), const_cast("s"), const_cast("complex"), NULL ); + cvalues = ( PyArrayObject * )PyObject_CallMethod( values, const_cast("astype"), const_cast("s"), const_cast("complex"), NULL ); Py_DECREF( values ); + SMILEI_PY_RELEASE_GIL return cvalues; } PyArrayObject *Function_Python4D::complexValueAt( std::vector x, PyArrayObject *t ) { + PyArrayObject *cvalues; + SMILEI_PY_ACQUIRE_GIL PyObject *values = PyObject_CallFunctionObjArgs( py_profile, x[0], x[1], x[2], t, NULL ); - PyArrayObject *cvalues = ( PyArrayObject * )PyObject_CallMethod( values, const_cast("astype"), const_cast("s"), const_cast("complex"), NULL ); + cvalues = ( PyArrayObject * )PyObject_CallMethod( values, const_cast("astype"), const_cast("s"), const_cast("complex"), NULL ); Py_DECREF( values ); + SMILEI_PY_RELEASE_GIL return cvalues; } diff --git a/src/Pusher/PusherHigueraCary.cpp b/src/Pusher/PusherHigueraCary.cpp index c85189fff..456624f40 100755 --- a/src/Pusher/PusherHigueraCary.cpp +++ b/src/Pusher/PusherHigueraCary.cpp @@ -117,10 +117,11 @@ void PusherHigueraCary::operator()( Particles &particles, SmileiMPI *smpi, int i // beta**2 const double beta2 = Tx*Tx + Ty*Ty + Tz*Tz; + const double Tum = Tx*umx + Ty*umy + Tz*umz; // Equivalent of 1/\gamma_{new} in the paper const double local_invgf = 1./std::sqrt( 0.5*( gfm2 - beta2 + - std::sqrt( (gfm2 - beta2)*(gfm2 - beta2) + 4.0*( beta2 + std::pow( Tx*umx + Ty*umy + Tz*umz, 2 ) ) ) ) ); + std::sqrt( (gfm2 - beta2)*(gfm2 - beta2) + 4.0*( beta2 + Tum * Tum ) ) ) ); // Rotation in the magnetic field Tx *= local_invgf; diff --git a/src/Radiation/Radiation.cpp b/src/Radiation/Radiation.cpp index 19ea3f648..e9791dc2a 100755 --- a/src/Radiation/Radiation.cpp +++ b/src/Radiation/Radiation.cpp @@ -83,7 +83,7 @@ void Radiation::computeParticlesChi( Particles &particles, double charge_over_mass2; // 1/mass^2 - double one_over_mass_square = pow( one_over_mass_, 2. ); + double one_over_mass_square = one_over_mass_ * one_over_mass_; // Temporary Lorentz factor double gamma; diff --git a/src/Radiation/Radiation.h b/src/Radiation/Radiation.h index 0755d0f3e..d1b1ee979 100755 --- a/src/Radiation/Radiation.h +++ b/src/Radiation/Radiation.h @@ -76,10 +76,10 @@ class Radiation { return std::fabs( charge_over_mass2 )*inv_norm_E_Schwinger_ - * std::sqrt( std::fabs( std::pow( Ex*px + Ey*py + Ez*pz, 2 ) - - std::pow( gamma*Ex - By*pz + Bz*py, 2 ) - - std::pow( gamma*Ey - Bz*px + Bx*pz, 2 ) - - std::pow( gamma*Ez - Bx*py + By*px, 2 ) ) ); + * std::sqrt( std::fabs( (Ex*px + Ey*py + Ez*pz) * (Ex*px + Ey*py + Ez*pz) + - (gamma*Ex - By*pz + Bz*py) * (gamma*Ex - By*pz + Bz*py) + - (gamma*Ey - Bz*px + Bx*pz) * (gamma*Ey - Bz*px + Bx*pz) + - (gamma*Ez - Bx*py + By*px) * (gamma*Ez - Bx*py + By*px) ) ); }; //! Computation of the quantum parameter for the given diff --git a/src/Radiation/RadiationDiagRadiationSpectrum.cpp b/src/Radiation/RadiationDiagRadiationSpectrum.cpp index 32ab07205..5138dfe06 100644 --- a/src/Radiation/RadiationDiagRadiationSpectrum.cpp +++ b/src/Radiation/RadiationDiagRadiationSpectrum.cpp @@ -72,7 +72,7 @@ void RadiationDiagRadiationSpectrum::operator() ( double charge_over_mass2; // 1/mass^2 - const double one_over_mass_2 = std::pow(one_over_mass_,2.); + const double one_over_mass_2 = one_over_mass_ * one_over_mass_; // Temporary Lorentz factor double gamma; diff --git a/src/Radiation/RadiationTables.h b/src/Radiation/RadiationTables.h index 77bcac8e2..77a47f07f 100755 --- a/src/Radiation/RadiationTables.h +++ b/src/Radiation/RadiationTables.h @@ -131,8 +131,9 @@ class RadiationTables //#pragma omp declare simd static inline double __attribute__((always_inline)) computeRidgersFit( double particle_chi ) { - return std::pow( 1.0 + 4.8*( 1.0+particle_chi )*std::log( 1.0 + 1.7*particle_chi ) - + 2.44*particle_chi*particle_chi, -2.0/3.0 ); + double a = 1.0 + 4.8 * ( 1.0 + particle_chi )*std::log( 1.0 + 1.7 * particle_chi ) + + 2.44 * particle_chi * particle_chi; + return 1.0 / std::cbrt( a * a ); }; //! Get of the classical continuous radiated energy during dt diff --git a/src/Radiation/RadiationTools.h b/src/Radiation/RadiationTools.h index 1746c894e..21966e899 100644 --- a/src/Radiation/RadiationTools.h +++ b/src/Radiation/RadiationTools.h @@ -94,10 +94,7 @@ class RadiationTools { const double chi2 = particle_chi * particle_chi; const double chi3 = chi2 * particle_chi; return chi3*1.9846415503393384 - *std::pow( - 1.0 + (1. + 4.528*particle_chi)*std::log(1.+12.29*particle_chi) + 4.632*chi2 - ,-7./6. - ); + *std::pow(1.0 + (1. + 4.528*particle_chi)*std::log(1.+12.29*particle_chi) + 4.632*chi2,-7./6.); } //! Computation of the function g of Erber using the Ridgers @@ -107,10 +104,11 @@ class RadiationTools { #ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc routine seq #endif - static inline double __attribute__((always_inline)) computeGRidgers(double particle_chi) + static inline double __attribute__((always_inline)) computeGRidgers(double particle_chi) // this is a duplicate of computeRidgersFit from radiationTables.h { - return std::pow(1. + 4.8*(1.0+particle_chi)*std::log(1. + 1.7*particle_chi) - + 2.44*particle_chi*particle_chi,-2./3.); + double a = 1.0 + 4.8 * ( 1.0 + particle_chi )*std::log( 1.0 + 1.7 * particle_chi ) + + 2.44 * particle_chi * particle_chi; + return 1.0 / std::cbrt( a * a ); }; // ----------------------------------------------------------------------------- @@ -122,8 +120,8 @@ class RadiationTools { #endif static inline double __attribute__((always_inline)) computeF1Nu(double nu) { - if (nu<0.1) return 2.149528241483088*std::pow(nu,-0.6666666666666667) - 1.813799364234217; - else if (nu>10) return 1.253314137315500*std::pow(nu,-0.5)*exp(-nu); + if (nu<0.1) return 2.149528241483088/std::cbrt(nu*nu) - 1.813799364234217; + else if (nu>10) return 1.253314137315500/std::sqrt(nu)*exp(-nu); else { const double lognu = std::log(nu); double lognu_power_n = lognu; @@ -142,10 +140,10 @@ class RadiationTools { return std::exp(f); - /*return exp(-1.042081355552157e-02 * pow(lognu,5) - -5.349995695960174e-02 * pow(lognu,4) - -1.570476212230771e-01 * pow(lognu,3) - -4.575331390887448e-01 * pow(lognu,2) + /*return exp(-1.042081355552157e-02 * pow (lognu,5) + -5.349995695960174e-02 * pow (lognu,4) + -1.570476212230771e-01 * pow (lognu,3) + -4.575331390887448e-01 * pow (lognu,2) -1.687909081004528e+00 * lognu -4.341018460806052e-01) ;*/ } @@ -160,8 +158,8 @@ class RadiationTools { #endif static inline double __attribute__((always_inline)) computeF2Nu(double nu) { - if (nu<0.05) return 1.074764120720013*std::pow(nu,-0.6666666666666667); - else if (nu>10) return 1.253314137315500*std::pow(nu,-0.5)*exp(-nu); + if (nu<0.05) return 1.074764120720013/std::cbrt(nu*nu) ; + else if (nu>10) return 1.253314137315500/std::sqrt(nu)*exp(-nu); else { const double lognu = std::log(nu); double lognu_power_n = lognu; diff --git a/src/Smilei.cpp b/src/Smilei.cpp index 11e43976e..335c37bbc 100755 --- a/src/Smilei.cpp +++ b/src/Smilei.cpp @@ -423,12 +423,15 @@ int main( int argc, char *argv[] ) if( !params.restart ) { TITLE( "Running diags at time t = 0" ); + #pragma omp parallel shared( smpi, params, vecPatches, simWindow ) + { #ifdef _OMPTASKS - vecPatches.runAllDiagsTasks( params, &smpi, 0, timers, simWindow ); + vecPatches.runAllDiagsTasks( params, &smpi, 0, timers, simWindow ); #else - vecPatches.runAllDiags( params, &smpi, 0, timers, simWindow ); + vecPatches.runAllDiags( params, &smpi, 0, timers, simWindow ); #endif - + } + vecPatches.rebootDiagTimers(); } TITLE( "Species creation summary" ); @@ -650,7 +653,7 @@ int main( int argc, char *argv[] ) // Standard fields operations (maxwell + comms + boundary conditions) are completed // apply prescribed fields can be considered if requested if( vecPatches(0)->EMfields->prescribedFields.size() ) { - #pragma omp single + #pragma omp master vecPatches.applyPrescribedFields( time_prim ); #pragma omp barrier } @@ -796,7 +799,6 @@ int main( int argc, char *argv[] ) // END MAIN CODE // --------------------------------------------------------------------------------------------------------------------- - int executeTestMode( VectorPatch &vecPatches, Region ®ion, SmileiMPI *smpi, diff --git a/src/Species/SpeciesFactory.h b/src/Species/SpeciesFactory.h index 8d5476d8a..37ece0c6f 100755 --- a/src/Species/SpeciesFactory.h +++ b/src/Species/SpeciesFactory.h @@ -671,7 +671,7 @@ class SpeciesFactory } if( (params.geometry=="AMcylindrical") && ( this_species->boundary_conditions_[1][1] != "remove" ) && ( this_species->boundary_conditions_[1][1] != "stop" ) && ( this_species->boundary_conditions_[1][1] != "reflective" ) ) { ERROR_NAMELIST( - " In AM geometry particle boundary conditions supported in Rmax are 'remove' and 'stop' ", + " In AM geometry particle boundary conditions supported in Rmax are 'remove', 'reflective' and 'stop' ", LINK_NAMELIST + std::string("#species") ); } @@ -763,7 +763,7 @@ class SpeciesFactory LINK_NAMELIST + std::string("#species") ); } - if( (model == "tunnel") || (model == "tunnel_BSI") || (model == "tunnel_TL") || (model == "tunnel_full_PPT") ){ + if( (model == "tunnel") || (model == "barrier_suppression") || (model == "Tong_Lin") || (model == "tunnel_full_PPT") ){ if (params.Laser_Envelope_model){ ERROR_NAMELIST("An envelope is present, so tunnel_envelope or tunnel_envelope_averaged ionization model should be selected for species "< &count, /*#pragma omp declare simd double SpeciesMetrics::get_particle_computation_time_vectorization(const double log_particle_number) { - return -7.983397022180499e-05 * pow(log_particle_number,4) - -1.220834603123080e-02 * pow(log_particle_number,3) -+ 2.262009704511124e-01 * pow(log_particle_number,2) - -1.346529777726451e+00 * log_particle_number -+ 3.053068997965275e+00; + return 3.053068997965275e+00 + log_particle_number * (-1.346529777726451e+00 + log_particle_number * (2.262009704511124e-01 + + log_particle_number * ( -1.220834603123080e-02 - 7.983397022180499e-05 * log_particle_number ) ) ) ; };*/ //! Evaluate the time necessary to compute `particle_number` particles @@ -127,46 +124,28 @@ float SpeciesMetrics::get_particle_computation_time_vectorization( const float l { // Cascade lake 6248 (Ex: Jean Zay) #if defined __INTEL_CASCADELAKE_6248 - return -3.878426186471072e-03 * pow(log_particle_number,4) - + 3.143999691029673e-02 * pow(log_particle_number,3) - + 6.520005335065826e-02 * pow(log_particle_number,2) - -1.103410559576951e+00 * log_particle_number - + 2.851575999756124e+00; + return 2.851575999756124e+00 + log_particle_number * ( -1.103410559576951e+00 + log_particle_number * ( 6.520005335065826e-02 + + log_particle_number * ( 3.143999691029673e-02 - 3.878426186471072e-03 * log_particle_number ) ) ) ; // Skylake 8168 (Ex: Irene Joliot-Curie) #elif defined __INTEL_SKYLAKE_8168 - return -5.500324176161280e-03 * pow( log_particle_number, 4 ) - + 5.302690106220765e-02 * pow( log_particle_number, 3 ) - -2.390999177899332e-02 * pow( log_particle_number, 2 ) - -1.018178658950980e+00 * log_particle_number - + 2.873965603217334e+00; + return 2.873965603217334e+00 + log_particle_number * ( -1.018178658950980e+00 + log_particle_number * ( -2.390999177899332e-02 + + log_particle_number * ( 5.302690106220765e-02 - 5.500324176161280e-03 * log_particle_number ) ) ) ; // Knight Landings Intel Xeon Phi 7250 (Ex: Frioul) #elif defined __INTEL_KNL_7250 - return + 9.287025545185804e-03 * pow( log_particle_number, 4 ) - -1.252595460426959e-01 * pow( log_particle_number, 3 ) - + 6.609030611761257e-01 * pow( log_particle_number, 2 ) - -1.948861281215199e+00 * log_particle_number - + 3.391615458521049e+00; + return 3.391615458521049e+00 + log_particle_number * ( -1.948861281215199e+00 + log_particle_number * ( 6.609030611761257e-01 + + log_particle_number * ( -1.252595460426959e-01 + 9.287025545185804e-03 * log_particle_number ) ) ) ; // Broadwell Intel Xeon E5-2697 v4 (Ex: Tornado) #elif defined __INTEL_BDW_E5_2697_V4 - return -4.732086199743545e-03 * pow( log_particle_number, 4 ) - + 3.249709067117774e-02 * pow( log_particle_number, 3 ) - + 1.940828611778672e-01 * pow( log_particle_number, 2 ) - -2.010116307618810e+00 * log_particle_number - + 4.661824411143119e+00; + return 4.661824411143119e+00 + log_particle_number * ( -2.010116307618810e+00 + log_particle_number * (1.940828611778672e-01 + + log_particle_number * ( 3.249709067117774e-02 - 4.732086199743545e-03 * log_particle_number ) ) ) ; // Haswell Intel Xeon E5-2680 v3 (Ex: Jureca) #elif defined __INTEL_HSW_E5_2680_v3 - return -4.127980207551420e-03 * pow( log_particle_number, 4 ) - + 3.688297004269906e-02 * pow( log_particle_number, 3 ) - + 3.666171703120181e-02 * pow( log_particle_number, 2 ) - -1.066920754145127e+00 * log_particle_number - + 2.893485213852858e+00; + return 2.893485213852858e+00 + log_particle_number * ( -1.066920754145127e+00 + log_particle_number * ( 3.666171703120181e-02 + + log_particle_number * ( 3.688297004269906e-02 - 4.127980207551420e-03 * log_particle_number ) ) ) ; // General fit #else - return -1.760649180606238e-03 * pow(log_particle_number,4) - + 8.410553824987992e-03 * pow(log_particle_number,3) - + 1.447576003168199e-01 * pow(log_particle_number,2) - -1.192593070397785e+00 * log_particle_number - + 2.855507642982689e+00; + return 2.855507642982689e+00 + log_particle_number * ( -1.192593070397785e+00 + log_particle_number * ( 1.447576003168199e-01 + + log_particle_number * ( 8.410553824987992e-03 - 1.760649180606238e-03 * log_particle_number ) ) ) ; #endif }; diff --git a/src/Species/SpeciesV.cpp b/src/Species/SpeciesV.cpp index 4a4199b63..cb173963d 100755 --- a/src/Species/SpeciesV.cpp +++ b/src/Species/SpeciesV.cpp @@ -1758,7 +1758,7 @@ void SpeciesV::mergeParticles( double time_dual ) // for (unsigned int ip = 0; ip < (unsigned int)(particles->last_index.back()) ; ip++) { // weight_before += particles->weight(ip); - // energy_before += sqrt(1 + pow(particles->momentum(0,ip),2) + pow(particles->momentum(1,ip),2) + pow(particles->momentum(2,ip),2)); + // energy_before += sqrt(1 + particles->momentum(0,ip) * particles->momentum(0,ip) + particles->momentum(1,ip) * particles->momentum(1,ip) + particles->momentum(2,ip) * particles->momentum(2,ip)); // } // For each cell, we apply independently the merging process @@ -1787,9 +1787,7 @@ void SpeciesV::mergeParticles( double time_dual ) // for (unsigned int ip = 0; ip < (unsigned int)(particles->last_index.back()) ; ip++) { // weight_after += particles->weight(ip); - // energy_after += sqrt(1 + pow(particles->momentum(0,ip),2) - // + pow(particles->momentum(1,ip),2) - // + pow(particles->momentum(2,ip),2)); + // energy_after += sqrt(1 + particles->momentum(0,ip) * particles->momentum(0,ip) + particles->momentum(1,ip) * particles->momentum(1,ip) + particles->momentum(2,ip) * particles->momentum(2,ip)); // } // // if (weight_before != weight_after) { diff --git a/src/Species/SpeciesVAdaptive.cpp b/src/Species/SpeciesVAdaptive.cpp index 273362561..4152a5e7c 100755 --- a/src/Species/SpeciesVAdaptive.cpp +++ b/src/Species/SpeciesVAdaptive.cpp @@ -346,18 +346,33 @@ void SpeciesVAdaptive::scalarDynamics( double time_dual, unsigned int ispec, if (time_dual <= time_frozen_ && diag_flag &&( !particles->is_test ) ) { //immobile particle (at the moment only project density) + if( params.geometry != "AMcylindrical" ) { - smpi->traceEventIfDiagTracing(diag_PartEventTracing, ithread,0,3); - double *b_rho=nullptr; - for( unsigned int ibin = 0 ; ibin < particles->first_index.size() ; ibin ++ ) { //Loop for projection on buffer_proj - - b_rho = EMfields->rho_s[ispec] ? &( *EMfields->rho_s[ispec] )( 0 ) : &( *EMfields->rho_ )( 0 ) ; - for( iPart=particles->first_index[ibin] ; ( int )iPartlast_index[ibin]; iPart++ ) { - Proj->basic( b_rho, ( *particles ), iPart, 0 ); + smpi->traceEventIfDiagTracing(diag_PartEventTracing, ithread,0,3); + double *b_rho = EMfields->rho_s[ispec] ? &( *EMfields->rho_s[ispec] )( 0 ) : &( *EMfields->rho_ )( 0 ) ; + for( unsigned int scell = 0 ; scell < particles->first_index.size() ; scell ++ ) { //Loop for projection on buffer_proj + for( iPart=particles->first_index[scell] ; ( int )iPartlast_index[scell]; iPart++ ) { + Proj->basic( b_rho, ( *particles ), iPart, 0 ); + } } + smpi->traceEventIfDiagTracing(diag_PartEventTracing, ithread,1,3); + + } else { // AM case + ElectroMagnAM *emAM = static_cast( EMfields ); + int n_species = patch->vecSpecies.size(); + + smpi->traceEventIfDiagTracing(diag_PartEventTracing, ithread,0,3); + for( unsigned int imode = 0; imode *b_rho = emAM->rho_AM_s[ifield] ? &( *emAM->rho_AM_s[ifield] )( 0 ) : &( *emAM->rho_AM_[imode] )( 0 ) ; + for( unsigned int scell = 0 ; scell < particles->first_index.size() ; scell ++ ) { //Loop for projection on buffer_proj + for( int iPart=particles->first_index[scell] ; iPartlast_index[scell]; iPart++ ) { + Proj->basicForComplex( b_rho, ( *particles ), iPart, 0, imode ); + } + } + } + smpi->traceEventIfDiagTracing(diag_PartEventTracing, ithread,1,3); } - smpi->traceEventIfDiagTracing(diag_PartEventTracing, ithread,1,3); - } }//END scalarDynamics diff --git a/src/Tools/PyTools.h b/src/Tools/PyTools.h index a756f3db8..0a5ee96f4 100755 --- a/src/Tools/PyTools.h +++ b/src/Tools/PyTools.h @@ -44,8 +44,8 @@ // } // SMILEI_PY_RESTORE_MASTER_THREAD #if PY_MAJOR_VERSION > 2 && PY_MINOR_VERSION > 11 - #define SMILEI_PY_SAVE_MASTER_THREAD PyThreadState *_save = NULL; _Pragma("omp master") if( Py_IsInitialized() ) _save = PyEval_SaveThread(); - #define SMILEI_PY_RESTORE_MASTER_THREAD _Pragma("omp master") if( Py_IsInitialized() ) PyEval_RestoreThread( _save ); + #define SMILEI_PY_SAVE_MASTER_THREAD PyThreadState *_save = NULL; _Pragma("omp master") if( Py_IsInitialized() ) _save = PyEval_SaveThread(); _Pragma("omp barrier") + #define SMILEI_PY_RESTORE_MASTER_THREAD _Pragma("omp master") if( Py_IsInitialized() ) PyEval_RestoreThread( _save ); _Pragma("omp barrier") #define SMILEI_PY_ACQUIRE_GIL PyGILState_STATE _state = PyGILState_Ensure(); #define SMILEI_PY_RELEASE_GIL PyGILState_Release( _state ); #else diff --git a/src/Tools/Random.h b/src/Tools/Random.h index 1355de28e..c57ac2550 100755 --- a/src/Tools/Random.h +++ b/src/Tools/Random.h @@ -4,6 +4,35 @@ #include #include #include +#include "userFunctions.h" + +namespace Random_namespace // in order to use the random functions without having access to the class random +{ + + inline uint32_t xorshift32(uint32_t xorshift32_state) + { + // Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs" + xorshift32_state ^= xorshift32_state << 13; + xorshift32_state ^= xorshift32_state >> 17; + xorshift32_state ^= xorshift32_state << 5; + return xorshift32_state; + } + + inline double uniform1(uint32_t xorshift32_state) { + constexpr double xorshift32_invmax1 = (1.-1e-11)/4294967296.; + return xorshift32(xorshift32_state) * xorshift32_invmax1; + } + + inline double perp_rand_dp(uint32_t xorshift32_state) { + double a = userFunctions::erfinv_dp( uniform1(xorshift32_state) ); + // technically we could also use the erfinv() function fron cuda, it would require compiling with -cuda though ... + // the study showed the gap in perf for BC thermal was not worth the added depend + if( xorshift32(xorshift32_state) & 1 ) { // rand->cointoss() + a *= -1.; + } + return a; + } +} class Random { diff --git a/src/Tools/tabulatedFunctions.cpp b/src/Tools/tabulatedFunctions.cpp deleted file mode 100755 index e31506cea..000000000 --- a/src/Tools/tabulatedFunctions.cpp +++ /dev/null @@ -1,89 +0,0 @@ -#include "tabulatedFunctions.h" -#include -#include -#include -#include -#include - - -using namespace std; - -// --------------------------------------------------------------------------------------------------------------------- -// Inverse error function -// --------------------------------------------------------------------------------------------------------------------- - -// method used to load the tabulated function -// ------------------------------------------ -void erfinv::prepare() -{ - if( erfinv_tab_.size()==0 ) { - - erfinv_tabSize_ = 1000; - erfinv_xmin_ = 0.0001; - erfinv_xmax_ = 0.9999; - erfinv_alpha_ = log( erfinv_xmax_/erfinv_xmin_ )/( double )( erfinv_tabSize_-1 ); - - erfinv_tab_.resize( erfinv_tabSize_ ); - erfinv_x_.resize( erfinv_tabSize_ ); - - // ----------------------------------- - // TABULATE THE INVERSE ERROR FUNCTION - // (using a logarithmic scale) - // ----------------------------------- - double tiny=1.e-10; // numerical parameter (~precision) - - for( unsigned int n=0; ntiny ) { - vm=0.5*( vl+vr ); - if( erfinv_x_[n]>erf( vm ) ) { - vl=vm; - } else { - vr=vm; - } - } - erfinv_tab_[n] = 0.5*( vl+vr ); - - }//n - - } else { - DEBUG( "trying to call this again!" ); - }//needLoad - -} - -// method used to compute the value for a given x (use linear interpolation on log. scale axis) -// -------------------------------------------------------------------------------------------- -double erfinv::call( double x ) -{ - - double pi = M_PI; - double val = 0.0; - - if( x <= erfinv_x_[0] ) { - val = 0.5*sqrt( pi )*x + pi/24.0 *pow( x, 3 ); - } else if( x >= erfinv_x_.back() ) { - double eta = -log( sqrt( pi )*( 1.0-x ) ); - double log_eta = log( eta ); - val = sqrt( eta - 0.5*log_eta + ( 0.25*log_eta-0.5 )/eta ); - } else { - unsigned int n = floor( log( erfinv_xmax_/( 1.0-x ) )/erfinv_alpha_ ); - double wl = ( erfinv_x_[n+1]-x )/( erfinv_x_[n+1]-erfinv_x_[n] ); - double wr = ( x-erfinv_x_[n] ) /( erfinv_x_[n+1]-erfinv_x_[n] ); - val = wl*erfinv_tab_[n] + wr*erfinv_tab_[n+1]; - } - - return val; -} - - - - - - diff --git a/src/Tools/tabulatedFunctions.h b/src/Tools/tabulatedFunctions.h deleted file mode 100755 index 161eea70d..000000000 --- a/src/Tools/tabulatedFunctions.h +++ /dev/null @@ -1,64 +0,0 @@ -#ifndef TABULATEDFUNCTIONS_H -#define TABULATEDFUNCTIONS_H - -#include -#include -#include -#include -#include -#include "Tools.h" - - - -//! singleton class of tabulated functions - -class erfinv -{ -public: - static erfinv &instance() - { - static erfinv one_and_only_instance; // Guaranteed to be destroyed. - // Instantiated on first use. - return one_and_only_instance; - } - - //! returns inverse error function of a double x - double call( double x ); - - //! needs to be called one time before using erfinv - void prepare(); - -protected: - // creator is private for singletons - erfinv() {}; - erfinv( erfinv const & ); // avoid implementation of this - void operator=( erfinv const & ); // avoid implementation of this - -private: - - //! number of points used to sample the fct - unsigned int erfinv_tabSize_; - - //! mininum value for x - double erfinv_xmin_; - - //! maximum value for x - double erfinv_xmax_; - - //! constant used to compute the different values of x used during the sampling - double erfinv_alpha_; - - //! vector storing the values of x used to sample erfinv - std::vector erfinv_x_; - - //! vector storing the sampled values of erfinv - std::vector erfinv_tab_; - -}; - - -#endif - - - - diff --git a/src/Tools/userFunctions.cpp b/src/Tools/userFunctions.cpp deleted file mode 100755 index 2e540b322..000000000 --- a/src/Tools/userFunctions.cpp +++ /dev/null @@ -1,207 +0,0 @@ -#include -#include -#include "userFunctions.h" - -#include "Params.h" - -//! inverse error function is taken from NIST -double userFunctions::erfinv( double x ) -{ - if( x < -1. || x > 1. ) { - return std::numeric_limits::quiet_NaN(); - } - - if( x == 0 ) { - return 0; - } - - int sign_x=( x > 0? 1 : -1 ); - - double r; - if( x <= 0.686 ) { - double x2 = x * x; - r = x * ( ( ( -0.140543331 * x2 + 0.914624893 ) * x2 + -1.645349621 ) * x2 + 0.886226899 ); - r /= ( ( ( 0.012229801 * x2 + -0.329097515 ) * x2 + 1.442710462 ) * x2 + -2.118377725 ) * x2 + 1.; - } else { - double y = sqrt( -log( ( 1. - x ) / 2. ) ); - r = ( ( ( 1.641345311 * y + 3.429567803 ) * y + -1.62490649 ) * y + -1.970840454 ); - r /= ( 1.637067800 * y + 3.543889200 ) * y + 1.; - } - - r *= ( double )sign_x; - x *= ( double )sign_x; - - r -= ( erf( r ) - x ) / ( 2. / sqrt( M_PI ) * exp( -r*r ) ); - - return r; -} - -//! inverse error function is taken from M.B. Giles. 'Approximating the erfinv function'. In GPU Computing Gems, volume 2, Morgan Kaufmann, 2011. -double userFunctions::erfinv2( double x ) -{ - double w, p; - w = -log( ( 1.0-x )*( 1.0+x ) ); - - if( w < 5.000000 ) { - w = w - 2.500000; - p = +2.81022636000e-08 ; - p = +3.43273939000e-07 + p*w; - p = -3.52338770000e-06 + p*w; - p = -4.39150654000e-06 + p*w; - p = +0.00021858087e+00 + p*w; - p = -0.00125372503e+00 + p*w; - p = -0.00417768164e+00 + p*w; - p = +0.24664072700e+00 + p*w; - p = +1.50140941000e+00 + p*w; - } else { - w = sqrt( w ) - 3.000000; - p = -0.000200214257 ; - p = +0.000100950558 + p*w; - p = +0.001349343220 + p*w; - p = -0.003673428440 + p*w; - p = +0.005739507730 + p*w; - p = -0.007622461300 + p*w; - p = +0.009438870470 + p*w; - p = +1.001674060000 + p*w; - p = +2.832976820000 + p*w; - } - return p*x; -} - -// ---------------------------------------------------------------------------- -//! \brief Distribute equally the load into chunk of an array -//! and return the number of elements for the specified chunk number. -// -//! \param chunk number -//! \param nb_chunks Total number of MPI tasks -//! \param nb_elems Total number of element to be distributed -//! \param imin Index of the first element for chunk -//! \param nb_loc_elems Number of element for chunk -// ---------------------------------------------------------------------------- -void userFunctions::distributeArray( int chunk, - int nb_chunks, - int nb_elems, - int &imin, - int &nb_loc_elems ) -{ - // If more ranks than elements, - // only a part of the processes will work - if( nb_chunks >= nb_elems ) { - if( chunk < nb_elems ) { - imin = chunk; - nb_loc_elems = 1; - } else { - imin = nb_elems; - nb_loc_elems = 0; - } - } else { - - int quotient; - int remainder; - - // Part of the load equally distributed - quotient = nb_elems/nb_chunks; - - // Remaining load to be distributed after balanced repartition - remainder = nb_elems%nb_chunks; - - if( chunk < remainder ) { - imin = chunk*quotient+chunk; - nb_loc_elems = quotient + 1; - } else { - imin = remainder + chunk*quotient; - nb_loc_elems = quotient; - } - } -} - -// ---------------------------------------------------------------------------- -//! \brief Distribute equally 1D array into chunks -//! This function returns tables of indexes and length for each chunk -// -//! \param nb_chunks Total number of chunks -//! \param nb_elems Total number of element to be distributed -//! \param imin_table Index of the first element for each chunk -//! \param length_table Number of element for each chunk -// ---------------------------------------------------------------------------- -void userFunctions::distributeArray( - int nb_chunks, - int nb_elems, - int *imin_table, - int *length_table ) -{ - - // If more chunks than elements, - // only a part of the processes will work - if( nb_chunks >= nb_elems ) { - #pragma omp simd - for( int chunk = 0 ; chunk < nb_elems ; chunk ++ ) { - imin_table[chunk] = chunk; - length_table[chunk] = 1; - } - #pragma omp simd - for( int chunk = nb_elems ; chunk < nb_chunks ; chunk ++ ) { - imin_table[chunk] = nb_elems; - length_table[chunk] = 0; - } - } else { - - int quotient; - int remainder; - - // Part of the load equally distributed - quotient = nb_elems/nb_chunks; - - // Remaining load to be distributed after balanced repartition - remainder = nb_elems%nb_chunks; - - #pragma omp simd - for( int chunk = 0 ; chunk < remainder ; chunk ++ ) { - imin_table[chunk] = chunk*quotient+chunk; - length_table[chunk] = quotient + 1; - } - #pragma omp simd - for( int chunk = remainder ; chunk < nb_chunks ; chunk ++ ) { - imin_table[chunk] = remainder + chunk*quotient; - length_table[chunk] = quotient; - } - } -} - - -// ---------------------------------------------------------------------------- -//! \brief This function uses a bijection algorithm in a monotonic double array -//! to find the corresponding index i so that elem is between array[i] -//! and array[i+1]. -// -//! \param array array in which to find the value -//! \param elem element to be found -//! \param nb_elem number of elements -// ---------------------------------------------------------------------------- -// template -// int userFunctions::searchValuesInMonotonicArray( T * array, -// T elem, -// int nb_elems ) -// { -// int imin = 0; // lower bound -// int imax = nb_elems-1; // upper bound -// int imid = 0; -// -// if( elem == array[0] ) { -// return 0; -// } else if( elem == array[nb_elems-1] ) { -// return nb_elems-2; -// } else { -// while( imax - imin > 1 ) { -// imid= ( imin + imax )/2; -// //imid= (imin + imax)>>1; -// if( elem >= array[imid] ) { -// imin = imid; -// } else { -// imax = imid; -// } -// } -// return imin; -// } -// } - diff --git a/src/Tools/userFunctions.h b/src/Tools/userFunctions.h index d9525723d..c3e39b26d 100755 --- a/src/Tools/userFunctions.h +++ b/src/Tools/userFunctions.h @@ -6,28 +6,233 @@ #ifndef USERFUNCTIONS_H #define USERFUNCTIONS_H -class userFunctions +namespace userFunctions { +// removed all static properties since we are using a namespace -public: + //! inverse error function is taken from M.B. Giles. 'Approximating the erfinv function'. In GPU Computing Gems, volume 2, Morgan Kaufmann, 2011. + inline double erfinv_sp( double x ) + { + double w, p; + w = -log( ( 1.0 - x ) * ( 1.0 + x ) ); + + if( w < 5.000000 ) { + w = w - 2.500000; + p = +2.81022636000e-08 ; + p = +3.43273939000e-07 + p*w; + p = -3.52338770000e-06 + p*w; + p = -4.39150654000e-06 + p*w; + p = +0.00021858087e+00 + p*w; + p = -0.00125372503e+00 + p*w; + p = -0.00417768164e+00 + p*w; + p = +0.24664072700e+00 + p*w; + p = +1.50140941000e+00 + p*w; + } else { + w = sqrt( w ) - 3.000000; + p = -0.000200214257 ; + p = +0.000100950558 + p*w; + p = +0.001349343220 + p*w; + p = -0.003673428440 + p*w; + p = +0.005739507730 + p*w; + p = -0.007622461300 + p*w; + p = +0.009438870470 + p*w; + p = +1.001674060000 + p*w; + p = +2.832976820000 + p*w; + } + return p*x; + } + /** + * copied from erfinv_DP_1.cu by Prof. Mike Giles. + * https://people.maths.ox.ac.uk/gilesm/ + * https://people.maths.ox.ac.uk/gilesm/codes/erfinv/ + * + * Original code is written for CUDA. + * Mutsuo Saito modified original code for C++. + */ + inline double erfinv_dp(double x) + { + double w, p; + double sign; + if (x > 0) { + sign = 1.0; + } else { + sign = -1.0; + x = abs(x); + } + w = - log( (1.0 - x) * (1.0 + x) ); - static double erfinv( double x ); - static double erfinv2( double x ); + if ( w < 6.250000 ) { + w = w - 3.125000; + p = -3.6444120640178196996e-21; + p = -1.685059138182016589e-19 + p*w; + p = 1.2858480715256400167e-18 + p*w; + p = 1.115787767802518096e-17 + p*w; + p = -1.333171662854620906e-16 + p*w; + p = 2.0972767875968561637e-17 + p*w; + p = 6.6376381343583238325e-15 + p*w; + p = -4.0545662729752068639e-14 + p*w; + p = -8.1519341976054721522e-14 + p*w; + p = 2.6335093153082322977e-12 + p*w; + p = -1.2975133253453532498e-11 + p*w; + p = -5.4154120542946279317e-11 + p*w; + p = 1.051212273321532285e-09 + p*w; + p = -4.1126339803469836976e-09 + p*w; + p = -2.9070369957882005086e-08 + p*w; + p = 4.2347877827932403518e-07 + p*w; + p = -1.3654692000834678645e-06 + p*w; + p = -1.3882523362786468719e-05 + p*w; + p = 0.0001867342080340571352 + p*w; + p = -0.00074070253416626697512 + p*w; + p = -0.0060336708714301490533 + p*w; + p = 0.24015818242558961693 + p*w; + p = 1.6536545626831027356 + p*w; + } + else if ( w < 16.000000 ) { + w = sqrt(w) - 3.250000; + p = 2.2137376921775787049e-09; + p = 9.0756561938885390979e-08 + p*w; + p = -2.7517406297064545428e-07 + p*w; + p = 1.8239629214389227755e-08 + p*w; + p = 1.5027403968909827627e-06 + p*w; + p = -4.013867526981545969e-06 + p*w; + p = 2.9234449089955446044e-06 + p*w; + p = 1.2475304481671778723e-05 + p*w; + p = -4.7318229009055733981e-05 + p*w; + p = 6.8284851459573175448e-05 + p*w; + p = 2.4031110387097893999e-05 + p*w; + p = -0.0003550375203628474796 + p*w; + p = 0.00095328937973738049703 + p*w; + p = -0.0016882755560235047313 + p*w; + p = 0.0024914420961078508066 + p*w; + p = -0.0037512085075692412107 + p*w; + p = 0.005370914553590063617 + p*w; + p = 1.0052589676941592334 + p*w; + p = 3.0838856104922207635 + p*w; + } + else { + w = sqrt(w) - 5.000000; + p = -2.7109920616438573243e-11; + p = -2.5556418169965252055e-10 + p*w; + p = 1.5076572693500548083e-09 + p*w; + p = -3.7894654401267369937e-09 + p*w; + p = 7.6157012080783393804e-09 + p*w; + p = -1.4960026627149240478e-08 + p*w; + p = 2.9147953450901080826e-08 + p*w; + p = -6.7711997758452339498e-08 + p*w; + p = 2.2900482228026654717e-07 + p*w; + p = -9.9298272942317002539e-07 + p*w; + p = 4.5260625972231537039e-06 + p*w; + p = -1.9681778105531670567e-05 + p*w; + p = 7.5995277030017761139e-05 + p*w; + p = -0.00021503011930044477347 + p*w; + p = -0.00013871931833623122026 + p*w; + p = 1.0103004648645343977 + p*w; + p = 4.8499064014085844221 + p*w; + } + return sign * p * x; + } - //! Load repartition in 1d between MPI processes - static void distributeArray( int rank, - int nb_ranks, - int nb_elems, - int &imin, - int &nb_loc_elems ); + //! Load repartition in 1d between MPI processes + // ---------------------------------------------------------------------------- + //! \brief Distribute equally the load into chunk of an array + //! and return the number of elements for the specified chunk number. + // + //! \param chunk number + //! \param nb_chunks Total number of MPI tasks + //! \param nb_elems Total number of element to be distributed + //! \param imin Index of the first element for chunk + //! \param nb_loc_elems Number of element for chunk + // ---------------------------------------------------------------------------- + inline void distributeArray( int chunk, + int nb_chunks, + int nb_elems, + int &imin, + int &nb_loc_elems ) + { + // If more ranks than elements, + // only a part of the processes will work + if( nb_chunks >= nb_elems ) { + if( chunk < nb_elems ) { + imin = chunk; + nb_loc_elems = 1; + } else { + imin = nb_elems; + nb_loc_elems = 0; + } + } else { + + int quotient; + int remainder; + + // Part of the load equally distributed + quotient = nb_elems/nb_chunks; + + // Remaining load to be distributed after balanced repartition + remainder = nb_elems%nb_chunks; + + if( chunk < remainder ) { + imin = chunk*quotient+chunk; + nb_loc_elems = quotient + 1; + } else { + imin = remainder + chunk*quotient; + nb_loc_elems = quotient; + } + } + } //! Load repartition in 1d between MPI processes. - //! This function returns tables of indexes and length for all rank - static void distributeArray( + // ---------------------------------------------------------------------------- + //! \brief Distribute equally 1D array into chunks + //! This function returns tables of indexes and length for each chunk + // + //! \param nb_chunks Total number of chunks + //! \param nb_elems Total number of element to be distributed + //! \param imin_table Index of the first element for each chunk + //! \param length_table Number of element for each chunk + // ---------------------------------------------------------------------------- + inline void distributeArray( int nb_chunks, int nb_elems, int *imin_table, - int *length_table ); + int *length_table ) + { + + // If more chunks than elements, + // only a part of the processes will work + if( nb_chunks >= nb_elems ) { + #pragma omp simd + for( int chunk = 0 ; chunk < nb_elems ; chunk ++ ) { + imin_table[chunk] = chunk; + length_table[chunk] = 1; + } + #pragma omp simd + for( int chunk = nb_elems ; chunk < nb_chunks ; chunk ++ ) { + imin_table[chunk] = nb_elems; + length_table[chunk] = 0; + } + } else { + + int quotient; + int remainder; + + // Part of the load equally distributed + quotient = nb_elems/nb_chunks; + + // Remaining load to be distributed after balanced repartition + remainder = nb_elems%nb_chunks; + + #pragma omp simd + for( int chunk = 0 ; chunk < remainder ; chunk ++ ) { + imin_table[chunk] = chunk*quotient+chunk; + length_table[chunk] = quotient + 1; + } + #pragma omp simd + for( int chunk = remainder ; chunk < nb_chunks ; chunk ++ ) { + imin_table[chunk] = remainder + chunk*quotient; + length_table[chunk] = quotient; + } + } + } //! \brief This function uses a bijection algorithm in a monotonic double array //! to find the corresponding index i so that elem is between array[i] @@ -40,34 +245,30 @@ class userFunctions #pragma acc routine seq #endif template - static int searchValuesInMonotonicArray( T * array, + inline int searchValuesInMonotonicArray( T * array, T elem, int nb_elems ) -{ - int imin = 0; // lower bound - int imax = nb_elems-1; // upper bound - int imid = 0; - - if( elem == array[0] ) { - return 0; - } else if( elem == array[nb_elems-1] ) { - return nb_elems-2; - } else { - while( imax - imin > 1 ) { - imid= ( imin + imax )/2; - //imid= (imin + imax)>>1; - if( elem >= array[imid] ) { - imin = imid; - } else { - imax = imid; + { + int imin = 0; // lower bound + int imax = nb_elems-1; // upper bound + int imid = 0; + + if( elem == array[0] ) { + return 0; + } else if( elem == array[nb_elems-1] ) { + return nb_elems-2; + } else { + while( imax - imin > 1 ) { + imid= ( imin + imax )/2; + //imid= (imin + imax)>>1; + if( elem >= array[imid] ) { + imin = imid; + } else { + imax = imid; + } } + return imin; } - return imin; } } - -private: - - -}; #endif diff --git a/validation/analyses/gpu_validate_tst1d_03_thermal_expansion.py b/validation/analyses/gpu_validate_tst1d_03_thermal_expansion.py new file mode 100755 index 000000000..767d21ad4 --- /dev/null +++ b/validation/analyses/gpu_validate_tst1d_03_thermal_expansion.py @@ -0,0 +1,26 @@ +import os, re, numpy as np +import happi +from scipy.signal import butter, filtfilt +b, a = butter(5, 0.2, btype='low', analog=False) + + +#S = happi.Open("./restart*", verbose=False) + +S = happi.Open(verbose=False) + +eon_spectrum = S.ParticleBinning.Diag2().get() +ekin = eon_spectrum["ekin"] +eon_spectrum = np.mean(eon_spectrum["data"], axis=0) +eon_spectrum_filt = filtfilt(b, a, eon_spectrum) +# # theory +# Te = S.namelist.Species["eon"].temperature[0] +# factor = S.namelist.Species["eon"].number_density.xplateau / S.namelist.Main.grid_length[0] +# theoretical_spectrum = factor*2./Te * (ekin/np.pi/Te)**0.5 * np.exp(-ekin/Te) +# plt.plot(ekin, eon_spectrum_filt, '.-') +# plt.plot(ekin, theoretical_spectrum, '-') +Validate("Electron spectrum", eon_spectrum_filt, 20. ) + + +rho = S.Field.Field0.Rho_ion(timesteps=11800).getData()[0] +rho_filt = filtfilt(b, a, rho) +Validate("Final ion profile", rho_filt[::10], 0.15) diff --git a/validation/analyses/gpu_validate_tst2d_01_plasma_mirror.py b/validation/analyses/gpu_validate_tst2d_01_plasma_mirror.py new file mode 100755 index 000000000..e958d40ee --- /dev/null +++ b/validation/analyses/gpu_validate_tst2d_01_plasma_mirror.py @@ -0,0 +1,17 @@ +import os, re, numpy as np, math +from scipy.ndimage import gaussian_filter as gfilt +import happi + +#S = happi.Open(["./restart*"], verbose=False) + +S = happi.Open(verbose=False) + +# COMPARE THE TOTAL NUMBER OF CREATED IONS +nion = S.Scalar.Ntot_ion(timesteps=0).getData()[0] +Validate("Number of ions at iteration 0", nion) + +# COMPARE THE Ey FIELD +Ey = S.Field.Field0.Ey(timesteps=800).getData()[0] +Ey = gfilt(Ey, 6) # smoothing +Ey = Ey[50:200:4, 300::4] # zoom on the reflected laser +Validate("Ey field at iteration 800", Ey, 0.02) diff --git a/validation/analyses/validate_tst1d_05_tunnel_ionisation.py b/validation/analyses/validate_tst1d_05_tunnel_ionisation.py index 19148bd74..fe26ad1d5 100755 --- a/validation/analyses/validate_tst1d_05_tunnel_ionisation.py +++ b/validation/analyses/validate_tst1d_05_tunnel_ionisation.py @@ -93,7 +93,7 @@ def calculate_ionization(Ip, l): d = S.TrackParticles("electron", axes=["Id","x","Wy"], timesteps=1000).getData() keep = d["Id"] > 0 order = np.argsort(d["x"][keep]) -Validate("Track electron x", d["x"][keep][order][::200], 1e-4) +Validate("Track electron x", d["x"][keep][order][::200], 2e-4) Validate("Track electron Wy", gaussian_filter(maximum_filter1d(d["Wy"][keep][order],20),200)[::200], 1e-5) # NEW PARTICLES DIAGNOSTIC diff --git a/validation/analyses/validate_tst1d_25_bsi_ionisation.py b/validation/analyses/validate_tst1d_25_bsi_ionisation.py new file mode 100755 index 000000000..3cf4da166 --- /dev/null +++ b/validation/analyses/validate_tst1d_25_bsi_ionisation.py @@ -0,0 +1,49 @@ +import numpy as np +from scipy.ndimage import gaussian_filter, maximum_filter1d +import happi +import sys + +S = happi.Open(["./restart*"], verbose=False) + +# COMPARE THE Ey FIELD +Ey = S.Field.Field0.Ey(timesteps=1000).getData()[0] +Validate("Ey field at iteration 1000", Ey, 0.001) + +# COMPARE THE Ez FIELD +Ez = S.Field.Field0.Ez(timesteps=1000).getData()[0] +Validate("Ez field at iteration 1000", Ez, 1e-9) + +# hydrogen +charge_distribution = S.ParticleBinning.Diag0().getData() +charge_distribution /= charge_distribution[0].sum() +n = charge_distribution[0].size +mean_charge = [np.sum(d*np.arange(n)) for d in charge_distribution] +Validate("Hydrogen mean charge vs time", mean_charge, 0.03) + +# carbon +charge_distribution = S.ParticleBinning.Diag1().getData() +charge_distribution /= charge_distribution[0].sum() +n = charge_distribution[0].size +mean_charge = [np.sum(d*np.arange(n)) for d in charge_distribution] +Validate("Carbon mean charge vs time", mean_charge, 0.03) + +# SCALARS RELATED TO SPECIES +Validate("Scalar Dens_electron", S.Scalar.Dens_electron().getData(), 0.003) +Validate("Scalar Ntot_electron", S.Scalar.Ntot_electron().getData(), 100.) +Validate("Scalar Zavg_carbon" , S.Scalar.Zavg_carbon ().getData(), 0.2) + +# TRACKING DIAGNOSTIC +d = S.TrackParticles("electron", axes=["Id","x","Wy"], timesteps=1000).getData() +keep = d["Id"] > 0 +order = np.argsort(d["x"][keep]) +Validate("Track electron x", d["x"][keep][order][::200], 1e-4) +Validate("Track electron Wy", gaussian_filter(maximum_filter1d(d["Wy"][keep][order],20),200)[::200], 1e-5) + +# NEW PARTICLES DIAGNOSTIC +d = S.NewParticles.electron().get() +t = d["t"] +q = d["q"] +Validate("DiagNewParticles: number of particles", t.size, 5. ) +tavg = [np.mean(t[q==i]) for i in [0,1,2,3]] +Validate("DiagNewParticles: time vs ionization state", tavg, 0.01 ) + diff --git a/validation/analyses/validate_tst1d_26_TL_ionisation.py b/validation/analyses/validate_tst1d_26_TL_ionisation.py new file mode 100755 index 000000000..19148bd74 --- /dev/null +++ b/validation/analyses/validate_tst1d_26_TL_ionisation.py @@ -0,0 +1,105 @@ +import os, re, numpy as np, math +from scipy.interpolate import interp1d +from scipy.ndimage import gaussian_filter, maximum_filter1d +from h5py import File +from glob import glob +import happi + +S = happi.Open(["./restart*"], verbose=False) + + + +# COMPARE THE Ey FIELD +Ey = S.Field.Field0.Ey(timesteps=1000).getData()[0] +Validate("Ey field at iteration 1000", Ey, 0.001) + +# COMPARE THE Ez FIELD +Ez = S.Field.Field0.Ez(timesteps=1000).getData()[0] +Validate("Ez field at iteration 1000", Ez, 1e-9) + +# VERIFY THE IONIZATION RATE Vs THEORY +w_r = S.namelist.Main.reference_angular_frequency_SI +au_to_w0 = 4.134137172e+16 / w_r +Ec_to_au = 3.314742578e-15 * w_r +a0 = S.namelist.Laser[0].space_envelope[1] + +def calculate_ionization(Ip, l): + Zat = len(Ip) + Z = np.arange(Zat) + nstar = (Z+1.) * (2.*Ip)**-0.5 + alpha = 2.*nstar - 1. + gc = np.array([math.gamma(c) for c in 2.*nstar]) + beta = 2.**(2.*nstar-1.)/(2.*nstar)/gc * (8.*l+4.) * Ip * au_to_w0 + gamma = 2.*(2.*Ip)**1.5 + + tmax = 0.3*2.*np.pi + dt = 2.*np.pi/200. + prev_n = np.zeros((Zat+1,)); prev_n[0]=1. + times = [] + Zstar = [] + for t in np.arange(0, tmax, dt): + E = abs(a0 * (math.sin(t-0.05) if t>0.05 else 0.)) *Ec_to_au + if E > 1e-5: + n = np.zeros((Zat+1,)) + for z in range(0,Zat+1): + Wp = 0. + if z < Zat: + deltap = gamma[z]/E + if deltap>1e-18: + Wp = beta[z] * deltap**alpha[z] * math.exp(-deltap/3.) + n[z] = (1.-Wp*dt/2.)/(1.+Wp*dt/2.)*prev_n[z] + if z > 0: + deltam = gamma[z-1]/E + if deltam>1e-18: + Wm = beta[z-1] * deltam**alpha[z-1] * math.exp(-deltam/3.) + n[z] += Wm*dt/(1.+Wm*dt/2.)*prev_n[z-1] + prev_n = n + times += [t] + Zstar += [ np.sum(prev_n*np.arange(Zat+1))/np.sum(prev_n) ] + return times, Zstar + +# hydrogen +charge_distribution = S.ParticleBinning.Diag0().getData() +charge_distribution /= charge_distribution[0].sum() +n = charge_distribution[0].size +mean_charge = [np.sum(d*np.arange(n)) for d in charge_distribution] +# # theory +# Ip = np.array([13.5984])/27.2114 +# l = np.array([0]) +# t, Zs = calculate_ionization(Ip, l) +# times = charge["times"]*S.namelist.Main.timestep +# Zs_theory = interp1d(t, Zs) (times) +Validate("Hydrogen mean charge vs time", mean_charge, 0.03) + +# carbon +charge_distribution = S.ParticleBinning.Diag1().getData() +charge_distribution /= charge_distribution[0].sum() +n = charge_distribution[0].size +mean_charge = [np.sum(d*np.arange(n)) for d in charge_distribution] +# # theory +# Ip = np.array([11.2602,24.3845,47.8877,64.4935,392.0905,489.9931]) /27.2114 +# l = np.array([1,1,0,0,0,0]) +# t, Zs = calculate_ionization(Ip, l) +# times = charge["times"]*S.namelist.Main.timestep +# Zs_theory = interp1d(t, Zs) (times) +Validate("Carbon mean charge vs time", mean_charge, 0.03) + +# SCALARS RELATED TO SPECIES +Validate("Scalar Dens_electron", S.Scalar.Dens_electron().getData(), 0.003) +Validate("Scalar Ntot_electron", S.Scalar.Ntot_electron().getData(), 100.) +Validate("Scalar Zavg_carbon" , S.Scalar.Zavg_carbon ().getData(), 0.2) + +# TRACKING DIAGNOSTIC +d = S.TrackParticles("electron", axes=["Id","x","Wy"], timesteps=1000).getData() +keep = d["Id"] > 0 +order = np.argsort(d["x"][keep]) +Validate("Track electron x", d["x"][keep][order][::200], 1e-4) +Validate("Track electron Wy", gaussian_filter(maximum_filter1d(d["Wy"][keep][order],20),200)[::200], 1e-5) + +# NEW PARTICLES DIAGNOSTIC +d = S.NewParticles.electron().get() +t = d["t"] +q = d["q"] +Validate("DiagNewParticles: number of particles", t.size, 5. ) +tavg = [np.mean(t[q==i]) for i in [0,1,2,3]] +Validate("DiagNewParticles: time vs ionization state", tavg, 0.01 ) diff --git a/validation/analyses/validate_tst1d_27_fullPPT_ionisation.py b/validation/analyses/validate_tst1d_27_fullPPT_ionisation.py new file mode 100755 index 000000000..19148bd74 --- /dev/null +++ b/validation/analyses/validate_tst1d_27_fullPPT_ionisation.py @@ -0,0 +1,105 @@ +import os, re, numpy as np, math +from scipy.interpolate import interp1d +from scipy.ndimage import gaussian_filter, maximum_filter1d +from h5py import File +from glob import glob +import happi + +S = happi.Open(["./restart*"], verbose=False) + + + +# COMPARE THE Ey FIELD +Ey = S.Field.Field0.Ey(timesteps=1000).getData()[0] +Validate("Ey field at iteration 1000", Ey, 0.001) + +# COMPARE THE Ez FIELD +Ez = S.Field.Field0.Ez(timesteps=1000).getData()[0] +Validate("Ez field at iteration 1000", Ez, 1e-9) + +# VERIFY THE IONIZATION RATE Vs THEORY +w_r = S.namelist.Main.reference_angular_frequency_SI +au_to_w0 = 4.134137172e+16 / w_r +Ec_to_au = 3.314742578e-15 * w_r +a0 = S.namelist.Laser[0].space_envelope[1] + +def calculate_ionization(Ip, l): + Zat = len(Ip) + Z = np.arange(Zat) + nstar = (Z+1.) * (2.*Ip)**-0.5 + alpha = 2.*nstar - 1. + gc = np.array([math.gamma(c) for c in 2.*nstar]) + beta = 2.**(2.*nstar-1.)/(2.*nstar)/gc * (8.*l+4.) * Ip * au_to_w0 + gamma = 2.*(2.*Ip)**1.5 + + tmax = 0.3*2.*np.pi + dt = 2.*np.pi/200. + prev_n = np.zeros((Zat+1,)); prev_n[0]=1. + times = [] + Zstar = [] + for t in np.arange(0, tmax, dt): + E = abs(a0 * (math.sin(t-0.05) if t>0.05 else 0.)) *Ec_to_au + if E > 1e-5: + n = np.zeros((Zat+1,)) + for z in range(0,Zat+1): + Wp = 0. + if z < Zat: + deltap = gamma[z]/E + if deltap>1e-18: + Wp = beta[z] * deltap**alpha[z] * math.exp(-deltap/3.) + n[z] = (1.-Wp*dt/2.)/(1.+Wp*dt/2.)*prev_n[z] + if z > 0: + deltam = gamma[z-1]/E + if deltam>1e-18: + Wm = beta[z-1] * deltam**alpha[z-1] * math.exp(-deltam/3.) + n[z] += Wm*dt/(1.+Wm*dt/2.)*prev_n[z-1] + prev_n = n + times += [t] + Zstar += [ np.sum(prev_n*np.arange(Zat+1))/np.sum(prev_n) ] + return times, Zstar + +# hydrogen +charge_distribution = S.ParticleBinning.Diag0().getData() +charge_distribution /= charge_distribution[0].sum() +n = charge_distribution[0].size +mean_charge = [np.sum(d*np.arange(n)) for d in charge_distribution] +# # theory +# Ip = np.array([13.5984])/27.2114 +# l = np.array([0]) +# t, Zs = calculate_ionization(Ip, l) +# times = charge["times"]*S.namelist.Main.timestep +# Zs_theory = interp1d(t, Zs) (times) +Validate("Hydrogen mean charge vs time", mean_charge, 0.03) + +# carbon +charge_distribution = S.ParticleBinning.Diag1().getData() +charge_distribution /= charge_distribution[0].sum() +n = charge_distribution[0].size +mean_charge = [np.sum(d*np.arange(n)) for d in charge_distribution] +# # theory +# Ip = np.array([11.2602,24.3845,47.8877,64.4935,392.0905,489.9931]) /27.2114 +# l = np.array([1,1,0,0,0,0]) +# t, Zs = calculate_ionization(Ip, l) +# times = charge["times"]*S.namelist.Main.timestep +# Zs_theory = interp1d(t, Zs) (times) +Validate("Carbon mean charge vs time", mean_charge, 0.03) + +# SCALARS RELATED TO SPECIES +Validate("Scalar Dens_electron", S.Scalar.Dens_electron().getData(), 0.003) +Validate("Scalar Ntot_electron", S.Scalar.Ntot_electron().getData(), 100.) +Validate("Scalar Zavg_carbon" , S.Scalar.Zavg_carbon ().getData(), 0.2) + +# TRACKING DIAGNOSTIC +d = S.TrackParticles("electron", axes=["Id","x","Wy"], timesteps=1000).getData() +keep = d["Id"] > 0 +order = np.argsort(d["x"][keep]) +Validate("Track electron x", d["x"][keep][order][::200], 1e-4) +Validate("Track electron Wy", gaussian_filter(maximum_filter1d(d["Wy"][keep][order],20),200)[::200], 1e-5) + +# NEW PARTICLES DIAGNOSTIC +d = S.NewParticles.electron().get() +t = d["t"] +q = d["q"] +Validate("DiagNewParticles: number of particles", t.size, 5. ) +tavg = [np.mean(t[q==i]) for i in [0,1,2,3]] +Validate("DiagNewParticles: time vs ionization state", tavg, 0.01 ) diff --git a/validation/analyses/validate_tst_collisions3_AM_uniformity.py b/validation/analyses/validate_tst_collisions3_AM_uniformity.py new file mode 100644 index 000000000..edb52201a --- /dev/null +++ b/validation/analyses/validate_tst_collisions3_AM_uniformity.py @@ -0,0 +1,7 @@ +import happi + +S = happi.Open(["./restart*"], verbose=False) + +Validate("ion Pyy vs x", S.ParticleBinning(0, sum={"y":"all"}).getData()[-1], 0.4e-10) +Validate("ion Pyy vs y", S.ParticleBinning(0, sum={"x":"all"}).getData()[-1], 0.4e-10) + diff --git a/validation/references/tst1d_25_bsi_ionisation.py.txt b/validation/references/tst1d_25_bsi_ionisation.py.txt new file mode 100644 index 000000000..819098e0b Binary files /dev/null and b/validation/references/tst1d_25_bsi_ionisation.py.txt differ diff --git a/validation/references/tst1d_26_TL_ionisation.py.txt b/validation/references/tst1d_26_TL_ionisation.py.txt new file mode 100644 index 000000000..faf76598b Binary files /dev/null and b/validation/references/tst1d_26_TL_ionisation.py.txt differ diff --git a/validation/references/tst1d_27_fullPPT_ionisation.py.txt b/validation/references/tst1d_27_fullPPT_ionisation.py.txt new file mode 100644 index 000000000..ead3bd462 Binary files /dev/null and b/validation/references/tst1d_27_fullPPT_ionisation.py.txt differ diff --git a/validation/references/tst_collisions3_AM_uniformity.py.txt b/validation/references/tst_collisions3_AM_uniformity.py.txt new file mode 100644 index 000000000..146b36250 Binary files /dev/null and b/validation/references/tst_collisions3_AM_uniformity.py.txt differ