Triumvirate C++ API 0.5.0.post1.dev301+g026f21751
Three-point clustering measurements in large-scale structure analyses.
Loading...
Searching...
No Matches
field.cpp
Go to the documentation of this file.
1// Triumvirate: Three-Point Clustering Measurements in LSS
2//
3// Copyright (C) 2023 Mike S Wang & Naonori S Sugiyama [GPL-3.0-or-later]
4//
5// This file is part of the Triumvirate program. See the COPYRIGHT
6// and LICENCE files at the top-level directory of this distribution
7// for details of copyright and licensing.
8//
9// This program is free software: you can redistribute it and/or modify it
10// under the terms of the GNU General Public License as published by
11// the Free Software Foundation, either version 3 of the License, or
12// (at your option) any later version.
13//
14// This program is distributed in the hope that it will be useful,
15// but WITHOUT ANY WARRANTY; without even the implied warranty of
16// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
17// See the GNU General Public License for more details.
18//
19// You should have received a copy of the GNU General Public License
20// along with this program. If not, see <https://www.gnu.org/licenses/>.
21
28
29#include "field.hpp"
30
31namespace trva = trv::array;
32namespace trvs = trv::sys;
33namespace trvm = trv::maths;
34
35namespace trv {
36
37// ***********************************************************************
38// Mesh field
39// ***********************************************************************
40
41// -----------------------------------------------------------------------
42// Life cycle
43// -----------------------------------------------------------------------
44
46 trv::ParameterSet& params, bool plan_ini, const std::string& name
47) {
48 // Attach the full parameter set to @ref trv::MeshField.
49 this->params = params;
50 this->name = name;
51
52 trvs::logger.reset_level(params.verbose);
53
54 // Initialise the field (and its shadow field if interlacing is used)
55 // and increase allocated memory.
56 this->field = fftw_alloc_complex(this->params.nmesh);
57
63
64 if (this->params.interlace == "true") {
65 this->field_s = fftw_alloc_complex(this->params.nmesh);
66// #ifdef TRV_USE_HIP
67// if (trvs::is_gpu_enabled()) {
68// HIP_EXEC(hipMalloc(
69// &this->d_field_s, sizeof(fft_double_complex) * this->params.nmesh
70// ));
71// }
72// #endif // TRV_USE_HIP
73
79 }
80
81 this->reset_density_field(); // initialise; likely redundant but safe
82
83 // Initialise FFT(W) plans.
84 if (plan_ini) {
85#if defined(TRV_USE_OMP) && defined(TRV_USE_FFTWOMP)
86 // Initialise FFTW multithreading.
87 fftw_plan_with_nthreads(omp_get_max_threads());
88#endif // TRV_USE_OMP && TRV_USE_FFTWOMP
89
90 // Initialise FFTW wisdom (effective when not in GPU mode).
91 bool import_fftw_wisdom_f = false;
92 bool import_fftw_wisdom_b = false;
93 bool export_fftw_wisdom_f = false;
94 bool export_fftw_wisdom_b = false;
95 std::FILE* fftw_wisdom_file_f = nullptr;
96 std::FILE* fftw_wisdom_file_b = nullptr;
97
98 if (this->params.use_fftw_wisdom != "") {
100 fftw_wisdom_file_f =
101 std::fopen(this->params.fftw_wisdom_file_f.c_str(), "r");
102 if (fftw_wisdom_file_f != nullptr) {
103 import_fftw_wisdom_f = true;
104 export_fftw_wisdom_f = false;
105 if (this->params.fftw_scheme == "patient") {
106 if (trvs::currTask == 0) {
107 trvs::logger.info(
108 "FFTW planner flag is set to `FFTW_PATIENT`. "
109 "Ensure that the FFTW wisdom file '%s' imported has been "
110 "generated with an equivalent or higher planner flag.",
111 this->params.fftw_wisdom_file_f.c_str()
112 );
113 }
114 }
115 } else {
116 import_fftw_wisdom_f = false;
117 export_fftw_wisdom_f = true;
118 if (trvs::currTask == 0) {
119 trvs::logger.info(
120 "No FFTW wisdom file for forward transforms "
121 "could be imported: %s",
122 this->params.fftw_wisdom_file_f.c_str()
123 );
124 }
125 }
126 }
127
129 fftw_wisdom_file_b =
130 fopen(this->params.fftw_wisdom_file_b.c_str(), "r");
131 if (fftw_wisdom_file_b != nullptr) {
132 import_fftw_wisdom_b = true;
133 export_fftw_wisdom_b = false;
134 if (this->params.fftw_scheme == "patient") {
135 if (trvs::currTask == 0) {
136 trvs::logger.info(
137 "FFTW planner flag is set to `FFTW_PATIENT`. "
138 "Ensure that the FFTW wisdom file '%s' imported has been "
139 "generated with an equivalent or higher planner flag.",
140 this->params.fftw_wisdom_file_b.c_str()
141 );
142 }
143 }
144 } else {
145 import_fftw_wisdom_b = false;
146 export_fftw_wisdom_b = true;
147 if (trvs::currTask == 0) {
148 trvs::logger.info(
149 "No FFTW wisdom file for backward transforms "
150 "could be imported: %s",
151 this->params.fftw_wisdom_file_b.c_str()
152 );
153 }
154 }
155 }
156 }
157
158 if (import_fftw_wisdom_f) {
159 fftw_import_wisdom_from_filename(
160 this->params.fftw_wisdom_file_f.c_str()
161 );
163 if (trvs::currTask == 0) {
164 trvs::logger.info(
165 "FFTW wisdom file for forward transforms has been imported: %s",
166 this->params.fftw_wisdom_file_f.c_str()
167 );
168 }
169 }
170
171 // Initialise GPU context (effective when in GPU mode).
172 std::vector<int> gpus = trvs::get_gpu_ids();
173 if (trvs::is_gpu_enabled()) {
174#ifdef _CUDA_STREAM
175 CUDA_EXEC(cudaStreamCreate(&this->custream));
176#endif // _CUDA_STREAM
177 }
178
179#ifdef TRV_USE_HIP
180 if (trvs::is_gpu_enabled()) {
181 HIP_EXEC(hipMalloc(
182 &this->d_field, sizeof(fft_double_complex) * this->params.nmesh
183 ));
184 // trvs::gbytesMemGPU +=
185 // trvs::size_in_gb<fft_double_complex>(this->params.nmesh);
186 // trvs::update_maxmem(trvs::is_gpu_enabled());
187 }
188#endif // TRV_USE_HIP
189
190 auto pre_plan_f_timept = std::chrono::system_clock::now();
191 if (trvs::is_gpu_enabled()) {
192#if defined(TRV_USE_HIP)
193 HIPFFT_EXEC(hipfftPlan3d(
194 &this->transform_gpu,
195 this->params.ngrid[0], this->params.ngrid[1], this->params.ngrid[2],
196 HIPFFT_Z2Z
197 ));
198 trvs::gbytesMemGPU += trvs::worksize_in_gb(
199 this->transform_gpu,
200 HIPFFT_Z2Z,
201 this->params.ngrid[0], this->params.ngrid[1], this->params.ngrid[2],
202 gpus
203 );
205#elif defined(TRV_USE_CUDA) // !TRV_USE_HIP && TRV_USE_CUDA
206 CUFFT_EXEC(cufftCreate(&this->transform_gpu));
207
208 #ifdef _CUDA_STREAM
209 CUFFT_EXEC(cufftSetStream(this->transform_gpu, this->custream));
210 #endif // _CUDA_STREAM
211
212 if (!trvs::is_gpu_single()) {
213 CUFFT_EXEC(cufftXtSetGPUs(
214 this->transform_gpu, gpus.size(), gpus.data()
215 ));
216 }
217
218 std::size_t workspace_sizes[gpus.size()];
219 CUFFT_EXEC(cufftMakePlan3d(
220 this->transform_gpu,
221 this->params.ngrid[0], this->params.ngrid[1], this->params.ngrid[2],
222 CUFFT_Z2Z,
223 workspace_sizes
224 ));
225
226 if (trvs::is_gpu_single()) {
227 CUDA_EXEC(cudaMalloc(
228 &this->d_field, sizeof(fft_double_complex) * this->params.nmesh
229 ));
230 } else {
231 CUFFT_EXEC(cufftXtMalloc(
232 this->transform_gpu, &this->field_desc,
233 CUFFT_XT_FORMAT_INPLACE_SHUFFLED
234 ));
235 }
236 trvs::gbytesMemGPU += trvs::worksize_in_gb(
237 this->transform_gpu,
238 CUFFT_Z2Z,
239 this->params.ngrid[0], this->params.ngrid[1], this->params.ngrid[2],
240 workspace_sizes,
241 gpus
242 );
244#endif // TRV_USE_HIP
245 } else {
246 this->transform = fftw_plan_dft_3d(
247 this->params.ngrid[0], this->params.ngrid[1], this->params.ngrid[2],
248 this->field, this->field,
249 FFTW_FORWARD, this->params.fftw_planner_flag
250 );
251 }
252 auto post_plan_f_timept = std::chrono::system_clock::now();
253
254 double plan_f_time = std::chrono::duration<double>(
255 post_plan_f_timept - pre_plan_f_timept
256 ).count();
257 if (import_fftw_wisdom_f && plan_f_time > 1.) {
258 if (trvs::currTask == 0) {
259 trvs::logger.info(
260 "FFTW plan for forward transforms took %.3f (> 1.) seconds "
261 "despite importing wisdom file, which may have been created "
262 "under different runtime conditions. "
263 "A new wisdom file will be exported.",
264 plan_f_time
265 );
266 }
267 export_fftw_wisdom_f = true;
268 }
269
270 if (export_fftw_wisdom_f) {
271 fftw_export_wisdom_to_filename(
272 this->params.fftw_wisdom_file_f.c_str()
273 );
274 if (trvs::currTask == 0) {
275 trvs::logger.info(
276 "FFTW wisdom file for forward transforms has been exported: %s",
277 this->params.fftw_wisdom_file_f.c_str()
278 );
279 }
281 }
282
283 if (import_fftw_wisdom_b) {
284 fftw_import_wisdom_from_filename(
285 this->params.fftw_wisdom_file_b.c_str()
286 );
288 if (trvs::currTask == 0) {
289 trvs::logger.info(
290 "FFTW wisdom file for backward transforms has been imported: %s",
291 this->params.fftw_wisdom_file_b.c_str()
292 );
293 }
294 }
295
296 auto pre_plan_b_timept = std::chrono::system_clock::now();
297 if (trvs::is_gpu_enabled()) {
298 } else {
299 this->inv_transform = fftw_plan_dft_3d(
300 this->params.ngrid[0], this->params.ngrid[1], this->params.ngrid[2],
301 this->field, this->field,
302 FFTW_BACKWARD, this->params.fftw_planner_flag
303 );
304 }
305 auto post_plan_b_timept = std::chrono::system_clock::now();
306
307 double plan_b_time = std::chrono::duration<double>(
308 post_plan_b_timept - pre_plan_b_timept
309 ).count();
310 if (import_fftw_wisdom_b && plan_b_time > 1.) {
311 if (trvs::currTask == 0) {
312 trvs::logger.info(
313 "FFTW plan for backward transforms took %.3f (> 1.) seconds "
314 "despite importing wisdom file, which may have been created "
315 "under different runtime conditions. "
316 "A new wisdom file will be exported.",
317 plan_b_time
318 );
319 }
320 export_fftw_wisdom_b = true;
321 }
322
323 if (export_fftw_wisdom_b) {
324 fftw_export_wisdom_to_filename(
325 this->params.fftw_wisdom_file_b.c_str()
326 );
327 if (trvs::currTask == 0) {
328 trvs::logger.info(
329 "FFTW wisdom file for backward transforms has been exported: %s",
330 this->params.fftw_wisdom_file_b.c_str()
331 );
332 }
334 }
335
336 if (this->params.interlace == "true") {
337 if (trvs::is_gpu_enabled()) {
338 } else {
339 this->transform_s = fftw_plan_dft_3d(
340 this->params.ngrid[0], this->params.ngrid[1], this->params.ngrid[2],
341 this->field_s, this->field_s,
342 FFTW_FORWARD, this->params.fftw_planner_flag
343 );
344 }
345 }
346 this->plan_ini = true;
347
348 if (fftw_wisdom_file_f != nullptr) {std::fclose(fftw_wisdom_file_f);}
349 if (fftw_wisdom_file_b != nullptr) {std::fclose(fftw_wisdom_file_b);}
350 }
351
352 // Calculate grid sizes in configuration space.
353 this->dr[0] = this->params.boxsize[0] / this->params.ngrid[0];
354 this->dr[1] = this->params.boxsize[1] / this->params.ngrid[1];
355 this->dr[2] = this->params.boxsize[2] / this->params.ngrid[2];
356
357 // Calculate fundamental wavenumbers in Fourier space.
358 this->dk[0] = 2.*M_PI / this->params.boxsize[0];
359 this->dk[1] = 2.*M_PI / this->params.boxsize[1];
360 this->dk[2] = 2.*M_PI / this->params.boxsize[2];
361
362 // Calculate mesh volume and mesh grid cell volume.
363 this->vol = this->params.volume;
364 this->vol_cell = this->vol / double(this->params.nmesh);
365}
366
369 fftw_plan& transform, fftw_plan& inv_transform,
370 const std::string& name
371) {
372 if (trvs::is_gpu_enabled()) {
373 if (trvs::currTask == 0) {
374 trvs::logger.error(
375 "MeshField constructor called with external FFTW plans in GPU mode."
376 );
377 }
378 throw std::runtime_error(
379 "MeshField constructor called with external FFTW plans in GPU mode."
380 );
381 }
382
383 // Attach the full parameter set to @ref trv::MeshField.
384 this->params = params;
385 this->name = name;
386
387 trvs::logger.reset_level(params.verbose);
388
389 // Initialise the field (and its shadow field if interlacing is used)
390 // and increase allocated memory.
391 this->field = fftw_alloc_complex(this->params.nmesh);
392
394 trvs::count_grid += 1;
398
399 if (this->params.interlace == "true") {
400 this->field_s = fftw_alloc_complex(this->params.nmesh);
401
403 trvs::count_grid += 1;
407 }
408
409 this->reset_density_field(); // initialise; likely redundant but safe
410
411 // Initialise FFTW plans.
412 this->transform = transform;
413 this->inv_transform = inv_transform;
414 if (this->params.interlace == "true") {
415 this->transform_s = transform;
416 }
417 this->plan_ext = true;
418
419 // Calculate grid sizes in configuration space.
420 this->dr[0] = this->params.boxsize[0] / this->params.ngrid[0];
421 this->dr[1] = this->params.boxsize[1] / this->params.ngrid[1];
422 this->dr[2] = this->params.boxsize[2] / this->params.ngrid[2];
423
424 // Calculate fundamental wavenumbers in Fourier space.
425 this->dk[0] = 2.*M_PI / this->params.boxsize[0];
426 this->dk[1] = 2.*M_PI / this->params.boxsize[1];
427 this->dk[2] = 2.*M_PI / this->params.boxsize[2];
428
429 // Calculate mesh volume and mesh grid cell volume.
430 this->vol = this->params.volume;
431 this->vol_cell = this->vol / double(this->params.nmesh);
432}
433
435 if (this->plan_ini) {
436 if (trvs::is_gpu_enabled()) {
437 std::vector<int> gpus = trvs::get_gpu_ids();
438 std::vector<std::size_t> workspace_sizes(gpus.size());
439#if defined(TRV_USE_HIP)
440 HIP_EXEC(hipFree(this->d_field));
441 // trvs::gbytesMemGPU -=
442 // trvs::size_in_gb<fft_double_complex>(this->params.nmesh);
443 trvs::gbytesMemGPU -= trvs::worksize_in_gb(
444 this->transform_gpu,
445 HIPFFT_Z2Z,
446 this->params.ngrid[0], this->params.ngrid[1], this->params.ngrid[2],
447 gpus
448 );
449 HIPFFT_EXEC(hipfftDestroy(this->transform_gpu));
450#elif defined(TRV_USE_CUDA) // !TRV_USE_HIP && TRV_USE_CUDA
451 if (trvs::is_gpu_single()) {
452 CUDA_EXEC(cudaFree(this->d_field));
453 } else {
454 CUFFT_EXEC(cufftXtFree(this->field_desc));
455 }
456 trvs::gbytesMemGPU -= trvs::worksize_in_gb(
457 this->transform_gpu,
458 CUFFT_Z2Z,
459 this->params.ngrid[0], this->params.ngrid[1], this->params.ngrid[2],
460 workspace_sizes.data(),
461 gpus
462 );
463 CUFFT_EXEC(cufftDestroy(this->transform_gpu));
464 #ifdef _CUDA_STREAM
465 // Destroy CUDA streams.
466 CUDA_EXEC(cudaStreamDestroy(this->custream));
467 #endif // _CUDA_STREAM
468#endif
469 } else {
470 fftw_destroy_plan(this->transform);
471 fftw_destroy_plan(this->inv_transform);
472 if (this->params.interlace == "true") {
473 fftw_destroy_plan(this->transform_s);
474 }
475 }
476 }
477
478 if (this->window_assign_order != -1) {
480 trvs::count_grid -= .5;
482 }
483
484 if (this->field != nullptr) {
485 fftw_free(this->field);
486 this->field = nullptr;
488 trvs::count_grid -= 1;
490 }
491 if (this->field_s != nullptr) {
492 fftw_free(this->field_s);
493 this->field_s = nullptr;
495 trvs::count_grid -= 1;
497 }
498}
499
501#ifdef TRV_USE_OMP
502#pragma omp parallel for simd
503#endif // TRV_USE_OMP
504 for (long long gid = 0; gid < this->params.nmesh; gid++) {
505 this->field[gid][0] = 0.;
506 this->field[gid][1] = 0.;
507 }
508 if (this->params.interlace == "true") {
509#ifdef TRV_USE_OMP
510#pragma omp parallel for simd
511#endif // TRV_USE_OMP
512 for (long long gid = 0; gid < this->params.nmesh; gid++) {
513 this->field_s[gid][0] = 0.;
514 this->field_s[gid][1] = 0.;
515 }
516 }
517}
518
519
520// -----------------------------------------------------------------------
521// Operators & reserved methods
522// -----------------------------------------------------------------------
523
524const fftw_complex& MeshField::operator[](long long gid) {
525 return this->field[gid];
526}
527
528
529// -----------------------------------------------------------------------
530// Mesh grid properties
531// -----------------------------------------------------------------------
532
533long long MeshField::ret_grid_index(int i, int j, int k) {
534 long long idx_grid =
535 (i * static_cast<long long>(this->params.ngrid[1]) + j)
536 * this->params.ngrid[2] + k;
537 return idx_grid;
538}
539
540void MeshField::shift_grid_indices_fourier(int& i, int& j, int& k) {
541 i = (i < this->params.ngrid[0]/2) ? i : i - this->params.ngrid[0];
542 j = (j < this->params.ngrid[1]/2) ? j : j - this->params.ngrid[1];
543 k = (k < this->params.ngrid[2]/2) ? k : k - this->params.ngrid[2];
544}
545
546void MeshField::get_grid_pos_vector(int i, int j, int k, double rvec[3]) {
547 rvec[0] = (i < this->params.ngrid[0]/2) ?
548 i * this->dr[0] : (i - this->params.ngrid[0]) * this->dr[0];
549 rvec[1] = (j < this->params.ngrid[1]/2) ?
550 j * this->dr[1] : (j - this->params.ngrid[1]) * this->dr[1];
551 rvec[2] = (k < this->params.ngrid[2]/2) ?
552 k * this->dr[2] : (k - this->params.ngrid[2]) * this->dr[2];
553}
554
555void MeshField::get_grid_wavevector(int i, int j, int k, double kvec[3]) {
556 kvec[0] = (i < this->params.ngrid[0]/2) ?
557 i * this->dk[0] : (i - this->params.ngrid[0]) * this->dk[0];
558 kvec[1] = (j < this->params.ngrid[1]/2) ?
559 j * this->dk[1] : (j - this->params.ngrid[1]) * this->dk[1];
560 kvec[2] = (k < this->params.ngrid[2]/2) ?
561 k * this->dk[2] : (k - this->params.ngrid[2]) * this->dk[2];
562}
563
564
565// -----------------------------------------------------------------------
566// Mesh assignment
567// -----------------------------------------------------------------------
568
570 ParticleCatalogue& particles, fftw_complex* weights
571) {
572 if (trvs::currTask == 0) {
573 trvs::logger.debug(
574 "Performing mesh assignment scheme '%s' to '%s'.",
575 this->params.assignment.c_str(),
576 this->name.c_str()
577 );
578 }
579
580 for (int iaxis = 0; iaxis < 3; iaxis++) {
581 double extent = particles.pos_max[iaxis] - particles.pos_min[iaxis];
582 if (params.boxsize[iaxis] < extent) {
583 if (trvs::currTask == 0) {
584 trvs::logger.warn(
585 "Box size in dimension %d is smaller than catalogue extents: "
586 "%.3f < %.3f.",
587 iaxis, params.boxsize[iaxis], extent
588 );
589 }
590 }
591 }
592
593 if (this->params.assignment == "ngp") {
594 this->assign_weighted_field_to_mesh_ngp(particles, weights);
595 } else
596 if (this->params.assignment == "cic") {
597 this->assign_weighted_field_to_mesh_cic(particles, weights);
598 } else
599 if (this->params.assignment == "tsc") {
600 this->assign_weighted_field_to_mesh_tsc(particles, weights);
601 } else
602 if (this->params.assignment == "pcs") {
603 this->assign_weighted_field_to_mesh_pcs(particles, weights);
604 } else {
605 if (trvs::currTask == 0) {
606 trvs::logger.error(
607 "Unsupported mesh assignment scheme: '%s'.",
608 this->params.assignment.c_str()
609 );
610 };
612 "Unsupported mesh assignment scheme: '%s'.",
613 this->params.assignment.c_str()
614 );
615 }
616}
617
618void MeshField::assign_weighted_field_to_mesh_ngp(
619 ParticleCatalogue& particles, fftw_complex* weight
620) {
621 // Set interpolation order, i.e. number of grids, per dimension,
622 // to which a single particle is assigned.
623 const int order = 1;
624
625 // Here the field is given by Σᵢ wᵢ δᴰ(x - xᵢ),
626 // where δᴰ ↔ δᴷ / dV, dV =: `vol_cell`.
627 const double inv_vol_cell = 1 / this->vol_cell;
628
629 // Reset field values to zero.
630 this->reset_density_field();
631
632 // Perform assignment.
633#ifdef TRV_USE_OMP
634#pragma omp parallel for
635#endif // TRV_USE_OMP
636 for (int pid = 0; pid < particles.ntotal; pid++) {
637 int ijk[order][3]; // grid index coordinates of covered grid cells
638 double win[order][3]; // sampling window
639 long long gid = 0; // flattened grid cell index
640
641 for (int iaxis = 0; iaxis < 3; iaxis++) {
642 // Carefully set covered sampling window grid indices.
643 double loc_grid = this->params.ngrid[iaxis]
644 * particles[pid].pos[iaxis] / this->params.boxsize[iaxis];
645
646 int idx_grid = int(loc_grid);
647 if (loc_grid - idx_grid >= 0.5) {
648 idx_grid = (idx_grid == this->params.ngrid[iaxis] - 1)
649 ? 0 : idx_grid + 1;
650 }
651
652 ijk[0][iaxis] = idx_grid;
653
654 // Set sampling window value (only 0th element as ``order == 1``).
655 win[0][iaxis] = 1.;
656 }
657
658 for (int iloc = 0; iloc < order; iloc++) {
659 for (int jloc = 0; jloc < order; jloc++) {
660 for (int kloc = 0; kloc < order; kloc++) {
661 gid = this->ret_grid_index(
662 ijk[iloc][0], ijk[jloc][1], ijk[kloc][2]
663 );
664 if (0 <= gid && gid < this->params.nmesh) {
665OMP_ATOMIC
666 this->field[gid][0] += inv_vol_cell
667 * weight[pid][0] * win[iloc][0] * win[jloc][1] * win[kloc][2];
668OMP_ATOMIC
669 this->field[gid][1] += inv_vol_cell
670 * weight[pid][1] * win[iloc][0] * win[jloc][1] * win[kloc][2];
671 }
672 }
673 }
674 }
675 }
676
677 // Perform interlacing if needed.
678 if (this->params.interlace == "true") {
679#ifdef TRV_USE_OMP
680#pragma omp parallel for
681#endif // TRV_USE_OMP
682 for (int pid = 0; pid < particles.ntotal; pid++) {
683 int ijk[order][3];
684 double win[order][3];
685 long long gid = 0;
686
687 for (int iaxis = 0; iaxis < 3; iaxis++) {
688 // Apply a half-grid shift and impose the periodic boundary condition.
689 double loc_grid = this->params.ngrid[iaxis]
690 * particles[pid].pos[iaxis] / this->params.boxsize[iaxis] + 0.5;
691
692 if (loc_grid > this->params.ngrid[iaxis]) {
693 loc_grid -= this->params.ngrid[iaxis];
694 }
695
696 int idx_grid = int(loc_grid);
697 if (loc_grid - idx_grid >= 0.5) {
698 idx_grid = (idx_grid == this->params.ngrid[iaxis] - 1)
699 ? 0 : idx_grid + 1;
700 }
701
702 ijk[0][iaxis] = idx_grid;
703
704 win[0][iaxis] = 1.;
705 }
706
707 for (int iloc = 0; iloc < order; iloc++) {
708 for (int jloc = 0; jloc < order; jloc++) {
709 for (int kloc = 0; kloc < order; kloc++) {
710 gid = this->ret_grid_index(
711 ijk[iloc][0], ijk[jloc][1], ijk[kloc][2]
712 );
713 if (0 <= gid && gid < this->params.nmesh) {
714OMP_ATOMIC
715 this->field_s[gid][0] += inv_vol_cell
716 * weight[pid][0] * win[iloc][0] * win[jloc][1] * win[kloc][2];
717OMP_ATOMIC
718 this->field_s[gid][1] += inv_vol_cell
719 * weight[pid][1] * win[iloc][0] * win[jloc][1] * win[kloc][2];
720 }
721 }
722 }
723 }
724 }
725 }
726}
727
728void MeshField::assign_weighted_field_to_mesh_cic(
729 ParticleCatalogue& particles, fftw_complex* weight
730) {
731 // Set interpolation order, i.e. number of grids, per dimension,
732 // to which a single particle is assigned.
733 const int order = 2;
734
735 // Here the field is given by Σᵢ wᵢ δᴰ(x - xᵢ),
736 // where δᴰ ↔ δᴷ / dV, dV =: `vol_cell`.
737 const double inv_vol_cell = 1 / this->vol_cell;
738
739 // Reset field values to zero.
740 this->reset_density_field();
741
742 // Perform assignment.
743#ifdef TRV_USE_OMP
744#pragma omp parallel for
745#endif // TRV_USE_OMP
746 for (int pid = 0; pid < particles.ntotal; pid++) {
747 int ijk[order][3]; // grid index coordinates of covered grid cells
748 double win[order][3]; // sampling window
749 long long gid = 0; // flattened grid cell index
750
751 for (int iaxis = 0; iaxis < 3; iaxis++) {
752 // Carefully set covered sampling window grid indices.
753 double loc_grid = this->params.ngrid[iaxis]
754 * particles[pid].pos[iaxis] / this->params.boxsize[iaxis];
755
756 int idx_grid = int(loc_grid);
757
758 ijk[0][iaxis] = idx_grid;
759 ijk[1][iaxis] = (idx_grid == this->params.ngrid[iaxis] - 1)
760 ? 0 : idx_grid + 1;
761
762 // Set sampling window value (up to the 1st element as `order == 2`).
763 double s = loc_grid - idx_grid; // particle-to-grid grid-index distance
764
765 win[0][iaxis] = 1. - s;
766 win[1][iaxis] = s;
767 }
768
769 for (int iloc = 0; iloc < order; iloc++) {
770 for (int jloc = 0; jloc < order; jloc++) {
771 for (int kloc = 0; kloc < order; kloc++) {
772 gid = this->ret_grid_index(
773 ijk[iloc][0], ijk[jloc][1], ijk[kloc][2]
774 );
775 if (0 <= gid && gid < this->params.nmesh) {
776OMP_ATOMIC
777 this->field[gid][0] += inv_vol_cell
778 * weight[pid][0] * win[iloc][0] * win[jloc][1] * win[kloc][2];
779OMP_ATOMIC
780 this->field[gid][1] += inv_vol_cell
781 * weight[pid][1] * win[iloc][0] * win[jloc][1] * win[kloc][2];
782 }
783 }
784 }
785 }
786 }
787
788 // Perform interlacing if needed.
789 if (this->params.interlace == "true") {
790#ifdef TRV_USE_OMP
791#pragma omp parallel for
792#endif // TRV_USE_OMP
793 for (int pid = 0; pid < particles.ntotal; pid++) {
794 int ijk[order][3];
795 double win[order][3];
796 long long gid = 0;
797
798 for (int iaxis = 0; iaxis < 3; iaxis++) {
799 // Apply a half-grid shift and impose the periodic boundary condition.
800 double loc_grid = this->params.ngrid[iaxis]
801 * particles[pid].pos[iaxis] / this->params.boxsize[iaxis] + 0.5;
802
803 if (loc_grid > this->params.ngrid[iaxis]) {
804 loc_grid -= this->params.ngrid[iaxis];
805 }
806
807 int idx_grid = int(loc_grid);
808
809 ijk[0][iaxis] = idx_grid;
810 ijk[1][iaxis] = (idx_grid == this->params.ngrid[iaxis] - 1)
811 ? 0 : idx_grid + 1;
812
813 double s = loc_grid - idx_grid;
814 win[0][iaxis] = 1. - s;
815 win[1][iaxis] = s;
816 }
817
818 for (int iloc = 0; iloc < order; iloc++) {
819 for (int jloc = 0; jloc < order; jloc++) {
820 for (int kloc = 0; kloc < order; kloc++) {
821 gid = this->ret_grid_index(
822 ijk[iloc][0], ijk[jloc][1], ijk[kloc][2]
823 );
824 if (0 <= gid && gid < this->params.nmesh) {
825OMP_ATOMIC
826 this->field_s[gid][0] += inv_vol_cell
827 * weight[pid][0] * win[iloc][0] * win[jloc][1] * win[kloc][2];
828OMP_ATOMIC
829 this->field_s[gid][1] += inv_vol_cell
830 * weight[pid][1] * win[iloc][0] * win[jloc][1] * win[kloc][2];
831 }
832 }
833 }
834 }
835 }
836 }
837}
838
839void MeshField::assign_weighted_field_to_mesh_tsc(
840 ParticleCatalogue& particles, fftw_complex* weight
841) {
842 // Set interpolation order, i.e. number of grids, per dimension,
843 // to which a single particle is assigned.
844 const int order = 3;
845
846 // Here the field is given by Σᵢ wᵢ δᴰ(x - xᵢ),
847 // where δᴰ ↔ δᴷ / dV, dV =: `vol_cell`.
848 const double inv_vol_cell = 1 / this->vol_cell;
849
850 // Reset field values to zero.
851 this->reset_density_field();
852
853 // Perform assignment.
854#ifdef TRV_USE_OMP
855#pragma omp parallel for
856#endif // TRV_USE_OMP
857 for (int pid = 0; pid < particles.ntotal; pid++) {
858 int ijk[order][3]; // grid index coordinates of covered grid cells
859 double win[order][3]; // sampling window
860 long long gid = 0; // flattened grid cell index
861
862 for (int iaxis = 0; iaxis < 3; iaxis++) {
863 // Carefully set covered sampling window grid indices.
864 double loc_grid = this->params.ngrid[iaxis]
865 * particles[pid].pos[iaxis] / this->params.boxsize[iaxis];
866
867 int idx_grid = int(loc_grid);
868
869 if (loc_grid - idx_grid < 0.5) {
870 ijk[0][iaxis] = (idx_grid == 0)
871 ? this->params.ngrid[iaxis] - 1 : idx_grid - 1;
872 ijk[1][iaxis] = idx_grid;
873 ijk[2][iaxis] = (idx_grid == this->params.ngrid[iaxis] - 1)
874 ? 0 : idx_grid + 1;
875 } else {
876 ijk[0][iaxis] = idx_grid;
877 ijk[1][iaxis] = (idx_grid == this->params.ngrid[iaxis] - 1)
878 ? 0 : idx_grid + 1;
879 ijk[2][iaxis] = (ijk[1][iaxis] == this->params.ngrid[iaxis] - 1)
880 ? 0 : ijk[1][iaxis] + 1;
881 }
882
883 // Set sampling window value (up to the 2nd element as `order == 3`).
884 double s = loc_grid - idx_grid;
885
886 if (s < 0.5) {
887 win[0][iaxis] = 1./2 * (1./2 - s) * (1./2 - s);
888 win[1][iaxis] = 3./4 - s * s;
889 win[2][iaxis] = 1./2 * (1./2 + s) * (1./2 + s);
890 } else {
891 s = 1 - s;
892 win[0][iaxis] = 1./2 * (1./2 + s) * (1./2 + s);
893 win[1][iaxis] = 3./4 - s * s;
894 win[2][iaxis] = 1./2 * (1./2 - s) * (1./2 - s);
895 }
896 }
897
898 for (int iloc = 0; iloc < order; iloc++) {
899 for (int jloc = 0; jloc < order; jloc++) {
900 for (int kloc = 0; kloc < order; kloc++) {
901 gid = this->ret_grid_index(
902 ijk[iloc][0], ijk[jloc][1], ijk[kloc][2]
903 );
904 if (0 <= gid && gid < this->params.nmesh) {
905OMP_ATOMIC
906 this->field[gid][0] += inv_vol_cell
907 * weight[pid][0] * win[iloc][0] * win[jloc][1] * win[kloc][2];
908OMP_ATOMIC
909 this->field[gid][1] += inv_vol_cell
910 * weight[pid][1] * win[iloc][0] * win[jloc][1] * win[kloc][2];
911 }
912 }
913 }
914 }
915 }
916
917 // Perform interlacing if needed.
918 if (this->params.interlace == "true") {
919#ifdef TRV_USE_OMP
920#pragma omp parallel for
921#endif // TRV_USE_OMP
922 for (int pid = 0; pid < particles.ntotal; pid++) {
923 int ijk[order][3];
924 double win[order][3];
925 long long gid = 0;
926
927 for (int iaxis = 0; iaxis < 3; iaxis++) {
928 // Apply a half-grid shift and impose the periodic boundary condition.
929 double loc_grid = this->params.ngrid[iaxis]
930 * particles[pid].pos[iaxis] / this->params.boxsize[iaxis] + 0.5;
931
932 if (loc_grid > this->params.ngrid[iaxis]) {
933 loc_grid -= this->params.ngrid[iaxis];
934 }
935
936 int idx_grid = int(loc_grid);
937
938 if (loc_grid - idx_grid < 0.5) {
939 ijk[0][iaxis] = (idx_grid == 0)
940 ? this->params.ngrid[iaxis] - 1 : idx_grid - 1;
941 ijk[1][iaxis] = idx_grid;
942 ijk[2][iaxis] = (idx_grid == this->params.ngrid[iaxis] - 1)
943 ? 0 : idx_grid + 1;
944 } else {
945 ijk[0][iaxis] = idx_grid;
946 ijk[1][iaxis] = (idx_grid == this->params.ngrid[iaxis] - 1)
947 ? 0 : ijk[0][iaxis] + 1;
948 ijk[2][iaxis] = (idx_grid == this->params.ngrid[iaxis] - 1)
949 ? 0 : ijk[1][iaxis] + 1;
950 }
951
952 double s = loc_grid - idx_grid;
953
954 if (s < 0.5) {
955 win[0][iaxis] = 1./2 * (1./2 - s) * (1./2 - s);
956 win[1][iaxis] = 3./4 - s * s;
957 win[2][iaxis] = 1./2 * (1./2 + s) * (1./2 + s);
958 } else {
959 s = 1 - s;
960 win[0][iaxis] = 1./2 * (1./2 + s) * (1./2 + s);
961 win[1][iaxis] = 3./4 - s * s;
962 win[2][iaxis] = 1./2 * (1./2 - s) * (1./2 - s);
963 }
964 }
965
966 for (int iloc = 0; iloc < order; iloc++) {
967 for (int jloc = 0; jloc < order; jloc++) {
968 for (int kloc = 0; kloc < order; kloc++) {
969 gid = this->ret_grid_index(
970 ijk[iloc][0], ijk[jloc][1], ijk[kloc][2]
971 );
972 if (0 <= gid && gid < this->params.nmesh) {
973OMP_ATOMIC
974 this->field_s[gid][0] += inv_vol_cell
975 * weight[pid][0] * win[iloc][0] * win[jloc][1] * win[kloc][2];
976OMP_ATOMIC
977 this->field_s[gid][1] += inv_vol_cell
978 * weight[pid][1] * win[iloc][0] * win[jloc][1] * win[kloc][2];
979 }
980 }
981 }
982 }
983 }
984 }
985}
986
987void MeshField::assign_weighted_field_to_mesh_pcs(
988 ParticleCatalogue& particles, fftw_complex* weight
989) {
990 // Set interpolation order, i.e. number of grids, per dimension,
991 // to which a single particle is assigned.
992 const int order = 4;
993
994 // Here the field is given by Σᵢ wᵢ δᴰ(x - xᵢ),
995 // where δᴰ ↔ δᴷ / dV, dV =: `vol_cell`.
996 const double inv_vol_cell = 1 / this->vol_cell;
997
998 // Reset field values to zero.
999 this->reset_density_field();
1000
1001 // Perform assignment.
1002#ifdef TRV_USE_OMP
1003#pragma omp parallel for
1004#endif // TRV_USE_OMP
1005 for (int pid = 0; pid < particles.ntotal; pid++) {
1006 int ijk[order][3]; // grid index coordinates of covered grid cells
1007 double win[order][3]; // sampling window
1008 long long gid = 0; // flattened grid cell index
1009
1010 for (int iaxis = 0; iaxis < 3; iaxis++) {
1011 // Carefully set covered sampling window grid indices.
1012 double loc_grid = this->params.ngrid[iaxis]
1013 * particles[pid].pos[iaxis] / this->params.boxsize[iaxis];
1014
1015 int idx_grid = int(loc_grid);
1016
1017 ijk[0][iaxis] = (idx_grid == 0)
1018 ? this->params.ngrid[iaxis] - 1 : idx_grid - 1;
1019 ijk[1][iaxis] = idx_grid;
1020 ijk[2][iaxis] = (idx_grid == this->params.ngrid[iaxis] - 1)
1021 ? 0 : idx_grid + 1;
1022 ijk[3][iaxis] = (ijk[2][iaxis] == this->params.ngrid[iaxis] - 1)
1023 ? 0 : ijk[2][iaxis] + 1;
1024
1025 // Set sampling window value (up to the 2nd element as `order == 4`).
1026 double s = loc_grid - idx_grid;
1027
1028 win[0][iaxis] = 1./6 * (1. - s) * (1. - s) * (1. - s);
1029 win[1][iaxis] = 1./6 * (4. - 6. * s * s + 3. * s * s * s);
1030 win[2][iaxis] = 1./6 * (
1031 4. - 6. * (1. - s) * (1. - s) + 3. * (1. - s) * (1. - s) * (1. - s)
1032 );
1033 win[3][iaxis] = 1./6 * s * s * s;
1034 }
1035
1036 for (int iloc = 0; iloc < order; iloc++) {
1037 for (int jloc = 0; jloc < order; jloc++) {
1038 for (int kloc = 0; kloc < order; kloc++) {
1039 gid = this->ret_grid_index(
1040 ijk[iloc][0], ijk[jloc][1], ijk[kloc][2]
1041 );
1042 if (0 <= gid && gid < this->params.nmesh) {
1043OMP_ATOMIC
1044 this->field[gid][0] += inv_vol_cell
1045 * weight[pid][0] * win[iloc][0] * win[jloc][1] * win[kloc][2];
1046OMP_ATOMIC
1047 this->field[gid][1] += inv_vol_cell
1048 * weight[pid][1] * win[iloc][0] * win[jloc][1] * win[kloc][2];
1049 }
1050 }
1051 }
1052 }
1053 }
1054
1055 // Perform interlacing if needed.
1056 if (this->params.interlace == "true") {
1057#ifdef TRV_USE_OMP
1058#pragma omp parallel for
1059#endif // TRV_USE_OMP
1060 for (int pid = 0; pid < particles.ntotal; pid++) {
1061 int ijk[order][3];
1062 double win[order][3];
1063 long long gid = 0;
1064
1065 for (int iaxis = 0; iaxis < 3; iaxis++) {
1066 // Apply a half-grid shift and impose the periodic boundary condition.
1067 double loc_grid = this->params.ngrid[iaxis]
1068 * particles[pid].pos[iaxis] / this->params.boxsize[iaxis] + 0.5;
1069
1070 if (loc_grid > this->params.ngrid[iaxis]) {
1071 loc_grid -= this->params.ngrid[iaxis];
1072 }
1073
1074 int idx_grid = int(loc_grid);
1075
1076 ijk[0][iaxis] = (idx_grid == 0)
1077 ? this->params.ngrid[iaxis] - 1 : idx_grid - 1;
1078 ijk[1][iaxis] = idx_grid;
1079 ijk[2][iaxis] = (idx_grid == this->params.ngrid[iaxis] - 1)
1080 ? 0 : idx_grid + 1;
1081 ijk[3][iaxis] = (ijk[2][iaxis] == this->params.ngrid[iaxis] - 1)
1082 ? 0 : ijk[2][iaxis] + 1;
1083 double s = loc_grid - idx_grid;
1084
1085 win[0][iaxis] = 1./6 * (1. - s) * (1. - s) * (1. - s);
1086 win[1][iaxis] = 1./6 * (4. - 6. * s * s + 3. * s * s * s);
1087 win[2][iaxis] = 1./6 * (
1088 4. - 6. * (1. - s) * (1. - s) + 3. * (1. - s) * (1. - s) * (1. - s)
1089 );
1090 win[3][iaxis] = 1./6 * s * s * s;
1091 }
1092
1093 for (int iloc = 0; iloc < order; iloc++) {
1094 for (int jloc = 0; jloc < order; jloc++) {
1095 for (int kloc = 0; kloc < order; kloc++) {
1096 gid = this->ret_grid_index(
1097 ijk[iloc][0], ijk[jloc][1], ijk[kloc][2]
1098 );
1099 if (0 <= gid && gid < this->params.nmesh) {
1100OMP_ATOMIC
1101 this->field_s[gid][0] += inv_vol_cell
1102 * weight[pid][0] * win[iloc][0] * win[jloc][1] * win[kloc][2];
1103OMP_ATOMIC
1104 this->field_s[gid][1] += inv_vol_cell
1105 * weight[pid][1] * win[iloc][0] * win[jloc][1] * win[kloc][2];
1106 }
1107 }
1108 }
1109 }
1110 }
1111 }
1112}
1113
1114double MeshField::calc_assignment_window_in_fourier(
1115 int i, int j, int k, int order
1116) {
1117 if (order < 0) {
1118 if (trvs::currTask == 0) {
1120 "Invalid window assignment order: %d. Must be non-negative.",
1121 order
1122 );
1123 }
1124 throw trvs::InvalidParameterError(
1125 "Invalid window assignment order: %d. Must be non-negative.",
1126 order
1127 );
1128 }
1129
1130 // Return the pre-computed window value.
1131 if (order == this->window_assign_order) {
1132 long long idx_grid = this->ret_grid_index(i, j, k);
1133 return this->window[idx_grid];
1134 }
1135
1136 this->shift_grid_indices_fourier(i, j, k);
1137
1138 double u_x = M_PI * i / double(this->params.ngrid[0]);
1139 double u_y = M_PI * j / double(this->params.ngrid[1]);
1140 double u_z = M_PI * k / double(this->params.ngrid[2]);
1141
1142 // Note sin(u) / u -> 1 as u -> 0.
1143 double wk_x = (i != 0) ? std::sin(u_x) / u_x : 1.;
1144 double wk_y = (j != 0) ? std::sin(u_y) / u_y : 1.;
1145 double wk_z = (k != 0) ? std::sin(u_z) / u_z : 1.;
1146
1147 double wk = wk_x * wk_y * wk_z;
1148
1149 return std::pow(wk, order);
1150}
1151
1152void MeshField::compute_assignment_window_in_fourier(int order) {
1153 if (order < 0) {
1154 if (trvs::currTask == 0) {
1156 "Invalid window assignment order: %d. Must be non-negative.",
1157 order
1158 );
1159 }
1160 throw trvs::InvalidParameterError(
1161 "Invalid window assignment order: %d. Must be non-negative.",
1162 order
1163 );
1164 }
1165
1166 if (this->window_assign_order == order) {return;} // if computed already
1167
1168 if (trvs::currTask == 0) {
1170 "Computing interpolation window in Fourier space "
1171 "for assignment order %d.",
1172 order
1173 );
1174 }
1175
1176 if (this->window_assign_order == -1) {
1177 this->window.resize(this->params.nmesh, 0.); // if not yet initialised
1178
1179 trvs::count_rgrid += 1;
1180 trvs::count_grid += .5;
1184 }
1185
1186#ifdef TRV_USE_OMP
1187#pragma omp parallel for collapse(3)
1188#endif // TRV_USE_OMP
1189 for (int i = 0; i < this->params.ngrid[0]; i++) {
1190 for (int j = 0; j < this->params.ngrid[1]; j++) {
1191 for (int k = 0; k < this->params.ngrid[2]; k++) {
1192 long long idx_grid = this->ret_grid_index(i, j, k);
1193 this->window[idx_grid] = this->calc_assignment_window_in_fourier(
1194 i, j, k, order
1195 );
1196 }
1197 }
1198 }
1199
1200 this->window_assign_order = order; // set assignment order/flag
1201}
1202
1203
1204// -----------------------------------------------------------------------
1205// Field computations
1206// -----------------------------------------------------------------------
1207
1209 fftw_complex* unit_weight = fftw_alloc_complex(particles.ntotal);
1210
1213
1214#ifdef TRV_USE_OMP
1215#pragma omp parallel for simd
1216#endif // TRV_USE_OMP
1217 for (int pid = 0; pid < particles.ntotal; pid++) {
1218 unit_weight[pid][0] = 1.;
1219 unit_weight[pid][1] = 0.;
1220 }
1221
1222 this->assign_weighted_field_to_mesh(particles, unit_weight);
1223
1224 fftw_free(unit_weight);
1225
1227}
1228
1230 ParticleCatalogue& particles
1231) {
1232 this->compute_unweighted_field(particles);
1233
1234 // Subtract the global mean density to compute fluctuations, i.e. δn.
1235 double nbar = double(particles.ntotal) / this->vol;
1236
1237#ifdef TRV_USE_OMP
1238#pragma omp parallel for simd
1239#endif // TRV_USE_OMP
1240 for (long long gid = 0; gid < this->params.nmesh; gid++) {
1241 this->field[gid][0] -= nbar;
1242 // this->field[gid][1] -= 0.; (unused)
1243 }
1244}
1245
1247 ParticleCatalogue& particles_data, ParticleCatalogue& particles_rand,
1248 LineOfSight* los_data, LineOfSight* los_rand,
1249 double alpha, int ell, int m
1250) {
1251 fftw_complex* weight_kern = fftw_alloc_complex(particles_data.ntotal);
1252
1255
1256#ifdef TRV_USE_OMP
1257#pragma omp parallel for
1258#endif // TRV_USE_OMP
1259 for (int pid = 0; pid < particles_data.ntotal; pid++) {
1260 double los_[3] = {
1261 los_data[pid].pos[0], los_data[pid].pos[1], los_data[pid].pos[2]
1262 };
1263
1264 std::complex<double> ylm = trvm::SphericalHarmonicCalculator::
1266
1267 weight_kern[pid][0] = ylm.real() * particles_data[pid].w;
1268 weight_kern[pid][1] = ylm.imag() * particles_data[pid].w;
1269 }
1270
1271 this->assign_weighted_field_to_mesh(particles_data, weight_kern);
1272
1273 fftw_free(weight_kern);
1274
1276
1277 // Compute the weighted random-source field.
1278 weight_kern = fftw_alloc_complex(particles_rand.ntotal);
1279
1282
1283#ifdef TRV_USE_OMP
1284#pragma omp parallel for
1285#endif // TRV_USE_OMP
1286 for (int pid = 0; pid < particles_rand.ntotal; pid++) {
1287 double los_[3] = {
1288 los_rand[pid].pos[0], los_rand[pid].pos[1], los_rand[pid].pos[2]
1289 };
1290
1291 std::complex<double> ylm = trvm::SphericalHarmonicCalculator::
1293
1294 weight_kern[pid][0] = ylm.real() * particles_rand[pid].w;
1295 weight_kern[pid][1] = ylm.imag() * particles_rand[pid].w;
1296 }
1297
1298 MeshField field_rand(this->params, false, "`field_rand`");
1299 field_rand.assign_weighted_field_to_mesh(particles_rand, weight_kern);
1300
1301 fftw_free(weight_kern);
1302
1304
1305 // Subtract to compute fluctuations, i.e. δn_LM.
1306#ifdef TRV_USE_OMP
1307#pragma omp parallel for simd
1308#endif // TRV_USE_OMP
1309 for (long long gid = 0; gid < this->params.nmesh; gid++) {
1310 this->field[gid][0] -= alpha * field_rand[gid][0];
1311 this->field[gid][1] -= alpha * field_rand[gid][1];
1312 }
1313
1314 if (this->params.interlace == "true") {
1315#ifdef TRV_USE_OMP
1316#pragma omp parallel for simd
1317#endif // TRV_USE_OMP
1318 for (long long gid = 0; gid < this->params.nmesh; gid++) {
1319 this->field_s[gid][0] -= alpha * field_rand.field_s[gid][0];
1320 this->field_s[gid][1] -= alpha * field_rand.field_s[gid][1];
1321 }
1322 }
1323}
1324
1326 ParticleCatalogue& particles, LineOfSight* los,
1327 double alpha, int ell, int m
1328) {
1329 // Compute the weighted field.
1330 fftw_complex* weight_kern = fftw_alloc_complex(particles.ntotal);
1331
1334
1335#ifdef TRV_USE_OMP
1336#pragma omp parallel for
1337#endif // TRV_USE_OMP
1338 for (int pid = 0; pid < particles.ntotal; pid++) {
1339 double los_[3] = {los[pid].pos[0], los[pid].pos[1], los[pid].pos[2]};
1340
1341 std::complex<double> ylm = trvm::SphericalHarmonicCalculator::
1343
1344 weight_kern[pid][0] = ylm.real() * particles[pid].w;
1345 weight_kern[pid][1] = ylm.imag() * particles[pid].w;
1346 }
1347
1348 this->assign_weighted_field_to_mesh(particles, weight_kern);
1349
1350 fftw_free(weight_kern);
1351
1353
1354 // Apply the normalising alpha contrast.
1355#ifdef TRV_USE_OMP
1356#pragma omp parallel for simd
1357#endif // TRV_USE_OMP
1358 for (long long gid = 0; gid < this->params.nmesh; gid++) {
1359 this->field[gid][0] *= alpha;
1360 this->field[gid][1] *= alpha;
1361 }
1362}
1363
1365 ParticleCatalogue& particles_data, ParticleCatalogue& particles_rand,
1366 LineOfSight* los_data, LineOfSight* los_rand,
1367 double alpha,
1368 int ell, int m
1369) {
1370 // Compute the quadratic weighted data-source field.
1371 fftw_complex* weight_kern = fftw_alloc_complex(particles_data.ntotal);
1372
1375
1376#ifdef TRV_USE_OMP
1377#pragma omp parallel for
1378#endif // TRV_USE_OMP
1379 for (int pid = 0; pid < particles_data.ntotal; pid++) {
1380 double los_[3] = {
1381 los_data[pid].pos[0], los_data[pid].pos[1], los_data[pid].pos[2]
1382 };
1383
1384 std::complex<double> ylm = trvm::SphericalHarmonicCalculator::
1386
1387 ylm = std::conj(ylm); // additional conjugation
1388
1389 weight_kern[pid][0] = ylm.real() * std::pow(particles_data[pid].w, 2);
1390 weight_kern[pid][1] = ylm.imag() * std::pow(particles_data[pid].w, 2);
1391 }
1392
1393 this->assign_weighted_field_to_mesh(particles_data, weight_kern);
1394
1395 fftw_free(weight_kern);
1396
1398
1399 // Compute the quadratic weighted random-source field.
1400 weight_kern = fftw_alloc_complex(particles_rand.ntotal);
1401
1404
1405#ifdef TRV_USE_OMP
1406#pragma omp parallel for
1407#endif // TRV_USE_OMP
1408 for (int pid = 0; pid < particles_rand.ntotal; pid++) {
1409 double los_[3] = {
1410 los_rand[pid].pos[0], los_rand[pid].pos[1], los_rand[pid].pos[2]
1411 };
1412
1413 std::complex<double> ylm = trvm::SphericalHarmonicCalculator::
1415
1416 ylm = std::conj(ylm); // additional conjugation
1417
1418 weight_kern[pid][0] = ylm.real() * std::pow(particles_rand[pid].w, 2);
1419 weight_kern[pid][1] = ylm.imag() * std::pow(particles_rand[pid].w, 2);
1420 }
1421
1422 MeshField field_rand(this->params, false, "`field_rand`");
1423 field_rand.assign_weighted_field_to_mesh(particles_rand, weight_kern);
1424
1425 fftw_free(weight_kern);
1426
1428
1429 // Add to compute quadratic fluctuations, i.e. N_LM.
1430#ifdef TRV_USE_OMP
1431#pragma omp parallel for simd
1432#endif // TRV_USE_OMP
1433 for (long long gid = 0; gid < this->params.nmesh; gid++) {
1434 this->field[gid][0] += std::pow(alpha, 2) * field_rand[gid][0];
1435 this->field[gid][1] += std::pow(alpha, 2) * field_rand[gid][1];
1436 }
1437
1438 if (this->params.interlace == "true") {
1439#ifdef TRV_USE_OMP
1440#pragma omp parallel for simd
1441#endif // TRV_USE_OMP
1442 for (long long gid = 0; gid < this->params.nmesh; gid++) {
1443 this->field_s[gid][0] += std::pow(alpha, 2) * field_rand.field_s[gid][0];
1444 this->field_s[gid][1] += std::pow(alpha, 2) * field_rand.field_s[gid][1];
1445 }
1446 }
1447}
1448
1450 ParticleCatalogue& particles, LineOfSight* los,
1451 double alpha, int ell, int m
1452) {
1453 // Compute the quadratic weighted field.
1454 fftw_complex* weight_kern = fftw_alloc_complex(particles.ntotal);
1455
1458
1459#ifdef TRV_USE_OMP
1460#pragma omp parallel for
1461#endif // TRV_USE_OMP
1462 for (int pid = 0; pid < particles.ntotal; pid++) {
1463 double los_[3] = {los[pid].pos[0], los[pid].pos[1], los[pid].pos[2]};
1464
1465 std::complex<double> ylm = trvm::SphericalHarmonicCalculator::
1467
1468 ylm = std::conj(ylm); // conjugation is essential
1469
1470 weight_kern[pid][0] = ylm.real() * std::pow(particles[pid].w, 2);
1471 weight_kern[pid][1] = ylm.imag() * std::pow(particles[pid].w, 2);
1472 }
1473
1474 this->assign_weighted_field_to_mesh(particles, weight_kern);
1475
1476 fftw_free(weight_kern);
1477
1479
1480 // Apply mean-density matching normalisation (i.e. alpha contrast)
1481 // to compute N_LM.
1482#ifdef TRV_USE_OMP
1483#pragma omp parallel for simd
1484#endif // TRV_USE_OMP
1485 for (long long gid = 0; gid < this->params.nmesh; gid++) {
1486 this->field[gid][0] *= std::pow(alpha, 2);
1487 this->field[gid][1] *= std::pow(alpha, 2);
1488 }
1489}
1490
1491
1492// -----------------------------------------------------------------------
1493// Field transforms
1494// -----------------------------------------------------------------------
1495
1497 if (trvs::currTask == 0) {
1498 trvs::logger.debug(
1499 "Performing Fourier transform of '%s'.", this->name.c_str()
1500 );
1501 }
1502
1503 // Apply FFT volume normalisation, where ∫d³x ↔ dV Σᵢ, dV =: `vol_cell`.
1504#ifdef TRV_USE_OMP
1505#pragma omp parallel for simd
1506#endif // TRV_USE_OMP
1507 for (long long gid = 0; gid < this->params.nmesh; gid++) {
1508 this->field[gid][0] *= this->vol_cell;
1509 this->field[gid][1] *= this->vol_cell;
1510 }
1511
1512 // Perform FFT.
1513 if (trvs::is_gpu_enabled()) {
1514#if defined(TRV_USE_HIP)
1515 trva::copy_complex_array_htod(
1516 this->field, this->d_field, this->params.nmesh
1517 );
1518 HIPFFT_EXEC(hipfftExecZ2Z(
1519 this->transform_gpu, this->d_field, this->d_field,
1520 HIPFFT_FORWARD
1521 ));
1522 trva::copy_complex_array_dtoh(
1523 this->d_field, this->field, this->params.nmesh
1524 );
1525#elif defined(TRV_USE_CUDA) // !TRV_USE_HIP && TRV_USE_CUDA
1526 if (trvs::is_gpu_single()) {
1527 trva::copy_complex_array_htod(
1528 this->field, this->d_field, this->params.nmesh
1529 );
1530 CUFFT_EXEC(cufftXtExec(
1531 this->transform_gpu, this->d_field, this->d_field,
1532 CUFFT_FORWARD
1533 ));
1534 trva::copy_complex_array_dtoh(
1535 this->d_field, this->field, this->params.nmesh
1536 );
1537 } else {
1538 trva::copy_complex_array_htod_mgpu(
1539 this->transform_gpu, this->field_desc, this->field
1540 );
1541 CUFFT_EXEC(cufftXtExecDescriptor(
1542 this->transform_gpu, this->field_desc, this->field_desc,
1543 CUFFT_FORWARD
1544 ));
1545 trva::copy_complex_array_dtoh_mgpu(
1546 this->transform_gpu, this->field_desc, this->field
1547 );
1548 }
1549#endif // TRV_USE_HIP
1550 } else {
1551 if (this->plan_ext) {
1552 fftw_execute_dft(this->transform, this->field, this->field);
1553 } else {
1554 fftw_execute(this->transform);
1555 }
1556 }
1557 trvs::count_fft += 1;
1558
1559 // Interlace with the shadow field.
1560 if (this->params.interlace == "true") {
1561#ifdef TRV_USE_OMP
1562#pragma omp parallel for simd
1563#endif // TRV_USE_OMP
1564 for (long long gid = 0; gid < this->params.nmesh; gid++) {
1565 this->field_s[gid][0] *= this->vol_cell;
1566 this->field_s[gid][1] *= this->vol_cell;
1567 }
1568
1569 if (trvs::is_gpu_enabled()) {
1570#if defined(TRV_USE_HIP)
1571 trva::copy_complex_array_htod(
1572 this->field_s, this->d_field, this->params.nmesh
1573 );
1574 HIPFFT_EXEC(hipfftExecZ2Z(
1575 this->transform_gpu, this->d_field, this->d_field,
1576 HIPFFT_FORWARD
1577 ));
1578 trva::copy_complex_array_dtoh(
1579 this->d_field, this->field_s, this->params.nmesh
1580 );
1581#elif defined(TRV_USE_CUDA) // !TRV_USE_HIP && TRV_USE_CUDA
1582 if (trvs::is_gpu_single()) {
1583 trva::copy_complex_array_htod(
1584 this->field_s, this->d_field, this->params.nmesh
1585 );
1586 CUFFT_EXEC(cufftXtExec(
1587 this->transform_gpu, this->d_field, this->d_field,
1588 CUFFT_FORWARD
1589 ));
1590 trva::copy_complex_array_dtoh(
1591 this->d_field, this->field_s, this->params.nmesh
1592 );
1593 } else {
1594 trva::copy_complex_array_htod_mgpu(
1595 this->transform_gpu, this->field_desc, this->field_s
1596 );
1597 CUFFT_EXEC(cufftXtExecDescriptor(
1598 this->transform_gpu, this->field_desc, this->field_desc,
1599 CUFFT_FORWARD
1600 ));
1601 trva::copy_complex_array_dtoh_mgpu(
1602 this->transform_gpu, this->field_desc, this->field_s
1603 );
1604 }
1605#endif // TRV_USE_HIP
1606 } else {
1607 if (this->plan_ext) {
1608 fftw_execute_dft(this->transform_s, this->field_s, this->field_s);
1609 } else {
1610 fftw_execute(this->transform_s);
1611 }
1612 }
1613 trvs::count_fft += 1;
1614
1615#ifdef TRV_USE_OMP
1616#pragma omp parallel for collapse(3)
1617#endif // TRV_USE_OMP
1618 for (int i = 0; i < this->params.ngrid[0]; i++) {
1619 for (int j = 0; j < this->params.ngrid[1]; j++) {
1620 for (int k = 0; k < this->params.ngrid[2]; k++) {
1621 long long idx_grid = this->ret_grid_index(i, j, k);
1622
1623 // Calculate the index vector representing the grid cell.
1624 double m[3];
1625 m[0] = (i < this->params.ngrid[0]/2)
1626 ? double(i) / this->params.ngrid[0]
1627 : double(i) / this->params.ngrid[0] - 1;
1628 m[1] = (j < this->params.ngrid[1]/2)
1629 ? double(j) / this->params.ngrid[1]
1630 : double(j) / this->params.ngrid[1] - 1;
1631 m[2] = (k < this->params.ngrid[2]/2)
1632 ? double(k) / this->params.ngrid[2]
1633 : double(k) / this->params.ngrid[2] - 1;
1634
1635 // Multiply by the phase factor from the half-grid shift and
1636 // add the shadow mesh field contribution. Note the positive
1637 // sign of `arg`.
1638 double arg = M_PI * (m[0] + m[1] + m[2]);
1639
1640 this->field[idx_grid][0] +=
1641 std::cos(arg) * this->field_s[idx_grid][0]
1642 - std::sin(arg) * this->field_s[idx_grid][1]
1643 ;
1644 this->field[idx_grid][1] +=
1645 std::sin(arg) * this->field_s[idx_grid][0]
1646 + std::cos(arg) * this->field_s[idx_grid][1]
1647 ;
1648
1649 this->field[idx_grid][0] /= 2.;
1650 this->field[idx_grid][1] /= 2.;
1651 }
1652 }
1653 }
1654 }
1655}
1656
1658 if (trvs::currTask == 0) {
1659 trvs::logger.debug(
1660 "Performing inverse Fourier transform of '%s'.", this->name.c_str()
1661 );
1662 }
1663
1664 // Apply inverse FFT volume normalisation, where ∫d³k/(2π)³ ↔ (1/V) Σᵢ,
1665 // V =: `vol`.
1666#ifdef TRV_USE_OMP
1667#pragma omp parallel for simd
1668#endif // TRV_USE_OMP
1669 for (long long gid = 0; gid < this->params.nmesh; gid++) {
1670 this->field[gid][0] /= this->vol;
1671 this->field[gid][1] /= this->vol;
1672 }
1673
1674 // Perform inverse FFT.
1675 if (trvs::is_gpu_enabled()) {
1676#if defined(TRV_USE_HIP)
1677 trva::copy_complex_array_htod(
1678 this->field, this->d_field, this->params.nmesh
1679 );
1680 HIPFFT_EXEC(hipfftExecZ2Z(
1681 this->transform_gpu, this->d_field, this->d_field,
1682 HIPFFT_BACKWARD
1683 ));
1684 trva::copy_complex_array_dtoh(
1685 this->d_field, this->field, this->params.nmesh
1686 );
1687#elif defined(TRV_USE_CUDA) // !TRV_USE_HIP && TRV_USE_CUDA
1688 if (trvs::is_gpu_single()) {
1689 trva::copy_complex_array_htod(
1690 this->field, this->d_field, this->params.nmesh
1691 );
1692 CUFFT_EXEC(cufftXtExec(
1693 this->transform_gpu, this->d_field, this->d_field,
1694 CUFFT_INVERSE
1695 ));
1696 trva::copy_complex_array_dtoh(
1697 this->d_field, this->field, this->params.nmesh
1698 );
1699 } else {
1700 trva::copy_complex_array_htod_mgpu(
1701 this->transform_gpu, this->field_desc, this->field
1702 );
1703 CUFFT_EXEC(cufftXtExecDescriptor(
1704 this->transform_gpu, this->field_desc, this->field_desc,
1705 CUFFT_INVERSE
1706 ));
1707 trva::copy_complex_array_dtoh_mgpu(
1708 this->transform_gpu, this->field_desc, this->field
1709 );
1710 }
1711#endif // TRV_USE_HIP
1712 } else {
1713 if (this->plan_ext) {
1714 fftw_execute_dft(this->inv_transform, this->field, this->field);
1715 } else {
1716 fftw_execute(this->inv_transform);
1717 }
1718 }
1719 trvs::count_ifft += 1;
1720}
1721
1722
1723// -----------------------------------------------------------------------
1724// Field operations
1725// -----------------------------------------------------------------------
1726
1728 if (trvs::currTask == 0) {
1729 trvs::logger.debug(
1730 "Applying wide-angle power-law kernel to '%s'.", this->name.c_str()
1731 );
1732 }
1733
1734 // CAVEAT: Discretionary choice such that eps_r / r = O(1.e-9).
1735 const double eps_r = 1.e-6;
1736
1737#ifdef TRV_USE_OMP
1738#pragma omp parallel for collapse(3)
1739#endif // TRV_USE_OMP
1740 for (int i = 0; i < this->params.ngrid[0]; i++) {
1741 for (int j = 0; j < this->params.ngrid[1]; j++) {
1742 for (int k = 0; k < this->params.ngrid[2]; k++) {
1743 long long idx_grid = this->ret_grid_index(i, j, k);
1744
1745 double rv[3];
1746 this->get_grid_pos_vector(i, j, k, rv);
1747
1748 double r_ = trvm::get_vec3d_magnitude(rv);
1749
1750 if (r_ < eps_r) {
1751 // this->field[idx_grid][0] *= 0.; (unused)
1752 // this->field[idx_grid][1] *= 0.; (unused)
1753 } else {
1754 this->field[idx_grid][0] *=
1755 std::pow(r_, - this->params.i_wa - this->params.j_wa);
1756 this->field[idx_grid][1] *=
1757 std::pow(r_, - this->params.i_wa - this->params.j_wa);
1758 }
1759 }
1760 }
1761 }
1762}
1763
1765 if (trvs::currTask == 0) {
1766 trvs::logger.debug(
1767 "Applying assignment compensation to '%s'.", this->name.c_str()
1768 );
1769 }
1770
1771 this->compute_assignment_window_in_fourier(this->params.assignment_order);
1772
1773#ifdef TRV_USE_OMP
1774#pragma omp parallel for collapse(3)
1775#endif // TRV_USE_OMP
1776 for (int i = 0; i < this->params.ngrid[0]; i++) {
1777 for (int j = 0; j < this->params.ngrid[1]; j++) {
1778 for (int k = 0; k < this->params.ngrid[2]; k++) {
1779 long long idx_grid = this->ret_grid_index(i, j, k);
1780 this->field[idx_grid][0] /= this->window[idx_grid];
1781 this->field[idx_grid][1] /= this->window[idx_grid];
1782 }
1783 }
1784 }
1785}
1786
1787
1788// -----------------------------------------------------------------------
1789// One-point statistics
1790// -----------------------------------------------------------------------
1791
1793 MeshField& field_fourier, std::vector< std::complex<double> >& ylm,
1794 double k_lower, double k_upper,
1795 double& k_eff, int& nmodes
1796) {
1797 if (trvs::currTask == 0) {
1798 trvs::logger.debug(
1799 "Performing inverse Fourier transform to spherical harmonic weighted "
1800 "'%s' in wavenumber interval [%f, %f).",
1801 this->name.c_str(), k_lower, k_upper
1802 );
1803 }
1804
1805 // Reset effective wavenumber and wavevector modes.
1806 k_eff = 0.;
1807 nmodes = 0;
1808
1809 // Perform wavevector mode binning in the band.
1810 this->compute_assignment_window_in_fourier(this->params.assignment_order);
1811
1812#ifdef TRV_USE_OMP
1813#pragma omp parallel for collapse(3) reduction(+:k_eff, nmodes)
1814#endif // TRV_USE_OMP
1815 for (int i = 0; i < this->params.ngrid[0]; i++) {
1816 for (int j = 0; j < this->params.ngrid[1]; j++) {
1817 for (int k = 0; k < this->params.ngrid[2]; k++) {
1818 long long idx_grid = this->ret_grid_index(i, j, k);
1819
1820 double kv[3];
1821 this->get_grid_wavevector(i, j, k, kv);
1822
1823 double k_ = trvm::get_vec3d_magnitude(kv);
1824
1825 // Determine the grid cell contribution to the band.
1826 if (k_lower <= k_ && k_ < k_upper) {
1827 std::complex<double> fk(
1828 field_fourier[idx_grid][0], field_fourier[idx_grid][1]
1829 );
1830
1831 // Apply assignment compensation.
1832 fk /= this->window[idx_grid];
1833
1834 // Weight the field.
1835 this->field[idx_grid][0] = (ylm[idx_grid] * fk).real();
1836 this->field[idx_grid][1] = (ylm[idx_grid] * fk).imag();
1837
1838 k_eff += k_;
1839 nmodes++;
1840 } else {
1841 // This is necessary when the field is not reset to zero.
1842 this->field[idx_grid][0] = 0.;
1843 this->field[idx_grid][1] = 0.;
1844 }
1845 }
1846 }
1847 }
1848
1849 // Perform inverse FFT.
1850 if (trvs::is_gpu_enabled()) {
1851#if defined(TRV_USE_HIP)
1852 trva::copy_complex_array_htod(
1853 this->field, this->d_field, this->params.nmesh
1854 );
1855 HIPFFT_EXEC(hipfftExecZ2Z(
1856 this->transform_gpu, this->d_field, this->d_field,
1857 HIPFFT_BACKWARD
1858 ));
1859 trva::copy_complex_array_dtoh(
1860 this->d_field, this->field, this->params.nmesh
1861 );
1862#elif defined(TRV_USE_CUDA) // !TRV_USE_HIP && TRV_USE_CUDA
1863 if (trvs::is_gpu_single()) {
1864 trva::copy_complex_array_htod(
1865 this->field, this->d_field, this->params.nmesh
1866 );
1867 CUFFT_EXEC(cufftXtExec(
1868 this->transform_gpu, this->d_field, this->d_field,
1869 CUFFT_INVERSE
1870 ));
1871 trva::copy_complex_array_dtoh(
1872 this->d_field, this->field, this->params.nmesh
1873 );
1874 } else {
1875 trva::copy_complex_array_htod_mgpu(
1876 this->transform_gpu, this->field_desc, this->field
1877 );
1878 CUFFT_EXEC(cufftXtExecDescriptor(
1879 this->transform_gpu, this->field_desc, this->field_desc,
1880 CUFFT_INVERSE
1881 ));
1882 trva::copy_complex_array_dtoh_mgpu(
1883 this->transform_gpu, this->field_desc, this->field
1884 );
1885 }
1886#endif // TRV_USE_HIP
1887 } else {
1888 if (this->plan_ext) {
1889 fftw_execute_dft(this->inv_transform, this->field, this->field);
1890 } else {
1891 fftw_execute(this->inv_transform);
1892 }
1893 }
1894 trvs::count_ifft += 1;
1895
1896 // Average over wavevector modes in the band.
1897#ifdef TRV_USE_OMP
1898#pragma omp parallel for simd
1899#endif // TRV_USE_OMP
1900 for (long long gid = 0; gid < this->params.nmesh; gid++) {
1901 this->field[gid][0] /= double(nmodes);
1902 this->field[gid][1] /= double(nmodes);
1903 }
1904
1905 k_eff /= double(nmodes);
1906}
1907
1909 MeshField& field_fourier,
1910 std::vector< std::complex<double> >& ylm,
1912 double r
1913) {
1914 if (trvs::currTask == 0) {
1915 trvs::logger.debug(
1916 "Performing inverse Fourier transform to spherical Bessel weighted "
1917 "'%s' at separation r = %f.",
1918 this->name.c_str(), r
1919 );
1920 }
1921
1922 // Compute the field weighted by the spherical Bessel function and
1923 // reduced spherical harmonics.
1924 this->compute_assignment_window_in_fourier(this->params.assignment_order);
1925
1926#ifdef TRV_USE_OMP
1927#pragma omp parallel
1928#endif // TRV_USE_OMP
1929{
1930 // Create thread-private copies of the spherical Bessel function calculator.
1931 trvm::SphericalBesselCalculator sjl_thread(sjl);
1932
1933#ifdef TRV_USE_OMP
1934#pragma omp for collapse(3)
1935#endif // TRV_USE_OMP
1936 for (int i = 0; i < this->params.ngrid[0]; i++) {
1937 for (int j = 0; j < this->params.ngrid[1]; j++) {
1938 for (int k = 0; k < this->params.ngrid[2]; k++) {
1939 long long idx_grid = this->ret_grid_index(i, j, k);
1940
1941 double kv[3];
1942 this->get_grid_wavevector(i, j, k, kv);
1943
1944 double k_ = trvm::get_vec3d_magnitude(kv);
1945
1946 // Apply assignment compensation.
1947 std::complex<double> fk(
1948 field_fourier[idx_grid][0], field_fourier[idx_grid][1]
1949 );
1950
1951 fk /= this->window[idx_grid];
1952
1953 // Weight the field including the volume normalisation,
1954 // where ∫d³k/(2π)³ ↔ (1/V) Σᵢ, V =: `vol`.
1955 this->field[idx_grid][0] =
1956 sjl_thread.eval(k_ * r) * (ylm[idx_grid] * fk).real() / this->vol;
1957 this->field[idx_grid][1] =
1958 sjl_thread.eval(k_ * r) * (ylm[idx_grid] * fk).imag() / this->vol;
1959 }
1960 }
1961 }
1962}
1963
1964 // Perform inverse FFT.
1965 if (trvs::is_gpu_enabled()) {
1966#if defined(TRV_USE_HIP)
1967 trva::copy_complex_array_htod(
1968 this->field, this->d_field, this->params.nmesh
1969 );
1970 HIPFFT_EXEC(hipfftExecZ2Z(
1971 this->transform_gpu, this->d_field, this->d_field,
1972 HIPFFT_BACKWARD
1973 ));
1974 trva::copy_complex_array_dtoh(
1975 this->d_field, this->field, this->params.nmesh
1976 );
1977#elif defined(TRV_USE_CUDA) // !TRV_USE_HIP && TRV_USE_CUDA
1978 if (trvs::is_gpu_single()) {
1979 trva::copy_complex_array_htod(
1980 this->field, this->d_field, this->params.nmesh
1981 );
1982 CUFFT_EXEC(cufftXtExec(
1983 this->transform_gpu, this->d_field, this->d_field,
1984 CUFFT_INVERSE
1985 ));
1986 trva::copy_complex_array_dtoh(
1987 this->d_field, this->field, this->params.nmesh
1988 );
1989 } else {
1990 trva::copy_complex_array_htod_mgpu(
1991 this->transform_gpu, this->field_desc, this->field
1992 );
1993 CUFFT_EXEC(cufftXtExecDescriptor(
1994 this->transform_gpu, this->field_desc, this->field_desc,
1995 CUFFT_INVERSE
1996 ));
1997 trva::copy_complex_array_dtoh_mgpu(
1998 this->transform_gpu, this->field_desc, this->field
1999 );
2000 }
2001#endif // TRV_USE_HIP
2002 } else {
2003 if (this->plan_ext) {
2004 fftw_execute_dft(this->inv_transform, this->field, this->field);
2005 } else {
2006 fftw_execute(this->inv_transform);
2007 }
2008 }
2009 trvs::count_ifft += 1;
2010}
2011
2012
2013// -----------------------------------------------------------------------
2014// Misc
2015// -----------------------------------------------------------------------
2016
2018 ParticleCatalogue& particles, int order
2019) {
2020 // Initialise the weight field.
2021 fftw_complex* weight = fftw_alloc_complex(particles.ntotal);
2022
2025
2026#ifdef TRV_USE_OMP
2027#if defined(__GNUC__) && !defined(__clang__)
2028#pragma omp parallel for simd
2029#else // !__GNUC__ || __clang__
2030#pragma omp parallel for
2031#endif // __GNUC__ && !__clang__
2032#endif // TRV_USE_OMP
2033 for (int pid = 0; pid < particles.ntotal; pid++) {
2034 weight[pid][0] = particles[pid].w;
2035 weight[pid][1] = 0.;
2036 }
2037
2038 // Compute the weighted field.
2039 this->assign_weighted_field_to_mesh(particles, weight);
2040
2041 fftw_free(weight);
2042
2044
2045 // Compute normalisation volume integral, where ∫d³x ↔ dV Σᵢ,
2046 // dV =: `vol_cell`.
2047 double vol_int = 0.;
2048
2049#ifdef TRV_USE_OMP
2050#if defined(__GNUC__) && !defined(__clang__)
2051#pragma omp parallel for simd reduction(+:vol_int)
2052#else // !__GNUC__ || __clang__
2053#pragma omp parallel for reduction(+:vol_int)
2054#endif // __GNUC__ && !__clang__
2055#endif // TRV_USE_OMP
2056 for (long long gid = 0; gid < this->params.nmesh; gid++) {
2057 vol_int += std::pow(this->field[gid][0], order);
2058 }
2059
2060 vol_int *= this->vol_cell;
2061
2062 double norm_factor = 1. / vol_int;
2063
2064 return norm_factor;
2065}
2066
2067
2068// ***********************************************************************
2069// Field statistics
2070// ***********************************************************************
2071
2072// -----------------------------------------------------------------------
2073// Life cycle
2074// -----------------------------------------------------------------------
2075
2077 this->params = params;
2078
2079 this->reset_stats();
2080
2081 // Calculate grid sizes in configuration space.
2082 this->dr[0] = this->params.boxsize[0] / this->params.ngrid[0];
2083 this->dr[1] = this->params.boxsize[1] / this->params.ngrid[1];
2084 this->dr[2] = this->params.boxsize[2] / this->params.ngrid[2];
2085
2086 // Calculate fundamental wavenumbers in Fourier space.
2087 this->dk[0] = 2.*M_PI / this->params.boxsize[0];
2088 this->dk[1] = 2.*M_PI / this->params.boxsize[1];
2089 this->dk[2] = 2.*M_PI / this->params.boxsize[2];
2090
2091 // Calculate mesh volume and mesh grid cell volume.
2092 this->vol = this->params.volume;
2093 this->vol_cell = this->vol / double(this->params.nmesh);
2094
2095 // Set up FFTW plans.
2096 if (plan_ini) {
2097 this->twopt_3d = fftw_alloc_complex(this->params.nmesh);
2098#ifdef TRV_USE_HIP
2099 if (trvs::is_gpu_enabled()) {
2100 HIP_EXEC(hipMalloc(
2101 &this->d_twopt_3d, sizeof(fft_double_complex) * this->params.nmesh
2102 ));
2103 // trvs::gbytesMemGPU +=
2104 // trvs::size_in_gb<fft_double_complex>(this->params.nmesh);
2105 // trvs::update_maxmem(trvs::is_gpu_enabled());
2106 }
2107#endif // TRV_USE_HIP
2108
2109 trvs::count_cgrid += 1;
2110 trvs::count_grid += 1;
2114
2115#if defined(TRV_USE_OMP) && defined(TRV_USE_FFTWOMP)
2116 fftw_plan_with_nthreads(omp_get_max_threads());
2117#endif // TRV_USE_OMP && TRV_USE_FFTWOMP
2118
2119 // Initialise GPU context (effective when in GPU mode).
2120 std::vector<int> gpus = trvs::get_gpu_ids();
2121 if (trvs::is_gpu_enabled()) {
2122#ifdef _CUDA_STREAM
2123 CUDA_EXEC(cudaStreamCreate(&this->custream));
2124#endif // _CUDA_STREAM
2125 }
2126
2127 if (trvs::is_gpu_enabled()) {
2128#if defined(TRV_USE_HIP)
2129 HIPFFT_EXEC(hipfftPlan3d(
2130 &this->inv_transform_gpu,
2131 this->params.ngrid[0], this->params.ngrid[1], this->params.ngrid[2],
2132 HIPFFT_Z2Z
2133 ));
2134 trvs::gbytesMemGPU += trvs::worksize_in_gb(
2135 this->inv_transform_gpu,
2136 HIPFFT_Z2Z,
2137 this->params.ngrid[0], this->params.ngrid[1], this->params.ngrid[2],
2138 gpus
2139 );
2141#elif defined(TRV_USE_CUDA) // !TRV_USE_HIP && TRV_USE_CUDA
2142 CUFFT_EXEC(cufftCreate(&this->inv_transform_gpu));
2143
2144 #ifdef _CUDA_STREAM
2145 CUFFT_EXEC(cufftSetStream(this->inv_transform_gpu, this->custream));
2146 #endif // _CUDA_STREAM
2147
2148 if (!trvs::is_gpu_single()) {
2149 CUFFT_EXEC(cufftXtSetGPUs(
2150 this->inv_transform_gpu, gpus.size(), gpus.data()
2151 ));
2152 }
2153
2154 std::size_t workspace_sizes[gpus.size()];
2155 CUFFT_EXEC(cufftMakePlan3d(
2156 this->inv_transform_gpu,
2157 this->params.ngrid[0], this->params.ngrid[1], this->params.ngrid[2],
2158 CUFFT_Z2Z,
2159 workspace_sizes
2160 ));
2161
2162 if (trvs::is_gpu_single()) {
2163 CUDA_EXEC(cudaMalloc(
2164 &this->d_twopt_3d, sizeof(fft_double_complex) * this->params.nmesh
2165 ));
2166 } else {
2167 CUFFT_EXEC(cufftXtMalloc(
2168 this->inv_transform_gpu, &this->twopt_3d_desc,
2169 CUFFT_XT_FORMAT_INPLACE_SHUFFLED
2170 ));
2171 }
2172 trvs::gbytesMemGPU += trvs::worksize_in_gb(
2173 this->inv_transform_gpu,
2174 CUFFT_Z2Z,
2175 this->params.ngrid[0], this->params.ngrid[1], this->params.ngrid[2],
2176 workspace_sizes,
2177 gpus
2178 );
2180#endif // TRV_USE_HIP
2181 } else {
2182 this->inv_transform = fftw_plan_dft_3d(
2183 this->params.ngrid[0], this->params.ngrid[1], this->params.ngrid[2],
2184 this->twopt_3d, this->twopt_3d,
2185 FFTW_BACKWARD, this->params.fftw_planner_flag
2186 );
2187 }
2188
2189 this->plan_ini = true;
2190 }
2191}
2192
2194 if (this->alias_ini) {
2195 trvs::count_rgrid -= 1;
2196 trvs::count_grid -= .5;
2197 trvs::gbytesMem -= trvs::size_in_gb<double>(this->params.nmesh);
2198 }
2199
2200 if (this->plan_ini) {
2201 if (trvs::is_gpu_enabled()) {
2202 std::vector<int> gpus = trvs::get_gpu_ids();
2203 std::vector<std::size_t> workspace_sizes(gpus.size());
2204#if defined(TRV_USE_HIP)
2205 HIP_EXEC(hipFree(this->d_twopt_3d));
2206 // trvs::gbytesMemGPU -=
2207 // trvs::size_in_gb<fft_double_complex>(this->params.nmesh);
2208 trvs::gbytesMemGPU -= trvs::worksize_in_gb(
2209 this->inv_transform_gpu,
2210 HIPFFT_Z2Z,
2211 this->params.ngrid[0], this->params.ngrid[1], this->params.ngrid[2],
2212 gpus
2213 );
2214 HIPFFT_EXEC(hipfftDestroy(this->inv_transform_gpu));
2215#elif defined(TRV_USE_CUDA) // !TRV_USE_HIP && TRV_USE_CUDA
2216 if (trvs::is_gpu_single()) {
2217 CUDA_EXEC(cudaFree(this->d_twopt_3d));
2218 } else {
2219 CUFFT_EXEC(cufftXtFree(this->twopt_3d_desc));
2220 }
2221 trvs::gbytesMemGPU -= trvs::worksize_in_gb(
2222 this->inv_transform_gpu,
2223 CUFFT_Z2Z,
2224 this->params.ngrid[0], this->params.ngrid[1], this->params.ngrid[2],
2225 workspace_sizes.data(),
2226 gpus
2227 );
2228 CUFFT_EXEC(cufftDestroy(this->inv_transform_gpu));
2229 #ifdef _CUDA_STREAM
2230 // Destroy CUDA streams.
2231 CUDA_EXEC(cudaStreamDestroy(this->custream));
2232 #endif // _CUDA_STREAM
2233#endif // TRV_USE_HIP
2234 } else {
2235 fftw_destroy_plan(this->inv_transform);
2236 }
2237
2238 if (this->twopt_3d != nullptr) {
2239 fftw_free(this->twopt_3d);
2240 this->twopt_3d = nullptr;
2241 trvs::count_cgrid -= 1;
2242 trvs::count_grid -= 1;
2243 trvs::gbytesMem -= trvs::size_in_gb<fftw_complex>(this->params.nmesh);
2244 }
2245 }
2246}
2247
2249 std::fill(this->nmodes.begin(), this->nmodes.end(), 0);
2250 std::fill(this->npairs.begin(), this->npairs.end(), 0);
2251 std::fill(this->k.begin(), this->k.end(), 0.);
2252 std::fill(this->r.begin(), this->r.end(), 0.);
2253 std::fill(this->sn.begin(), this->sn.end(), 0.);
2254 std::fill(this->pk.begin(), this->pk.end(), 0.);
2255 std::fill(this->xi.begin(), this->xi.end(), 0.);
2256}
2257
2258void FieldStats::resize_stats(int num_bins){
2259 this->nmodes.resize(num_bins);
2260 this->npairs.resize(num_bins);
2261 this->k.resize(num_bins);
2262 this->r.resize(num_bins);
2263 this->sn.resize(num_bins);
2264 this->pk.resize(num_bins);
2265 this->xi.resize(num_bins);
2266}
2267
2268
2269// -----------------------------------------------------------------------
2270// Utilities
2271// -----------------------------------------------------------------------
2272
2273bool FieldStats::if_fields_compatible(
2274 MeshField& field_a, MeshField& field_b
2275) {
2276 bool flag_compatible = true;
2277
2278 // Check if physical dimensions match.
2279 for (int iaxis = 0; iaxis < 3; iaxis++) {
2280 if (
2281 this->params.boxsize[iaxis] != field_a.params.boxsize[iaxis]
2282 || this->params.boxsize[iaxis] != field_b.params.boxsize[iaxis]
2283 || this->params.ngrid[iaxis] != field_a.params.ngrid[iaxis]
2284 || this->params.ngrid[iaxis] != field_b.params.ngrid[iaxis]
2285 ) {
2286 flag_compatible = false;
2287 }
2288 }
2289
2290 // Check if derived physical dimensions match. This is usually
2291 // redundant but parameters may have been altered.
2292 if (
2293 this->params.nmesh != field_a.params.nmesh
2294 || this->params.nmesh != field_b.params.nmesh
2295 || this->params.volume != field_b.params.volume
2296 || this->params.volume != field_b.params.volume
2297 ) {
2298 flag_compatible = false;
2299 }
2300
2301 return flag_compatible;
2302}
2303
2304long long FieldStats::ret_grid_index(int i, int j, int k) {
2305 long long idx_grid =
2306 (i * static_cast<long long>(this->params.ngrid[1]) + j)
2307 * this->params.ngrid[2] + k;
2308 return idx_grid;
2309}
2310
2311void FieldStats::shift_grid_indices_fourier(int& i, int& j, int& k) {
2312 i = (i < this->params.ngrid[0]/2) ? i : i - this->params.ngrid[0];
2313 j = (j < this->params.ngrid[1]/2) ? j : j - this->params.ngrid[1];
2314 k = (k < this->params.ngrid[2]/2) ? k : k - this->params.ngrid[2];
2315}
2316
2318 trv::Binning& binning, const std::string& save_file={}
2319) {
2320 double cellsizes[3];
2321 if (binning.space == "config") {
2322 cellsizes[0] = this->dr[0];
2323 cellsizes[1] = this->dr[1];
2324 cellsizes[2] = this->dr[2];
2325 } else
2326 if (binning.space == "fourier") {
2327 cellsizes[0] = this->dk[0];
2328 cellsizes[1] = this->dk[1];
2329 cellsizes[2] = this->dk[2];
2330 } else {
2331 if (trvs::currTask == 0) {
2333 "Invalid binning space: '%s'.", binning.space.c_str()
2334 );
2335 }
2336 throw trvs::InvalidDataError(
2337 "Invalid binning space: '%s'.", binning.space.c_str()
2338 );
2339 }
2340
2341 // Restrict the mesh grid index ranges.
2342 double b = binning.bin_max;
2343
2344 int lrange_upper[3], rrange_lower[3];
2345 if (binning.space == "config") {
2346 for (int iaxis = 0; iaxis < 3; iaxis++) {
2347 lrange_upper[iaxis] = std::min(
2348 std::ceil(b * this->params.ngrid[iaxis] / this->params.boxsize[iaxis]),
2349 std::ceil(this->params.ngrid[iaxis]/2.)
2350 );
2351 rrange_lower[iaxis] = std::max(
2352 std::floor(
2353 this->params.ngrid[iaxis]
2354 - b * this->params.ngrid[iaxis] / this->params.boxsize[iaxis]
2355 ),
2356 std::floor(this->params.ngrid[iaxis]/2.)
2357 );
2358 }
2359 } else
2360 if (binning.space == "fourier") {
2361 for (int iaxis = 0; iaxis < 3; iaxis++) {
2362 lrange_upper[iaxis] = std::min(
2363 std::ceil(b * this->params.boxsize[iaxis] / (2*M_PI)),
2364 std::ceil(this->params.ngrid[iaxis]/2.)
2365 );
2366 rrange_lower[iaxis] = std::max(
2367 std::floor(
2368 this->params.ngrid[iaxis]
2369 - b * this->params.boxsize[iaxis] / (2*M_PI)
2370 ),
2371 std::floor(this->params.ngrid[iaxis]/2.)
2372 );
2373 }
2374 }
2375
2376 auto generate_range = [](
2377 int lrange_lower, int lrange_upper, int rrange_lower, int rrange_upper
2378 ) {
2379 std::vector<int> range_vector;
2380 if (lrange_upper < rrange_lower) {
2381 for (int i = lrange_lower; i <= lrange_upper; i++) {
2382 range_vector.push_back(i);
2383 }
2384 for (int i = rrange_lower; i <= rrange_upper; i++) {
2385 range_vector.push_back(i);
2386 }
2387 } else {
2388 // This can happen when sampling beyond the Nyquist scale.
2389 for (int i = lrange_lower; i <= rrange_upper; i++) {
2390 range_vector.push_back(i);
2391 }
2392 }
2393 return range_vector;
2394 };
2395
2396 std::vector<int> i_range = generate_range(
2397 0, lrange_upper[0], rrange_lower[0], params.ngrid[0] - 1
2398 );
2399 std::vector<int> j_range = generate_range(
2400 0, lrange_upper[1], rrange_lower[1], params.ngrid[1] - 1
2401 );
2402 std::vector<int> k_range = generate_range(
2403 0, lrange_upper[2], rrange_lower[2], params.ngrid[2] - 1
2404 );
2405
2406 // Record the binned vectors.
2407 trv::BinnedVectors binned_vectors;
2408
2409 binned_vectors.num_bins = binning.num_bins;
2410
2411#ifdef TRV_USE_OMP
2412#pragma omp parallel for collapse(3)
2413#endif // TRV_USE_OMP
2414 for (int i: i_range) {
2415 for (int j: j_range) {
2416 for (int k: k_range) {
2417 double vx = (i < this->params.ngrid[0]/2) ?
2418 i * cellsizes[0] : (i - this->params.ngrid[0]) * cellsizes[0];
2419 double vy = (j < this->params.ngrid[1]/2) ?
2420 j * cellsizes[1] : (j - this->params.ngrid[1]) * cellsizes[1];
2421 double vz = (k < this->params.ngrid[2]/2) ?
2422 k * cellsizes[2] : (k - this->params.ngrid[2]) * cellsizes[2];
2423
2424 double scale = trvm::get_vec3d_magnitude({vx, vy, vz});
2425
2426OMP_CRITICAL
2427{
2428 for (int ibin = 0; ibin < binning.num_bins; ibin++) {
2429 double bin_lower = binning.bin_edges[ibin];
2430 double bin_upper = binning.bin_edges[ibin + 1];
2431 if (bin_lower <= scale && scale < bin_upper) {
2432 binned_vectors.indices.push_back(ibin);
2433 binned_vectors.lower_edges.push_back(bin_lower);
2434 binned_vectors.upper_edges.push_back(bin_upper);
2435 binned_vectors.vecx.push_back(vx);
2436 binned_vectors.vecy.push_back(vy);
2437 binned_vectors.vecz.push_back(vz);
2438 binned_vectors.count++;
2439 break;
2440 }
2441 }
2442}
2443 }
2444 }
2445 }
2446
2447 trvs::gbytesMem += trvs::size_in_gb<double>(6*binned_vectors.count);
2449
2450 // Sort the binned vectors.
2451 trv::BinnedVectors binned_vectors_sorted;
2452
2453 binned_vectors_sorted.count = binned_vectors.count;
2454 binned_vectors_sorted.num_bins = binned_vectors.num_bins;
2455
2456 binned_vectors_sorted.indices.resize(binned_vectors.count);
2457 binned_vectors_sorted.lower_edges.resize(binned_vectors.count);
2458 binned_vectors_sorted.upper_edges.resize(binned_vectors.count);
2459 binned_vectors_sorted.vecx.resize(binned_vectors.count);
2460 binned_vectors_sorted.vecy.resize(binned_vectors.count);
2461 binned_vectors_sorted.vecz.resize(binned_vectors.count);
2462
2463 trvs::gbytesMem += trvs::size_in_gb<double>(6*binned_vectors.count);
2465
2466 std::vector<int> indices_sorted =
2468
2469 trvs::gbytesMem += trvs::size_in_gb<int>(binned_vectors.count);
2471
2472#ifdef TRV_USE_OMP
2473#pragma omp parallel for
2474#endif // TRV_USE_OMP
2475 for (int i = 0; i < binned_vectors.count; i++) {
2476 int idx = indices_sorted[i];
2477 binned_vectors_sorted.indices[i] = binned_vectors.indices[idx];
2478 binned_vectors_sorted.lower_edges[i] = binned_vectors.lower_edges[idx];
2479 binned_vectors_sorted.upper_edges[i] = binned_vectors.upper_edges[idx];
2480 binned_vectors_sorted.vecx[i] = binned_vectors.vecx[idx];
2481 binned_vectors_sorted.vecy[i] = binned_vectors.vecy[idx];
2482 binned_vectors_sorted.vecz[i] = binned_vectors.vecz[idx];
2483 }
2484
2485 // Save the binned vectors.
2486 if (save_file != "") {
2487 std::FILE* save_fileptr = std::fopen(save_file.c_str(), "w");
2489 save_fileptr, this->params, binned_vectors_sorted
2490 );
2491 std::fclose(save_fileptr);
2492
2493 if (trvs::currTask == 0) {
2495 "Check binned-vectors file for reference: %s", save_file.c_str()
2496 );
2497 }
2498 }
2499
2500 trvs::gbytesMem -= trvs::size_in_gb<double>(2*6*binned_vectors.count);
2501 trvs::gbytesMem -= trvs::size_in_gb<int>(binned_vectors.count);
2502
2503 return binned_vectors_sorted;
2504}
2505
2506
2507// -----------------------------------------------------------------------
2508// Binned statistics
2509// -----------------------------------------------------------------------
2510
2512 MeshField& field_a, MeshField& field_b, std::complex<double> shotnoise_amp,
2513 int ell, int m, trv::Binning& kbinning
2514) {
2515 this->resize_stats(kbinning.num_bins);
2516
2517 // Check mesh fields compatibility and reuse methods of the first mesh field.
2518 if (!this->if_fields_compatible(field_a, field_b)) {
2519 if (trvs::currTask == 0) {
2520 trvs::logger.error(
2521 "Input mesh fields have incompatible physical properties."
2522 );
2523 }
2525 "Input mesh fields have incompatible physical properties."
2526 );
2527 }
2528
2529 auto ret_grid_wavevector = [&field_a](int i, int j, int k, double kvec[3]) {
2530 field_a.get_grid_wavevector(i, j, k, kvec);
2531 };
2532
2533 this->compute_shotnoise_aliasing();
2534
2535 std::function<double(int, int, int)> calc_shotnoise_aliasing = [this](
2536 int i, int j, int k
2537 ) {
2538 long long idx_grid = ret_grid_index(i, j, k);
2539 return this->alias_sn[idx_grid];
2540 };
2541
2542 std::function<double(int, int, int)> calc_win_pk, calc_win_sn;
2543 int assignment_order = this->params.assignment_order;
2544 if (this->params.interlace == "true") {
2545 calc_win_pk = [&field_a, &field_b, &assignment_order](
2546 int i, int j, int k
2547 ) {
2548 return
2549 field_a.calc_assignment_window_in_fourier(i, j, k, assignment_order)
2550 * field_b.calc_assignment_window_in_fourier(i, j, k, assignment_order);
2551 };
2552 calc_win_sn = calc_win_pk;
2553 } else
2554 if (this->params.interlace == "false") {
2555#ifndef DBG_FLAG_NOAC
2556 calc_win_sn = calc_shotnoise_aliasing;
2557 calc_win_pk = calc_win_sn;
2558#else // !DBG_FLAG_NOAC
2559 calc_win_pk = [&field_a, &field_b, &assignment_order](
2560 int i, int j, int k
2561 ) {
2562 return
2563 field_a.calc_assignment_window_in_fourier(i, j, k, assignment_order)
2564 * field_b.calc_assignment_window_in_fourier(i, j, k, assignment_order);
2565 };
2566 calc_win_sn = calc_shotnoise_aliasing;
2567#endif // !DBG_FLAG_NOAC
2568 }
2569
2570 // Perform fine binning.
2571 // NOTE: Dynamically allocate owing to size.
2572 // CAVEAT: Discretionary choices such that 0.0 < k < 10.0.
2573 const int n_sample = 1e6;
2574 const double dk_sample = 1.e-5;
2575 if (kbinning.bin_max > n_sample * dk_sample) {
2576 if (trvs::currTask == 0) {
2577 trvs::logger.warn(
2578 "Input bin range exceeds sampled range. "
2579 "Statistics in bins beyond sampled range are uncomputed."
2580 );
2581 }
2582 }
2583
2584 int* nmodes_sample = new int[n_sample];
2585 double* k_sample = new double[n_sample];
2586 double* pk_sample_real = new double[n_sample];
2587 double* pk_sample_imag = new double[n_sample];
2588 double* sn_sample_real = new double[n_sample];
2589 double* sn_sample_imag = new double[n_sample];
2590 std::complex<double>* pk_sample = new std::complex<double>[n_sample];
2591 std::complex<double>* sn_sample = new std::complex<double>[n_sample];
2592#ifdef TRV_USE_OMP
2593#pragma omp simd
2594#endif // TRV_USE_OMP
2595 for (int i = 0; i < n_sample; i++) {
2596 nmodes_sample[i] = 0;
2597 k_sample[i] = 0.;
2598 pk_sample_real[i] = 0.;
2599 pk_sample_imag[i] = 0.;
2600 sn_sample_real[i] = 0.;
2601 sn_sample_imag[i] = 0.;
2602 }
2603
2604 this->reset_stats();
2605
2606#ifdef TRV_USE_OMP
2607#pragma omp parallel for collapse(3)
2608#endif // TRV_USE_OMP
2609 for (int i = 0; i < this->params.ngrid[0]; i++) {
2610 for (int j = 0; j < this->params.ngrid[1]; j++) {
2611 for (int k = 0; k < this->params.ngrid[2]; k++) {
2612 long long idx_grid = ret_grid_index(i, j, k);
2613
2614 double kv[3];
2615 ret_grid_wavevector(i, j, k, kv);
2616
2617 double k_ = trvm::get_vec3d_magnitude(kv);
2618
2619 int idx_k = int(k_ / dk_sample);
2620 if (0 <= idx_k && idx_k < n_sample) {
2621 std::complex<double> fa(field_a[idx_grid][0], field_a[idx_grid][1]);
2622 std::complex<double> fb(field_b[idx_grid][0], field_b[idx_grid][1]);
2623
2624 std::complex<double> pk_mode = fa * std::conj(fb);
2625 std::complex<double> sn_mode =
2626 shotnoise_amp * calc_shotnoise_aliasing(i, j, k);
2627
2628 // Apply grid corrections.
2629 double win_pk = calc_win_pk(i, j, k);
2630 double win_sn = calc_win_sn(i, j, k);
2631
2632 pk_mode /= win_pk;
2633 sn_mode /= win_sn;
2634
2635 // Weight by reduced spherical harmonics.
2636 std::complex<double> ylm = trvm::SphericalHarmonicCalculator::
2638
2639 pk_mode *= ylm;
2640 sn_mode *= ylm;
2641
2642 double pk_mode_real = pk_mode.real();
2643 double pk_mode_imag = pk_mode.imag();
2644 double sn_mode_real = sn_mode.real();
2645 double sn_mode_imag = sn_mode.imag();
2646
2647 // Add contribution.
2648OMP_ATOMIC
2649 nmodes_sample[idx_k]++;
2650OMP_ATOMIC
2651 k_sample[idx_k] += k_;
2652OMP_ATOMIC
2653 pk_sample_real[idx_k] += pk_mode_real;
2654OMP_ATOMIC
2655 pk_sample_imag[idx_k] += pk_mode_imag;
2656OMP_ATOMIC
2657 sn_sample_real[idx_k] += sn_mode_real;
2658OMP_ATOMIC
2659 sn_sample_imag[idx_k] += sn_mode_imag;
2660 }
2661 }
2662 }
2663 }
2664
2665 for (int i = 0; i < n_sample; i++) {
2666 pk_sample[i] = pk_sample_real[i] + trvm::M_I * pk_sample_imag[i];
2667 sn_sample[i] = sn_sample_real[i] + trvm::M_I * sn_sample_imag[i];
2668 }
2669
2670 // Perform binning.
2671 for (int ibin = 0; ibin < kbinning.num_bins; ibin++) {
2672 double k_lower = kbinning.bin_edges[ibin];
2673 double k_upper = kbinning.bin_edges[ibin + 1];
2674 for (int i = 0; i < n_sample; i++) {
2675 double k_ = i * dk_sample;
2676 if (k_lower <= k_ && k_ < k_upper) {
2677 this->nmodes[ibin] += nmodes_sample[i];
2678 this->k[ibin] += k_sample[i];
2679 this->pk[ibin] += pk_sample[i];
2680 this->sn[ibin] += sn_sample[i];
2681 }
2682 }
2683
2684 if (this->nmodes[ibin] != 0) {
2685 this->k[ibin] /= double(this->nmodes[ibin]);
2686 this->pk[ibin] /= double(this->nmodes[ibin]);
2687 this->sn[ibin] /= double(this->nmodes[ibin]);
2688 } else {
2689 this->k[ibin] = kbinning.bin_centres[ibin];
2690 this->pk[ibin] = 0.;
2691 this->sn[ibin] = 0.;
2692 }
2693 }
2694
2695 delete[] nmodes_sample;
2696 delete[] k_sample;
2697 delete[] pk_sample_real;
2698 delete[] pk_sample_imag;
2699 delete[] sn_sample_real;
2700 delete[] sn_sample_imag;
2701 delete[] pk_sample;
2702 delete[] sn_sample;
2703}
2704
2706 MeshField& field_a, MeshField& field_b, std::complex<double> shotnoise_amp,
2707 int ell, int m, trv::Binning& rbinning
2708) {
2709 this->resize_stats(rbinning.num_bins);
2710
2711 // Check mesh fields compatibility and reuse properties and methods of
2712 // the first mesh field.
2713 if (!this->if_fields_compatible(field_a, field_b)) {
2714 if (trvs::currTask == 0) {
2715 trvs::logger.error(
2716 "Input mesh fields have incompatible physical properties."
2717 );
2718 }
2720 "Input mesh fields have incompatible physical properties."
2721 );
2722 }
2723
2724 auto ret_grid_pos_vector = [&field_a](int i, int j, int k, double rvec[3]) {
2725 field_a.get_grid_pos_vector(i, j, k, rvec);
2726 };
2727
2728 this->compute_shotnoise_aliasing();
2729
2730 std::function<double(int, int, int)> calc_shotnoise_aliasing = [this](
2731 int i, int j, int k
2732 ) {
2733 long long idx_grid = ret_grid_index(i, j, k);
2734 return this->alias_sn[idx_grid];
2735 };
2736
2737 std::function<double(int, int, int)> calc_win_pk, calc_win_sn;
2738 int assignment_order = this->params.assignment_order;
2739 if (this->params.interlace == "true") {
2740 calc_win_pk = [&field_a, &field_b, &assignment_order](
2741 int i, int j, int k
2742 ) {
2743 return
2744 field_a.calc_assignment_window_in_fourier(i, j, k, assignment_order)
2745 * field_b.calc_assignment_window_in_fourier(i, j, k, assignment_order);
2746 };
2747 calc_win_sn = calc_win_pk;
2748 } else
2749 if (this->params.interlace == "false") {
2750#ifndef DBG_FLAG_NOAC
2751 calc_win_sn = calc_shotnoise_aliasing;
2752 calc_win_pk = calc_win_sn;
2753#else // !DBG_FLAG_NOAC
2754 calc_win_pk = [&field_a, &field_b, &assignment_order](
2755 int i, int j, int k
2756 ) {
2757 return
2758 field_a.calc_assignment_window_in_fourier(i, j, k, assignment_order)
2759 * field_b.calc_assignment_window_in_fourier(i, j, k, assignment_order);
2760 };
2761 calc_win_sn = calc_shotnoise_aliasing;
2762#endif // !DBG_FLAG_NOAC
2763 }
2764
2765 // Compute shot noise--subtracted mode powers on mesh grids.
2766 for (int i = 0; i < this->params.ngrid[0]; i++) {
2767 for (int j = 0; j < this->params.ngrid[1]; j++) {
2768 for (int k = 0; k < this->params.ngrid[2]; k++) {
2769 long long idx_grid = ret_grid_index(i, j, k);
2770
2771 std::complex<double> fa(field_a[idx_grid][0], field_a[idx_grid][1]);
2772 std::complex<double> fb(field_b[idx_grid][0], field_b[idx_grid][1]);
2773
2774 std::complex<double> pk_mode = fa * std::conj(fb);
2775 std::complex<double> sn_mode =
2776 shotnoise_amp * calc_shotnoise_aliasing(i, j, k);
2777
2778 // Apply grid corrections.
2779 double win_pk = calc_win_pk(i, j, k);
2780 double win_sn = calc_win_sn(i, j, k);
2781
2782 pk_mode /= win_pk;
2783 sn_mode /= win_sn;
2784
2785 pk_mode -= sn_mode;
2786
2787 this->twopt_3d[idx_grid][0] = pk_mode.real() / this->vol;
2788 this->twopt_3d[idx_grid][1] = pk_mode.imag() / this->vol;
2789 }
2790 }
2791 }
2792
2793 // Inverse Fourier transform.
2794 if (trvs::is_gpu_enabled()) {
2795#if defined(TRV_USE_HIP)
2796 trva::copy_complex_array_htod(
2797 this->twopt_3d, this->d_twopt_3d, this->params.nmesh
2798 );
2799 HIPFFT_EXEC(hipfftExecZ2Z(
2800 this->inv_transform_gpu, this->d_twopt_3d, this->d_twopt_3d,
2801 HIPFFT_BACKWARD
2802 ));
2803 trva::copy_complex_array_dtoh(
2804 this->d_twopt_3d, this->twopt_3d, this->params.nmesh
2805 );
2806#elif defined(TRV_USE_CUDA) // !TRV_USE_HIP && TRV_USE_CUDA
2807 if (trvs::is_gpu_single()) {
2808 trva::copy_complex_array_htod(
2809 this->twopt_3d, this->d_twopt_3d, this->params.nmesh
2810 );
2811 CUFFT_EXEC(cufftExecZ2Z(
2812 this->inv_transform_gpu, this->d_twopt_3d, this->d_twopt_3d,
2813 CUFFT_INVERSE
2814 ));
2815 trva::copy_complex_array_dtoh(
2816 this->d_twopt_3d, this->twopt_3d, this->params.nmesh
2817 );
2818 } else {
2819 trva::copy_complex_array_htod_mgpu(
2820 this->inv_transform_gpu, this->twopt_3d_desc, this->twopt_3d
2821 );
2822 CUFFT_EXEC(cufftXtExecDescriptor(
2823 this->inv_transform_gpu, this->twopt_3d_desc, this->twopt_3d_desc,
2824 CUFFT_INVERSE
2825 ));
2826 trva::copy_complex_array_dtoh_mgpu(
2827 this->inv_transform_gpu, this->twopt_3d_desc, this->twopt_3d
2828 );
2829 }
2830#endif // TRV_USE_HIP
2831 } else {
2832 if (this->plan_ini) {
2833 fftw_execute(this->inv_transform);
2834 } else {
2835 fftw_execute_dft(field_a.inv_transform, this->twopt_3d, this->twopt_3d);
2836 }
2837 }
2838 trvs::count_ifft += 1;
2839
2840 // Perform fine binning.
2841 // NOTE: Dynamically allocate owing to size.
2842 // CAVEAT: Discretionary choices such that 0 < r < 100k.
2843 const int n_sample = 1e6;
2844 const double dr_sample = 1.e-1;
2845 if (rbinning.bin_max > n_sample * dr_sample) {
2846 if (trvs::currTask == 0) {
2847 trvs::logger.warn(
2848 "Input bin range exceeds sampled range. "
2849 "Statistics in bins beyond sampled range are uncomputed."
2850 );
2851 }
2852 }
2853
2854 int* npairs_sample = new int[n_sample];
2855 double* r_sample = new double[n_sample];
2856 double* xi_sample_real = new double[n_sample];
2857 double* xi_sample_imag = new double[n_sample];
2858 std::complex<double>* xi_sample = new std::complex<double>[n_sample];
2859#ifdef TRV_USE_OMP
2860#pragma omp simd
2861#endif // TRV_USE_OMP
2862 for (int i = 0; i < n_sample; i++) {
2863 npairs_sample[i] = 0;
2864 r_sample[i] = 0.;
2865 xi_sample_real[i] = 0.;
2866 xi_sample_imag[i] = 0.;
2867 }
2868
2869 this->reset_stats();
2870
2871#ifdef TRV_USE_OMP
2872#pragma omp parallel for collapse(3)
2873#endif // TRV_USE_OMP
2874 for (int i = 0; i < this->params.ngrid[0]; i++) {
2875 for (int j = 0; j < this->params.ngrid[1]; j++) {
2876 for (int k = 0; k < this->params.ngrid[2]; k++) {
2877 long long idx_grid = ret_grid_index(i, j, k);
2878
2879 double rv[3];
2880 ret_grid_pos_vector(i, j, k, rv);
2881
2882 double r_ = trvm::get_vec3d_magnitude(rv);
2883
2884 int idx_r = int(r_ / dr_sample);
2885 if (0 <= idx_r && idx_r < n_sample) {
2886 std::complex<double> xi_pair(
2887 this->twopt_3d[idx_grid][0], this->twopt_3d[idx_grid][1]
2888 );
2889
2890 // Weight by reduced spherical harmonics.
2891 std::complex<double> ylm = trvm::SphericalHarmonicCalculator::
2893
2894 xi_pair *= ylm;
2895
2896 double xi_pair_real = xi_pair.real();
2897 double xi_pair_imag = xi_pair.imag();
2898
2899 // Add contribution.
2900OMP_ATOMIC
2901 npairs_sample[idx_r]++;
2902OMP_ATOMIC
2903 r_sample[idx_r] += r_;
2904OMP_ATOMIC
2905 xi_sample_real[idx_r] += xi_pair_real;
2906OMP_ATOMIC
2907 xi_sample_imag[idx_r] += xi_pair_imag;
2908 }
2909 }
2910 }
2911 }
2912
2913 for (int i = 0; i < n_sample; i++) {
2914 xi_sample[i] = xi_sample_real[i] + trvm::M_I * xi_sample_imag[i];
2915 }
2916
2917 // Perform binning.
2918 for (int ibin = 0; ibin < rbinning.num_bins; ibin++) {
2919 double r_lower = rbinning.bin_edges[ibin];
2920 double r_upper = rbinning.bin_edges[ibin + 1];
2921 for (int i = 0; i < n_sample; i++) {
2922 double r_ = i * dr_sample;
2923 if (r_lower <= r_ && r_ < r_upper) {
2924 this->npairs[ibin] += npairs_sample[i];
2925 this->r[ibin] += r_sample[i];
2926 this->xi[ibin] += xi_sample[i];
2927 }
2928 }
2929
2930 if (this->npairs[ibin] != 0) {
2931 this->r[ibin] /= double(this->npairs[ibin]);
2932 this->xi[ibin] /= double(this->npairs[ibin]);
2933 // this->npairs[ibin] /= 2; // reality condition
2934 } else {
2935 this->r[ibin] = rbinning.bin_centres[ibin];
2936 this->xi[ibin] = 0.;
2937 }
2938 }
2939
2940 delete[] npairs_sample;
2941 delete[] r_sample;
2942 delete[] xi_sample_real;
2943 delete[] xi_sample_imag;
2944 delete[] xi_sample;
2945}
2946
2948 MeshField& field_a, MeshField& field_b,
2949 std::vector< std::complex<double> >& ylm_a,
2950 std::vector< std::complex<double> >& ylm_b,
2951 std::complex<double> shotnoise_amp,
2952 trv::Binning& rbinning
2953) {
2954 if (trvs::currTask == 0) {
2955 trvs::logger.debug("Computing uncoupled shot noise for 3PCF.");
2956 }
2957
2958 this->resize_stats(rbinning.num_bins);
2959
2960 // Check mesh fields compatibility and reuse properties and methods of
2961 // the first mesh field.
2962 if (!this->if_fields_compatible(field_a, field_b)) {
2963 if (trvs::currTask == 0) {
2964 trvs::logger.error(
2965 "Input mesh fields have incompatible physical properties."
2966 );
2967 }
2969 "Input mesh fields have incompatible physical properties."
2970 );
2971 }
2972
2973 auto ret_grid_pos_vector = [&field_a](int i, int j, int k, double rvec[3]) {
2974 field_a.get_grid_pos_vector(i, j, k, rvec);
2975 };
2976
2977 this->compute_shotnoise_aliasing();
2978
2979 std::function<double(int, int, int)> calc_shotnoise_aliasing = [this](
2980 int i, int j, int k
2981 ) {
2982 long long idx_grid = ret_grid_index(i, j, k);
2983 return this->alias_sn[idx_grid];
2984 };
2985
2986 std::function<double(int, int, int)> calc_win_pk, calc_win_sn;
2987 int assignment_order = this->params.assignment_order;
2988 if (this->params.interlace == "true") {
2989 calc_win_pk = [&field_a, &field_b, &assignment_order](
2990 int i, int j, int k
2991 ) {
2992 return
2993 field_a.calc_assignment_window_in_fourier(i, j, k, assignment_order)
2994 * field_b.calc_assignment_window_in_fourier(i, j, k, assignment_order);
2995 };
2996 calc_win_sn = calc_win_pk;
2997 } else
2998 if (this->params.interlace == "false") {
2999#ifndef DBG_FLAG_NOAC
3000 calc_win_sn = calc_shotnoise_aliasing;
3001 calc_win_pk = calc_win_sn;
3002#else // !DBG_FLAG_NOAC
3003 calc_win_pk = [&field_a, &field_b, &assignment_order](
3004 int i, int j, int k
3005 ) {
3006 return
3007 field_a.calc_assignment_window_in_fourier(i, j, k, assignment_order)
3008 * field_b.calc_assignment_window_in_fourier(i, j, k, assignment_order);
3009 };
3010 calc_win_sn = calc_shotnoise_aliasing;
3011#endif // !DBG_FLAG_NOAC
3012 }
3013
3014 // Compute meshed statistics.
3015#ifdef TRV_USE_OMP
3016#pragma omp parallel for collapse(3)
3017#endif // TRV_USE_OMP
3018 for (int i = 0; i < this->params.ngrid[0]; i++) {
3019 for (int j = 0; j < this->params.ngrid[1]; j++) {
3020 for (int k = 0; k < this->params.ngrid[2]; k++) {
3021 long long idx_grid = ret_grid_index(i, j, k);
3022
3023 std::complex<double> fa(field_a[idx_grid][0], field_a[idx_grid][1]);
3024 std::complex<double> fb(field_b[idx_grid][0], field_b[idx_grid][1]);
3025
3026 std::complex<double> pk_mode = fa * std::conj(fb);
3027 std::complex<double> sn_mode =
3028 shotnoise_amp * calc_shotnoise_aliasing(i, j, k);
3029
3030 // Apply grid corrections.
3031 double win_pk = calc_win_pk(i, j, k);
3032 double win_sn = calc_win_sn(i, j, k);
3033
3034 pk_mode /= win_pk;
3035 sn_mode /= win_sn;
3036
3037 pk_mode -= sn_mode;
3038
3039 this->twopt_3d[idx_grid][0] = pk_mode.real() / this->vol;
3040 this->twopt_3d[idx_grid][1] = pk_mode.imag() / this->vol;
3041 }
3042 }
3043 }
3044
3045 // Inverse Fourier transform.
3046 if (trvs::is_gpu_enabled()) {
3047#if defined(TRV_USE_HIP)
3048 trva::copy_complex_array_htod(
3049 this->twopt_3d, this->d_twopt_3d, this->params.nmesh
3050 );
3051 HIPFFT_EXEC(hipfftExecZ2Z(
3052 this->inv_transform_gpu, this->d_twopt_3d, this->d_twopt_3d,
3053 HIPFFT_BACKWARD
3054 ));
3055 trva::copy_complex_array_dtoh(
3056 this->d_twopt_3d, this->twopt_3d, this->params.nmesh
3057 );
3058#elif defined(TRV_USE_CUDA) // !TRV_USE_HIP && TRV_USE_CUDA
3059 if (trvs::is_gpu_single()) {
3060 trva::copy_complex_array_htod(
3061 this->twopt_3d, this->d_twopt_3d, this->params.nmesh
3062 );
3063 CUFFT_EXEC(cufftExecZ2Z(
3064 this->inv_transform_gpu, this->d_twopt_3d, this->d_twopt_3d,
3065 CUFFT_INVERSE
3066 ));
3067 trva::copy_complex_array_dtoh(
3068 this->d_twopt_3d, this->twopt_3d, this->params.nmesh
3069 );
3070 } else {
3071 trva::copy_complex_array_htod_mgpu(
3072 this->inv_transform_gpu, this->twopt_3d_desc, this->twopt_3d
3073 );
3074 CUFFT_EXEC(cufftXtExecDescriptor(
3075 this->inv_transform_gpu, this->twopt_3d_desc, this->twopt_3d_desc,
3076 CUFFT_INVERSE
3077 ));
3078 trva::copy_complex_array_dtoh_mgpu(
3079 this->inv_transform_gpu, this->twopt_3d_desc, this->twopt_3d
3080 );
3081 }
3082#endif // TRV_USE_HIP
3083 } else {
3084 if (this->plan_ini) {
3085 fftw_execute(this->inv_transform);
3086 } else {
3087 fftw_execute_dft(field_a.inv_transform, this->twopt_3d, this->twopt_3d);
3088 }
3089 }
3090 trvs::count_ifft += 1;
3091
3092 // Perform fine binning.
3093 // NOTE: Dynamically allocate owing to size.
3094 // CAVEAT: Discretionary choices such that 0 < r < 100k.
3095 const int n_sample = 1e5;
3096 const double dr_sample = 1.;
3097
3098 int* npairs_sample = new int[n_sample];
3099 double* r_sample = new double[n_sample];
3100 double* xi_sample_real = new double[n_sample];
3101 double* xi_sample_imag = new double[n_sample];
3102 std::complex<double>* xi_sample = new std::complex<double>[n_sample];
3103#ifdef TRV_USE_OMP
3104#pragma omp simd
3105#endif // TRV_USE_OMP
3106 for (int i = 0; i < n_sample; i++) {
3107 npairs_sample[i] = 0;
3108 r_sample[i] = 0.;
3109 xi_sample_real[i] = 0.;
3110 xi_sample_imag[i] = 0.;
3111 }
3112
3113 this->reset_stats();
3114
3115#ifdef TRV_USE_OMP
3116#pragma omp parallel for collapse(3)
3117#endif // TRV_USE_OMP
3118 for (int i = 0; i < this->params.ngrid[0]; i++) {
3119 for (int j = 0; j < this->params.ngrid[1]; j++) {
3120 for (int k = 0; k < this->params.ngrid[2]; k++) {
3121 long long idx_grid = ret_grid_index(i, j, k);
3122
3123 double rv[3];
3124 ret_grid_pos_vector(i, j, k, rv);
3125
3126 double r_ = trvm::get_vec3d_magnitude(rv);
3127
3128 int idx_r = int(r_ / dr_sample);
3129 if (0 <= idx_r && idx_r < n_sample) {
3130 std::complex<double> xi_pair(
3131 this->twopt_3d[idx_grid][0], this->twopt_3d[idx_grid][1]
3132 );
3133
3134 // Weight by reduced spherical harmonics.
3135 xi_pair *= ylm_a[idx_grid] * ylm_b[idx_grid];
3136
3137 double xi_pair_real = xi_pair.real();
3138 double xi_pair_imag = xi_pair.imag();
3139
3140 // Add contribution.
3141OMP_ATOMIC
3142 npairs_sample[idx_r]++;
3143OMP_ATOMIC
3144 r_sample[idx_r] += r_;
3145OMP_ATOMIC
3146 xi_sample_real[idx_r] += xi_pair_real;
3147OMP_ATOMIC
3148 xi_sample_imag[idx_r] += xi_pair_imag;
3149 }
3150 }
3151 }
3152 }
3153
3154 for (int i = 0; i < n_sample; i++) {
3155 xi_sample[i] = xi_sample_real[i] + trvm::M_I * xi_sample_imag[i];
3156 }
3157
3158 // Perform binning.
3159 for (int ibin = 0; ibin < rbinning.num_bins; ibin++) {
3160 double r_lower = rbinning.bin_edges[ibin];
3161 double r_upper = rbinning.bin_edges[ibin + 1];
3162 for (int i = 0; i < n_sample; i++) {
3163 double r_ = i * dr_sample;
3164 if (r_lower <= r_ && r_ < r_upper) {
3165 this->npairs[ibin] += npairs_sample[i];
3166 this->r[ibin] += r_sample[i];
3167 this->xi[ibin] += xi_sample[i];
3168 }
3169 }
3170
3171 if (this->npairs[ibin] != 0) {
3172 this->r[ibin] /= double(this->npairs[ibin]);
3173 this->xi[ibin] /= double(this->npairs[ibin]);
3174 } else {
3175 this->r[ibin] = rbinning.bin_centres[ibin];
3176 this->xi[ibin] = 0.;
3177 }
3178 }
3179
3180 // Apply normalisation factors.
3181 double norm_factors = 1 / this->vol_cell
3182 * std::pow(-1, this->params.ell1 + this->params.ell2);
3183
3184 for (int ibin = 0; ibin < rbinning.num_bins; ibin++) {
3185 if (this->npairs[ibin] != 0) {
3186 this->xi[ibin] *= norm_factors / double(this->npairs[ibin]);
3187 // this->npairs[ibin] /= 2; // reality condition
3188 }
3189 }
3190
3191 delete[] npairs_sample;
3192 delete[] r_sample;
3193 delete[] xi_sample_real;
3194 delete[] xi_sample_imag;
3195 delete[] xi_sample;
3196}
3197
3198std::complex<double> \
3199FieldStats::compute_uncoupled_shotnoise_for_bispec_per_bin(
3200 MeshField& field_a, MeshField& field_b,
3201 std::vector< std::complex<double> >& ylm_a,
3202 std::vector< std::complex<double> >& ylm_b,
3204 std::complex<double> shotnoise_amp,
3205 double k_a, double k_b
3206) {
3207 if (trvs::currTask == 0) {
3208 trvs::logger.debug(
3209 "Computing uncoupled shot noise for bispectrum "
3210 "at wavenumbers (%f, %f).",
3211 k_a, k_b
3212 );
3213 }
3214
3215 // Check mesh fields compatibility and reuse properties and methods of
3216 // the first mesh field.
3217 if (!this->if_fields_compatible(field_a, field_b)) {
3218 if (trvs::currTask == 0) {
3219 trvs::logger.error(
3220 "Input mesh fields have incompatible physical properties."
3221 );
3222 }
3224 "Input mesh fields have incompatible physical properties."
3225 );
3226 }
3227
3228 auto ret_grid_pos_vector = [&field_a](int i, int j, int k, double rvec[3]) {
3229 field_a.get_grid_pos_vector(i, j, k, rvec);
3230 };
3231
3232 this->compute_shotnoise_aliasing();
3233
3234 std::function<double(int, int, int)> calc_shotnoise_aliasing = [this](
3235 int i, int j, int k
3236 ) {
3237 long long idx_grid = ret_grid_index(i, j, k);
3238 return this->alias_sn[idx_grid];
3239 };
3240
3241 std::function<double(int, int, int)> calc_win_pk, calc_win_sn;
3242 int assignment_order = this->params.assignment_order;
3243 if (this->params.interlace == "true") {
3244 calc_win_pk = [&field_a, &field_b, &assignment_order](
3245 int i, int j, int k
3246 ) {
3247 return
3248 field_a.calc_assignment_window_in_fourier(i, j, k, assignment_order)
3249 * field_b.calc_assignment_window_in_fourier(i, j, k, assignment_order);
3250 };
3251 calc_win_sn = calc_win_pk;
3252 } else
3253 if (this->params.interlace == "false") {
3254#ifndef DBG_FLAG_NOAC
3255 calc_win_sn = calc_shotnoise_aliasing;
3256 calc_win_pk = calc_win_sn;
3257#else // !DBG_FLAG_NOAC
3258 calc_win_pk = [&field_a, &field_b, &assignment_order](
3259 int i, int j, int k
3260 ) {
3261 return
3262 field_a.calc_assignment_window_in_fourier(i, j, k, assignment_order)
3263 * field_b.calc_assignment_window_in_fourier(i, j, k, assignment_order);
3264 };
3265 calc_win_sn = calc_shotnoise_aliasing;
3266#endif // !DBG_FLAG_NOAC
3267 }
3268
3269 // Compute meshed statistics.
3270#ifdef TRV_USE_OMP
3271#pragma omp parallel for collapse(3)
3272#endif // TRV_USE_OMP
3273 for (int i = 0; i < this->params.ngrid[0]; i++) {
3274 for (int j = 0; j < this->params.ngrid[1]; j++) {
3275 for (int k = 0; k < this->params.ngrid[2]; k++) {
3276 long long idx_grid = ret_grid_index(i, j, k);
3277
3278 std::complex<double> fa(field_a[idx_grid][0], field_a[idx_grid][1]);
3279 std::complex<double> fb(field_b[idx_grid][0], field_b[idx_grid][1]);
3280
3281 std::complex<double> pk_mode = fa * std::conj(fb);
3282 std::complex<double> sn_mode =
3283 shotnoise_amp * calc_shotnoise_aliasing(i, j, k);
3284
3285 // Apply grid corrections.
3286 double win_pk = calc_win_pk(i, j, k);
3287 double win_sn = calc_win_sn(i, j, k);
3288
3289 pk_mode /= win_pk;
3290 sn_mode /= win_sn;
3291
3292 pk_mode -= sn_mode;
3293
3294 this->twopt_3d[idx_grid][0] = pk_mode.real() / this->vol;
3295 this->twopt_3d[idx_grid][1] = pk_mode.imag() / this->vol;
3296 }
3297 }
3298 }
3299
3300 // Inverse Fourier transform.
3301 if (trvs::is_gpu_enabled()) {
3302#if defined(TRV_USE_HIP)
3303 trva::copy_complex_array_htod(
3304 this->twopt_3d, this->d_twopt_3d, this->params.nmesh
3305 );
3306 HIPFFT_EXEC(hipfftExecZ2Z(
3307 this->inv_transform_gpu, this->d_twopt_3d, this->d_twopt_3d,
3308 HIPFFT_BACKWARD
3309 ));
3310 trva::copy_complex_array_dtoh(
3311 this->d_twopt_3d, this->twopt_3d, this->params.nmesh
3312 );
3313#elif defined(TRV_USE_CUDA) // !TRV_USE_HIP && TRV_USE_CUDA
3314 if (trvs::is_gpu_single()) {
3315 trva::copy_complex_array_htod(
3316 this->twopt_3d, this->d_twopt_3d, this->params.nmesh
3317 );
3318 CUFFT_EXEC(cufftExecZ2Z(
3319 this->inv_transform_gpu, this->d_twopt_3d, this->d_twopt_3d,
3320 CUFFT_INVERSE
3321 ));
3322 trva::copy_complex_array_dtoh(
3323 this->d_twopt_3d, this->twopt_3d, this->params.nmesh
3324 );
3325 } else {
3326 trva::copy_complex_array_htod_mgpu(
3327 this->inv_transform_gpu, this->twopt_3d_desc, this->twopt_3d
3328 );
3329 CUFFT_EXEC(cufftXtExecDescriptor(
3330 this->inv_transform_gpu, this->twopt_3d_desc, this->twopt_3d_desc,
3331 CUFFT_INVERSE
3332 ));
3333 trva::copy_complex_array_dtoh_mgpu(
3334 this->inv_transform_gpu, this->twopt_3d_desc, this->twopt_3d
3335 );
3336 }
3337#endif // TRV_USE_HIP
3338 } else {
3339 if (this->plan_ini) {
3340 fftw_execute(this->inv_transform);
3341 } else {
3342 fftw_execute_dft(field_a.inv_transform, this->twopt_3d, this->twopt_3d);
3343 }
3344 }
3345 trvs::count_ifft += 1;
3346
3347 // Weight by spherical Bessel functions and harmonics before summing
3348 // over the configuration-space grids.
3349 double S_ij_k_real = 0., S_ij_k_imag = 0.;
3350
3351#ifdef TRV_USE_OMP
3352#pragma omp parallel reduction(+:S_ij_k_real, S_ij_k_imag)
3353#endif // TRV_USE_OMP
3354{
3355 // Create thread-private copies of the spherical Bessel function calculator.
3356 trvm::SphericalBesselCalculator sj_a_thread(sj_a);
3357 trvm::SphericalBesselCalculator sj_b_thread(sj_b);
3358
3359#ifdef TRV_USE_OMP
3360#pragma omp for collapse(3)
3361#endif // TRV_USE_OMP
3362 for (int i = 0; i < this->params.ngrid[0]; i++) {
3363 for (int j = 0; j < this->params.ngrid[1]; j++) {
3364 for (int k = 0; k < this->params.ngrid[2]; k++) {
3365 long long idx_grid = ret_grid_index(i, j, k);
3366
3367 double rv[3];
3368 ret_grid_pos_vector(i, j, k, rv);
3369
3370 double r_ = trvm::get_vec3d_magnitude(rv);
3371
3372 double ja = sj_a_thread.eval(k_a * r_);
3373 double jb = sj_b_thread.eval(k_b * r_);
3374
3375 std::complex<double> S_ij_k_3d(
3376 this->twopt_3d[idx_grid][0], this->twopt_3d[idx_grid][1]
3377 );
3378
3379 S_ij_k_3d *= ja * jb * ylm_a[idx_grid] * ylm_b[idx_grid];
3380
3381 double S_ij_k_3d_real = S_ij_k_3d.real();
3382 double S_ij_k_3d_imag = S_ij_k_3d.imag();
3383
3384 S_ij_k_real += S_ij_k_3d_real;
3385 S_ij_k_imag += S_ij_k_3d_imag;
3386 }
3387 }
3388 }
3389}
3390
3391 std::complex<double> S_ij_k(S_ij_k_real, S_ij_k_imag);
3392
3393 S_ij_k *= this->vol_cell;
3394
3395 return S_ij_k;
3396}
3397
3398
3399// -----------------------------------------------------------------------
3400// Sampling corrections
3401// -----------------------------------------------------------------------
3402
3403std::function<double(int, int, int)> FieldStats::ret_calc_shotnoise_aliasing()
3404{
3405 // If interlacing is enabled, use the isotropic approximation.
3406 if (this->params.interlace == "true") {
3407 return [this](int i, int j, int k) {
3408 return calc_shotnoise_aliasing_interlaced_isoapprox(
3409 i, j, k, this->params.assignment_order
3410 );
3411 };
3412 }
3413
3414 if (this->params.assignment == "ngp") {
3415 return [this](int i, int j, int k) {
3416 return calc_shotnoise_aliasing_ngp(i, j, k);
3417 };
3418 }
3419 if (this->params.assignment == "cic") {
3420 return [this](int i, int j, int k) {
3421 return calc_shotnoise_aliasing_cic(i, j, k);
3422 };
3423 }
3424 if (this->params.assignment == "tsc") {
3425 return [this](int i, int j, int k) {
3426 return calc_shotnoise_aliasing_tsc(i, j, k);
3427 };
3428 }
3429 if (this->params.assignment == "pcs") {
3430 return [this](int i, int j, int k) {
3431 return calc_shotnoise_aliasing_pcs(i, j, k);
3432 };
3433 }
3434
3435 if (trvs::currTask == 0) {
3437 "Invalid assignment scheme: '%s'.", this->params.assignment.c_str()
3438 );
3439 }
3440 throw trvs::InvalidParameterError(
3441 "Invalid assignment scheme: '%s'.", this->params.assignment.c_str()
3442 );
3443}
3444
3445void FieldStats::get_shotnoise_aliasing_sin2(
3446 int i, int j, int k, double& sx2, double& sy2, double& sz2
3447) {
3448 this->shift_grid_indices_fourier(i, j, k);
3449
3450 double u_x = M_PI * i / double(this->params.ngrid[0]);
3451 double u_y = M_PI * j / double(this->params.ngrid[1]);
3452 double u_z = M_PI * k / double(this->params.ngrid[2]);
3453
3454 sx2 = (i != 0) ? std::sin(u_x) * std::sin(u_x) : 0.;
3455 sy2 = (j != 0) ? std::sin(u_y) * std::sin(u_y) : 0.;
3456 sz2 = (k != 0) ? std::sin(u_z) * std::sin(u_z) : 0.;
3457}
3458
3459void FieldStats::get_shotnoise_aliasing_sin2_cos2_isoapprox(
3460 int i, int j, int k, double& s2h, double& c2h
3461) {
3462 this->shift_grid_indices_fourier(i, j, k);
3463
3464 double u_x = M_PI * i / double(this->params.ngrid[0]);
3465 double u_y = M_PI * j / double(this->params.ngrid[1]);
3466 double u_z = M_PI * k / double(this->params.ngrid[2]);
3467
3468 double uhalf = std::sqrt(u_x * u_x + u_y * u_y + u_z * u_z) / 2.;
3469
3470 s2h = std::sin(uhalf) * std::sin(uhalf);
3471 c2h = std::cos(uhalf) * std::cos(uhalf);
3472}
3473
3474// NOTE: Pure by coincidence.
3475PURE double FieldStats::calc_shotnoise_aliasing_ngp(int i, int j, int k) {
3476 return 1.;
3477}
3478
3479double FieldStats::calc_shotnoise_aliasing_cic(int i, int j, int k) {
3480 double sx2, sy2, sz2;
3481 this->get_shotnoise_aliasing_sin2(i, j, k, sx2, sy2, sz2);
3482
3483 return (1. - 2./3. * sx2) * (1. - 2./3. * sy2) * (1. - 2./3. * sz2);
3484}
3485
3486double FieldStats::calc_shotnoise_aliasing_tsc(int i, int j, int k) {
3487 double sx2, sy2, sz2;
3488 this->get_shotnoise_aliasing_sin2(i, j, k, sx2, sy2, sz2);
3489
3490 return (1. - sx2 + 2./15. * sx2 * sx2)
3491 * (1. - sy2 + 2./15. * sy2 * sy2)
3492 * (1. - sz2 + 2./15. * sz2 * sz2);
3493}
3494
3495double FieldStats::calc_shotnoise_aliasing_pcs(int i, int j, int k) {
3496 double sx2, sy2, sz2;
3497 this->get_shotnoise_aliasing_sin2(i, j, k, sx2, sy2, sz2);
3498
3499 return (1. - 4./3. * sx2 + 2./5. * sx2 * sx2 - 4./315. * sx2 * sx2 * sx2)
3500 * (1. - 4./3. * sy2 + 2./5. * sy2 * sy2 - 4./315. * sy2 * sy2 * sy2)
3501 * (1. - 4./3. * sz2 + 2./5. * sz2 * sz2 - 4./315. * sz2 * sz2 * sz2);
3502}
3503
3504double FieldStats::calc_shotnoise_aliasing_interlaced_isoapprox(
3505 int i, int j, int k, int order
3506) {
3507 double s2h, c2h;
3508 this->get_shotnoise_aliasing_sin2_cos2_isoapprox(i, j, k, s2h, c2h);
3509
3510 double cc = std::pow(c2h, order);
3511
3512 double cs = 1.;
3513 // if (order == 1) {
3514 // cs = 1.;
3515 // } else
3516 if (order == 2) {
3517 cs = 1. - 2./3. * s2h;
3518 } else
3519 if (order == 3) {
3520 cs = 1. - s2h + 2./15. * s2h * s2h;
3521 } else
3522 if (order == 4) {
3523 cs = 1. - 4./3. * s2h + 2./5. * s2h * s2h - 4./315. * s2h * s2h * s2h;
3524 }
3525
3526 return cc * cs;
3527}
3528
3529void FieldStats::compute_shotnoise_aliasing() {
3530 if (this->alias_ini) {return;} // if computed already
3531
3532 if (trvs::currTask == 0) {
3534 "Computing shot noise aliasing function in Fourier space "
3535 "for assignment order %d.",
3536 this->params.assignment_order
3537 );
3538 }
3539
3540 this->alias_sn.resize(this->params.nmesh, 0.); // initialise
3541
3542 trvs::count_rgrid += 1;
3543 trvs::count_grid += .5;
3545 trvs::gbytesMem += trvs::size_in_gb<double>(this->params.nmesh);
3547
3548 std::function<double(int, int, int)> calc_shotnoise_aliasing =
3549 this->ret_calc_shotnoise_aliasing();
3550
3551#ifdef TRV_USE_OMP
3552#pragma omp parallel for collapse(3)
3553#endif // TRV_USE_OMP
3554 for (int i = 0; i < this->params.ngrid[0]; i++) {
3555 for (int j = 0; j < this->params.ngrid[1]; j++) {
3556 for (int k = 0; k < this->params.ngrid[2]; k++) {
3557 long long idx_grid = ret_grid_index(i, j, k);
3558 this->alias_sn[idx_grid] = calc_shotnoise_aliasing(i, j, k);
3559 }
3560 }
3561 }
3562
3563 this->alias_ini = true; // set aliasing flag
3564}
3565
3566} // namespace trv
Isotropic coordinate binning.
Definition dataobjs.hpp:58
std::vector< double > bin_edges
bin edges
Definition dataobjs.hpp:65
std::vector< double > bin_centres
bin centres
Definition dataobjs.hpp:66
std::string space
coordinate space
Definition dataobjs.hpp:60
double bin_max
highest bin edge
Definition dataobjs.hpp:63
int num_bins
number of bins
Definition dataobjs.hpp:64
std::vector< std::complex< double > > sn
shot-noise power in bins
Definition field.hpp:589
void reset_stats()
Reset two-point statistics.
Definition field.cpp:2248
std::vector< std::complex< double > > pk
pseudo power spectrum in bins
Definition field.hpp:591
std::vector< double > r
Definition field.hpp:587
void compute_uncoupled_shotnoise_for_3pcf(MeshField &field_a, MeshField &field_b, std::vector< std::complex< double > > &ylm_a, std::vector< std::complex< double > > &ylm_b, std::complex< double > shotnoise_amp, trv::Binning &rbinning)
Compute binned uncoupled three-point correlation function shot noise.
Definition field.cpp:2947
~FieldStats()
Destruct two-point statistics.
Definition field.cpp:2193
std::vector< int > nmodes
number of wavevector modes in bins
Definition field.hpp:584
FieldStats(trv::ParameterSet &params, bool plan_ini=true)
Construct pseudo two-point statistics.
Definition field.cpp:2076
std::vector< double > k
average wavenumber in bins
Definition field.hpp:586
std::vector< std::complex< double > > xi
pseudo two-point correlation function in bins
Definition field.hpp:593
std::vector< int > npairs
number of separation pairs in bins
Definition field.hpp:585
void compute_ylm_wgtd_2pt_stats_in_fourier(MeshField &field_a, MeshField &field_b, std::complex< double > shotnoise_amp, int ell, int m, trv::Binning &kbinning)
Compute binned two-point statistics in Fourier space.
Definition field.cpp:2511
trv::BinnedVectors record_binned_vectors(trv::Binning &binning, const std::string &save_file)
Record binned vectors given a binning scheme.
Definition field.cpp:2317
void compute_ylm_wgtd_2pt_stats_in_config(MeshField &field_a, MeshField &field_b, std::complex< double > shotnoise_amp, int ell, int m, trv::Binning &rbinning)
Compute binned two-point statistics in configuration space.
Definition field.cpp:2705
Discretely sampled field on a mesh grid from particle catalogues.
Definition field.hpp:83
trv::ParameterSet params
parameter set
Definition field.hpp:85
void inv_fourier_transform()
Inverse Fourier transform the (FFT-transformed) field.
Definition field.cpp:1657
void inv_fourier_transform_sjl_ylm_wgtd_field(MeshField &field_fourier, std::vector< std::complex< double > > &ylm, trvm::SphericalBesselCalculator &sjl, double r)
Inverse Fourier transform a field weighted by the spherical Bessel function and reduced spherical ha...
Definition field.cpp:1908
std::string name
field name
Definition field.hpp:86
void assign_weighted_field_to_mesh(ParticleCatalogue &particles, fftw_complex *weights)
Assign a weighted field to a mesh by interpolation scheme.
Definition field.cpp:569
double calc_grid_based_powlaw_norm(ParticleCatalogue &particles, int order)
Definition field.cpp:2017
void reset_density_field()
(Re-)initialise the complex field (and its shadow) on mesh.
Definition field.cpp:500
void inv_fourier_transform_ylm_wgtd_field_band_limited(MeshField &field_fourier, std::vector< std::complex< double > > &ylm, double k_band, double dk_band, double &k_eff, int &nmodes)
Inverse Fourier transform a field weighted by the reduced spherical harmonics restricted to a wavenu...
Definition field.cpp:1792
double dr[3]
grid size in each dimension
Definition field.hpp:88
void fourier_transform()
Fourier transform the field.
Definition field.cpp:1496
void compute_ylm_wgtd_field(ParticleCatalogue &particles_data, ParticleCatalogue &particles_rand, LineOfSight *los_data, LineOfSight *los_rand, double alpha, int ell, int m)
Compute the weighted field (fluctuations) further weighted by the reduced spherical harmonics.
Definition field.cpp:1246
void compute_unweighted_field(ParticleCatalogue &particles)
Compute the unweighted field.
Definition field.cpp:1208
void apply_wide_angle_pow_law_kernel()
Apply wide-angle correction kernel in configuration space.
Definition field.cpp:1727
void compute_ylm_wgtd_quad_field(ParticleCatalogue &particles_data, ParticleCatalogue &particles_rand, LineOfSight *los_data, LineOfSight *los_rand, double alpha, int ell, int m)
Compute the quadratic weighted field (fluctuations) further weighted by the reduced spherical harmoni...
Definition field.cpp:1364
fftw_complex * field
complex field on mesh
Definition field.hpp:87
MeshField(trv::ParameterSet &params, bool plan_ini=true, const std::string &name="mesh-field")
Construct the mesh field.
Definition field.cpp:45
void compute_unweighted_field_fluctuations_insitu(ParticleCatalogue &particles)
Compute the unweighted field fluctuations.
Definition field.cpp:1229
double dk[3]
fundamental wavenumber in each dimension
Definition field.hpp:89
double vol
mesh volume
Definition field.hpp:90
~MeshField()
Destruct the mesh field.
Definition field.cpp:434
double vol_cell
mesh grid cell volume
Definition field.hpp:91
void apply_assignment_compensation()
Apply compensation needed after Fourier transform for assignment schemes.
Definition field.cpp:1764
const fftw_complex & operator[](long long gid)
Return mesh field grid cell value.
Definition field.cpp:524
Parameter set.
long long nmesh
number of mesh grid cells
std::string fftw_scheme
FFTW scheme: {"estimate", "measure" (default), "patient"}.
double volume
box volume (in Mpc^3/h^3)
std::string fftw_wisdom_file_b
backward-transform wisdom file path
std::string interlace
interlacing switch: {"true"/"on", "false"/"off" (default)}
int assignment_order
order of the assignment scheme
std::string fftw_wisdom_file_f
derived FFTW wisdom file paths
int ngrid[3]
grid cell number in each dimension
double boxsize[3]
box size (in Mpc/h) in each dimension
std::string use_fftw_wisdom
use FFTW wisdom: {"false" (default), <path-to-dir>}
Particle catalogue.
Definition particles.hpp:78
double pos_min[3]
minimum particle coordinates
Definition particles.hpp:88
int ntotal
total number of particles
Definition particles.hpp:84
double pos_max[3]
maximum particle coordinates
Definition particles.hpp:89
Interpolated spherical Bessel function of the first kind.
Definition maths.hpp:268
double eval(double x)
Evaluate the interpolated function.
Definition maths.cpp:368
static std::complex< double > calc_reduced_spherical_harmonic(const int ell, const int m, double pos[3])
Calculate the reduced spherical harmonic.
Definition maths.cpp:172
Exception raised when the data to be operated on are invalid.
Definition monitor.hpp:755
Exception raised when parameters are invalid.
Definition monitor.hpp:731
void info(const char *fmt_string,...)
Emit a information-level message.
Definition monitor.cpp:467
void debug(const char *fmt_string,...)
Emit a debugging-level message.
Definition monitor.cpp:441
void error(const char *fmt_string,...)
Emit a warning-level message.
Definition monitor.cpp:493
Mesh field (with one-point statistics) and pseudo two-point statistics.
std::vector< int > get_sorted_indices(std::vector< int > sorting_vector)
Get the sorted indices.
Definition arrayops.cpp:245
void print_binned_vectors_to_file(std::FILE *fileptr, trv::ParameterSet &params, trv::BinnedVectors &binned_vectors)
Print binned vectors to a file.
Definition io.cpp:305
double get_vec3d_magnitude(std::vector< double > vec)
Return the magnitude of a 3-d vector.
Definition maths.cpp:53
const std::complex< double > M_I
imaginary unit
double gbytesMem
current memory usage in gibibytes
Definition monitor.cpp:94
void update_maxmem(bool gpu=false)
Update the maximum memory usage estimate.
Definition monitor.cpp:165
double size_in_gb(long long num)
Return size in gibibytes.
Definition monitor.hpp:281
std::vector< int > get_gpu_ids()
Get the indices of GPUs available for use.
Definition monitor.cpp:284
bool fftw_wisdom_b_imported
wisdom import status for backward transform
Definition monitor.cpp:111
bool is_gpu_enabled()
Check if GPU mode is enabled.
Definition monitor.cpp:297
bool fftw_wisdom_f_imported
wisdom import status for forward transform
Definition monitor.cpp:110
Logger logger
default logger (at NSET logging level)
float count_grid
number of grids
Definition monitor.cpp:102
double gbytesMemGPU
current (GPU) memory usage in gibibytes
Definition monitor.cpp:97
int currTask
current task
Definition monitor.cpp:92
bool is_gpu_single()
Check if in single-GPU mode.
Definition monitor.cpp:322
int count_fft
number of FFTs
Definition monitor.cpp:107
int count_cgrid
number of 3-d complex grids
Definition monitor.cpp:101
int count_rgrid
number of 3-d real grids
Definition monitor.cpp:100
int count_ifft
number of IFFTs
Definition monitor.cpp:108
void update_maxcntgrid()
Update the maximum 3-d grid counts.
Definition monitor.cpp:177
Binned vectors.
Definition dataobjs.hpp:166
std::vector< double > vecy
y-components
Definition dataobjs.hpp:173
std::vector< double > vecz
z-components
Definition dataobjs.hpp:174
int count
number of vectors
Definition dataobjs.hpp:167
int num_bins
number of bins
Definition dataobjs.hpp:168
std::vector< double > upper_edges
upper bin edges
Definition dataobjs.hpp:171
std::vector< double > lower_edges
lower bin edges
Definition dataobjs.hpp:170
std::vector< double > vecx
x-components
Definition dataobjs.hpp:172
std::vector< int > indices
bin indices
Definition dataobjs.hpp:169
Line-of-sight vector.
Definition dataobjs.hpp:186
double pos[3]
3-d position vector
Definition dataobjs.hpp:187