103 std::vector<double> sample_pts,
double kr_c,
bool lowring,
107 if (sample_pts.size() < 2) {
109 "The number of sample points must be at least 2."
114 "The sample points are not logarithmically spaced."
119 this->
nsamp = sample_pts.size();
121 std::log(sample_pts.back() / sample_pts.front()) / (this->
nsamp - 1);
123 this->extrap = extrap;
125 if (this->
nsamp % 2 != 0) {
127 "The number of sample points must be even for extrapolation."
131 2, std::ceil(std::log2(extrap_exp * this->
nsamp))
135 "The sample size expansion factor results in a shrunken sample size."
139 this->n_ext = (this->
nsamp_trans - this->nsamp) / 2;
141 this->
pre_sampts, this->n_ext, this->pre_sampts_extrap
164 double kr_aprod = this->
pivot * std::exp(- this->
logres);
167 for (
int j = 0; j < this->
nsamp; j++) {
172 this->post_sampts_extrap.resize(this->
nsamp_trans);
174 this->post_sampts_extrap[j] =
175 kr_aprod / this->
pre_sampts[this->nsamp_trans - j - 1];
194 CUDA_EXEC(cudaStreamCreate(&this->custream));
198 this->pre_buffer = fftw_alloc_complex(this->
nsamp_trans);
201#if defined(TRV_USE_HIP)
202 HIPFFT_EXEC(hipfftPlan1d(
207 &this->d_buffer,
sizeof(fft_double_complex) * this->
nsamp_trans
209#elif defined(TRV_USE_CUDA)
210 CUFFT_EXEC(cufftCreate(&this->plan_gpu));
213 CUFFT_EXEC(cufftSetStream(this->plan_gpu, this->custream));
216 std::size_t workspace_sizes[gpus.size()];
217 CUFFT_EXEC(cufftMakePlan1d(
218 this->plan_gpu, this->nsamp_trans,
219 CUFFT_Z2Z, 1, workspace_sizes
222 CUDA_EXEC(cudaMalloc(
223 &this->d_buffer,
sizeof(fft_double_complex) * this->nsamp_trans
227 this->pre_plan = fftw_plan_dft_1d(
228 this->
nsamp_trans, this->pre_buffer, this->pre_buffer,
229 FFTW_FORWARD, FFTW_ESTIMATE
233 this->post_buffer = fftw_alloc_complex(this->
nsamp_trans);
237 this->post_plan = fftw_plan_dft_1d(
238 this->
nsamp_trans, this->post_buffer, this->post_buffer,
239 FFTW_FORWARD, FFTW_ESTIMATE
243 this->plan_init =
true;
289 double mu = this->
order;
290 double q = this->
bias;
293 double kr_c = this->
pivot;
295 if (N_trans <= 0 || dL <= 0. || kr_c <= 0.) {
296 throw std::runtime_error(
297 "This instance of trv::maths::HankelTransform has not been "
298 "initialised with `initialise`."
302 double x_p = (mu + 1. + q)/2.;
303 double x_m = (mu + 1. - q)/2.;
304 double x_eq = (mu + 1.)/2.;
306 double y = M_PI / (N_trans * dL);
308 double t = -2. * y * std::log(kr_c/2.);
312 std::vector< std::complex<double> > u(N_trans);
314 for (
int m = 0; m <= N_trans/2; m++) {
321 double qln2 = q * std::log(2.);
322 for (
int m = 0; m <= N_trans/2; m++) {
329 std::exp(qln2 + lnr_p - lnr_m), m*t + phi_p + phi_m
334 for (
int m = N_trans/2 + 1; m < N_trans; m++) {
335 u[m] = conj(u[N_trans - m]);
338 if (N_trans % 2 == 0) {
340 u[N_trans/2] = u[N_trans/2].real() +
trvm::M_I * 0.;
347 std::complex<double>* a, std::complex<double>* b
353 if (this->kernel.empty() || N <= 2 || N_trans <= 2) {
354 throw std::runtime_error(
355 "This instance of trv::maths::HankelTransform has not been "
356 "initialised with `initialise`."
362 for (
int j = 0; j < N_trans; j++) {
363 this->pre_buffer[j][0] = (a[j]).real();
364 this->pre_buffer[j][1] = (a[j]).imag();
365 this->post_buffer[j][0] = 0.;
366 this->post_buffer[j][1] = 0.;
370 std::vector<double> a_vec(N);
371 for (
int j = 0; j < N; j++) {
372 a_vec[j] = a[j].real();
375 std::vector<double> a_trans_vec(N_trans);
376 switch (this->extrap) {
381 a_vec, this->n_ext, 0., 0., a_trans_vec
386 a_vec, this->n_ext, a_vec.front(), a_vec.back(), a_trans_vec
399 for (
int j = 0; j < N_trans; j++) {
400 this->pre_buffer[j][0] = a_trans_vec[j];
401 this->pre_buffer[j][1] = 0.;
402 this->post_buffer[j][0] = 0.;
403 this->post_buffer[j][1] = 0.;
409#if defined(TRV_USE_HIP)
410 trva::copy_complex_array_htod(
411 this->pre_buffer, this->d_buffer, this->
nsamp_trans
413 HIPFFT_EXEC(hipfftExecZ2Z(
414 this->plan_gpu, this->d_buffer, this->d_buffer,
417 trva::copy_complex_array_dtoh(
418 this->d_buffer, this->pre_buffer, this->
nsamp_trans
420#elif defined(TRV_USE_CUDA)
421 trva::copy_complex_array_htod(
422 this->pre_buffer, this->d_buffer, this->
nsamp_trans
424 CUFFT_EXEC(cufftXtExec(
425 this->plan_gpu, this->d_buffer, this->d_buffer,
428 trva::copy_complex_array_dtoh(
429 this->d_buffer, this->pre_buffer, this->
nsamp_trans
433 fftw_execute(this->pre_plan);
436 for (
int m = 0; m < N_trans; m++) {
438 std::complex<double> a_(this->pre_buffer[m][0], this->pre_buffer[m][1]);
439 std::complex<double> b_ = a_ * this->kernel[m] / double(N_trans);
440 this->post_buffer[m][0] = b_.real();
441 this->post_buffer[m][1] = b_.imag();
445#if defined(TRV_USE_HIP)
446 trva::copy_complex_array_htod(
447 this->post_buffer, this->d_buffer, this->
nsamp_trans
449 HIPFFT_EXEC(hipfftExecZ2Z(
450 this->plan_gpu, this->d_buffer, this->d_buffer,
453 trva::copy_complex_array_dtoh(
454 this->d_buffer, this->post_buffer, this->
nsamp_trans
456#elif defined(TRV_USE_CUDA)
457 trva::copy_complex_array_htod(
458 this->post_buffer, this->d_buffer, this->
nsamp_trans
460 CUFFT_EXEC(cufftXtExec(
461 this->plan_gpu, this->d_buffer, this->d_buffer,
464 trva::copy_complex_array_dtoh(
465 this->d_buffer, this->post_buffer, this->
nsamp_trans
469 fftw_execute(this->post_plan);
473 for (
int j = 0; j < N; j++) {
474 b[j] = std::complex<double>(
475 this->post_buffer[j + this->n_ext][0],
476 this->post_buffer[j + this->n_ext][1]