-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathps3d.cpp
273 lines (234 loc) · 10.1 KB
/
ps3d.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
#include <cmath>
#include <iomanip>
#include <fstream>
#include <sstream>
#include <DistributionMapping.H>
#include <MultiFab.H>
#include <Geometry.H>
#include <BLFort.H>
#include <MakeFFTWBoxes.H>
BL_FORT_PROC_DECL(FFT_3D, fft_3d) (
const Real* mf_fft_in,
const int* lo,
const int* hi,
const int* domain_size_int,
const Real* dx,
const int* comm,
const intptr_t* alloc_local,
const Real* mf_fft_out_real,
const Real* mf_fft_out_imag);
BL_FORT_PROC_DECL(CALC_PS3D, calc_ps3d) (
const Real* mf_fft_out_real,
const Real* mf_fft_out_imag,
const int* lo,
const int* hi,
const int* num_ghosts,
const int* num_bins,
const Real* k_bin_edges,
const Real* domain_length,
const int* domain_grid_length,
const int* k_bin_count,
const Real* k_bin_power_weighted_k_sum,
const Real* k_bin_power_sum);
BL_FORT_PROC_DECL(CIC_DECONVOLVE, cic_deconvolve) (
const Real* dm_density_real,
const Real* dm_density_imag,
const int* lo,
const int* hi,
const int* domain_size);
void ps3d (const MultiFab& mf,
const Geometry &geom,
const unsigned int nStep,
const std::string field_name,
bool CIC_deconvolve,
bool already_in_overdensity_units)
{
/*
* We can do pencil FFTs on single processors (i.e., without MPI) because
* the pencil memory footprint is small. However this is not true for the
* grid-wide 3-D FFTs. For these we must use the distributed memory MPI
* version of FFTW.
*/
// First query FFTW to get the slab size that it prefers for the domain
// decomposition. We'll then construct boxes based on this distribution.
BoxList bl_fft;
intptr_t alloc_local;
MakeFFTWBoxes(geom, bl_fft, alloc_local);
// Now that we have the domain decomposition that FFTW wants to use, we set
// up a DM which places exactly one Box on every process (which FFTW
// implicitly assumes we will do; it has no notion of distributed
// iterators). Presumably any of the supported distribution mapping
// strategies in BoxLib will place one Box per process if there are exactly
// as many Boxes as processors. But I don't know this for sure, so we
// enforce it by hand.
Array<int> dm_one_box_per_process_vector(bl_fft.size()+1);
for (unsigned int i = 0; i < bl_fft.size(); ++i)
dm_one_box_per_process_vector[i] = i;
dm_one_box_per_process_vector[bl_fft.size()] = ParallelDescriptor::MyProc();
DistributionMapping dm_one_box_per_process(dm_one_box_per_process_vector);
BoxArray ba_fft(bl_fft);
MultiFab mf_fft(ba_fft, 1, 0, dm_one_box_per_process);
mf_fft.copy(mf);
const Box& domain = geom.Domain();
const long num_cells = domain.numPts();
MultiFab overmf(mf_fft.boxArray(), 1, 0, dm_one_box_per_process);
overmf.copy(mf_fft);
if (!already_in_overdensity_units) {
const Real mean_value = mf.norm1() / Real(num_cells);
overmf.mult(1.0/mean_value);
overmf.plus(-1.0, 0, 1);
}
const IntVect& domain_size_IV = domain.size();
const int* const domain_size_int = domain_size_IV.getVect();
const Real dx = (geom.ProbHi(0) - geom.ProbLo(0)) / Real(domain.length(0));
// The following section is devoted to the case that there are more
// processes than there are slabs for FFTW to distribute (i.e., n_procs >
// grid_nz). Since the calls to MPI FFTW are collective, but not all
// processes will make the call (since some won't have any slabs to work
// on), we confine the "active" processes to a subcommunicator during the
// DFT.
// Get list of processes that have a slab to work on.
int num_my_boxes = 0;
for (MFIter mfi(overmf); mfi.isValid(); ++mfi) {
num_my_boxes++;
break;
}
Array<int> proc_box_count(ParallelDescriptor::NProcs());
MPI_Allgather(&num_my_boxes, 1, MPI_INT, &proc_box_count[0], 1, MPI_INT, ParallelDescriptor::Communicator());
Array<int> procs_with_boxes;
for (unsigned int i = 0; i < ParallelDescriptor::NProcs(); ++i) {
if (proc_box_count[i] > 0) procs_with_boxes.push_back(i);
}
// Build subgroup and subcommunicator for those processes which will
// participate in the DFT.
MPI_Group parent_group;
MPI_Comm_group(ParallelDescriptor::Communicator(), &parent_group);
MPI_Group fftw_group;
MPI_Group_incl(parent_group, procs_with_boxes.size(), &procs_with_boxes[0], &fftw_group);
MPI_Comm fftw_comm;
MPI_Comm_create(ParallelDescriptor::Communicator(), fftw_group, &fftw_comm);
const int comm = MPI_Comm_c2f(fftw_comm);
// Separate MultiFabs to save real and imaginary components of complex
// output from DFT.
MultiFab mf_fft_out_real(ba_fft, 1, 0, dm_one_box_per_process);
MultiFab mf_fft_out_imag(ba_fft, 1, 0, dm_one_box_per_process);
// Do the DFT. Every process has either 1 or 0 boxes to work on, so even
// though we use a generic MFIter, it will only iterate one time (or zero).
for (MFIter mfi(overmf); mfi.isValid(); ++mfi) {
const Box bx = mfi.validbox();
BL_FORT_PROC_CALL(FFT_3D, fft_3d) (
(overmf)[mfi].dataPtr(),
bx.loVect(),
bx.hiVect(),
domain_size_int,
&dx,
&comm,
&alloc_local,
(mf_fft_out_real)[mfi].dataPtr(),
(mf_fft_out_imag)[mfi].dataPtr());
}
MPI_Group_free(&fftw_group);
MPI_Group_free(&parent_group);
if (MPI_COMM_NULL != fftw_comm) MPI_Comm_free(&fftw_comm);
if (CIC_deconvolve) {
// Do CIC deconvolution on k-space dark matter data.
const Box& problem_domain = geom.Domain();
Array<int> domain_size(3);
for (unsigned int i = 0; i < 3; ++i) domain_size[i] = problem_domain.length(i);
for (MFIter mfi(mf_fft_out_real); mfi.isValid(); ++mfi) {
const Box& bx = mfi.validbox();
BL_FORT_PROC_CALL(CIC_DECONVOLVE, cic_deconvolve) (
(mf_fft_out_real)[mfi].dataPtr(),
(mf_fft_out_imag)[mfi].dataPtr(),
bx.loVect(),
bx.hiVect(),
&domain_size[0]);
}
}
// Now calculate 3-D power spectrum P(k).
const int n_min = std::min(domain.length(0), std::min(domain.length(1), domain.length(2)));
const int i_nyq = n_min / 2;
const Real pi = 3.1415927;
const Real k_fund = (2.0 * pi) / geom.ProbLength(0);
const Real k_nyquist = k_fund * Real(n_min)/2.0;
// bin bounds
const Real k_min = k_fund;
const Real k_max = k_fund * Real(i_nyq);
// Chose number of linear bins at low end.
const int num_lin_bins = 10;
const Real k_lin_edge = k_min + Real(num_lin_bins) * k_fund;
// Figure out log spacing.
// Choose dlogk such that the spacing between i = 10 and 11 looks like the
// linear spacing would on log scale.
const Real dlogk = log10( k_lin_edge + k_fund ) - log10( k_lin_edge );
const int num_log_bins = ( log10(k_max) - log10(k_lin_edge) ) / dlogk;
const Real log_k_lin_edge = log10(k_lin_edge);
const int k_num_bins = num_lin_bins + num_log_bins;
const int k_num_edges = k_num_bins + 1;
std::vector<Real> k_bin_edges(k_num_edges);
int i, ki;
for (i = 0; i < k_num_edges; ++i) {
if (i < num_lin_bins) {
k_bin_edges[i] = k_min + k_fund * i;
} else {
ki = i - num_lin_bins;
k_bin_edges[i] = pow(10.0, log_k_lin_edge + dlogk * ki);
}
}
if (k_min < 0.0 || k_max > k_nyquist)
BoxLib::Error("Bad k bin edge values.");
const int num_ghosts = mf_fft_out_real.nGrow();
const Real domain_length = geom.ProbLength(0);
std::vector<int> k_bin_count (k_num_bins);
std::vector<Real> k_bin_power_weighted_k_sum (k_num_bins);
std::vector<Real> k_bin_power_sum (k_num_bins);
for (unsigned int i = 0; i < k_num_bins; ++i) {
k_bin_count[i] = 0;
k_bin_power_weighted_k_sum[i] = 0.0;
k_bin_power_sum[i] = 0.0;
}
const int domain_grid_length = domain.length(0);
for (MFIter mfi(mf_fft_out_real); mfi.isValid(); ++mfi) {
const Box& bx = mfi.validbox();
std::vector<int> k_bin_count_per_box (k_num_bins);
std::vector<Real> k_bin_power_weighted_k_sum_per_box (k_num_bins);
std::vector<Real> k_bin_power_sum_per_box (k_num_bins);
BL_FORT_PROC_CALL(CALC_PS3D, calc_ps3d) (
(mf_fft_out_real)[mfi].dataPtr(),
(mf_fft_out_imag)[mfi].dataPtr(),
bx.loVect(),
bx.hiVect(),
&num_ghosts,
&k_num_bins,
&k_bin_edges[0],
&domain_length,
&domain_grid_length,
&k_bin_count[0],
&k_bin_power_weighted_k_sum[0],
&k_bin_power_sum[0]);
for (unsigned int i = 0; i < k_num_bins; ++i) {
k_bin_count[i] += k_bin_count_per_box[i];
k_bin_power_weighted_k_sum[i] += k_bin_power_weighted_k_sum_per_box[i];
k_bin_power_sum[i] += k_bin_power_sum_per_box[i];
}
}
ParallelDescriptor::ReduceIntSum (&k_bin_count[0], k_num_bins);
ParallelDescriptor::ReduceRealSum(&k_bin_power_weighted_k_sum[0], k_num_bins);
ParallelDescriptor::ReduceRealSum(&k_bin_power_sum[0], k_num_bins);
if (ParallelDescriptor::IOProcessor()) {
std::ofstream power_spectrum_file;
std::stringstream filename;
filename << field_name << "_ps3d_nstep_" << std::setfill('0') << std::setw(5) << nStep;
power_spectrum_file.open(filename.str().c_str());
power_spectrum_file << std::scientific;
for (unsigned int i = 0; i < k_num_bins; ++i) {
power_spectrum_file << std::setw(25) << k_bin_edges[i]
<< std::setw(15) << k_bin_count[i]
<< std::setw(25) << k_bin_power_weighted_k_sum[i]/k_bin_power_sum[i]
<< std::setw(15) << k_bin_power_sum[i]/Real(k_bin_count[i])
<< std::endl;
}
power_spectrum_file.close();
}
return;
}