Actual source code: petsclandau.h
1: #ifndef PETSCLANDAU_H
2: #define PETSCLANDAU_H
4: #include <petscdmplex.h>
5: #include <petscts.h>
7: PETSC_EXTERN PetscErrorCode DMPlexLandauPrintNorms(Vec, PetscInt);
8: PETSC_EXTERN PetscErrorCode DMPlexLandauCreateVelocitySpace(MPI_Comm, PetscInt, const char[], Vec *, Mat *, DM *);
9: PETSC_EXTERN PetscErrorCode DMPlexLandauDestroyVelocitySpace(DM *);
10: PETSC_EXTERN PetscErrorCode DMPlexLandauAccess(DM, Vec, PetscErrorCode (*)(DM, Vec, PetscInt, PetscInt, PetscInt, void *), void *);
11: PETSC_EXTERN PetscErrorCode DMPlexLandauAddMaxwellians(DM, Vec, PetscReal, PetscReal[], PetscReal[], PetscInt, PetscInt, PetscInt, void *);
12: PETSC_EXTERN PetscErrorCode DMPlexLandauCreateMassMatrix(DM dm, Mat *Amat);
13: PETSC_EXTERN PetscErrorCode DMPlexLandauIFunction(TS, PetscReal, Vec, Vec, Vec, void *);
14: PETSC_EXTERN PetscErrorCode DMPlexLandauIJacobian(TS, PetscReal, Vec, Vec, PetscReal, Mat, Mat, void *);
16: typedef int LandauIdx;
18: /* the Fokker-Planck-Landau context */
19: #if !defined(LANDAU_MAX_SPECIES)
20: #if defined(PETSC_USE_DMLANDAU_2D)
21: #define LANDAU_MAX_SPECIES 10
22: #define LANDAU_MAX_GRIDS 3
23: #else
24: #define LANDAU_MAX_SPECIES 10
25: #define LANDAU_MAX_GRIDS 3
26: #endif
27: #else
28: #define LANDAU_MAX_GRIDS 3
29: #endif
31: #if !defined(LANDAU_MAX_Q)
32: #if defined(LANDAU_MAX_NQ)
33: #error "LANDAU_MAX_NQ but not LANDAU_MAX_Q. Use -DLANDAU_MAX_Q=4 for Q3 elements"
34: #endif
35: #if defined(PETSC_USE_DMLANDAU_2D)
36: #define LANDAU_MAX_Q 5
37: #else
38: // 3D CUDA fails with > 3 (KK-CUDA is OK)
39: #define LANDAU_MAX_Q 3
40: #endif
41: #else
42: #undef LANDAU_MAX_NQ
43: #endif
45: #if defined(PETSC_USE_DMLANDAU_2D)
46: #define LANDAU_MAX_Q_FACE LANDAU_MAX_Q
47: #define LANDAU_MAX_NQ (LANDAU_MAX_Q * LANDAU_MAX_Q)
48: #define LANDAU_MAX_BATCH_SZ 1024
49: #define LANDAU_DIM 2
50: #else
51: #define LANDAU_MAX_Q_FACE (LANDAU_MAX_Q * LANDAU_MAX_Q)
52: #define LANDAU_MAX_NQ (LANDAU_MAX_Q * LANDAU_MAX_Q * LANDAU_MAX_Q)
53: #define LANDAU_MAX_BATCH_SZ 64
54: #define LANDAU_DIM 3
55: #endif
57: typedef enum {
58: LANDAU_CUDA,
59: LANDAU_KOKKOS,
60: LANDAU_CPU
61: } LandauDeviceType;
63: // static data - will be "device" data
64: typedef struct {
65: void *invJ; // nip*dim*dim
66: void *D; // nq*nb*dim
67: void *B; // nq*nb
68: void *alpha; // ns
69: void *beta; // ns
70: void *invMass; // ns
71: void *w; // nip
72: void *x; // nip
73: void *y; // nip
74: void *z; // nip
75: void *Eq_m; // ns - dynamic
76: void *f; // nip*Nf - dynamic (IP)
77: void *dfdx; // nip*Nf - dynamic (IP)
78: void *dfdy; // nip*Nf - dynamic (IP)
79: void *dfdz; // nip*Nf - dynamic (IP)
80: int dim_, ns_, nip_, nq_, nb_;
81: void *NCells; // remove and use elem_offset - TODO
82: void *species_offset; // for each grid, but same for all batched vertices
83: void *mat_offset; // for each grid, but same for all batched vertices
84: void *elem_offset; // for each grid, but same for all batched vertices
85: void *ip_offset; // for each grid, but same for all batched vertices
86: void *ipf_offset; // for each grid, but same for all batched vertices
87: void *ipfdf_data; // for each grid, but same for all batched vertices
88: void *maps; // for each grid, but same for all batched vertices
89: // COO
90: void *coo_elem_offsets;
91: void *coo_elem_point_offsets;
92: void *coo_elem_fullNb;
93: void *coo_vals;
94: LandauIdx coo_n_cellsTot;
95: LandauIdx coo_size;
96: LandauIdx coo_max_fullnb;
97: } LandauStaticData;
99: typedef enum {
100: LANDAU_EX2_TSSOLVE,
101: LANDAU_MATRIX_TOTAL,
102: LANDAU_OPERATOR,
103: LANDAU_JACOBIAN_COUNT,
104: LANDAU_JACOBIAN,
105: LANDAU_MASS,
106: LANDAU_F_DF,
107: LANDAU_KERNEL,
108: KSP_FACTOR,
109: KSP_SOLVE,
110: LANDAU_NUM_TIMERS
111: } LandauOMPTimers;
113: typedef struct {
114: PetscBool interpolate; /* Generate intermediate mesh elements */
115: PetscBool gpu_assembly;
116: MPI_Comm comm; /* global communicator to use for errors and diagnostics */
117: double times[LANDAU_NUM_TIMERS];
118: PetscBool use_matrix_mass;
119: /* FE */
120: PetscFE fe[LANDAU_MAX_SPECIES];
121: /* geometry */
122: PetscReal radius[LANDAU_MAX_GRIDS];
123: PetscReal re_radius; /* RE: radius of refinement along v_perp=0, z>0 */
124: PetscReal vperp0_radius1; /* RE: radius of refinement along v_perp=0 */
125: PetscReal vperp0_radius2; /* RE: radius of refinement along v_perp=0 after origin AMR refinement */
126: PetscBool sphere; // not used
127: PetscBool inflate; // not used
128: PetscReal i_radius[LANDAU_MAX_GRIDS]; // not used
129: PetscReal e_radius; // not used
130: PetscInt num_sections; // not used
131: PetscInt cells0[3];
132: /* AMR */
133: PetscBool use_p4est;
134: PetscInt numRERefine; /* RE: refinement along v_perp=0, z > 0 */
135: PetscInt nZRefine1; /* RE: origin refinement after v_perp=0 refinement */
136: PetscInt nZRefine2; /* RE: origin refinement after origin AMR refinement */
137: PetscInt numAMRRefine[LANDAU_MAX_GRIDS]; /* normal AMR - refine from origin */
138: PetscInt postAMRRefine[LANDAU_MAX_GRIDS]; /* uniform refinement of AMR */
139: /* relativistic */
140: PetscBool use_energy_tensor_trick;
141: PetscBool use_relativistic_corrections;
142: /* physics */
143: PetscReal thermal_temps[LANDAU_MAX_SPECIES];
144: PetscReal masses[LANDAU_MAX_SPECIES]; /* mass of each species */
145: PetscReal charges[LANDAU_MAX_SPECIES]; /* charge of each species */
146: PetscReal n[LANDAU_MAX_SPECIES]; /* number density of each species */
147: PetscReal m_0; /* reference mass */
148: PetscReal v_0; /* reference velocity */
149: PetscReal n_0; /* reference number density */
150: PetscReal t_0; /* reference time */
151: PetscReal Ez;
152: PetscReal epsilon0;
153: PetscReal k;
154: PetscReal lnLam;
155: PetscReal electronShift;
156: PetscInt num_species;
157: PetscInt num_grids;
158: PetscInt species_offset[LANDAU_MAX_GRIDS + 1]; // for each grid, but same for all batched vertices
159: PetscInt mat_offset[LANDAU_MAX_GRIDS + 1]; // for each grid, but same for all batched vertices
160: // batching
161: PetscBool jacobian_field_major_order; // this could be a type but lets not get pedantic
162: VecScatter plex_batch;
163: Vec work_vec;
164: IS batch_is;
165: PetscErrorCode (*seqaij_mult)(Mat, Vec, Vec);
166: PetscErrorCode (*seqaij_multtranspose)(Mat, Vec, Vec);
167: PetscErrorCode (*seqaij_solve)(Mat, Vec, Vec);
168: PetscErrorCode (*seqaij_getdiagonal)(Mat, Vec);
169: /* COO */
170: PetscBool coo_assembly;
171: /* cache */
172: Mat J;
173: Mat M;
174: Vec X;
175: /* derived type */
176: void *data;
177: /* computing */
178: LandauDeviceType deviceType;
179: DM pack;
180: DM plex[LANDAU_MAX_GRIDS];
181: LandauStaticData SData_d; /* static geometric data on device */
182: /* diagnostics */
183: PetscInt verbose;
184: PetscLogEvent events[20];
185: PetscLogStage stage;
186: PetscObjectState norm_state;
187: PetscInt batch_sz;
188: PetscInt batch_view_idx;
189: } LandauCtx;
191: #define LANDAU_SPECIES_MAJOR
192: #if !defined(LANDAU_SPECIES_MAJOR)
193: #define LAND_PACK_IDX(_b, _g) (_b * ctx->num_grids + _g)
194: #define LAND_MOFFSET(_b, _g, _nbch, _ngrid, _mat_off) (_b * _mat_off[_ngrid] + _mat_off[_g])
195: #else
196: #define LAND_PACK_IDX(_b, _g) (_g * ctx->batch_sz + _b)
197: #define LAND_MOFFSET(_b, _g, _nbch, _ngrid, _mat_off) (_nbch * _mat_off[_g] + _b * (_mat_off[_g + 1] - _mat_off[_g]))
198: #endif
200: typedef struct {
201: PetscReal scale;
202: LandauIdx gid; // Landau matrix index (<10,000)
203: } pointInterpolationP4est;
204: typedef struct _lP4estVertexMaps {
205: LandauIdx (*gIdx)[LANDAU_MAX_SPECIES][LANDAU_MAX_NQ]; // #elems * LANDAU_MAX_NQ (spoof for max , Nb) on device,
206: LandauIdx num_elements;
207: LandauIdx num_reduced;
208: LandauIdx num_face; // (Q or Q^2 for 3D)
209: LandauDeviceType deviceType;
210: PetscInt Nf;
211: pointInterpolationP4est (*c_maps)[LANDAU_MAX_Q_FACE];
212: struct _lP4estVertexMaps *d_self;
213: void *vp1, *vp2, *vp3;
214: PetscInt numgrids;
215: } P4estVertexMaps;
217: PETSC_EXTERN PetscErrorCode LandauCreateColoring(Mat, DM, PetscContainer *);
218: #if defined(PETSC_HAVE_CUDA)
219: PETSC_EXTERN PetscErrorCode LandauCUDAJacobian(DM[], const PetscInt, const PetscInt, const PetscInt, const PetscInt[], PetscReal[], PetscScalar[], const PetscScalar[], const LandauStaticData *, const PetscReal, const PetscLogEvent[], const PetscInt[], const PetscInt[], Mat[], Mat);
220: PETSC_EXTERN PetscErrorCode LandauCUDACreateMatMaps(P4estVertexMaps *, pointInterpolationP4est (*)[LANDAU_MAX_Q_FACE], PetscInt[], PetscInt, PetscInt);
221: PETSC_EXTERN PetscErrorCode LandauCUDADestroyMatMaps(P4estVertexMaps *, PetscInt);
222: PETSC_EXTERN PetscErrorCode LandauCUDAStaticDataSet(DM, const PetscInt, const PetscInt, const PetscInt, PetscInt[], PetscInt[], PetscInt[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], LandauStaticData *);
223: PETSC_EXTERN PetscErrorCode LandauCUDAStaticDataClear(LandauStaticData *);
224: #endif
225: #if defined(PETSC_HAVE_KOKKOS)
226: PETSC_EXTERN PetscErrorCode LandauKokkosJacobian(DM[], const PetscInt, const PetscInt, const PetscInt, const PetscInt[], PetscReal[], PetscScalar[], const PetscScalar[], const LandauStaticData *, const PetscReal, const PetscLogEvent[], const PetscInt[], const PetscInt[], Mat[], Mat);
227: PETSC_EXTERN PetscErrorCode LandauKokkosCreateMatMaps(P4estVertexMaps *, pointInterpolationP4est (*)[LANDAU_MAX_Q_FACE], PetscInt[], PetscInt, PetscInt);
228: PETSC_EXTERN PetscErrorCode LandauKokkosDestroyMatMaps(P4estVertexMaps *, PetscInt);
229: PETSC_EXTERN PetscErrorCode LandauKokkosStaticDataSet(DM, const PetscInt, const PetscInt, const PetscInt, PetscInt[], PetscInt[], PetscInt[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], LandauStaticData *);
230: PETSC_EXTERN PetscErrorCode LandauKokkosStaticDataClear(LandauStaticData *);
231: #endif
233: #endif /* PETSCLANDAU_H */