Actual source code: feopencl.c
1: #include <petsc/private/petscfeimpl.h>
3: #if defined(PETSC_HAVE_OPENCL)
5: static PetscErrorCode PetscFEDestroy_OpenCL(PetscFE fem)
6: {
7: PetscFE_OpenCL *ocl = (PetscFE_OpenCL *)fem->data;
9: clReleaseCommandQueue(ocl->queue_id);
10: ocl->queue_id = 0;
11: clReleaseContext(ocl->ctx_id);
12: ocl->ctx_id = 0;
13: PetscFree(ocl);
14: return 0;
15: }
17: #define PetscCallSTR(err) \
18: do { \
19: err; \
20: string_tail += count; \
22: } while (0)
23: enum {
24: LAPLACIAN = 0,
25: ELASTICITY = 1
26: };
28: /* NOTE: This is now broken for vector problems. Must redo loops to respect vector basis elements */
29: /* dim Number of spatial dimensions: 2 */
30: /* N_b Number of basis functions: generated */
31: /* N_{bt} Number of total basis functions: N_b * N_{comp} */
32: /* N_q Number of quadrature points: generated */
33: /* N_{bs} Number of block cells LCM(N_b, N_q) */
34: /* N_{bst} Number of block cell components LCM(N_{bt}, N_q) */
35: /* N_{bl} Number of concurrent blocks generated */
36: /* N_t Number of threads: N_{bl} * N_{bs} */
37: /* N_{cbc} Number of concurrent basis cells: N_{bl} * N_q */
38: /* N_{cqc} Number of concurrent quadrature cells: N_{bl} * N_b */
39: /* N_{sbc} Number of serial basis cells: N_{bs} / N_q */
40: /* N_{sqc} Number of serial quadrature cells: N_{bs} / N_b */
41: /* N_{cb} Number of serial cell batches: input */
42: /* N_c Number of total cells: N_{cb}*N_{t}/N_{comp} */
43: static PetscErrorCode PetscFEOpenCLGenerateIntegrationCode(PetscFE fem, char **string_buffer, PetscInt buffer_length, PetscBool useAux, PetscInt N_bl)
44: {
45: PetscFE_OpenCL *ocl = (PetscFE_OpenCL *)fem->data;
46: PetscQuadrature q;
47: char *string_tail = *string_buffer;
48: char *end_of_buffer = *string_buffer + buffer_length;
49: char float_str[] = "float", double_str[] = "double";
50: char *numeric_str = &(float_str[0]);
51: PetscInt op = ocl->op;
52: PetscBool useField = PETSC_FALSE;
53: PetscBool useFieldDer = PETSC_TRUE;
54: PetscBool useFieldAux = useAux;
55: PetscBool useFieldDerAux = PETSC_FALSE;
56: PetscBool useF0 = PETSC_TRUE;
57: PetscBool useF1 = PETSC_TRUE;
58: const PetscReal *points, *weights;
59: PetscTabulation T;
60: PetscInt dim, qNc, N_b, N_c, N_q, N_t, p, d, b, c;
61: size_t count;
63: PetscFEGetSpatialDimension(fem, &dim);
64: PetscFEGetDimension(fem, &N_b);
65: PetscFEGetNumComponents(fem, &N_c);
66: PetscFEGetQuadrature(fem, &q);
67: PetscQuadratureGetData(q, NULL, &qNc, &N_q, &points, &weights);
69: N_t = N_b * N_c * N_q * N_bl;
70: /* Enable device extension for double precision */
71: if (ocl->realType == PETSC_DOUBLE) {
72: PetscCallSTR(PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
73: "#if defined(cl_khr_fp64)\n"
74: "# pragma OPENCL EXTENSION cl_khr_fp64: enable\n"
75: "#elif defined(cl_amd_fp64)\n"
76: "# pragma OPENCL EXTENSION cl_amd_fp64: enable\n"
77: "#endif\n",
78: &count));
79: numeric_str = &(double_str[0]);
80: }
81: /* Kernel API */
82: PetscCallSTR(PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
83: "\n"
84: "__kernel void integrateElementQuadrature(int N_cb, __global %s *coefficients, __global %s *coefficientsAux, __global %s *jacobianInverses, __global %s *jacobianDeterminants, __global %s *elemVec)\n"
85: "{\n",
86: &count, numeric_str, numeric_str, numeric_str, numeric_str, numeric_str));
87: /* Quadrature */
88: PetscCallSTR(PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
89: " /* Quadrature points\n"
90: " - (x1,y1,x2,y2,...) */\n"
91: " const %s points[%d] = {\n",
92: &count, numeric_str, N_q * dim));
93: for (p = 0; p < N_q; ++p) {
94: for (d = 0; d < dim; ++d) PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, "%g,\n", &count, points[p * dim + d]);
95: }
96: PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, "};\n", &count);
97: PetscCallSTR(PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
98: " /* Quadrature weights\n"
99: " - (v1,v2,...) */\n"
100: " const %s weights[%d] = {\n",
101: &count, numeric_str, N_q));
102: for (p = 0; p < N_q; ++p) PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, "%g,\n", &count, weights[p]);
103: PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, "};\n", &count);
104: /* Basis Functions */
105: PetscFEGetCellTabulation(fem, 1, &T);
106: PetscCallSTR(PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
107: " /* Nodal basis function evaluations\n"
108: " - basis component is fastest varying, the basis function, then point */\n"
109: " const %s Basis[%d] = {\n",
110: &count, numeric_str, N_q * N_b * N_c));
111: for (p = 0; p < N_q; ++p) {
112: for (b = 0; b < N_b; ++b) {
113: for (c = 0; c < N_c; ++c) PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, "%g,\n", &count, T->T[0][(p * N_b + b) * N_c + c]);
114: }
115: }
116: PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, "};\n", &count);
117: PetscCallSTR(PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
118: "\n"
119: " /* Nodal basis function derivative evaluations,\n"
120: " - derivative direction is fastest varying, then basis component, then basis function, then point */\n"
121: " const %s%d BasisDerivatives[%d] = {\n",
122: &count, numeric_str, dim, N_q * N_b * N_c));
123: for (p = 0; p < N_q; ++p) {
124: for (b = 0; b < N_b; ++b) {
125: for (c = 0; c < N_c; ++c) {
126: PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, "(%s%d)(", &count, numeric_str, dim);
127: for (d = 0; d < dim; ++d) {
128: if (d > 0) {
129: PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, ", %g", &count, T->T[1][((p * N_b + b) * dim + d) * N_c + c]);
130: } else {
131: PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, "%g", &count, T->T[1][((p * N_b + b) * dim + d) * N_c + c]);
132: }
133: }
134: PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, "),\n", &count);
135: }
136: }
137: }
138: PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, "};\n", &count);
139: /* Sizes */
140: PetscCallSTR(PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
141: " const int dim = %d; // The spatial dimension\n"
142: " const int N_bl = %d; // The number of concurrent blocks\n"
143: " const int N_b = %d; // The number of basis functions\n"
144: " const int N_comp = %d; // The number of basis function components\n"
145: " const int N_bt = N_b*N_comp; // The total number of scalar basis functions\n"
146: " const int N_q = %d; // The number of quadrature points\n"
147: " const int N_bst = N_bt*N_q; // The block size, LCM(N_b*N_comp, N_q), Notice that a block is not processed simultaneously\n"
148: " const int N_t = N_bst*N_bl; // The number of threads, N_bst * N_bl\n"
149: " const int N_bc = N_t/N_comp; // The number of cells per batch (N_b*N_q*N_bl)\n"
150: " const int N_sbc = N_bst / (N_q * N_comp);\n"
151: " const int N_sqc = N_bst / N_bt;\n"
152: " /*const int N_c = N_cb * N_bc;*/\n"
153: "\n"
154: " /* Calculated indices */\n"
155: " /*const int tidx = get_local_id(0) + get_local_size(0)*get_local_id(1);*/\n"
156: " const int tidx = get_local_id(0);\n"
157: " const int blidx = tidx / N_bst; // Block number for this thread\n"
158: " const int bidx = tidx %% N_bt; // Basis function mapped to this thread\n"
159: " const int cidx = tidx %% N_comp; // Basis component mapped to this thread\n"
160: " const int qidx = tidx %% N_q; // Quadrature point mapped to this thread\n"
161: " const int blbidx = tidx %% N_q + blidx*N_q; // Cell mapped to this thread in the basis phase\n"
162: " const int blqidx = tidx %% N_b + blidx*N_b; // Cell mapped to this thread in the quadrature phase\n"
163: " const int gidx = get_group_id(1)*get_num_groups(0) + get_group_id(0);\n"
164: " const int Goffset = gidx*N_cb*N_bc;\n",
165: &count, dim, N_bl, N_b, N_c, N_q));
166: /* Local memory */
167: PetscCallSTR(PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
168: "\n"
169: " /* Quadrature data */\n"
170: " %s w; // $w_q$, Quadrature weight at $x_q$\n"
171: " __local %s phi_i[%d]; //[N_bt*N_q]; // $\\phi_i(x_q)$, Value of the basis function $i$ at $x_q$\n"
172: " __local %s%d phiDer_i[%d]; //[N_bt*N_q]; // $\\frac{\\partial\\phi_i(x_q)}{\\partial x_d}$, Value of the derivative of basis function $i$ in direction $x_d$ at $x_q$\n"
173: " /* Geometric data */\n"
174: " __local %s detJ[%d]; //[N_t]; // $|J(x_q)|$, Jacobian determinant at $x_q$\n"
175: " __local %s invJ[%d];//[N_t*dim*dim]; // $J^{-1}(x_q)$, Jacobian inverse at $x_q$\n",
176: &count, numeric_str, numeric_str, N_b * N_c * N_q, numeric_str, dim, N_b * N_c * N_q, numeric_str, N_t, numeric_str, N_t * dim * dim));
177: PetscCallSTR(PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
178: " /* FEM data */\n"
179: " __local %s u_i[%d]; //[N_t*N_bt]; // Coefficients $u_i$ of the field $u|_{\\mathcal{T}} = \\sum_i u_i \\phi_i$\n",
180: &count, numeric_str, N_t * N_b * N_c));
181: if (useAux) {
182: PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, " __local %s a_i[%d]; //[N_t]; // Coefficients $a_i$ of the auxiliary field $a|_{\\mathcal{T}} = \\sum_i a_i \\phi^R_i$\n", &count, numeric_str, N_t);
183: }
184: if (useF0) {
185: PetscCallSTR(PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
186: " /* Intermediate calculations */\n"
187: " __local %s f_0[%d]; //[N_t*N_sqc]; // $f_0(u(x_q), \\nabla u(x_q)) |J(x_q)| w_q$\n",
188: &count, numeric_str, N_t * N_q));
189: }
190: if (useF1) PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, " __local %s%d f_1[%d]; //[N_t*N_sqc]; // $f_1(u(x_q), \\nabla u(x_q)) |J(x_q)| w_q$\n", &count, numeric_str, dim, N_t * N_q);
191: /* TODO: If using elasticity, put in mu/lambda coefficients */
192: PetscCallSTR(PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
193: " /* Output data */\n"
194: " %s e_i; // Coefficient $e_i$ of the residual\n\n",
195: &count, numeric_str));
196: /* One-time loads */
197: PetscCallSTR(PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
198: " /* These should be generated inline */\n"
199: " /* Load quadrature weights */\n"
200: " w = weights[qidx];\n"
201: " /* Load basis tabulation \\phi_i for this cell */\n"
202: " if (tidx < N_bt*N_q) {\n"
203: " phi_i[tidx] = Basis[tidx];\n"
204: " phiDer_i[tidx] = BasisDerivatives[tidx];\n"
205: " }\n\n",
206: &count));
207: /* Batch loads */
208: PetscCallSTR(PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
209: " for (int batch = 0; batch < N_cb; ++batch) {\n"
210: " /* Load geometry */\n"
211: " detJ[tidx] = jacobianDeterminants[Goffset+batch*N_bc+tidx];\n"
212: " for (int n = 0; n < dim*dim; ++n) {\n"
213: " const int offset = n*N_t;\n"
214: " invJ[offset+tidx] = jacobianInverses[(Goffset+batch*N_bc)*dim*dim+offset+tidx];\n"
215: " }\n"
216: " /* Load coefficients u_i for this cell */\n"
217: " for (int n = 0; n < N_bt; ++n) {\n"
218: " const int offset = n*N_t;\n"
219: " u_i[offset+tidx] = coefficients[(Goffset*N_bt)+batch*N_t*N_b+offset+tidx];\n"
220: " }\n",
221: &count));
222: if (useAux) {
223: PetscCallSTR(PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
224: " /* Load coefficients a_i for this cell */\n"
225: " /* TODO: This should not be N_t here, it should be N_bc*N_comp_aux */\n"
226: " a_i[tidx] = coefficientsAux[Goffset+batch*N_t+tidx];\n",
227: &count));
228: }
229: /* Quadrature phase */
230: PetscCallSTR(PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
231: " barrier(CLK_LOCAL_MEM_FENCE);\n"
232: "\n"
233: " /* Map coefficients to values at quadrature points */\n"
234: " for (int c = 0; c < N_sqc; ++c) {\n"
235: " const int cell = c*N_bl*N_b + blqidx;\n"
236: " const int fidx = (cell*N_q + qidx)*N_comp + cidx;\n",
237: &count));
238: if (useField) PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, " %s u[%d]; //[N_comp]; // $u(x_q)$, Value of the field at $x_q$\n", &count, numeric_str, N_c);
239: if (useFieldDer) PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, " %s%d gradU[%d]; //[N_comp]; // $\\nabla u(x_q)$, Value of the field gradient at $x_q$\n", &count, numeric_str, dim, N_c);
240: if (useFieldAux) PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, " %s a[%d]; //[1]; // $a(x_q)$, Value of the auxiliary fields at $x_q$\n", &count, numeric_str, 1);
241: if (useFieldDerAux) PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, " %s%d gradA[%d]; //[1]; // $\\nabla a(x_q)$, Value of the auxiliary field gradient at $x_q$\n", &count, numeric_str, dim, 1);
242: PetscCallSTR(PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
243: "\n"
244: " for (int comp = 0; comp < N_comp; ++comp) {\n",
245: &count));
246: if (useField) PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, " u[comp] = 0.0;\n", &count);
247: if (useFieldDer) {
248: switch (dim) {
249: case 1:
250: PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, " gradU[comp].x = 0.0;\n", &count);
251: break;
252: case 2:
253: PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, " gradU[comp].x = 0.0; gradU[comp].y = 0.0;\n", &count);
254: break;
255: case 3:
256: PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, " gradU[comp].x = 0.0; gradU[comp].y = 0.0; gradU[comp].z = 0.0;\n", &count);
257: break;
258: }
259: }
260: PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, " }\n", &count);
261: if (useFieldAux) PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, " a[0] = 0.0;\n", &count);
262: if (useFieldDerAux) {
263: switch (dim) {
264: case 1:
265: PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, " gradA[0].x = 0.0;\n", &count);
266: break;
267: case 2:
268: PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, " gradA[0].x = 0.0; gradA[0].y = 0.0;\n", &count);
269: break;
270: case 3:
271: PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, " gradA[0].x = 0.0; gradA[0].y = 0.0; gradA[0].z = 0.0;\n", &count);
272: break;
273: }
274: }
275: PetscCallSTR(PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
276: " /* Get field and derivatives at this quadrature point */\n"
277: " for (int i = 0; i < N_b; ++i) {\n"
278: " for (int comp = 0; comp < N_comp; ++comp) {\n"
279: " const int b = i*N_comp+comp;\n"
280: " const int pidx = qidx*N_bt + b;\n"
281: " const int uidx = cell*N_bt + b;\n"
282: " %s%d realSpaceDer;\n\n",
283: &count, numeric_str, dim));
284: if (useField) PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, " u[comp] += u_i[uidx]*phi_i[pidx];\n", &count);
285: if (useFieldDer) {
286: switch (dim) {
287: case 2:
288: PetscCallSTR(PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
289: " realSpaceDer.x = invJ[cell*dim*dim+0*dim+0]*phiDer_i[pidx].x + invJ[cell*dim*dim+1*dim+0]*phiDer_i[pidx].y;\n"
290: " gradU[comp].x += u_i[uidx]*realSpaceDer.x;\n"
291: " realSpaceDer.y = invJ[cell*dim*dim+0*dim+1]*phiDer_i[pidx].x + invJ[cell*dim*dim+1*dim+1]*phiDer_i[pidx].y;\n"
292: " gradU[comp].y += u_i[uidx]*realSpaceDer.y;\n",
293: &count));
294: break;
295: case 3:
296: PetscCallSTR(PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
297: " realSpaceDer.x = invJ[cell*dim*dim+0*dim+0]*phiDer_i[pidx].x + invJ[cell*dim*dim+1*dim+0]*phiDer_i[pidx].y + invJ[cell*dim*dim+2*dim+0]*phiDer_i[pidx].z;\n"
298: " gradU[comp].x += u_i[uidx]*realSpaceDer.x;\n"
299: " realSpaceDer.y = invJ[cell*dim*dim+0*dim+1]*phiDer_i[pidx].x + invJ[cell*dim*dim+1*dim+1]*phiDer_i[pidx].y + invJ[cell*dim*dim+2*dim+1]*phiDer_i[pidx].z;\n"
300: " gradU[comp].y += u_i[uidx]*realSpaceDer.y;\n"
301: " realSpaceDer.z = invJ[cell*dim*dim+0*dim+2]*phiDer_i[pidx].x + invJ[cell*dim*dim+1*dim+2]*phiDer_i[pidx].y + invJ[cell*dim*dim+2*dim+2]*phiDer_i[pidx].z;\n"
302: " gradU[comp].z += u_i[uidx]*realSpaceDer.z;\n",
303: &count));
304: break;
305: }
306: }
307: PetscCallSTR(PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
308: " }\n"
309: " }\n",
310: &count));
311: if (useFieldAux) PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, " a[0] += a_i[cell];\n", &count);
312: /* Calculate residual at quadrature points: Should be generated by an weak form egine */
313: PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, " /* Process values at quadrature points */\n", &count);
314: switch (op) {
315: case LAPLACIAN:
316: if (useF0) PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, " f_0[fidx] = 4.0;\n", &count);
317: if (useF1) {
318: if (useAux) PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, " f_1[fidx] = a[0]*gradU[cidx];\n", &count);
319: else PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, " f_1[fidx] = gradU[cidx];\n", &count);
320: }
321: break;
322: case ELASTICITY:
323: if (useF0) PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, " f_0[fidx] = 4.0;\n", &count);
324: if (useF1) {
325: switch (dim) {
326: case 2:
327: PetscCallSTR(PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
328: " switch (cidx) {\n"
329: " case 0:\n"
330: " f_1[fidx].x = lambda*(gradU[0].x + gradU[1].y) + mu*(gradU[0].x + gradU[0].x);\n"
331: " f_1[fidx].y = lambda*(gradU[0].x + gradU[1].y) + mu*(gradU[0].y + gradU[1].x);\n"
332: " break;\n"
333: " case 1:\n"
334: " f_1[fidx].x = lambda*(gradU[0].x + gradU[1].y) + mu*(gradU[1].x + gradU[0].y);\n"
335: " f_1[fidx].y = lambda*(gradU[0].x + gradU[1].y) + mu*(gradU[1].y + gradU[1].y);\n"
336: " }\n",
337: &count));
338: break;
339: case 3:
340: PetscCallSTR(PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
341: " switch (cidx) {\n"
342: " case 0:\n"
343: " f_1[fidx].x = lambda*(gradU[0].x + gradU[1].y + gradU[2].z) + mu*(gradU[0].x + gradU[0].x);\n"
344: " f_1[fidx].y = lambda*(gradU[0].x + gradU[1].y + gradU[2].z) + mu*(gradU[0].y + gradU[1].x);\n"
345: " f_1[fidx].z = lambda*(gradU[0].x + gradU[1].y + gradU[2].z) + mu*(gradU[0].z + gradU[2].x);\n"
346: " break;\n"
347: " case 1:\n"
348: " f_1[fidx].x = lambda*(gradU[0].x + gradU[1].y + gradU[2].z) + mu*(gradU[1].x + gradU[0].y);\n"
349: " f_1[fidx].y = lambda*(gradU[0].x + gradU[1].y + gradU[2].z) + mu*(gradU[1].y + gradU[1].y);\n"
350: " f_1[fidx].z = lambda*(gradU[0].x + gradU[1].y + gradU[2].z) + mu*(gradU[1].y + gradU[2].y);\n"
351: " break;\n"
352: " case 2:\n"
353: " f_1[fidx].x = lambda*(gradU[0].x + gradU[1].y + gradU[2].z) + mu*(gradU[2].x + gradU[0].z);\n"
354: " f_1[fidx].y = lambda*(gradU[0].x + gradU[1].y + gradU[2].z) + mu*(gradU[2].y + gradU[1].z);\n"
355: " f_1[fidx].z = lambda*(gradU[0].x + gradU[1].y + gradU[2].z) + mu*(gradU[2].y + gradU[2].z);\n"
356: " }\n",
357: &count));
358: break;
359: }
360: }
361: break;
362: default:
363: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PDE operator %d is not supported", op);
364: }
365: if (useF0) PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, " f_0[fidx] *= detJ[cell]*w;\n", &count);
366: if (useF1) {
367: switch (dim) {
368: case 1:
369: PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, " f_1[fidx].x *= detJ[cell]*w;\n", &count);
370: break;
371: case 2:
372: PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, " f_1[fidx].x *= detJ[cell]*w; f_1[fidx].y *= detJ[cell]*w;\n", &count);
373: break;
374: case 3:
375: PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, " f_1[fidx].x *= detJ[cell]*w; f_1[fidx].y *= detJ[cell]*w; f_1[fidx].z *= detJ[cell]*w;\n", &count);
376: break;
377: }
378: }
379: /* Thread transpose */
380: PetscCallSTR(PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
381: " }\n\n"
382: " /* ==== TRANSPOSE THREADS ==== */\n"
383: " barrier(CLK_LOCAL_MEM_FENCE);\n\n",
384: &count));
385: /* Basis phase */
386: PetscCallSTR(PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
387: " /* Map values at quadrature points to coefficients */\n"
388: " for (int c = 0; c < N_sbc; ++c) {\n"
389: " const int cell = c*N_bl*N_q + blbidx; /* Cell number in batch */\n"
390: "\n"
391: " e_i = 0.0;\n"
392: " for (int q = 0; q < N_q; ++q) {\n"
393: " const int pidx = q*N_bt + bidx;\n"
394: " const int fidx = (cell*N_q + q)*N_comp + cidx;\n"
395: " %s%d realSpaceDer;\n\n",
396: &count, numeric_str, dim));
398: if (useF0) PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, " e_i += phi_i[pidx]*f_0[fidx];\n", &count);
399: if (useF1) {
400: switch (dim) {
401: case 2:
402: PetscCallSTR(PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
403: " realSpaceDer.x = invJ[cell*dim*dim+0*dim+0]*phiDer_i[pidx].x + invJ[cell*dim*dim+1*dim+0]*phiDer_i[pidx].y;\n"
404: " e_i += realSpaceDer.x*f_1[fidx].x;\n"
405: " realSpaceDer.y = invJ[cell*dim*dim+0*dim+1]*phiDer_i[pidx].x + invJ[cell*dim*dim+1*dim+1]*phiDer_i[pidx].y;\n"
406: " e_i += realSpaceDer.y*f_1[fidx].y;\n",
407: &count));
408: break;
409: case 3:
410: PetscCallSTR(PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
411: " realSpaceDer.x = invJ[cell*dim*dim+0*dim+0]*phiDer_i[pidx].x + invJ[cell*dim*dim+1*dim+0]*phiDer_i[pidx].y + invJ[cell*dim*dim+2*dim+0]*phiDer_i[pidx].z;\n"
412: " e_i += realSpaceDer.x*f_1[fidx].x;\n"
413: " realSpaceDer.y = invJ[cell*dim*dim+0*dim+1]*phiDer_i[pidx].x + invJ[cell*dim*dim+1*dim+1]*phiDer_i[pidx].y + invJ[cell*dim*dim+2*dim+1]*phiDer_i[pidx].z;\n"
414: " e_i += realSpaceDer.y*f_1[fidx].y;\n"
415: " realSpaceDer.z = invJ[cell*dim*dim+0*dim+2]*phiDer_i[pidx].x + invJ[cell*dim*dim+1*dim+2]*phiDer_i[pidx].y + invJ[cell*dim*dim+2*dim+2]*phiDer_i[pidx].z;\n"
416: " e_i += realSpaceDer.z*f_1[fidx].z;\n",
417: &count));
418: break;
419: }
420: }
421: PetscCallSTR(PetscSNPrintfCount(string_tail, end_of_buffer - string_tail,
422: " }\n"
423: " /* Write element vector for N_{cbc} cells at a time */\n"
424: " elemVec[(Goffset + batch*N_bc + c*N_bl*N_q)*N_bt + tidx] = e_i;\n"
425: " }\n"
426: " /* ==== Could do one write per batch ==== */\n"
427: " }\n"
428: " return;\n"
429: "}\n",
430: &count));
431: return 0;
432: }
434: static PetscErrorCode PetscFEOpenCLGetIntegrationKernel(PetscFE fem, PetscBool useAux, cl_program *ocl_prog, cl_kernel *ocl_kernel)
435: {
436: PetscFE_OpenCL *ocl = (PetscFE_OpenCL *)fem->data;
437: PetscInt dim, N_bl;
438: PetscBool flg;
439: char *buffer;
440: size_t len;
441: char errMsg[8192];
442: cl_int err;
444: PetscFEGetSpatialDimension(fem, &dim);
445: PetscMalloc1(8192, &buffer);
446: PetscFEGetTileSizes(fem, NULL, &N_bl, NULL, NULL);
447: PetscFEOpenCLGenerateIntegrationCode(fem, &buffer, 8192, useAux, N_bl);
448: PetscOptionsHasName(((PetscObject)fem)->options, ((PetscObject)fem)->prefix, "-petscfe_opencl_kernel_print", &flg);
449: if (flg) PetscPrintf(PetscObjectComm((PetscObject)fem), "OpenCL FE Integration Kernel:\n%s\n", buffer);
450: PetscStrlen(buffer, &len);
451: *ocl_prog = clCreateProgramWithSource(ocl->ctx_id, 1, (const char **)&buffer, &len, &err);
452: err;
453: err = clBuildProgram(*ocl_prog, 0, NULL, NULL, NULL, NULL);
454: if (err != CL_SUCCESS) {
455: err = clGetProgramBuildInfo(*ocl_prog, ocl->dev_id, CL_PROGRAM_BUILD_LOG, 8192 * sizeof(char), &errMsg, NULL);
456: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Build failed! Log:\n %s", errMsg);
457: }
458: PetscFree(buffer);
459: *ocl_kernel = clCreateKernel(*ocl_prog, "integrateElementQuadrature", &err);
460: return 0;
461: }
463: static PetscErrorCode PetscFEOpenCLCalculateGrid(PetscFE fem, PetscInt N, PetscInt blockSize, size_t *x, size_t *y, size_t *z)
464: {
465: const PetscInt Nblocks = N / blockSize;
468: *z = 1;
469: *y = 1;
470: for (*x = (size_t)(PetscSqrtReal(Nblocks) + 0.5); *x > 0; --*x) {
471: *y = Nblocks / *x;
472: if (*x * *y == (size_t)Nblocks) break;
473: }
475: return 0;
476: }
478: static PetscErrorCode PetscFEOpenCLLogResidual(PetscFE fem, PetscLogDouble time, PetscLogDouble flops)
479: {
480: PetscFE_OpenCL *ocl = (PetscFE_OpenCL *)fem->data;
481: PetscStageLog stageLog;
482: PetscEventPerfLog eventLog = NULL;
483: int stage;
485: PetscLogGetStageLog(&stageLog);
486: PetscStageLogGetCurrent(stageLog, &stage);
487: PetscStageLogGetEventPerfLog(stageLog, stage, &eventLog);
488: /* Log performance info */
489: eventLog->eventInfo[ocl->residualEvent].count++;
490: eventLog->eventInfo[ocl->residualEvent].time += time;
491: eventLog->eventInfo[ocl->residualEvent].flops += flops;
492: return 0;
493: }
495: static PetscErrorCode PetscFEIntegrateResidual_OpenCL(PetscDS prob, PetscFormKey key, PetscInt Ne, PetscFEGeom *cgeom, const PetscScalar coefficients[], const PetscScalar coefficients_t[], PetscDS probAux, const PetscScalar coefficientsAux[], PetscReal t, PetscScalar elemVec[])
496: {
497: /* Nbc = batchSize */
498: PetscFE fem;
499: PetscFE_OpenCL *ocl;
500: PetscPointFunc f0_func;
501: PetscPointFunc f1_func;
502: PetscQuadrature q;
503: PetscInt dim, qNc;
504: PetscInt N_b; /* The number of basis functions */
505: PetscInt N_comp; /* The number of basis function components */
506: PetscInt N_bt; /* The total number of scalar basis functions */
507: PetscInt N_q; /* The number of quadrature points */
508: PetscInt N_bst; /* The block size, LCM(N_bt, N_q), Notice that a block is not process simultaneously */
509: PetscInt N_t; /* The number of threads, N_bst * N_bl */
510: PetscInt N_bl; /* The number of blocks */
511: PetscInt N_bc; /* The batch size, N_bl*N_q*N_b */
512: PetscInt N_cb; /* The number of batches */
513: const PetscInt field = key.field;
514: PetscInt numFlops, f0Flops = 0, f1Flops = 0;
515: PetscBool useAux = probAux ? PETSC_TRUE : PETSC_FALSE;
516: PetscBool useField = PETSC_FALSE;
517: PetscBool useFieldDer = PETSC_TRUE;
518: PetscBool useF0 = PETSC_TRUE;
519: PetscBool useF1 = PETSC_TRUE;
520: /* OpenCL variables */
521: cl_program ocl_prog;
522: cl_kernel ocl_kernel;
523: cl_event ocl_ev; /* The event for tracking kernel execution */
524: cl_ulong ns_start; /* Nanoseconds counter on GPU at kernel start */
525: cl_ulong ns_end; /* Nanoseconds counter on GPU at kernel stop */
526: cl_mem o_jacobianInverses, o_jacobianDeterminants;
527: cl_mem o_coefficients, o_coefficientsAux, o_elemVec;
528: float *f_coeff = NULL, *f_coeffAux = NULL, *f_invJ = NULL, *f_detJ = NULL;
529: double *d_coeff = NULL, *d_coeffAux = NULL, *d_invJ = NULL, *d_detJ = NULL;
530: PetscReal *r_invJ = NULL, *r_detJ = NULL;
531: void *oclCoeff, *oclCoeffAux, *oclInvJ, *oclDetJ;
532: size_t local_work_size[3], global_work_size[3];
533: size_t realSize, x, y, z;
534: const PetscReal *points, *weights;
535: int err;
537: PetscDSGetDiscretization(prob, field, (PetscObject *)&fem);
538: ocl = (PetscFE_OpenCL *)fem->data;
539: if (!Ne) {
540: PetscFEOpenCLLogResidual(fem, 0.0, 0.0);
541: return 0;
542: }
543: PetscFEGetSpatialDimension(fem, &dim);
544: PetscFEGetQuadrature(fem, &q);
545: PetscQuadratureGetData(q, NULL, &qNc, &N_q, &points, &weights);
547: PetscFEGetDimension(fem, &N_b);
548: PetscFEGetNumComponents(fem, &N_comp);
549: PetscDSGetResidual(prob, field, &f0_func, &f1_func);
550: PetscFEGetTileSizes(fem, NULL, &N_bl, &N_bc, &N_cb);
551: N_bt = N_b * N_comp;
552: N_bst = N_bt * N_q;
553: N_t = N_bst * N_bl;
555: /* Calculate layout */
556: if (Ne % (N_cb * N_bc)) { /* Remainder cells */
557: PetscFEIntegrateResidual_Basic(prob, key, Ne, cgeom, coefficients, coefficients_t, probAux, coefficientsAux, t, elemVec);
558: return 0;
559: }
560: PetscFEOpenCLCalculateGrid(fem, Ne, N_cb * N_bc, &x, &y, &z);
561: local_work_size[0] = N_bc * N_comp;
562: local_work_size[1] = 1;
563: local_work_size[2] = 1;
564: global_work_size[0] = x * local_work_size[0];
565: global_work_size[1] = y * local_work_size[1];
566: global_work_size[2] = z * local_work_size[2];
567: PetscInfo(fem, "GPU layout grid(%zu,%zu,%zu) block(%zu,%zu,%zu) with %d batches\n", x, y, z, local_work_size[0], local_work_size[1], local_work_size[2], N_cb);
568: PetscInfo(fem, " N_t: %d, N_cb: %d\n", N_t, N_cb);
569: /* Generate code */
570: if (probAux) {
571: PetscSpace P;
572: PetscInt NfAux, order, f;
574: PetscDSGetNumFields(probAux, &NfAux);
575: for (f = 0; f < NfAux; ++f) {
576: PetscFE feAux;
578: PetscDSGetDiscretization(probAux, f, (PetscObject *)&feAux);
579: PetscFEGetBasisSpace(feAux, &P);
580: PetscSpaceGetDegree(P, &order, NULL);
582: }
583: }
584: PetscFEOpenCLGetIntegrationKernel(fem, useAux, &ocl_prog, &ocl_kernel);
585: /* Create buffers on the device and send data over */
586: PetscDataTypeGetSize(ocl->realType, &realSize);
588: if (sizeof(PetscReal) != realSize) {
589: switch (ocl->realType) {
590: case PETSC_FLOAT: {
591: PetscInt c, b, d;
593: PetscMalloc4(Ne * N_bt, &f_coeff, Ne, &f_coeffAux, Ne * dim * dim, &f_invJ, Ne, &f_detJ);
594: for (c = 0; c < Ne; ++c) {
595: f_detJ[c] = (float)cgeom->detJ[c];
596: for (d = 0; d < dim * dim; ++d) f_invJ[c * dim * dim + d] = (float)cgeom->invJ[c * dim * dim + d];
597: for (b = 0; b < N_bt; ++b) f_coeff[c * N_bt + b] = (float)coefficients[c * N_bt + b];
598: }
599: if (coefficientsAux) { /* Assume P0 */
600: for (c = 0; c < Ne; ++c) f_coeffAux[c] = (float)coefficientsAux[c];
601: }
602: oclCoeff = (void *)f_coeff;
603: if (coefficientsAux) {
604: oclCoeffAux = (void *)f_coeffAux;
605: } else {
606: oclCoeffAux = NULL;
607: }
608: oclInvJ = (void *)f_invJ;
609: oclDetJ = (void *)f_detJ;
610: } break;
611: case PETSC_DOUBLE: {
612: PetscInt c, b, d;
614: PetscMalloc4(Ne * N_bt, &d_coeff, Ne, &d_coeffAux, Ne * dim * dim, &d_invJ, Ne, &d_detJ);
615: for (c = 0; c < Ne; ++c) {
616: d_detJ[c] = (double)cgeom->detJ[c];
617: for (d = 0; d < dim * dim; ++d) d_invJ[c * dim * dim + d] = (double)cgeom->invJ[c * dim * dim + d];
618: for (b = 0; b < N_bt; ++b) d_coeff[c * N_bt + b] = (double)coefficients[c * N_bt + b];
619: }
620: if (coefficientsAux) { /* Assume P0 */
621: for (c = 0; c < Ne; ++c) d_coeffAux[c] = (double)coefficientsAux[c];
622: }
623: oclCoeff = (void *)d_coeff;
624: if (coefficientsAux) {
625: oclCoeffAux = (void *)d_coeffAux;
626: } else {
627: oclCoeffAux = NULL;
628: }
629: oclInvJ = (void *)d_invJ;
630: oclDetJ = (void *)d_detJ;
631: } break;
632: default:
633: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unsupported PETSc type %d", ocl->realType);
634: }
635: } else {
636: PetscInt c, d;
638: PetscMalloc2(Ne * dim * dim, &r_invJ, Ne, &r_detJ);
639: for (c = 0; c < Ne; ++c) {
640: r_detJ[c] = cgeom->detJ[c];
641: for (d = 0; d < dim * dim; ++d) r_invJ[c * dim * dim + d] = cgeom->invJ[c * dim * dim + d];
642: }
643: oclCoeff = (void *)coefficients;
644: oclCoeffAux = (void *)coefficientsAux;
645: oclInvJ = (void *)r_invJ;
646: oclDetJ = (void *)r_detJ;
647: }
648: o_coefficients = clCreateBuffer(ocl->ctx_id, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, Ne * N_bt * realSize, oclCoeff, &err);
649: if (coefficientsAux) {
650: o_coefficientsAux = clCreateBuffer(ocl->ctx_id, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, Ne * realSize, oclCoeffAux, &err);
651: } else {
652: o_coefficientsAux = clCreateBuffer(ocl->ctx_id, CL_MEM_READ_ONLY, Ne * realSize, oclCoeffAux, &err);
653: }
654: o_jacobianInverses = clCreateBuffer(ocl->ctx_id, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, Ne * dim * dim * realSize, oclInvJ, &err);
655: o_jacobianDeterminants = clCreateBuffer(ocl->ctx_id, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, Ne * realSize, oclDetJ, &err);
656: o_elemVec = clCreateBuffer(ocl->ctx_id, CL_MEM_WRITE_ONLY, Ne * N_bt * realSize, NULL, &err);
657: /* Kernel launch */
658: clSetKernelArg(ocl_kernel, 0, sizeof(cl_int), (void *)&N_cb);
659: clSetKernelArg(ocl_kernel, 1, sizeof(cl_mem), (void *)&o_coefficients);
660: clSetKernelArg(ocl_kernel, 2, sizeof(cl_mem), (void *)&o_coefficientsAux);
661: clSetKernelArg(ocl_kernel, 3, sizeof(cl_mem), (void *)&o_jacobianInverses);
662: clSetKernelArg(ocl_kernel, 4, sizeof(cl_mem), (void *)&o_jacobianDeterminants);
663: clSetKernelArg(ocl_kernel, 5, sizeof(cl_mem), (void *)&o_elemVec);
664: clEnqueueNDRangeKernel(ocl->queue_id, ocl_kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, &ocl_ev);
665: /* Read data back from device */
666: if (sizeof(PetscReal) != realSize) {
667: switch (ocl->realType) {
668: case PETSC_FLOAT: {
669: float *elem;
670: PetscInt c, b;
672: PetscFree4(f_coeff, f_coeffAux, f_invJ, f_detJ);
673: PetscMalloc1(Ne * N_bt, &elem);
674: clEnqueueReadBuffer(ocl->queue_id, o_elemVec, CL_TRUE, 0, Ne * N_bt * realSize, elem, 0, NULL, NULL);
675: for (c = 0; c < Ne; ++c) {
676: for (b = 0; b < N_bt; ++b) elemVec[c * N_bt + b] = (PetscScalar)elem[c * N_bt + b];
677: }
678: PetscFree(elem);
679: } break;
680: case PETSC_DOUBLE: {
681: double *elem;
682: PetscInt c, b;
684: PetscFree4(d_coeff, d_coeffAux, d_invJ, d_detJ);
685: PetscMalloc1(Ne * N_bt, &elem);
686: clEnqueueReadBuffer(ocl->queue_id, o_elemVec, CL_TRUE, 0, Ne * N_bt * realSize, elem, 0, NULL, NULL);
687: for (c = 0; c < Ne; ++c) {
688: for (b = 0; b < N_bt; ++b) elemVec[c * N_bt + b] = (PetscScalar)elem[c * N_bt + b];
689: }
690: PetscFree(elem);
691: } break;
692: default:
693: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unsupported PETSc type %d", ocl->realType);
694: }
695: } else {
696: PetscFree2(r_invJ, r_detJ);
697: clEnqueueReadBuffer(ocl->queue_id, o_elemVec, CL_TRUE, 0, Ne * N_bt * realSize, elemVec, 0, NULL, NULL);
698: }
699: /* Log performance */
700: clGetEventProfilingInfo(ocl_ev, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &ns_start, NULL);
701: clGetEventProfilingInfo(ocl_ev, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &ns_end, NULL);
702: f0Flops = 0;
703: switch (ocl->op) {
704: case LAPLACIAN:
705: f1Flops = useAux ? dim : 0;
706: break;
707: case ELASTICITY:
708: f1Flops = 2 * dim * dim;
709: break;
710: }
711: numFlops = Ne * (N_q * (N_b * N_comp * ((useField ? 2 : 0) + (useFieldDer ? 2 * dim * (dim + 1) : 0))
712: /*+
713: N_ba*N_compa*((useFieldAux ? 2 : 0) + (useFieldDerAux ? 2*dim*(dim + 1) : 0))*/
714: + N_comp * ((useF0 ? f0Flops + 2 : 0) + (useF1 ? f1Flops + 2 * dim : 0))) +
715: N_b * ((useF0 ? 2 : 0) + (useF1 ? 2 * dim * (dim + 1) : 0)));
716: PetscFEOpenCLLogResidual(fem, (ns_end - ns_start) * 1.0e-9, numFlops);
717: /* Cleanup */
718: clReleaseMemObject(o_coefficients);
719: clReleaseMemObject(o_coefficientsAux);
720: clReleaseMemObject(o_jacobianInverses);
721: clReleaseMemObject(o_jacobianDeterminants);
722: clReleaseMemObject(o_elemVec);
723: clReleaseKernel(ocl_kernel);
724: clReleaseProgram(ocl_prog);
725: return 0;
726: }
728: PETSC_INTERN PetscErrorCode PetscFESetUp_Basic(PetscFE);
729: PETSC_INTERN PetscErrorCode PetscFECreateTabulation_Basic(PetscFE, PetscInt, const PetscReal[], PetscInt, PetscTabulation);
731: static PetscErrorCode PetscFEInitialize_OpenCL(PetscFE fem)
732: {
733: fem->ops->setfromoptions = NULL;
734: fem->ops->setup = PetscFESetUp_Basic;
735: fem->ops->view = NULL;
736: fem->ops->destroy = PetscFEDestroy_OpenCL;
737: fem->ops->getdimension = PetscFEGetDimension_Basic;
738: fem->ops->createtabulation = PetscFECreateTabulation_Basic;
739: fem->ops->integrateresidual = PetscFEIntegrateResidual_OpenCL;
740: fem->ops->integratebdresidual = NULL /* PetscFEIntegrateBdResidual_OpenCL */;
741: fem->ops->integratejacobianaction = NULL /* PetscFEIntegrateJacobianAction_OpenCL */;
742: fem->ops->integratejacobian = PetscFEIntegrateJacobian_Basic;
743: return 0;
744: }
746: /*MC
747: PETSCFEOPENCL = "opencl" - A `PetscFEType` that integrates using a vectorized OpenCL implementation
749: Level: intermediate
751: .seealso: `PetscFEType`, `PetscFECreate()`, `PetscFESetType()`
752: M*/
754: PETSC_EXTERN PetscErrorCode PetscFECreate_OpenCL(PetscFE fem)
755: {
756: PetscFE_OpenCL *ocl;
757: cl_uint num_platforms;
758: cl_platform_id platform_ids[42];
759: cl_uint num_devices;
760: cl_device_id device_ids[42];
761: cl_int err;
764: PetscNew(&ocl);
765: fem->data = ocl;
767: /* Init Platform */
768: clGetPlatformIDs(42, platform_ids, &num_platforms);
770: ocl->pf_id = platform_ids[0];
771: /* Init Device */
772: clGetDeviceIDs(ocl->pf_id, CL_DEVICE_TYPE_ALL, 42, device_ids, &num_devices);
774: ocl->dev_id = device_ids[0];
775: /* Create context with one command queue */
776: ocl->ctx_id = clCreateContext(0, 1, &(ocl->dev_id), NULL, NULL, &err);
777: err;
778: ocl->queue_id = clCreateCommandQueue(ocl->ctx_id, ocl->dev_id, CL_QUEUE_PROFILING_ENABLE, &err);
779: err;
780: /* Types */
781: ocl->realType = PETSC_FLOAT;
782: /* Register events */
783: PetscLogEventRegister("OpenCL FEResidual", PETSCFE_CLASSID, &ocl->residualEvent);
784: /* Equation handling */
785: ocl->op = LAPLACIAN;
787: PetscFEInitialize_OpenCL(fem);
788: return 0;
789: }
791: /*@
792: PetscFEOpenCLSetRealType - Set the scalar type for running on the OpenCL accelerator
794: Input Parameters:
795: + fem - The `PetscFE`
796: - realType - The scalar type
798: Level: developer
800: .seealso: `PetscFE`, `PetscFEOpenCLGetRealType()`
801: @*/
802: PetscErrorCode PetscFEOpenCLSetRealType(PetscFE fem, PetscDataType realType)
803: {
804: PetscFE_OpenCL *ocl = (PetscFE_OpenCL *)fem->data;
807: ocl->realType = realType;
808: return 0;
809: }
811: /*@
812: PetscFEOpenCLGetRealType - Get the scalar type for running on the OpenCL accelerator
814: Input Parameter:
815: . fem - The `PetscFE`
817: Output Parameter:
818: . realType - The scalar type
820: Level: developer
822: .seealso: `PetscFE`, `PetscFEOpenCLSetRealType()`
823: @*/
824: PetscErrorCode PetscFEOpenCLGetRealType(PetscFE fem, PetscDataType *realType)
825: {
826: PetscFE_OpenCL *ocl = (PetscFE_OpenCL *)fem->data;
830: *realType = ocl->realType;
831: return 0;
832: }
834: #endif /* PETSC_HAVE_OPENCL */