Actual source code: ex226.c
1: static char help[] = "Benchmark for MatMatMult() of AIJ matrices using different 2d finite-difference stencils.\n\n";
3: #include <petscmat.h>
5: /* Converts 3d grid coordinates (i,j,k) for a grid of size m \times n to global indexing. Pass k = 0 for a 2d grid. */
6: int global_index(PetscInt i, PetscInt j, PetscInt k, PetscInt m, PetscInt n)
7: {
8: return i + j * m + k * m * n;
9: }
11: int main(int argc, char **argv)
12: {
13: Mat A, B, C, PtAP, PtAP_copy, PtAP_squared;
14: PetscInt i, M, N, Istart, Iend, n = 7, j, J, Ii, m = 8, k, o = 1;
15: PetscScalar v;
16: PetscBool equal = PETSC_FALSE, mat_view = PETSC_FALSE;
17: char stencil[PETSC_MAX_PATH_LEN];
18: #if defined(PETSC_USE_LOG)
19: PetscLogStage fullMatMatMultStage;
20: #endif
23: PetscInitialize(&argc, &argv, (char *)0, help);
24: PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL);
25: PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL);
26: PetscOptionsGetInt(NULL, NULL, "-o", &o, NULL);
27: PetscOptionsHasName(NULL, NULL, "-result_view", &mat_view);
28: PetscOptionsGetString(NULL, NULL, "-stencil", stencil, sizeof(stencil), NULL);
30: /* Create a aij matrix A */
31: M = N = m * n * o;
32: MatCreate(PETSC_COMM_WORLD, &A);
33: MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, M, N);
34: MatSetType(A, MATAIJ);
35: MatSetFromOptions(A);
37: /* Consistency checks */
40: /************ 2D stencils ***************/
41: PetscStrcmp(stencil, "2d5point", &equal);
42: if (equal) { /* 5-point stencil, 2D */
43: MatMPIAIJSetPreallocation(A, 5, NULL, 5, NULL);
44: MatSeqAIJSetPreallocation(A, 5, NULL);
45: MatGetOwnershipRange(A, &Istart, &Iend);
46: for (Ii = Istart; Ii < Iend; Ii++) {
47: v = -1.0;
48: k = Ii / (m * n);
49: j = (Ii - k * m * n) / m;
50: i = (Ii - k * m * n - j * m);
51: if (i > 0) {
52: J = global_index(i - 1, j, k, m, n);
53: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
54: }
55: if (i < m - 1) {
56: J = global_index(i + 1, j, k, m, n);
57: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
58: }
59: if (j > 0) {
60: J = global_index(i, j - 1, k, m, n);
61: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
62: }
63: if (j < n - 1) {
64: J = global_index(i, j + 1, k, m, n);
65: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
66: }
67: v = 4.0;
68: MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES);
69: }
70: }
71: PetscStrcmp(stencil, "2d9point", &equal);
72: if (equal) { /* 9-point stencil, 2D */
73: MatMPIAIJSetPreallocation(A, 9, NULL, 9, NULL);
74: MatSeqAIJSetPreallocation(A, 9, NULL);
75: MatGetOwnershipRange(A, &Istart, &Iend);
76: for (Ii = Istart; Ii < Iend; Ii++) {
77: v = -1.0;
78: k = Ii / (m * n);
79: j = (Ii - k * m * n) / m;
80: i = (Ii - k * m * n - j * m);
81: if (i > 0) {
82: J = global_index(i - 1, j, k, m, n);
83: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
84: }
85: if (i > 0 && j > 0) {
86: J = global_index(i - 1, j - 1, k, m, n);
87: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
88: }
89: if (j > 0) {
90: J = global_index(i, j - 1, k, m, n);
91: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
92: }
93: if (i < m - 1 && j > 0) {
94: J = global_index(i + 1, j - 1, k, m, n);
95: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
96: }
97: if (i < m - 1) {
98: J = global_index(i + 1, j, k, m, n);
99: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
100: }
101: if (i < m - 1 && j < n - 1) {
102: J = global_index(i + 1, j + 1, k, m, n);
103: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
104: }
105: if (j < n - 1) {
106: J = global_index(i, j + 1, k, m, n);
107: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
108: }
109: if (i > 0 && j < n - 1) {
110: J = global_index(i - 1, j + 1, k, m, n);
111: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
112: }
113: v = 8.0;
114: MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES);
115: }
116: }
117: PetscStrcmp(stencil, "2d9point2", &equal);
118: if (equal) { /* 9-point Cartesian stencil (width 2 per coordinate), 2D */
119: MatMPIAIJSetPreallocation(A, 9, NULL, 9, NULL);
120: MatSeqAIJSetPreallocation(A, 9, NULL);
121: MatGetOwnershipRange(A, &Istart, &Iend);
122: for (Ii = Istart; Ii < Iend; Ii++) {
123: v = -1.0;
124: k = Ii / (m * n);
125: j = (Ii - k * m * n) / m;
126: i = (Ii - k * m * n - j * m);
127: if (i > 0) {
128: J = global_index(i - 1, j, k, m, n);
129: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
130: }
131: if (i > 1) {
132: J = global_index(i - 2, j, k, m, n);
133: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
134: }
135: if (i < m - 1) {
136: J = global_index(i + 1, j, k, m, n);
137: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
138: }
139: if (i < m - 2) {
140: J = global_index(i + 2, j, k, m, n);
141: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
142: }
143: if (j > 0) {
144: J = global_index(i, j - 1, k, m, n);
145: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
146: }
147: if (j > 1) {
148: J = global_index(i, j - 2, k, m, n);
149: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
150: }
151: if (j < n - 1) {
152: J = global_index(i, j + 1, k, m, n);
153: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
154: }
155: if (j < n - 2) {
156: J = global_index(i, j + 2, k, m, n);
157: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
158: }
159: v = 8.0;
160: MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES);
161: }
162: }
163: PetscStrcmp(stencil, "2d13point", &equal);
164: if (equal) { /* 13-point Cartesian stencil (width 3 per coordinate), 2D */
165: MatMPIAIJSetPreallocation(A, 13, NULL, 13, NULL);
166: MatSeqAIJSetPreallocation(A, 13, NULL);
167: MatGetOwnershipRange(A, &Istart, &Iend);
168: for (Ii = Istart; Ii < Iend; Ii++) {
169: v = -1.0;
170: k = Ii / (m * n);
171: j = (Ii - k * m * n) / m;
172: i = (Ii - k * m * n - j * m);
173: if (i > 0) {
174: J = global_index(i - 1, j, k, m, n);
175: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
176: }
177: if (i > 1) {
178: J = global_index(i - 2, j, k, m, n);
179: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
180: }
181: if (i > 2) {
182: J = global_index(i - 3, j, k, m, n);
183: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
184: }
185: if (i < m - 1) {
186: J = global_index(i + 1, j, k, m, n);
187: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
188: }
189: if (i < m - 2) {
190: J = global_index(i + 2, j, k, m, n);
191: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
192: }
193: if (i < m - 3) {
194: J = global_index(i + 3, j, k, m, n);
195: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
196: }
197: if (j > 0) {
198: J = global_index(i, j - 1, k, m, n);
199: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
200: }
201: if (j > 1) {
202: J = global_index(i, j - 2, k, m, n);
203: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
204: }
205: if (j > 2) {
206: J = global_index(i, j - 3, k, m, n);
207: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
208: }
209: if (j < n - 1) {
210: J = global_index(i, j + 1, k, m, n);
211: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
212: }
213: if (j < n - 2) {
214: J = global_index(i, j + 2, k, m, n);
215: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
216: }
217: if (j < n - 3) {
218: J = global_index(i, j + 3, k, m, n);
219: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
220: }
221: v = 12.0;
222: MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES);
223: }
224: }
225: /************ 3D stencils ***************/
226: PetscStrcmp(stencil, "3d7point", &equal);
227: if (equal) { /* 7-point stencil, 3D */
228: MatMPIAIJSetPreallocation(A, 7, NULL, 7, NULL);
229: MatSeqAIJSetPreallocation(A, 7, NULL);
230: MatGetOwnershipRange(A, &Istart, &Iend);
231: for (Ii = Istart; Ii < Iend; Ii++) {
232: v = -1.0;
233: k = Ii / (m * n);
234: j = (Ii - k * m * n) / m;
235: i = (Ii - k * m * n - j * m);
236: if (i > 0) {
237: J = global_index(i - 1, j, k, m, n);
238: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
239: }
240: if (i < m - 1) {
241: J = global_index(i + 1, j, k, m, n);
242: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
243: }
244: if (j > 0) {
245: J = global_index(i, j - 1, k, m, n);
246: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
247: }
248: if (j < n - 1) {
249: J = global_index(i, j + 1, k, m, n);
250: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
251: }
252: if (k > 0) {
253: J = global_index(i, j, k - 1, m, n);
254: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
255: }
256: if (k < o - 1) {
257: J = global_index(i, j, k + 1, m, n);
258: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
259: }
260: v = 6.0;
261: MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES);
262: }
263: }
264: PetscStrcmp(stencil, "3d13point", &equal);
265: if (equal) { /* 13-point stencil, 3D */
266: MatMPIAIJSetPreallocation(A, 13, NULL, 13, NULL);
267: MatSeqAIJSetPreallocation(A, 13, NULL);
268: MatGetOwnershipRange(A, &Istart, &Iend);
269: for (Ii = Istart; Ii < Iend; Ii++) {
270: v = -1.0;
271: k = Ii / (m * n);
272: j = (Ii - k * m * n) / m;
273: i = (Ii - k * m * n - j * m);
274: if (i > 0) {
275: J = global_index(i - 1, j, k, m, n);
276: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
277: }
278: if (i > 1) {
279: J = global_index(i - 2, j, k, m, n);
280: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
281: }
282: if (i < m - 1) {
283: J = global_index(i + 1, j, k, m, n);
284: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
285: }
286: if (i < m - 2) {
287: J = global_index(i + 2, j, k, m, n);
288: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
289: }
290: if (j > 0) {
291: J = global_index(i, j - 1, k, m, n);
292: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
293: }
294: if (j > 1) {
295: J = global_index(i, j - 2, k, m, n);
296: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
297: }
298: if (j < n - 1) {
299: J = global_index(i, j + 1, k, m, n);
300: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
301: }
302: if (j < n - 2) {
303: J = global_index(i, j + 2, k, m, n);
304: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
305: }
306: if (k > 0) {
307: J = global_index(i, j, k - 1, m, n);
308: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
309: }
310: if (k > 1) {
311: J = global_index(i, j, k - 2, m, n);
312: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
313: }
314: if (k < o - 1) {
315: J = global_index(i, j, k + 1, m, n);
316: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
317: }
318: if (k < o - 2) {
319: J = global_index(i, j, k + 2, m, n);
320: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
321: }
322: v = 12.0;
323: MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES);
324: }
325: }
326: PetscStrcmp(stencil, "3d19point", &equal);
327: if (equal) { /* 19-point stencil, 3D */
328: MatMPIAIJSetPreallocation(A, 19, NULL, 19, NULL);
329: MatSeqAIJSetPreallocation(A, 19, NULL);
330: MatGetOwnershipRange(A, &Istart, &Iend);
331: for (Ii = Istart; Ii < Iend; Ii++) {
332: v = -1.0;
333: k = Ii / (m * n);
334: j = (Ii - k * m * n) / m;
335: i = (Ii - k * m * n - j * m);
336: /* one hop */
337: if (i > 0) {
338: J = global_index(i - 1, j, k, m, n);
339: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
340: }
341: if (i < m - 1) {
342: J = global_index(i + 1, j, k, m, n);
343: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
344: }
345: if (j > 0) {
346: J = global_index(i, j - 1, k, m, n);
347: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
348: }
349: if (j < n - 1) {
350: J = global_index(i, j + 1, k, m, n);
351: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
352: }
353: if (k > 0) {
354: J = global_index(i, j, k - 1, m, n);
355: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
356: }
357: if (k < o - 1) {
358: J = global_index(i, j, k + 1, m, n);
359: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
360: }
361: /* two hops */
362: if (i > 0 && j > 0) {
363: J = global_index(i - 1, j - 1, k, m, n);
364: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
365: }
366: if (i > 0 && k > 0) {
367: J = global_index(i - 1, j, k - 1, m, n);
368: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
369: }
370: if (i > 0 && j < n - 1) {
371: J = global_index(i - 1, j + 1, k, m, n);
372: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
373: }
374: if (i > 0 && k < o - 1) {
375: J = global_index(i - 1, j, k + 1, m, n);
376: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
377: }
378: if (i < m - 1 && j > 0) {
379: J = global_index(i + 1, j - 1, k, m, n);
380: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
381: }
382: if (i < m - 1 && k > 0) {
383: J = global_index(i + 1, j, k - 1, m, n);
384: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
385: }
386: if (i < m - 1 && j < n - 1) {
387: J = global_index(i + 1, j + 1, k, m, n);
388: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
389: }
390: if (i < m - 1 && k < o - 1) {
391: J = global_index(i + 1, j, k + 1, m, n);
392: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
393: }
394: if (j > 0 && k > 0) {
395: J = global_index(i, j - 1, k - 1, m, n);
396: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
397: }
398: if (j > 0 && k < o - 1) {
399: J = global_index(i, j - 1, k + 1, m, n);
400: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
401: }
402: if (j < n - 1 && k > 0) {
403: J = global_index(i, j + 1, k - 1, m, n);
404: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
405: }
406: if (j < n - 1 && k < o - 1) {
407: J = global_index(i, j + 1, k + 1, m, n);
408: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
409: }
410: v = 18.0;
411: MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES);
412: }
413: }
414: PetscStrcmp(stencil, "3d27point", &equal);
415: if (equal) { /* 27-point stencil, 3D */
416: MatMPIAIJSetPreallocation(A, 27, NULL, 27, NULL);
417: MatSeqAIJSetPreallocation(A, 27, NULL);
418: MatGetOwnershipRange(A, &Istart, &Iend);
419: for (Ii = Istart; Ii < Iend; Ii++) {
420: v = -1.0;
421: k = Ii / (m * n);
422: j = (Ii - k * m * n) / m;
423: i = (Ii - k * m * n - j * m);
424: if (k > 0) {
425: if (j > 0) {
426: if (i > 0) {
427: J = global_index(i - 1, j - 1, k - 1, m, n);
428: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
429: }
430: J = global_index(i, j - 1, k - 1, m, n);
431: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
432: if (i < m - 1) {
433: J = global_index(i + 1, j - 1, k - 1, m, n);
434: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
435: }
436: }
437: {
438: if (i > 0) {
439: J = global_index(i - 1, j, k - 1, m, n);
440: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
441: }
442: J = global_index(i, j, k - 1, m, n);
443: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
444: if (i < m - 1) {
445: J = global_index(i + 1, j, k - 1, m, n);
446: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
447: }
448: }
449: if (j < n - 1) {
450: if (i > 0) {
451: J = global_index(i - 1, j + 1, k - 1, m, n);
452: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
453: }
454: J = global_index(i, j + 1, k - 1, m, n);
455: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
456: if (i < m - 1) {
457: J = global_index(i + 1, j + 1, k - 1, m, n);
458: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
459: }
460: }
461: }
462: {
463: if (j > 0) {
464: if (i > 0) {
465: J = global_index(i - 1, j - 1, k, m, n);
466: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
467: }
468: J = global_index(i, j - 1, k, m, n);
469: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
470: if (i < m - 1) {
471: J = global_index(i + 1, j - 1, k, m, n);
472: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
473: }
474: }
475: {
476: if (i > 0) {
477: J = global_index(i - 1, j, k, m, n);
478: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
479: }
480: J = global_index(i, j, k, m, n);
481: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
482: if (i < m - 1) {
483: J = global_index(i + 1, j, k, m, n);
484: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
485: }
486: }
487: if (j < n - 1) {
488: if (i > 0) {
489: J = global_index(i - 1, j + 1, k, m, n);
490: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
491: }
492: J = global_index(i, j + 1, k, m, n);
493: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
494: if (i < m - 1) {
495: J = global_index(i + 1, j + 1, k, m, n);
496: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
497: }
498: }
499: }
500: if (k < o - 1) {
501: if (j > 0) {
502: if (i > 0) {
503: J = global_index(i - 1, j - 1, k + 1, m, n);
504: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
505: }
506: J = global_index(i, j - 1, k + 1, m, n);
507: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
508: if (i < m - 1) {
509: J = global_index(i + 1, j - 1, k + 1, m, n);
510: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
511: }
512: }
513: {
514: if (i > 0) {
515: J = global_index(i - 1, j, k + 1, m, n);
516: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
517: }
518: J = global_index(i, j, k + 1, m, n);
519: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
520: if (i < m - 1) {
521: J = global_index(i + 1, j, k + 1, m, n);
522: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
523: }
524: }
525: if (j < n - 1) {
526: if (i > 0) {
527: J = global_index(i - 1, j + 1, k + 1, m, n);
528: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
529: }
530: J = global_index(i, j + 1, k + 1, m, n);
531: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
532: if (i < m - 1) {
533: J = global_index(i + 1, j + 1, k + 1, m, n);
534: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
535: }
536: }
537: }
538: v = 26.0;
539: MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES);
540: }
541: }
542: MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
543: MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
545: /* Copy A into B in order to have a more representative benchmark (A*A has more cache hits than A*B) */
546: MatDuplicate(A, MAT_COPY_VALUES, &B);
548: PetscLogStageRegister("Full MatMatMult", &fullMatMatMultStage);
550: /* Test C = A*B */
551: PetscLogStagePush(fullMatMatMultStage);
552: MatMatMult(A, B, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &C);
554: /* Test PtAP_squared = PtAP(C,C)*PtAP(C,C) */
555: MatPtAP(C, C, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &PtAP);
556: MatDuplicate(PtAP, MAT_COPY_VALUES, &PtAP_copy);
557: MatMatMult(PtAP, PtAP_copy, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &PtAP_squared);
559: MatView(C, PETSC_VIEWER_STDOUT_WORLD);
560: MatView(PtAP_squared, PETSC_VIEWER_STDOUT_WORLD);
562: MatDestroy(&PtAP_squared);
563: MatDestroy(&PtAP_copy);
564: MatDestroy(&PtAP);
565: MatDestroy(&C);
566: MatDestroy(&B);
567: MatDestroy(&A);
568: PetscFinalize();
569: return 0;
570: }
572: /*TEST
574: test:
575: suffix: 1
576: nsize: 1
577: args: -m 8 -n 8 -stencil 2d5point -matmatmult_via sorted
579: test:
580: suffix: 2
581: nsize: 1
582: args: -m 5 -n 5 -o 5 -stencil 3d27point -matmatmult_via rowmerge
584: test:
585: suffix: 3
586: nsize: 4
587: args: -m 6 -n 6 -stencil 2d5point -matmatmult_via seqmpi
589: TEST*/