Actual source code: ex226.c

  1: static char help[] = "Benchmark for MatMatMult() of AIJ matrices using different 2d finite-difference stencils.\n\n";

  3: #include <petscmat.h>

  5: /* Converts 3d grid coordinates (i,j,k) for a grid of size m \times n to global indexing. Pass k = 0 for a 2d grid. */
  6: int global_index(PetscInt i, PetscInt j, PetscInt k, PetscInt m, PetscInt n)
  7: {
  8:   return i + j * m + k * m * n;
  9: }

 11: int main(int argc, char **argv)
 12: {
 13:   Mat         A, B, C, PtAP, PtAP_copy, PtAP_squared;
 14:   PetscInt    i, M, N, Istart, Iend, n = 7, j, J, Ii, m = 8, k, o = 1;
 15:   PetscScalar v;
 16:   PetscBool   equal = PETSC_FALSE, mat_view = PETSC_FALSE;
 17:   char        stencil[PETSC_MAX_PATH_LEN];
 18: #if defined(PETSC_USE_LOG)
 19:   PetscLogStage fullMatMatMultStage;
 20: #endif

 23:   PetscInitialize(&argc, &argv, (char *)0, help);
 24:   PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL);
 25:   PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL);
 26:   PetscOptionsGetInt(NULL, NULL, "-o", &o, NULL);
 27:   PetscOptionsHasName(NULL, NULL, "-result_view", &mat_view);
 28:   PetscOptionsGetString(NULL, NULL, "-stencil", stencil, sizeof(stencil), NULL);

 30:   /* Create a aij matrix A */
 31:   M = N = m * n * o;
 32:   MatCreate(PETSC_COMM_WORLD, &A);
 33:   MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, M, N);
 34:   MatSetType(A, MATAIJ);
 35:   MatSetFromOptions(A);

 37:   /* Consistency checks */

 40:   /************ 2D stencils ***************/
 41:   PetscStrcmp(stencil, "2d5point", &equal);
 42:   if (equal) { /* 5-point stencil, 2D */
 43:     MatMPIAIJSetPreallocation(A, 5, NULL, 5, NULL);
 44:     MatSeqAIJSetPreallocation(A, 5, NULL);
 45:     MatGetOwnershipRange(A, &Istart, &Iend);
 46:     for (Ii = Istart; Ii < Iend; Ii++) {
 47:       v = -1.0;
 48:       k = Ii / (m * n);
 49:       j = (Ii - k * m * n) / m;
 50:       i = (Ii - k * m * n - j * m);
 51:       if (i > 0) {
 52:         J = global_index(i - 1, j, k, m, n);
 53:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
 54:       }
 55:       if (i < m - 1) {
 56:         J = global_index(i + 1, j, k, m, n);
 57:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
 58:       }
 59:       if (j > 0) {
 60:         J = global_index(i, j - 1, k, m, n);
 61:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
 62:       }
 63:       if (j < n - 1) {
 64:         J = global_index(i, j + 1, k, m, n);
 65:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
 66:       }
 67:       v = 4.0;
 68:       MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES);
 69:     }
 70:   }
 71:   PetscStrcmp(stencil, "2d9point", &equal);
 72:   if (equal) { /* 9-point stencil, 2D */
 73:     MatMPIAIJSetPreallocation(A, 9, NULL, 9, NULL);
 74:     MatSeqAIJSetPreallocation(A, 9, NULL);
 75:     MatGetOwnershipRange(A, &Istart, &Iend);
 76:     for (Ii = Istart; Ii < Iend; Ii++) {
 77:       v = -1.0;
 78:       k = Ii / (m * n);
 79:       j = (Ii - k * m * n) / m;
 80:       i = (Ii - k * m * n - j * m);
 81:       if (i > 0) {
 82:         J = global_index(i - 1, j, k, m, n);
 83:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
 84:       }
 85:       if (i > 0 && j > 0) {
 86:         J = global_index(i - 1, j - 1, k, m, n);
 87:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
 88:       }
 89:       if (j > 0) {
 90:         J = global_index(i, j - 1, k, m, n);
 91:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
 92:       }
 93:       if (i < m - 1 && j > 0) {
 94:         J = global_index(i + 1, j - 1, k, m, n);
 95:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
 96:       }
 97:       if (i < m - 1) {
 98:         J = global_index(i + 1, j, k, m, n);
 99:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
100:       }
101:       if (i < m - 1 && j < n - 1) {
102:         J = global_index(i + 1, j + 1, k, m, n);
103:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
104:       }
105:       if (j < n - 1) {
106:         J = global_index(i, j + 1, k, m, n);
107:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
108:       }
109:       if (i > 0 && j < n - 1) {
110:         J = global_index(i - 1, j + 1, k, m, n);
111:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
112:       }
113:       v = 8.0;
114:       MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES);
115:     }
116:   }
117:   PetscStrcmp(stencil, "2d9point2", &equal);
118:   if (equal) { /* 9-point Cartesian stencil (width 2 per coordinate), 2D */
119:     MatMPIAIJSetPreallocation(A, 9, NULL, 9, NULL);
120:     MatSeqAIJSetPreallocation(A, 9, NULL);
121:     MatGetOwnershipRange(A, &Istart, &Iend);
122:     for (Ii = Istart; Ii < Iend; Ii++) {
123:       v = -1.0;
124:       k = Ii / (m * n);
125:       j = (Ii - k * m * n) / m;
126:       i = (Ii - k * m * n - j * m);
127:       if (i > 0) {
128:         J = global_index(i - 1, j, k, m, n);
129:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
130:       }
131:       if (i > 1) {
132:         J = global_index(i - 2, j, k, m, n);
133:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
134:       }
135:       if (i < m - 1) {
136:         J = global_index(i + 1, j, k, m, n);
137:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
138:       }
139:       if (i < m - 2) {
140:         J = global_index(i + 2, j, k, m, n);
141:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
142:       }
143:       if (j > 0) {
144:         J = global_index(i, j - 1, k, m, n);
145:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
146:       }
147:       if (j > 1) {
148:         J = global_index(i, j - 2, k, m, n);
149:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
150:       }
151:       if (j < n - 1) {
152:         J = global_index(i, j + 1, k, m, n);
153:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
154:       }
155:       if (j < n - 2) {
156:         J = global_index(i, j + 2, k, m, n);
157:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
158:       }
159:       v = 8.0;
160:       MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES);
161:     }
162:   }
163:   PetscStrcmp(stencil, "2d13point", &equal);
164:   if (equal) { /* 13-point Cartesian stencil (width 3 per coordinate), 2D */
165:     MatMPIAIJSetPreallocation(A, 13, NULL, 13, NULL);
166:     MatSeqAIJSetPreallocation(A, 13, NULL);
167:     MatGetOwnershipRange(A, &Istart, &Iend);
168:     for (Ii = Istart; Ii < Iend; Ii++) {
169:       v = -1.0;
170:       k = Ii / (m * n);
171:       j = (Ii - k * m * n) / m;
172:       i = (Ii - k * m * n - j * m);
173:       if (i > 0) {
174:         J = global_index(i - 1, j, k, m, n);
175:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
176:       }
177:       if (i > 1) {
178:         J = global_index(i - 2, j, k, m, n);
179:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
180:       }
181:       if (i > 2) {
182:         J = global_index(i - 3, j, k, m, n);
183:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
184:       }
185:       if (i < m - 1) {
186:         J = global_index(i + 1, j, k, m, n);
187:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
188:       }
189:       if (i < m - 2) {
190:         J = global_index(i + 2, j, k, m, n);
191:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
192:       }
193:       if (i < m - 3) {
194:         J = global_index(i + 3, j, k, m, n);
195:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
196:       }
197:       if (j > 0) {
198:         J = global_index(i, j - 1, k, m, n);
199:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
200:       }
201:       if (j > 1) {
202:         J = global_index(i, j - 2, k, m, n);
203:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
204:       }
205:       if (j > 2) {
206:         J = global_index(i, j - 3, k, m, n);
207:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
208:       }
209:       if (j < n - 1) {
210:         J = global_index(i, j + 1, k, m, n);
211:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
212:       }
213:       if (j < n - 2) {
214:         J = global_index(i, j + 2, k, m, n);
215:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
216:       }
217:       if (j < n - 3) {
218:         J = global_index(i, j + 3, k, m, n);
219:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
220:       }
221:       v = 12.0;
222:       MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES);
223:     }
224:   }
225:   /************ 3D stencils ***************/
226:   PetscStrcmp(stencil, "3d7point", &equal);
227:   if (equal) { /* 7-point stencil, 3D */
228:     MatMPIAIJSetPreallocation(A, 7, NULL, 7, NULL);
229:     MatSeqAIJSetPreallocation(A, 7, NULL);
230:     MatGetOwnershipRange(A, &Istart, &Iend);
231:     for (Ii = Istart; Ii < Iend; Ii++) {
232:       v = -1.0;
233:       k = Ii / (m * n);
234:       j = (Ii - k * m * n) / m;
235:       i = (Ii - k * m * n - j * m);
236:       if (i > 0) {
237:         J = global_index(i - 1, j, k, m, n);
238:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
239:       }
240:       if (i < m - 1) {
241:         J = global_index(i + 1, j, k, m, n);
242:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
243:       }
244:       if (j > 0) {
245:         J = global_index(i, j - 1, k, m, n);
246:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
247:       }
248:       if (j < n - 1) {
249:         J = global_index(i, j + 1, k, m, n);
250:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
251:       }
252:       if (k > 0) {
253:         J = global_index(i, j, k - 1, m, n);
254:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
255:       }
256:       if (k < o - 1) {
257:         J = global_index(i, j, k + 1, m, n);
258:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
259:       }
260:       v = 6.0;
261:       MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES);
262:     }
263:   }
264:   PetscStrcmp(stencil, "3d13point", &equal);
265:   if (equal) { /* 13-point stencil, 3D */
266:     MatMPIAIJSetPreallocation(A, 13, NULL, 13, NULL);
267:     MatSeqAIJSetPreallocation(A, 13, NULL);
268:     MatGetOwnershipRange(A, &Istart, &Iend);
269:     for (Ii = Istart; Ii < Iend; Ii++) {
270:       v = -1.0;
271:       k = Ii / (m * n);
272:       j = (Ii - k * m * n) / m;
273:       i = (Ii - k * m * n - j * m);
274:       if (i > 0) {
275:         J = global_index(i - 1, j, k, m, n);
276:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
277:       }
278:       if (i > 1) {
279:         J = global_index(i - 2, j, k, m, n);
280:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
281:       }
282:       if (i < m - 1) {
283:         J = global_index(i + 1, j, k, m, n);
284:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
285:       }
286:       if (i < m - 2) {
287:         J = global_index(i + 2, j, k, m, n);
288:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
289:       }
290:       if (j > 0) {
291:         J = global_index(i, j - 1, k, m, n);
292:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
293:       }
294:       if (j > 1) {
295:         J = global_index(i, j - 2, k, m, n);
296:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
297:       }
298:       if (j < n - 1) {
299:         J = global_index(i, j + 1, k, m, n);
300:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
301:       }
302:       if (j < n - 2) {
303:         J = global_index(i, j + 2, k, m, n);
304:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
305:       }
306:       if (k > 0) {
307:         J = global_index(i, j, k - 1, m, n);
308:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
309:       }
310:       if (k > 1) {
311:         J = global_index(i, j, k - 2, m, n);
312:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
313:       }
314:       if (k < o - 1) {
315:         J = global_index(i, j, k + 1, m, n);
316:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
317:       }
318:       if (k < o - 2) {
319:         J = global_index(i, j, k + 2, m, n);
320:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
321:       }
322:       v = 12.0;
323:       MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES);
324:     }
325:   }
326:   PetscStrcmp(stencil, "3d19point", &equal);
327:   if (equal) { /* 19-point stencil, 3D */
328:     MatMPIAIJSetPreallocation(A, 19, NULL, 19, NULL);
329:     MatSeqAIJSetPreallocation(A, 19, NULL);
330:     MatGetOwnershipRange(A, &Istart, &Iend);
331:     for (Ii = Istart; Ii < Iend; Ii++) {
332:       v = -1.0;
333:       k = Ii / (m * n);
334:       j = (Ii - k * m * n) / m;
335:       i = (Ii - k * m * n - j * m);
336:       /* one hop */
337:       if (i > 0) {
338:         J = global_index(i - 1, j, k, m, n);
339:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
340:       }
341:       if (i < m - 1) {
342:         J = global_index(i + 1, j, k, m, n);
343:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
344:       }
345:       if (j > 0) {
346:         J = global_index(i, j - 1, k, m, n);
347:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
348:       }
349:       if (j < n - 1) {
350:         J = global_index(i, j + 1, k, m, n);
351:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
352:       }
353:       if (k > 0) {
354:         J = global_index(i, j, k - 1, m, n);
355:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
356:       }
357:       if (k < o - 1) {
358:         J = global_index(i, j, k + 1, m, n);
359:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
360:       }
361:       /* two hops */
362:       if (i > 0 && j > 0) {
363:         J = global_index(i - 1, j - 1, k, m, n);
364:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
365:       }
366:       if (i > 0 && k > 0) {
367:         J = global_index(i - 1, j, k - 1, m, n);
368:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
369:       }
370:       if (i > 0 && j < n - 1) {
371:         J = global_index(i - 1, j + 1, k, m, n);
372:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
373:       }
374:       if (i > 0 && k < o - 1) {
375:         J = global_index(i - 1, j, k + 1, m, n);
376:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
377:       }
378:       if (i < m - 1 && j > 0) {
379:         J = global_index(i + 1, j - 1, k, m, n);
380:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
381:       }
382:       if (i < m - 1 && k > 0) {
383:         J = global_index(i + 1, j, k - 1, m, n);
384:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
385:       }
386:       if (i < m - 1 && j < n - 1) {
387:         J = global_index(i + 1, j + 1, k, m, n);
388:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
389:       }
390:       if (i < m - 1 && k < o - 1) {
391:         J = global_index(i + 1, j, k + 1, m, n);
392:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
393:       }
394:       if (j > 0 && k > 0) {
395:         J = global_index(i, j - 1, k - 1, m, n);
396:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
397:       }
398:       if (j > 0 && k < o - 1) {
399:         J = global_index(i, j - 1, k + 1, m, n);
400:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
401:       }
402:       if (j < n - 1 && k > 0) {
403:         J = global_index(i, j + 1, k - 1, m, n);
404:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
405:       }
406:       if (j < n - 1 && k < o - 1) {
407:         J = global_index(i, j + 1, k + 1, m, n);
408:         MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
409:       }
410:       v = 18.0;
411:       MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES);
412:     }
413:   }
414:   PetscStrcmp(stencil, "3d27point", &equal);
415:   if (equal) { /* 27-point stencil, 3D */
416:     MatMPIAIJSetPreallocation(A, 27, NULL, 27, NULL);
417:     MatSeqAIJSetPreallocation(A, 27, NULL);
418:     MatGetOwnershipRange(A, &Istart, &Iend);
419:     for (Ii = Istart; Ii < Iend; Ii++) {
420:       v = -1.0;
421:       k = Ii / (m * n);
422:       j = (Ii - k * m * n) / m;
423:       i = (Ii - k * m * n - j * m);
424:       if (k > 0) {
425:         if (j > 0) {
426:           if (i > 0) {
427:             J = global_index(i - 1, j - 1, k - 1, m, n);
428:             MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
429:           }
430:           J = global_index(i, j - 1, k - 1, m, n);
431:           MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
432:           if (i < m - 1) {
433:             J = global_index(i + 1, j - 1, k - 1, m, n);
434:             MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
435:           }
436:         }
437:         {
438:           if (i > 0) {
439:             J = global_index(i - 1, j, k - 1, m, n);
440:             MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
441:           }
442:           J = global_index(i, j, k - 1, m, n);
443:           MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
444:           if (i < m - 1) {
445:             J = global_index(i + 1, j, k - 1, m, n);
446:             MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
447:           }
448:         }
449:         if (j < n - 1) {
450:           if (i > 0) {
451:             J = global_index(i - 1, j + 1, k - 1, m, n);
452:             MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
453:           }
454:           J = global_index(i, j + 1, k - 1, m, n);
455:           MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
456:           if (i < m - 1) {
457:             J = global_index(i + 1, j + 1, k - 1, m, n);
458:             MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
459:           }
460:         }
461:       }
462:       {
463:         if (j > 0) {
464:           if (i > 0) {
465:             J = global_index(i - 1, j - 1, k, m, n);
466:             MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
467:           }
468:           J = global_index(i, j - 1, k, m, n);
469:           MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
470:           if (i < m - 1) {
471:             J = global_index(i + 1, j - 1, k, m, n);
472:             MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
473:           }
474:         }
475:         {
476:           if (i > 0) {
477:             J = global_index(i - 1, j, k, m, n);
478:             MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
479:           }
480:           J = global_index(i, j, k, m, n);
481:           MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
482:           if (i < m - 1) {
483:             J = global_index(i + 1, j, k, m, n);
484:             MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
485:           }
486:         }
487:         if (j < n - 1) {
488:           if (i > 0) {
489:             J = global_index(i - 1, j + 1, k, m, n);
490:             MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
491:           }
492:           J = global_index(i, j + 1, k, m, n);
493:           MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
494:           if (i < m - 1) {
495:             J = global_index(i + 1, j + 1, k, m, n);
496:             MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
497:           }
498:         }
499:       }
500:       if (k < o - 1) {
501:         if (j > 0) {
502:           if (i > 0) {
503:             J = global_index(i - 1, j - 1, k + 1, m, n);
504:             MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
505:           }
506:           J = global_index(i, j - 1, k + 1, m, n);
507:           MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
508:           if (i < m - 1) {
509:             J = global_index(i + 1, j - 1, k + 1, m, n);
510:             MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
511:           }
512:         }
513:         {
514:           if (i > 0) {
515:             J = global_index(i - 1, j, k + 1, m, n);
516:             MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
517:           }
518:           J = global_index(i, j, k + 1, m, n);
519:           MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
520:           if (i < m - 1) {
521:             J = global_index(i + 1, j, k + 1, m, n);
522:             MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
523:           }
524:         }
525:         if (j < n - 1) {
526:           if (i > 0) {
527:             J = global_index(i - 1, j + 1, k + 1, m, n);
528:             MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
529:           }
530:           J = global_index(i, j + 1, k + 1, m, n);
531:           MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
532:           if (i < m - 1) {
533:             J = global_index(i + 1, j + 1, k + 1, m, n);
534:             MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
535:           }
536:         }
537:       }
538:       v = 26.0;
539:       MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES);
540:     }
541:   }
542:   MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
543:   MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);

545:   /* Copy A into B in order to have a more representative benchmark (A*A has more cache hits than A*B) */
546:   MatDuplicate(A, MAT_COPY_VALUES, &B);

548:   PetscLogStageRegister("Full MatMatMult", &fullMatMatMultStage);

550:   /* Test C = A*B */
551:   PetscLogStagePush(fullMatMatMultStage);
552:   MatMatMult(A, B, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &C);

554:   /* Test PtAP_squared = PtAP(C,C)*PtAP(C,C)  */
555:   MatPtAP(C, C, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &PtAP);
556:   MatDuplicate(PtAP, MAT_COPY_VALUES, &PtAP_copy);
557:   MatMatMult(PtAP, PtAP_copy, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &PtAP_squared);

559:   MatView(C, PETSC_VIEWER_STDOUT_WORLD);
560:   MatView(PtAP_squared, PETSC_VIEWER_STDOUT_WORLD);

562:   MatDestroy(&PtAP_squared);
563:   MatDestroy(&PtAP_copy);
564:   MatDestroy(&PtAP);
565:   MatDestroy(&C);
566:   MatDestroy(&B);
567:   MatDestroy(&A);
568:   PetscFinalize();
569:   return 0;
570: }

572: /*TEST

574:  test:
575:       suffix: 1
576:       nsize: 1
577:       args: -m 8 -n 8 -stencil 2d5point -matmatmult_via sorted

579:  test:
580:        suffix: 2
581:        nsize: 1
582:        args: -m 5 -n 5 -o 5 -stencil 3d27point -matmatmult_via rowmerge

584:  test:
585:       suffix: 3
586:       nsize: 4
587:       args: -m 6 -n 6 -stencil 2d5point -matmatmult_via seqmpi

589: TEST*/