Actual source code: ex66.c
1: static char help[] = "Tests MATH2OPUS\n\n";
3: #include <petscmat.h>
4: #include <petscsf.h>
6: static PetscScalar GenEntry_Symm(PetscInt sdim, PetscReal x[], PetscReal y[], void *ctx)
7: {
8: PetscInt d;
9: PetscReal clength = sdim == 3 ? 0.2 : 0.1;
10: PetscReal dist, diff = 0.0;
12: for (d = 0; d < sdim; d++) diff += (x[d] - y[d]) * (x[d] - y[d]);
13: dist = PetscSqrtReal(diff);
14: return PetscExpReal(-dist / clength);
15: }
17: static PetscScalar GenEntry_Unsymm(PetscInt sdim, PetscReal x[], PetscReal y[], void *ctx)
18: {
19: PetscInt d;
20: PetscReal clength = sdim == 3 ? 0.2 : 0.1;
21: PetscReal dist, diff = 0.0, nx = 0.0, ny = 0.0;
23: for (d = 0; d < sdim; d++) nx += x[d] * x[d];
24: for (d = 0; d < sdim; d++) ny += y[d] * y[d];
25: for (d = 0; d < sdim; d++) diff += (x[d] - y[d]) * (x[d] - y[d]);
26: dist = PetscSqrtReal(diff);
27: return nx > ny ? PetscExpReal(-dist / clength) : PetscExpReal(-dist / clength) + 1.;
28: }
30: int main(int argc, char **argv)
31: {
32: Mat A, B, C, D;
33: Vec v, x, y, Ax, Ay, Bx, By;
34: PetscRandom r;
35: PetscLayout map;
36: PetscScalar *Adata = NULL, *Cdata = NULL, scale = 1.0;
37: PetscReal *coords, nA, nD, nB, err, nX, norms[3];
38: PetscInt N, n = 64, dim = 1, i, j, nrhs = 11, lda = 0, ldc = 0, ldu = 0, nlr = 7, nt, ntrials = 2;
39: PetscMPIInt size, rank;
40: PetscBool testlayout = PETSC_FALSE, flg, symm = PETSC_FALSE, Asymm = PETSC_TRUE, kernel = PETSC_TRUE;
41: PetscBool checkexpl = PETSC_FALSE, agpu = PETSC_FALSE, bgpu = PETSC_FALSE, cgpu = PETSC_FALSE, flgglob;
42: PetscBool testtrans, testnorm, randommat = PETSC_TRUE, testorthog, testcompress, testhlru;
43: void (*approxnormfunc)(void);
44: void (*Anormfunc)(void);
46: #if defined(PETSC_HAVE_MPI_INIT_THREAD)
47: PETSC_MPI_THREAD_REQUIRED = MPI_THREAD_MULTIPLE;
48: #endif
50: PetscInitialize(&argc, &argv, (char *)0, help);
51: PetscOptionsGetInt(NULL, NULL, "-ng", &N, &flgglob);
52: PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL);
53: PetscOptionsGetInt(NULL, NULL, "-nrhs", &nrhs, NULL);
54: PetscOptionsGetInt(NULL, NULL, "-dim", &dim, NULL);
55: PetscOptionsGetInt(NULL, NULL, "-lda", &lda, NULL);
56: PetscOptionsGetInt(NULL, NULL, "-ldc", &ldc, NULL);
57: PetscOptionsGetInt(NULL, NULL, "-nlr", &nlr, NULL);
58: PetscOptionsGetInt(NULL, NULL, "-ldu", &ldu, NULL);
59: PetscOptionsGetInt(NULL, NULL, "-matmattrials", &ntrials, NULL);
60: PetscOptionsGetBool(NULL, NULL, "-randommat", &randommat, NULL);
61: if (!flgglob) PetscOptionsGetBool(NULL, NULL, "-testlayout", &testlayout, NULL);
62: PetscOptionsGetBool(NULL, NULL, "-Asymm", &Asymm, NULL);
63: PetscOptionsGetBool(NULL, NULL, "-symm", &symm, NULL);
64: PetscOptionsGetBool(NULL, NULL, "-kernel", &kernel, NULL);
65: PetscOptionsGetBool(NULL, NULL, "-checkexpl", &checkexpl, NULL);
66: PetscOptionsGetBool(NULL, NULL, "-agpu", &agpu, NULL);
67: PetscOptionsGetBool(NULL, NULL, "-bgpu", &bgpu, NULL);
68: PetscOptionsGetBool(NULL, NULL, "-cgpu", &cgpu, NULL);
69: PetscOptionsGetScalar(NULL, NULL, "-scale", &scale, NULL);
70: if (!Asymm) symm = PETSC_FALSE;
72: MPI_Comm_size(PETSC_COMM_WORLD, &size);
74: /* Disable tests for unimplemented variants */
75: testtrans = (PetscBool)(size == 1 || symm);
76: testnorm = (PetscBool)(size == 1 || symm);
77: testorthog = (PetscBool)(size == 1 || symm);
78: testcompress = (PetscBool)(size == 1 || symm);
79: testhlru = (PetscBool)(size == 1);
81: MPI_Comm_rank(PETSC_COMM_WORLD, &rank);
82: PetscLayoutCreate(PETSC_COMM_WORLD, &map);
83: if (testlayout) {
84: if (rank % 2) n = PetscMax(2 * n - 5 * rank, 0);
85: else n = 2 * n + rank;
86: }
87: if (!flgglob) {
88: PetscLayoutSetLocalSize(map, n);
89: PetscLayoutSetUp(map);
90: PetscLayoutGetSize(map, &N);
91: } else {
92: PetscLayoutSetSize(map, N);
93: PetscLayoutSetUp(map);
94: PetscLayoutGetLocalSize(map, &n);
95: }
96: PetscLayoutDestroy(&map);
98: if (lda) PetscMalloc1(N * (n + lda), &Adata);
99: MatCreateDense(PETSC_COMM_WORLD, n, n, N, N, Adata, &A);
100: MatDenseSetLDA(A, n + lda);
102: /* Create random points; these are replicated in order to populate a dense matrix and to compare sequential and dense runs
103: The constructor for MATH2OPUS can take as input the distributed coordinates and replicates them internally in case
104: a kernel construction is requested */
105: PetscRandomCreate(PETSC_COMM_WORLD, &r);
106: PetscRandomSetFromOptions(r);
107: PetscRandomSetSeed(r, 123456);
108: PetscRandomSeed(r);
109: PetscMalloc1(N * dim, &coords);
110: PetscRandomGetValuesReal(r, N * dim, coords);
111: PetscRandomDestroy(&r);
113: if (kernel || !randommat) {
114: MatH2OpusKernel k = Asymm ? GenEntry_Symm : GenEntry_Unsymm;
115: PetscInt ist, ien;
117: MatGetOwnershipRange(A, &ist, &ien);
118: for (i = ist; i < ien; i++) {
119: for (j = 0; j < N; j++) MatSetValue(A, i, j, (*k)(dim, coords + i * dim, coords + j * dim, NULL), INSERT_VALUES);
120: }
121: MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
122: MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
123: if (kernel) {
124: MatCreateH2OpusFromKernel(PETSC_COMM_WORLD, n, n, N, N, dim, coords + ist * dim, PETSC_TRUE, k, NULL, PETSC_DECIDE, PETSC_DECIDE, PETSC_DECIDE, &B);
125: } else {
126: MatCreateH2OpusFromMat(A, dim, coords + ist * dim, PETSC_TRUE, PETSC_DECIDE, PETSC_DECIDE, PETSC_DECIDE, PETSC_DECIDE, PETSC_DECIDE, &B);
127: }
128: } else {
129: PetscInt ist;
131: MatGetOwnershipRange(A, &ist, NULL);
132: MatSetRandom(A, NULL);
133: if (Asymm) {
134: MatTranspose(A, MAT_INITIAL_MATRIX, &B);
135: MatAXPY(A, 1.0, B, SAME_NONZERO_PATTERN);
136: MatDestroy(&B);
137: MatSetOption(A, MAT_SYMMETRIC, PETSC_TRUE);
138: }
139: MatCreateH2OpusFromMat(A, dim, coords + ist * dim, PETSC_TRUE, PETSC_DECIDE, PETSC_DECIDE, PETSC_DECIDE, PETSC_DECIDE, PETSC_DECIDE, &B);
140: }
141: PetscFree(coords);
142: if (agpu) MatConvert(A, MATDENSECUDA, MAT_INPLACE_MATRIX, &A);
143: MatViewFromOptions(A, NULL, "-A_view");
145: MatSetOption(B, MAT_SYMMETRIC, symm);
147: /* assemble the H-matrix */
148: MatBindToCPU(B, (PetscBool)!bgpu);
149: MatSetFromOptions(B);
150: MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY);
151: MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY);
152: MatViewFromOptions(B, NULL, "-B_view");
154: /* Test MatScale */
155: MatScale(A, scale);
156: MatScale(B, scale);
158: /* Test MatMult */
159: MatCreateVecs(A, &Ax, &Ay);
160: MatCreateVecs(B, &Bx, &By);
161: VecSetRandom(Ax, NULL);
162: VecCopy(Ax, Bx);
163: MatMult(A, Ax, Ay);
164: MatMult(B, Bx, By);
165: VecViewFromOptions(Ay, NULL, "-mult_vec_view");
166: VecViewFromOptions(By, NULL, "-mult_vec_view");
167: VecNorm(Ay, NORM_INFINITY, &nX);
168: VecAXPY(Ay, -1.0, By);
169: VecViewFromOptions(Ay, NULL, "-mult_vec_view");
170: VecNorm(Ay, NORM_INFINITY, &err);
171: PetscPrintf(PETSC_COMM_WORLD, "MatMult err %g\n", err / nX);
172: VecScale(By, -1.0);
173: MatMultAdd(B, Bx, By, By);
174: VecNorm(By, NORM_INFINITY, &err);
175: VecViewFromOptions(By, NULL, "-mult_vec_view");
176: if (err > 10. * PETSC_SMALL) PetscPrintf(PETSC_COMM_WORLD, "MatMultAdd err %g\n", err);
178: /* Test MatNorm */
179: MatNorm(A, NORM_INFINITY, &norms[0]);
180: MatNorm(A, NORM_1, &norms[1]);
181: norms[2] = -1.; /* NORM_2 not supported */
182: PetscPrintf(PETSC_COMM_WORLD, "A Matrix norms: infty=%g, norm_1=%g, norm_2=%g\n", (double)norms[0], (double)norms[1], (double)norms[2]);
183: MatGetOperation(A, MATOP_NORM, &Anormfunc);
184: MatGetOperation(B, MATOP_NORM, &approxnormfunc);
185: MatSetOperation(A, MATOP_NORM, approxnormfunc);
186: MatNorm(A, NORM_INFINITY, &norms[0]);
187: MatNorm(A, NORM_1, &norms[1]);
188: MatNorm(A, NORM_2, &norms[2]);
189: PetscPrintf(PETSC_COMM_WORLD, "A Approx Matrix norms: infty=%g, norm_1=%g, norm_2=%g\n", (double)norms[0], (double)norms[1], (double)norms[2]);
190: if (testnorm) {
191: MatNorm(B, NORM_INFINITY, &norms[0]);
192: MatNorm(B, NORM_1, &norms[1]);
193: MatNorm(B, NORM_2, &norms[2]);
194: } else {
195: norms[0] = -1.;
196: norms[1] = -1.;
197: norms[2] = -1.;
198: }
199: PetscPrintf(PETSC_COMM_WORLD, "B Approx Matrix norms: infty=%g, norm_1=%g, norm_2=%g\n", (double)norms[0], (double)norms[1], (double)norms[2]);
200: MatSetOperation(A, MATOP_NORM, Anormfunc);
202: /* Test MatDuplicate */
203: MatDuplicate(B, MAT_COPY_VALUES, &D);
204: MatSetOption(D, MAT_SYMMETRIC, symm);
205: MatMultEqual(B, D, 10, &flg);
206: if (!flg) PetscPrintf(PETSC_COMM_WORLD, "MatMult error after MatDuplicate\n");
207: if (testtrans) {
208: MatMultTransposeEqual(B, D, 10, &flg);
209: if (!flg) PetscPrintf(PETSC_COMM_WORLD, "MatMultTranspose error after MatDuplicate\n");
210: }
211: MatDestroy(&D);
213: if (testtrans) { /* MatMultTranspose for nonsymmetric matrices not implemented */
214: VecSetRandom(Ay, NULL);
215: VecCopy(Ay, By);
216: MatMultTranspose(A, Ay, Ax);
217: MatMultTranspose(B, By, Bx);
218: VecViewFromOptions(Ax, NULL, "-multtrans_vec_view");
219: VecViewFromOptions(Bx, NULL, "-multtrans_vec_view");
220: VecNorm(Ax, NORM_INFINITY, &nX);
221: VecAXPY(Ax, -1.0, Bx);
222: VecViewFromOptions(Ax, NULL, "-multtrans_vec_view");
223: VecNorm(Ax, NORM_INFINITY, &err);
224: PetscPrintf(PETSC_COMM_WORLD, "MatMultTranspose err %g\n", err / nX);
225: VecScale(Bx, -1.0);
226: MatMultTransposeAdd(B, By, Bx, Bx);
227: VecNorm(Bx, NORM_INFINITY, &err);
228: if (err > 10. * PETSC_SMALL) PetscPrintf(PETSC_COMM_WORLD, "MatMultTransposeAdd err %g\n", err);
229: }
230: VecDestroy(&Ax);
231: VecDestroy(&Ay);
232: VecDestroy(&Bx);
233: VecDestroy(&By);
235: /* Test MatMatMult */
236: if (ldc) PetscMalloc1(nrhs * (n + ldc), &Cdata);
237: MatCreateDense(PETSC_COMM_WORLD, n, PETSC_DECIDE, N, nrhs, Cdata, &C);
238: MatDenseSetLDA(C, n + ldc);
239: MatSetRandom(C, NULL);
240: if (cgpu) MatConvert(C, MATDENSECUDA, MAT_INPLACE_MATRIX, &C);
241: for (nt = 0; nt < ntrials; nt++) {
242: MatMatMult(B, C, nt ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &D);
243: MatViewFromOptions(D, NULL, "-bc_view");
244: PetscObjectBaseTypeCompareAny((PetscObject)D, &flg, MATSEQDENSE, MATMPIDENSE, "");
245: if (flg) {
246: MatCreateVecs(B, &x, &y);
247: MatCreateVecs(D, NULL, &v);
248: for (i = 0; i < nrhs; i++) {
249: MatGetColumnVector(D, v, i);
250: MatGetColumnVector(C, x, i);
251: MatMult(B, x, y);
252: VecAXPY(y, -1.0, v);
253: VecNorm(y, NORM_INFINITY, &err);
254: if (err > 10. * PETSC_SMALL) PetscPrintf(PETSC_COMM_WORLD, "MatMat err %" PetscInt_FMT " %g\n", i, err);
255: }
256: VecDestroy(&y);
257: VecDestroy(&x);
258: VecDestroy(&v);
259: }
260: }
261: MatDestroy(&D);
263: /* Test MatTransposeMatMult */
264: if (testtrans) { /* MatMultTranspose for nonsymmetric matrices not implemented */
265: for (nt = 0; nt < ntrials; nt++) {
266: MatTransposeMatMult(B, C, nt ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &D);
267: MatViewFromOptions(D, NULL, "-btc_view");
268: PetscObjectBaseTypeCompareAny((PetscObject)D, &flg, MATSEQDENSE, MATMPIDENSE, "");
269: if (flg) {
270: MatCreateVecs(B, &y, &x);
271: MatCreateVecs(D, NULL, &v);
272: for (i = 0; i < nrhs; i++) {
273: MatGetColumnVector(D, v, i);
274: MatGetColumnVector(C, x, i);
275: MatMultTranspose(B, x, y);
276: VecAXPY(y, -1.0, v);
277: VecNorm(y, NORM_INFINITY, &err);
278: if (err > 10. * PETSC_SMALL) PetscPrintf(PETSC_COMM_WORLD, "MatTransMat err %" PetscInt_FMT " %g\n", i, err);
279: }
280: VecDestroy(&y);
281: VecDestroy(&x);
282: VecDestroy(&v);
283: }
284: }
285: MatDestroy(&D);
286: }
288: /* Test basis orthogonalization */
289: if (testorthog) {
290: MatDuplicate(B, MAT_COPY_VALUES, &D);
291: MatSetOption(D, MAT_SYMMETRIC, symm);
292: MatH2OpusOrthogonalize(D);
293: MatMultEqual(B, D, 10, &flg);
294: if (!flg) PetscPrintf(PETSC_COMM_WORLD, "MatMult error after basis ortogonalization\n");
295: MatDestroy(&D);
296: }
298: /* Test matrix compression */
299: if (testcompress) {
300: MatDuplicate(B, MAT_COPY_VALUES, &D);
301: MatSetOption(D, MAT_SYMMETRIC, symm);
302: MatH2OpusCompress(D, PETSC_SMALL);
303: MatDestroy(&D);
304: }
306: /* Test low-rank update */
307: if (testhlru) {
308: Mat U, V;
309: PetscScalar *Udata = NULL, *Vdata = NULL;
311: if (ldu) {
312: PetscMalloc1(nlr * (n + ldu), &Udata);
313: PetscMalloc1(nlr * (n + ldu + 2), &Vdata);
314: }
315: MatDuplicate(B, MAT_COPY_VALUES, &D);
316: MatCreateDense(PetscObjectComm((PetscObject)D), n, PETSC_DECIDE, N, nlr, Udata, &U);
317: MatDenseSetLDA(U, n + ldu);
318: MatCreateDense(PetscObjectComm((PetscObject)D), n, PETSC_DECIDE, N, nlr, Vdata, &V);
319: if (ldu) MatDenseSetLDA(V, n + ldu + 2);
320: MatSetRandom(U, NULL);
321: MatSetRandom(V, NULL);
322: MatH2OpusLowRankUpdate(D, U, V, 0.5);
323: MatH2OpusLowRankUpdate(D, U, V, -0.5);
324: MatMultEqual(B, D, 10, &flg);
325: if (!flg) PetscPrintf(PETSC_COMM_WORLD, "MatMult error after low-rank update\n");
326: MatDestroy(&D);
327: MatDestroy(&U);
328: PetscFree(Udata);
329: MatDestroy(&V);
330: PetscFree(Vdata);
331: }
333: /* check explicit operator */
334: if (checkexpl) {
335: Mat Be, Bet;
337: MatComputeOperator(B, MATDENSE, &D);
338: MatDuplicate(D, MAT_COPY_VALUES, &Be);
339: MatNorm(D, NORM_FROBENIUS, &nB);
340: MatViewFromOptions(D, NULL, "-expl_view");
341: MatAXPY(D, -1.0, A, SAME_NONZERO_PATTERN);
342: MatViewFromOptions(D, NULL, "-diff_view");
343: MatNorm(D, NORM_FROBENIUS, &nD);
344: MatNorm(A, NORM_FROBENIUS, &nA);
345: PetscPrintf(PETSC_COMM_WORLD, "Approximation error %g (%g / %g, %g)\n", nD / nA, nD, nA, nB);
346: MatDestroy(&D);
348: if (testtrans) { /* MatMultTranspose for nonsymmetric matrices not implemented */
349: MatTranspose(A, MAT_INPLACE_MATRIX, &A);
350: MatComputeOperatorTranspose(B, MATDENSE, &D);
351: MatDuplicate(D, MAT_COPY_VALUES, &Bet);
352: MatNorm(D, NORM_FROBENIUS, &nB);
353: MatViewFromOptions(D, NULL, "-expl_trans_view");
354: MatAXPY(D, -1.0, A, SAME_NONZERO_PATTERN);
355: MatViewFromOptions(D, NULL, "-diff_trans_view");
356: MatNorm(D, NORM_FROBENIUS, &nD);
357: MatNorm(A, NORM_FROBENIUS, &nA);
358: PetscPrintf(PETSC_COMM_WORLD, "Approximation error transpose %g (%g / %g, %g)\n", nD / nA, nD, nA, nB);
359: MatDestroy(&D);
361: MatTranspose(Bet, MAT_INPLACE_MATRIX, &Bet);
362: MatAXPY(Be, -1.0, Bet, SAME_NONZERO_PATTERN);
363: MatViewFromOptions(Be, NULL, "-diff_expl_view");
364: MatNorm(Be, NORM_FROBENIUS, &nB);
365: PetscPrintf(PETSC_COMM_WORLD, "Approximation error B - (B^T)^T %g\n", nB);
366: MatDestroy(&Be);
367: MatDestroy(&Bet);
368: }
369: }
370: MatDestroy(&A);
371: MatDestroy(&B);
372: MatDestroy(&C);
373: PetscFree(Cdata);
374: PetscFree(Adata);
375: PetscFinalize();
376: return 0;
377: }
379: /*TEST
381: build:
382: requires: h2opus
384: #tests from kernel
385: test:
386: requires: h2opus
387: nsize: 1
388: suffix: 1
389: args: -n {{17 33}} -kernel 1 -dim {{1 2 3}} -symm {{0 1}} -checkexpl -bgpu 0
391: test:
392: requires: h2opus
393: nsize: 1
394: suffix: 1_ld
395: output_file: output/ex66_1.out
396: args: -n 33 -kernel 1 -dim 1 -lda 13 -ldc 11 -ldu 17 -symm 0 -checkexpl -bgpu 0
398: test:
399: requires: h2opus cuda
400: nsize: 1
401: suffix: 1_cuda
402: output_file: output/ex66_1.out
403: args: -n {{17 33}} -kernel 1 -dim {{1 2 3}} -symm {{0 1}} -checkexpl -bgpu 1
405: test:
406: requires: h2opus cuda
407: nsize: 1
408: suffix: 1_cuda_ld
409: output_file: output/ex66_1.out
410: args: -n 33 -kernel 1 -dim 1 -lda 13 -ldc 11 -ldu 17 -symm 0 -checkexpl -bgpu 1
412: test:
413: requires: h2opus
414: nsize: {{2 3}}
415: suffix: 1_par
416: args: -n 64 -symm -kernel 1 -dim 1 -ldc 12 -testlayout {{0 1}} -bgpu 0 -cgpu 0
418: test:
419: requires: h2opus cuda
420: nsize: {{2 3}}
421: suffix: 1_par_cuda
422: args: -n 64 -symm -kernel 1 -dim 1 -ldc 12 -testlayout {{0 1}} -bgpu {{0 1}} -cgpu {{0 1}}
423: output_file: output/ex66_1_par.out
425: #tests from matrix sampling (parallel or unsymmetric not supported)
426: test:
427: requires: h2opus
428: nsize: 1
429: suffix: 2
430: args: -n {{17 33}} -kernel 0 -dim 2 -symm 1 -checkexpl -bgpu 0
432: test:
433: requires: h2opus cuda
434: nsize: 1
435: suffix: 2_cuda
436: output_file: output/ex66_2.out
437: args: -n {{17 29}} -kernel 0 -dim 2 -symm 1 -checkexpl -bgpu {{0 1}} -agpu {{0 1}}
439: #tests view operation
440: test:
441: requires: h2opus !cuda
442: filter: grep -v " MPI process" | grep -v "\[" | grep -v "\]"
443: nsize: {{1 2 3}}
444: suffix: view
445: args: -ng 64 -kernel 1 -dim 2 -symm 1 -checkexpl -B_view -mat_h2opus_leafsize 17 -mat_h2opus_normsamples 13 -mat_h2opus_indexmap_view ::ascii_matlab -mat_approximate_norm_samples 2 -mat_h2opus_normsamples 2
447: test:
448: requires: h2opus cuda
449: filter: grep -v " MPI process" | grep -v "\[" | grep -v "\]"
450: nsize: {{1 2 3}}
451: suffix: view_cuda
452: args: -ng 64 -kernel 1 -dim 2 -symm 1 -checkexpl -bgpu -B_view -mat_h2opus_leafsize 17 -mat_h2opus_normsamples 13 -mat_h2opus_indexmap_view ::ascii_matlab -mat_approximate_norm_samples 2 -mat_h2opus_normsamples 2
454: TEST*/