Actual source code: ex237.c
1: static char help[] = "Mini-app to benchmark matrix--matrix multiplication\n\n";
3: /*
4: See the paper below for more information
6: "KSPHPDDM and PCHPDDM: Extending PETSc with Robust Overlapping Schwarz Preconditioners and Advanced Krylov Methods",
7: P. Jolivet, J. E. Roman, and S. Zampini (2020).
8: */
10: #include <petsc.h>
12: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
13: #include <mkl.h>
14: #define PetscCallMKLSparse(func, args) \
15: do { \
16: sparse_status_t __ierr; \
17: PetscStackPushExternal(#func); \
18: __func args; \
19: PetscStackPop; \
21: } while (0)
22: #else
23: #define PetscCallMKLSparse(func, args) \
24: do { \
25: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No MKL support"); \
26: } while (0)
27: #endif
29: int main(int argc, char **argv)
30: {
31: Mat A, C, D, E;
32: PetscInt nbs = 10, ntype = 10, nN = 8, m, M, trial = 5;
33: PetscViewer viewer;
34: PetscInt bs[10], N[8];
35: char *type[10];
36: PetscMPIInt size;
37: PetscBool flg, cuda, maij = PETSC_FALSE, check = PETSC_FALSE, trans = PETSC_FALSE, convert = PETSC_FALSE, mkl;
38: char file[PETSC_MAX_PATH_LEN];
41: PetscInitialize(&argc, &argv, NULL, help);
42: MPI_Comm_size(PETSC_COMM_WORLD, &size);
44: PetscOptionsGetString(NULL, NULL, "-f", file, PETSC_MAX_PATH_LEN, &flg);
46: PetscOptionsGetInt(NULL, NULL, "-trial", &trial, NULL);
47: PetscOptionsGetIntArray(NULL, NULL, "-bs", bs, &nbs, &flg);
48: if (!flg) {
49: nbs = 1;
50: bs[0] = 1;
51: }
52: PetscOptionsGetIntArray(NULL, NULL, "-N", N, &nN, &flg);
53: if (!flg) {
54: nN = 8;
55: N[0] = 1;
56: N[1] = 2;
57: N[2] = 4;
58: N[3] = 8;
59: N[4] = 16;
60: N[5] = 32;
61: N[6] = 64;
62: N[7] = 128;
63: }
64: PetscOptionsGetStringArray(NULL, NULL, "-type", type, &ntype, &flg);
65: if (!flg) {
66: ntype = 1;
67: PetscStrallocpy(MATSEQAIJ, &type[0]);
68: }
69: PetscOptionsGetBool(NULL, NULL, "-check", &check, NULL);
70: PetscOptionsGetBool(NULL, NULL, "-trans", &trans, NULL);
71: PetscOptionsGetBool(NULL, NULL, "-convert_aij", &convert, NULL);
72: for (PetscInt j = 0; j < nbs; ++j) {
73: PetscViewerBinaryOpen(PETSC_COMM_WORLD, file, FILE_MODE_READ, &viewer);
74: MatCreate(PETSC_COMM_WORLD, &A);
75: MatSetFromOptions(A);
76: MatLoad(A, viewer);
77: PetscViewerDestroy(&viewer);
78: PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATMPIAIJ, "");
80: MatGetSize(A, &m, &M);
81: if (m == M) {
82: Mat oA;
83: MatTranspose(A, MAT_INITIAL_MATRIX, &oA);
84: MatAXPY(A, 1.0, oA, DIFFERENT_NONZERO_PATTERN);
85: MatDestroy(&oA);
86: }
87: MatGetLocalSize(A, &m, NULL);
88: MatGetSize(A, &M, NULL);
89: if (bs[j] > 1) {
90: Mat T, Tt, B;
91: const PetscScalar *ptr;
92: PetscScalar *val, *Aa;
93: const PetscInt *Ai, *Aj;
94: PetscInt An, i, k;
95: PetscBool done;
97: MatCreateDense(PETSC_COMM_SELF, bs[j], bs[j], bs[j], bs[j], NULL, &T);
98: MatSetRandom(T, NULL);
99: MatTranspose(T, MAT_INITIAL_MATRIX, &Tt);
100: MatAXPY(T, 1.0, Tt, SAME_NONZERO_PATTERN);
101: MatDestroy(&Tt);
102: MatDenseGetArrayRead(T, &ptr);
103: MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &An, &Ai, &Aj, &done);
105: MatSeqAIJGetArray(A, &Aa);
106: MatCreate(PETSC_COMM_WORLD, &B);
107: MatSetType(B, MATSEQBAIJ);
108: MatSetSizes(B, bs[j] * An, bs[j] * An, PETSC_DECIDE, PETSC_DECIDE);
109: PetscMalloc1(Ai[An] * bs[j] * bs[j], &val);
110: for (i = 0; i < Ai[An]; ++i)
111: for (k = 0; k < bs[j] * bs[j]; ++k) val[i * bs[j] * bs[j] + k] = Aa[i] * ptr[k];
112: MatSetOption(B, MAT_ROW_ORIENTED, PETSC_FALSE);
113: MatSeqBAIJSetPreallocationCSR(B, bs[j], Ai, Aj, val);
114: PetscFree(val);
115: MatSeqAIJRestoreArray(A, &Aa);
116: MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &An, &Ai, &Aj, &done);
117: MatDenseRestoreArrayRead(T, &ptr);
118: MatDestroy(&T);
119: MatDestroy(&A);
120: A = B;
121: }
122: /* reconvert back to SeqAIJ before converting to the desired type later */
123: if (!convert) E = A;
124: MatConvert(A, MATSEQAIJ, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &E);
125: MatSetOption(E, MAT_SYMMETRIC, PETSC_TRUE);
126: for (PetscInt i = 0; i < ntype; ++i) {
127: char *tmp;
128: PetscInt *ia_ptr, *ja_ptr, k;
129: PetscScalar *a_ptr;
130: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
131: struct matrix_descr descr;
132: sparse_matrix_t spr;
133: descr.type = SPARSE_MATRIX_TYPE_GENERAL;
134: descr.diag = SPARSE_DIAG_NON_UNIT;
135: #endif
136: if (convert) MatDestroy(&A);
137: PetscStrstr(type[i], "mkl", &tmp);
138: if (tmp) {
139: size_t mlen, tlen;
140: char base[256];
142: mkl = PETSC_TRUE;
143: PetscStrlen(tmp, &mlen);
144: PetscStrlen(type[i], &tlen);
145: PetscStrncpy(base, type[i], tlen - mlen + 1);
146: MatConvert(E, base, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A);
147: } else {
148: mkl = PETSC_FALSE;
149: PetscStrstr(type[i], "maij", &tmp);
150: if (!tmp) {
151: MatConvert(E, type[i], convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A);
152: } else {
153: MatConvert(E, MATAIJ, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A);
154: maij = PETSC_TRUE;
155: }
156: }
157: PetscObjectTypeCompareAny((PetscObject)A, &cuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "");
158: if (mkl) {
159: const PetscInt *Ai, *Aj;
160: PetscInt An;
161: PetscBool done;
163: PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATSEQBAIJ, MATSEQSBAIJ, "");
165: PetscObjectTypeCompare((PetscObject)A, MATSEQAIJ, &flg);
166: MatGetRowIJ(A, 0, PETSC_FALSE, flg ? PETSC_FALSE : PETSC_TRUE, &An, &Ai, &Aj, &done);
168: PetscMalloc1(An + 1, &ia_ptr);
169: PetscMalloc1(Ai[An], &ja_ptr);
170: if (flg) { /* SeqAIJ */
171: for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k];
172: for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k];
173: MatSeqAIJGetArray(A, &a_ptr);
174: PetscCallMKLSparse(mkl_sparse_d_create_csr, (&spr, SPARSE_INDEX_BASE_ZERO, An, An, ia_ptr, ia_ptr + 1, ja_ptr, a_ptr));
175: } else {
176: PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &flg);
177: if (flg) {
178: for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */
179: for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */
180: MatSeqBAIJGetArray(A, &a_ptr);
181: PetscCallMKLSparse(mkl_sparse_d_create_bsr, (&spr, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, An, An, bs[j], ia_ptr, ia_ptr + 1, ja_ptr, a_ptr));
182: } else {
183: PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &flg);
184: if (flg) {
185: for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */
186: for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */
187: MatSeqSBAIJGetArray(A, &a_ptr);
188: PetscCallMKLSparse(mkl_sparse_d_create_bsr, (&spr, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, An, An, bs[j], ia_ptr, ia_ptr + 1, ja_ptr, a_ptr));
189: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
190: descr.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
191: descr.mode = SPARSE_FILL_MODE_UPPER;
192: descr.diag = SPARSE_DIAG_NON_UNIT;
193: #endif
194: }
195: }
196: }
197: PetscObjectTypeCompare((PetscObject)A, MATSEQAIJ, &flg);
198: MatRestoreRowIJ(A, 0, PETSC_FALSE, flg ? PETSC_FALSE : PETSC_TRUE, &An, &Ai, &Aj, &done);
199: }
201: MatViewFromOptions(A, NULL, "-A_view");
203: for (k = 0; k < nN; ++k) {
204: MatType Atype, Ctype;
205: PetscInt AM, AN, CM, CN, t;
206: #if defined(PETSC_USE_LOG)
207: PetscLogStage stage, tstage;
208: char stage_s[256];
209: #endif
211: MatCreateDense(PETSC_COMM_WORLD, bs[j] * m, PETSC_DECIDE, bs[j] * M, N[k], NULL, &C);
212: MatCreateDense(PETSC_COMM_WORLD, bs[j] * m, PETSC_DECIDE, bs[j] * M, N[k], NULL, &D);
213: MatSetRandom(C, NULL);
214: if (cuda) { /* convert to GPU if needed */
215: MatConvert(C, MATDENSECUDA, MAT_INPLACE_MATRIX, &C);
216: MatConvert(D, MATDENSECUDA, MAT_INPLACE_MATRIX, &D);
217: }
218: if (mkl) {
219: if (N[k] > 1) PetscCallMKLSparse(mkl_sparse_set_mm_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, SPARSE_LAYOUT_COLUMN_MAJOR, N[k], 1 + trial));
220: else PetscCallMKLSparse(mkl_sparse_set_mv_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, 1 + trial));
221: PetscCallMKLSparse(mkl_sparse_set_memory_hint, (spr, SPARSE_MEMORY_AGGRESSIVE));
222: PetscCallMKLSparse(mkl_sparse_optimize, (spr));
223: }
224: MatGetType(A, &Atype);
225: MatGetType(C, &Ctype);
226: MatGetSize(A, &AM, &AN);
227: MatGetSize(C, &CM, &CN);
229: #if defined(PETSC_USE_LOG)
230: if (!maij || N[k] > 1) {
231: PetscSNPrintf(stage_s, sizeof(stage_s), "type_%s-bs_%" PetscInt_FMT "-N_%02d", type[i], bs[j], (int)N[k]);
232: PetscLogStageRegister(stage_s, &stage);
233: }
234: if (trans && N[k] > 1) {
235: PetscSNPrintf(stage_s, sizeof(stage_s), "trans_type_%s-bs_%" PetscInt_FMT "-N_%02d", type[i], bs[j], (int)N[k]);
236: PetscLogStageRegister(stage_s, &tstage);
237: }
238: #endif
239: /* A*B */
240: if (N[k] > 1) {
241: if (!maij) {
242: MatProductCreateWithMat(A, C, NULL, D);
243: MatProductSetType(D, MATPRODUCT_AB);
244: MatProductSetFromOptions(D);
245: MatProductSymbolic(D);
246: }
248: if (!mkl) {
249: if (!maij) {
250: MatProductNumeric(D);
251: PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatProduct %s: with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", MatProductTypes[MATPRODUCT_AB], Atype, AM, AN, Ctype, CM, CN);
252: PetscLogStagePush(stage);
253: for (t = 0; t < trial; ++t) MatProductNumeric(D);
254: PetscLogStagePop();
255: } else {
256: Mat E, Ct, Dt;
257: Vec cC, cD;
258: const PetscScalar *c_ptr;
259: PetscScalar *d_ptr;
260: MatCreateMAIJ(A, N[k], &E);
261: MatDenseGetLocalMatrix(C, &Ct);
262: MatDenseGetLocalMatrix(D, &Dt);
263: MatTranspose(Ct, MAT_INPLACE_MATRIX, &Ct);
264: MatTranspose(Dt, MAT_INPLACE_MATRIX, &Dt);
265: MatDenseGetArrayRead(Ct, &c_ptr);
266: MatDenseGetArrayWrite(Dt, &d_ptr);
267: VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, AM * N[k], PETSC_DECIDE, c_ptr, &cC);
268: VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, AM * N[k], PETSC_DECIDE, d_ptr, &cD);
269: MatMult(E, cC, cD);
270: PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatMult: with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", MATMAIJ, AM, AN, VECMPI, AM * N[k], 1);
271: PetscLogStagePush(stage);
272: for (t = 0; t < trial; ++t) MatMult(E, cC, cD);
273: PetscLogStagePop();
274: VecDestroy(&cD);
275: VecDestroy(&cC);
276: MatDestroy(&E);
277: MatDenseRestoreArrayWrite(Dt, &d_ptr);
278: MatDenseRestoreArrayRead(Ct, &c_ptr);
279: MatTranspose(Ct, MAT_INPLACE_MATRIX, &Ct);
280: MatTranspose(Dt, MAT_INPLACE_MATRIX, &Dt);
281: }
282: } else {
283: const PetscScalar *c_ptr;
284: PetscScalar *d_ptr;
286: MatDenseGetArrayRead(C, &c_ptr);
287: MatDenseGetArrayWrite(D, &d_ptr);
288: PetscCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_COLUMN_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM));
289: PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mm (COLUMN_MAJOR): with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN, Ctype, CM, CN);
290: PetscLogStagePush(stage);
291: for (t = 0; t < trial; ++t) PetscCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_COLUMN_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM));
292: PetscLogStagePop();
293: MatDenseRestoreArrayWrite(D, &d_ptr);
294: MatDenseRestoreArrayRead(C, &c_ptr);
295: }
296: } else if (maij) {
297: MatDestroy(&C);
298: MatDestroy(&D);
299: continue;
300: } else if (!mkl) {
301: Vec cC, cD;
303: MatDenseGetColumnVecRead(C, 0, &cC);
304: MatDenseGetColumnVecWrite(D, 0, &cD);
305: MatMult(A, cC, cD);
306: PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatMult: with A %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN);
307: PetscLogStagePush(stage);
308: for (t = 0; t < trial; ++t) MatMult(A, cC, cD);
309: PetscLogStagePop();
310: MatDenseRestoreColumnVecRead(C, 0, &cC);
311: MatDenseRestoreColumnVecWrite(D, 0, &cD);
312: } else {
313: const PetscScalar *c_ptr;
314: PetscScalar *d_ptr;
316: MatDenseGetArrayRead(C, &c_ptr);
317: MatDenseGetArrayWrite(D, &d_ptr);
318: PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mv: with A %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN);
319: PetscCallMKLSparse(mkl_sparse_d_mv, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, c_ptr, 0.0, d_ptr));
320: PetscLogStagePush(stage);
321: for (t = 0; t < trial; ++t) PetscCallMKLSparse(mkl_sparse_d_mv, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, c_ptr, 0.0, d_ptr));
322: PetscLogStagePop();
323: MatDenseRestoreArrayWrite(D, &d_ptr);
324: MatDenseRestoreArrayRead(C, &c_ptr);
325: }
327: if (check) {
328: MatMatMultEqual(A, C, D, 10, &flg);
329: if (!flg) {
330: MatType Dtype;
332: MatGetType(D, &Dtype);
333: PetscPrintf(PETSC_COMM_WORLD, "Error with A %s%s, C %s, D %s, Nk %" PetscInt_FMT "\n", Atype, mkl ? "mkl" : "", Ctype, Dtype, N[k]);
334: }
335: }
337: /* MKL implementation seems buggy for ABt */
338: /* A*Bt */
339: if (!mkl && trans && N[k] > 1) {
340: PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATMPIAIJ, "");
341: if (flg) {
342: MatTranspose(C, MAT_INPLACE_MATRIX, &C);
343: MatGetType(C, &Ctype);
344: if (!mkl) {
345: MatProductCreateWithMat(A, C, NULL, D);
346: MatProductSetType(D, MATPRODUCT_ABt);
347: MatProductSetFromOptions(D);
348: MatProductSymbolic(D);
349: MatProductNumeric(D);
350: PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatProduct %s: with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and Bt %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", MatProductTypes[MATPRODUCT_ABt], Atype, AM, AN, Ctype, CM, CN);
351: PetscLogStagePush(tstage);
352: for (t = 0; t < trial; ++t) MatProductNumeric(D);
353: PetscLogStagePop();
354: } else {
355: const PetscScalar *c_ptr;
356: PetscScalar *d_ptr;
358: PetscCallMKLSparse(mkl_sparse_set_mm_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, SPARSE_LAYOUT_ROW_MAJOR, N[k], 1 + trial));
359: PetscCallMKLSparse(mkl_sparse_optimize, (spr));
360: MatDenseGetArrayRead(C, &c_ptr);
361: MatDenseGetArrayWrite(D, &d_ptr);
362: PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mm (ROW_MAJOR): with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN, Ctype, CM, CN);
363: PetscCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_ROW_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM));
364: PetscLogStagePush(stage);
365: for (t = 0; t < trial; ++t) PetscCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_ROW_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM));
366: PetscLogStagePop();
367: MatDenseRestoreArrayWrite(D, &d_ptr);
368: MatDenseRestoreArrayRead(C, &c_ptr);
369: }
370: }
371: }
373: if (!mkl && trans && N[k] > 1 && flg && check) {
374: MatMatTransposeMultEqual(A, C, D, 10, &flg);
375: if (!flg) {
376: MatType Dtype;
377: MatGetType(D, &Dtype);
378: PetscPrintf(PETSC_COMM_WORLD, "Error with A %s%s, C %s, D %s, Nk %" PetscInt_FMT "\n", Atype, mkl ? "mkl" : "", Ctype, Dtype, N[k]);
379: }
380: }
381: MatDestroy(&C);
382: MatDestroy(&D);
383: }
384: if (mkl) {
385: PetscCallMKLSparse(mkl_sparse_destroy, (spr));
386: PetscFree(ia_ptr);
387: PetscFree(ja_ptr);
388: }
389: if (cuda && i != ntype - 1) {
390: PetscPrintf(PETSC_COMM_WORLD, "AIJCUSPARSE must be last, otherwise MatConvert() to another MatType is too slow\n");
391: break;
392: }
393: }
394: if (E != A) MatDestroy(&E);
395: MatDestroy(&A);
396: }
397: for (m = 0; m < ntype; ++m) PetscFree(type[m]);
398: PetscFinalize();
399: return 0;
400: }
402: /*TEST
403: build:
404: requires: double !complex !defined(PETSC_USE_64BIT_INDICES)
406: testset:
407: nsize: 1
408: filter: sed "/Benchmarking/d"
409: args: -f ${wPETSC_DIR}/share/petsc/datafiles/matrices/spd-real-int32-float64 -bs 1,2,3 -N 1,2,18 -check -trans -convert_aij {{false true}shared output}
410: test:
411: suffix: basic
412: args: -type aij,sbaij,baij
413: output_file: output/ex237.out
414: test:
415: suffix: maij
416: args: -type aij,maij
417: output_file: output/ex237.out
418: test:
419: suffix: cuda
420: requires: cuda
421: args: -type aij,aijcusparse
422: output_file: output/ex237.out
423: test:
424: suffix: mkl
425: requires: mkl_sparse_optimize
426: args: -type aij,aijmkl,baijmkl,sbaijmkl
427: output_file: output/ex237.out
429: TEST*/