Actual source code: xxt.c
2: /*
3: Module Name: xxt
4: Module Info:
6: author: Henry M. Tufo III
7: e-mail: hmt@asci.uchicago.edu
8: contact:
9: +--------------------------------+--------------------------------+
10: |MCS Division - Building 221 |Department of Computer Science |
11: |Argonne National Laboratory |Ryerson 152 |
12: |9700 S. Cass Avenue |The University of Chicago |
13: |Argonne, IL 60439 |Chicago, IL 60637 |
14: |(630) 252-5354/5986 ph/fx |(773) 702-6019/8487 ph/fx |
15: +--------------------------------+--------------------------------+
17: Last Modification: 3.20.01
18: */
19: #include <../src/ksp/pc/impls/tfs/tfs.h>
21: #define LEFT -1
22: #define RIGHT 1
23: #define BOTH 0
25: typedef struct xxt_solver_info {
26: PetscInt n, m, n_global, m_global;
27: PetscInt nnz, max_nnz, msg_buf_sz;
28: PetscInt *nsep, *lnsep, *fo, nfo, *stages;
29: PetscInt *col_sz, *col_indices;
30: PetscScalar **col_vals, *x, *solve_uu, *solve_w;
31: PetscInt nsolves;
32: PetscScalar tot_solve_time;
33: } xxt_info;
35: typedef struct matvec_info {
36: PetscInt n, m, n_global, m_global;
37: PetscInt *local2global;
38: PCTFS_gs_ADT PCTFS_gs_handle;
39: PetscErrorCode (*matvec)(struct matvec_info *, PetscScalar *, PetscScalar *);
40: void *grid_data;
41: } mv_info;
43: struct xxt_CDT {
44: PetscInt id;
45: PetscInt ns;
46: PetscInt level;
47: xxt_info *info;
48: mv_info *mvi;
49: };
51: static PetscInt n_xxt = 0;
52: static PetscInt n_xxt_handles = 0;
54: /* prototypes */
55: static PetscErrorCode do_xxt_solve(xxt_ADT xxt_handle, PetscScalar *rhs);
56: static PetscErrorCode check_handle(xxt_ADT xxt_handle);
57: static PetscErrorCode det_separators(xxt_ADT xxt_handle);
58: static PetscErrorCode do_matvec(mv_info *A, PetscScalar *v, PetscScalar *u);
59: static PetscErrorCode xxt_generate(xxt_ADT xxt_handle);
60: static PetscErrorCode do_xxt_factor(xxt_ADT xxt_handle);
61: static mv_info *set_mvi(PetscInt *local2global, PetscInt n, PetscInt m, PetscErrorCode (*matvec)(mv_info *, PetscScalar *, PetscScalar *), void *grid_data);
63: xxt_ADT XXT_new(void)
64: {
65: xxt_ADT xxt_handle;
67: /* rolling count on n_xxt ... pot. problem here */
68: n_xxt_handles++;
69: xxt_handle = (xxt_ADT)malloc(sizeof(struct xxt_CDT));
70: xxt_handle->id = ++n_xxt;
71: xxt_handle->info = NULL;
72: xxt_handle->mvi = NULL;
74: return (xxt_handle);
75: }
77: PetscErrorCode XXT_factor(xxt_ADT xxt_handle, /* prev. allocated xxt handle */
78: PetscInt *local2global, /* global column mapping */
79: PetscInt n, /* local num rows */
80: PetscInt m, /* local num cols */
81: PetscErrorCode (*matvec)(void *, PetscScalar *, PetscScalar *), /* b_loc=A_local.x_loc */
82: void *grid_data) /* grid data for matvec */
83: {
84: PCTFS_comm_init();
85: check_handle(xxt_handle);
87: /* only 2^k for now and all nodes participating */
90: /* space for X info */
91: xxt_handle->info = (xxt_info *)malloc(sizeof(xxt_info));
93: /* set up matvec handles */
94: xxt_handle->mvi = set_mvi(local2global, n, m, (PetscErrorCode(*)(mv_info *, PetscScalar *, PetscScalar *))matvec, grid_data);
96: /* matrix is assumed to be of full rank */
97: /* LATER we can reset to indicate rank def. */
98: xxt_handle->ns = 0;
100: /* determine separators and generate firing order - NB xxt info set here */
101: det_separators(xxt_handle);
103: return (do_xxt_factor(xxt_handle));
104: }
106: PetscErrorCode XXT_solve(xxt_ADT xxt_handle, PetscScalar *x, PetscScalar *b)
107: {
108: PCTFS_comm_init();
109: check_handle(xxt_handle);
111: /* need to copy b into x? */
112: if (b) PCTFS_rvec_copy(x, b, xxt_handle->mvi->n);
113: return do_xxt_solve(xxt_handle, x);
114: }
116: PetscInt XXT_free(xxt_ADT xxt_handle)
117: {
118: PCTFS_comm_init();
119: check_handle(xxt_handle);
120: n_xxt_handles--;
122: free(xxt_handle->info->nsep);
123: free(xxt_handle->info->lnsep);
124: free(xxt_handle->info->fo);
125: free(xxt_handle->info->stages);
126: free(xxt_handle->info->solve_uu);
127: free(xxt_handle->info->solve_w);
128: free(xxt_handle->info->x);
129: free(xxt_handle->info->col_vals);
130: free(xxt_handle->info->col_sz);
131: free(xxt_handle->info->col_indices);
132: free(xxt_handle->info);
133: free(xxt_handle->mvi->local2global);
134: PCTFS_gs_free(xxt_handle->mvi->PCTFS_gs_handle);
135: free(xxt_handle->mvi);
136: free(xxt_handle);
138: /* if the check fails we nuke */
139: /* if NULL pointer passed to free we nuke */
140: /* if the calls to free fail that's not my problem */
141: return (0);
142: }
144: /* This function is currently unused */
145: PetscErrorCode XXT_stats(xxt_ADT xxt_handle)
146: {
147: PetscInt op[] = {NON_UNIFORM, GL_MIN, GL_MAX, GL_ADD, GL_MIN, GL_MAX, GL_ADD, GL_MIN, GL_MAX, GL_ADD};
148: PetscInt fop[] = {NON_UNIFORM, GL_MIN, GL_MAX, GL_ADD};
149: PetscInt vals[9], work[9];
150: PetscScalar fvals[3], fwork[3];
152: PCTFS_comm_init();
153: check_handle(xxt_handle);
155: /* if factorization not done there are no stats */
156: if (!xxt_handle->info || !xxt_handle->mvi) {
157: if (!PCTFS_my_id) PetscPrintf(PETSC_COMM_WORLD, "XXT_stats() :: no stats available!\n");
158: return 0;
159: }
161: vals[0] = vals[1] = vals[2] = xxt_handle->info->nnz;
162: vals[3] = vals[4] = vals[5] = xxt_handle->mvi->n;
163: vals[6] = vals[7] = vals[8] = xxt_handle->info->msg_buf_sz;
164: PCTFS_giop(vals, work, PETSC_STATIC_ARRAY_LENGTH(op) - 1, op);
166: fvals[0] = fvals[1] = fvals[2] = xxt_handle->info->tot_solve_time / xxt_handle->info->nsolves++;
167: PCTFS_grop(fvals, fwork, PETSC_STATIC_ARRAY_LENGTH(fop) - 1, fop);
169: if (!PCTFS_my_id) {
170: PetscPrintf(PETSC_COMM_WORLD, "%d :: min xxt_nnz=%" PetscInt_FMT "\n", PCTFS_my_id, vals[0]);
171: PetscPrintf(PETSC_COMM_WORLD, "%d :: max xxt_nnz=%" PetscInt_FMT "\n", PCTFS_my_id, vals[1]);
172: PetscPrintf(PETSC_COMM_WORLD, "%d :: avg xxt_nnz=%g\n", PCTFS_my_id, (double)(1.0 * vals[2] / PCTFS_num_nodes));
173: PetscPrintf(PETSC_COMM_WORLD, "%d :: tot xxt_nnz=%" PetscInt_FMT "\n", PCTFS_my_id, vals[2]);
174: PetscPrintf(PETSC_COMM_WORLD, "%d :: xxt C(2d) =%g\n", PCTFS_my_id, (double)(vals[2] / (PetscPowReal(1.0 * vals[5], 1.5))));
175: PetscPrintf(PETSC_COMM_WORLD, "%d :: xxt C(3d) =%g\n", PCTFS_my_id, (double)(vals[2] / (PetscPowReal(1.0 * vals[5], 1.6667))));
176: PetscPrintf(PETSC_COMM_WORLD, "%d :: min xxt_n =%" PetscInt_FMT "\n", PCTFS_my_id, vals[3]);
177: PetscPrintf(PETSC_COMM_WORLD, "%d :: max xxt_n =%" PetscInt_FMT "\n", PCTFS_my_id, vals[4]);
178: PetscPrintf(PETSC_COMM_WORLD, "%d :: avg xxt_n =%g\n", PCTFS_my_id, (double)(1.0 * vals[5] / PCTFS_num_nodes));
179: PetscPrintf(PETSC_COMM_WORLD, "%d :: tot xxt_n =%" PetscInt_FMT "\n", PCTFS_my_id, vals[5]);
180: PetscPrintf(PETSC_COMM_WORLD, "%d :: min xxt_buf=%" PetscInt_FMT "\n", PCTFS_my_id, vals[6]);
181: PetscPrintf(PETSC_COMM_WORLD, "%d :: max xxt_buf=%" PetscInt_FMT "\n", PCTFS_my_id, vals[7]);
182: PetscPrintf(PETSC_COMM_WORLD, "%d :: avg xxt_buf=%g\n", PCTFS_my_id, (double)(1.0 * vals[8] / PCTFS_num_nodes));
183: PetscPrintf(PETSC_COMM_WORLD, "%d :: min xxt_slv=%g\n", PCTFS_my_id, (double)PetscRealPart(fvals[0]));
184: PetscPrintf(PETSC_COMM_WORLD, "%d :: max xxt_slv=%g\n", PCTFS_my_id, (double)PetscRealPart(fvals[1]));
185: PetscPrintf(PETSC_COMM_WORLD, "%d :: avg xxt_slv=%g\n", PCTFS_my_id, (double)PetscRealPart(fvals[2] / PCTFS_num_nodes));
186: }
187: return 0;
188: }
190: /*
192: Description: get A_local, local portion of global coarse matrix which
193: is a row dist. nxm matrix w/ n<m.
194: o my_ml holds address of ML struct associated w/A_local and coarse grid
195: o local2global holds global number of column i (i=0,...,m-1)
196: o local2global holds global number of row i (i=0,...,n-1)
197: o mylocmatvec performs A_local . vec_local (note that gs is performed using
198: PCTFS_gs_init/gop).
200: mylocmatvec = my_ml->Amat[grid_tag].matvec->external;
201: mylocmatvec (void :: void *data, double *in, double *out)
202: */
203: static PetscErrorCode do_xxt_factor(xxt_ADT xxt_handle)
204: {
205: return xxt_generate(xxt_handle);
206: }
208: static PetscErrorCode xxt_generate(xxt_ADT xxt_handle)
209: {
210: PetscInt i, j, k, idex;
211: PetscInt dim, col;
212: PetscScalar *u, *uu, *v, *z, *w, alpha, alpha_w;
213: PetscInt *segs;
214: PetscInt op[] = {GL_ADD, 0};
215: PetscInt off, len;
216: PetscScalar *x_ptr;
217: PetscInt *iptr, flag;
218: PetscInt start = 0, end, work;
219: PetscInt op2[] = {GL_MIN, 0};
220: PCTFS_gs_ADT PCTFS_gs_handle;
221: PetscInt *nsep, *lnsep, *fo;
222: PetscInt a_n = xxt_handle->mvi->n;
223: PetscInt a_m = xxt_handle->mvi->m;
224: PetscInt *a_local2global = xxt_handle->mvi->local2global;
225: PetscInt level;
226: PetscInt xxt_nnz = 0, xxt_max_nnz = 0;
227: PetscInt n, m;
228: PetscInt *col_sz, *col_indices, *stages;
229: PetscScalar **col_vals, *x;
230: PetscInt n_global;
231: PetscBLASInt i1 = 1, dlen;
232: PetscScalar dm1 = -1.0;
234: n = xxt_handle->mvi->n;
235: nsep = xxt_handle->info->nsep;
236: lnsep = xxt_handle->info->lnsep;
237: fo = xxt_handle->info->fo;
238: end = lnsep[0];
239: level = xxt_handle->level;
240: PCTFS_gs_handle = xxt_handle->mvi->PCTFS_gs_handle;
242: /* is there a null space? */
243: /* LATER add in ability to detect null space by checking alpha */
244: for (i = 0, j = 0; i <= level; i++) j += nsep[i];
246: m = j - xxt_handle->ns;
247: if (m != j) PetscPrintf(PETSC_COMM_WORLD, "xxt_generate() :: null space exists %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", m, j, xxt_handle->ns);
249: /* get and initialize storage for x local */
250: /* note that x local is nxm and stored by columns */
251: col_sz = (PetscInt *)malloc(m * sizeof(PetscInt));
252: col_indices = (PetscInt *)malloc((2 * m + 1) * sizeof(PetscInt));
253: col_vals = (PetscScalar **)malloc(m * sizeof(PetscScalar *));
254: for (i = j = 0; i < m; i++, j += 2) {
255: col_indices[j] = col_indices[j + 1] = col_sz[i] = -1;
256: col_vals[i] = NULL;
257: }
258: col_indices[j] = -1;
260: /* size of separators for each sub-hc working from bottom of tree to top */
261: /* this looks like nsep[]=segments */
262: stages = (PetscInt *)malloc((level + 1) * sizeof(PetscInt));
263: segs = (PetscInt *)malloc((level + 1) * sizeof(PetscInt));
264: PCTFS_ivec_zero(stages, level + 1);
265: PCTFS_ivec_copy(segs, nsep, level + 1);
266: for (i = 0; i < level; i++) segs[i + 1] += segs[i];
267: stages[0] = segs[0];
269: /* temporary vectors */
270: u = (PetscScalar *)malloc(n * sizeof(PetscScalar));
271: z = (PetscScalar *)malloc(n * sizeof(PetscScalar));
272: v = (PetscScalar *)malloc(a_m * sizeof(PetscScalar));
273: uu = (PetscScalar *)malloc(m * sizeof(PetscScalar));
274: w = (PetscScalar *)malloc(m * sizeof(PetscScalar));
276: /* extra nnz due to replication of vertices across separators */
277: for (i = 1, j = 0; i <= level; i++) j += nsep[i];
279: /* storage for sparse x values */
280: n_global = xxt_handle->info->n_global;
281: xxt_max_nnz = (PetscInt)(2.5 * PetscPowReal(1.0 * n_global, 1.6667) + j * n / 2) / PCTFS_num_nodes;
282: x = (PetscScalar *)malloc(xxt_max_nnz * sizeof(PetscScalar));
283: xxt_nnz = 0;
285: /* LATER - can embed next sep to fire in gs */
286: /* time to make the donuts - generate X factor */
287: for (dim = i = j = 0; i < m; i++) {
288: /* time to move to the next level? */
289: while (i == segs[dim]) {
291: stages[dim++] = i;
292: end += lnsep[dim];
293: }
294: stages[dim] = i;
296: /* which column are we firing? */
297: /* i.e. set v_l */
298: /* use new seps and do global min across hc to determine which one to fire */
299: (start < end) ? (col = fo[start]) : (col = INT_MAX);
300: PCTFS_giop_hc(&col, &work, 1, op2, dim);
302: /* shouldn't need this */
303: if (col == INT_MAX) {
304: PetscInfo(0, "hey ... col==INT_MAX??\n");
305: continue;
306: }
308: /* do I own it? I should */
309: PCTFS_rvec_zero(v, a_m);
310: if (col == fo[start]) {
311: start++;
312: idex = PCTFS_ivec_linear_search(col, a_local2global, a_n);
313: if (idex != -1) {
314: v[idex] = 1.0;
315: j++;
316: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "NOT FOUND!");
317: } else {
318: idex = PCTFS_ivec_linear_search(col, a_local2global, a_m);
319: if (idex != -1) v[idex] = 1.0;
320: }
322: /* perform u = A.v_l */
323: PCTFS_rvec_zero(u, n);
324: do_matvec(xxt_handle->mvi, v, u);
326: /* uu = X^T.u_l (local portion) */
327: /* technically only need to zero out first i entries */
328: /* later turn this into an XXT_solve call ? */
329: PCTFS_rvec_zero(uu, m);
330: x_ptr = x;
331: iptr = col_indices;
332: for (k = 0; k < i; k++) {
333: off = *iptr++;
334: len = *iptr++;
335: PetscBLASIntCast(len, &dlen);
336: PetscCallBLAS("BLASdot", uu[k] = BLASdot_(&dlen, u + off, &i1, x_ptr, &i1));
337: x_ptr += len;
338: }
340: /* uu = X^T.u_l (comm portion) */
341: PCTFS_ssgl_radd(uu, w, dim, stages);
343: /* z = X.uu */
344: PCTFS_rvec_zero(z, n);
345: x_ptr = x;
346: iptr = col_indices;
347: for (k = 0; k < i; k++) {
348: off = *iptr++;
349: len = *iptr++;
350: PetscBLASIntCast(len, &dlen);
351: PetscCallBLAS("BLASaxpy", BLASaxpy_(&dlen, &uu[k], x_ptr, &i1, z + off, &i1));
352: x_ptr += len;
353: }
355: /* compute v_l = v_l - z */
356: PCTFS_rvec_zero(v + a_n, a_m - a_n);
357: PetscBLASIntCast(n, &dlen);
358: PetscCallBLAS("BLASaxpy", BLASaxpy_(&dlen, &dm1, z, &i1, v, &i1));
360: /* compute u_l = A.v_l */
361: if (a_n != a_m) PCTFS_gs_gop_hc(PCTFS_gs_handle, v, "+\0", dim);
362: PCTFS_rvec_zero(u, n);
363: do_matvec(xxt_handle->mvi, v, u);
365: /* compute sqrt(alpha) = sqrt(v_l^T.u_l) - local portion */
366: PetscBLASIntCast(n, &dlen);
367: PetscCallBLAS("BLASdot", alpha = BLASdot_(&dlen, u, &i1, v, &i1));
368: /* compute sqrt(alpha) = sqrt(v_l^T.u_l) - comm portion */
369: PCTFS_grop_hc(&alpha, &alpha_w, 1, op, dim);
371: alpha = (PetscScalar)PetscSqrtReal((PetscReal)alpha);
373: /* check for small alpha */
374: /* LATER use this to detect and determine null space */
377: /* compute v_l = v_l/sqrt(alpha) */
378: PCTFS_rvec_scale(v, 1.0 / alpha, n);
380: /* add newly generated column, v_l, to X */
381: flag = 1;
382: off = len = 0;
383: for (k = 0; k < n; k++) {
384: if (v[k] != 0.0) {
385: len = k;
386: if (flag) {
387: off = k;
388: flag = 0;
389: }
390: }
391: }
393: len -= (off - 1);
395: if (len > 0) {
396: if ((xxt_nnz + len) > xxt_max_nnz) {
397: PetscInfo(0, "increasing space for X by 2x!\n");
398: xxt_max_nnz *= 2;
399: x_ptr = (PetscScalar *)malloc(xxt_max_nnz * sizeof(PetscScalar));
400: PCTFS_rvec_copy(x_ptr, x, xxt_nnz);
401: free(x);
402: x = x_ptr;
403: x_ptr += xxt_nnz;
404: }
405: xxt_nnz += len;
406: PCTFS_rvec_copy(x_ptr, v + off, len);
408: col_indices[2 * i] = off;
409: col_sz[i] = col_indices[2 * i + 1] = len;
410: col_vals[i] = x_ptr;
411: } else {
412: col_indices[2 * i] = 0;
413: col_sz[i] = col_indices[2 * i + 1] = 0;
414: col_vals[i] = x_ptr;
415: }
416: }
418: /* close off stages for execution phase */
419: while (dim != level) {
420: stages[dim++] = i;
421: PetscInfo(0, "disconnected!!! dim(%" PetscInt_FMT ")!=level(%" PetscInt_FMT ")\n", dim, level);
422: }
423: stages[dim] = i;
425: xxt_handle->info->n = xxt_handle->mvi->n;
426: xxt_handle->info->m = m;
427: xxt_handle->info->nnz = xxt_nnz;
428: xxt_handle->info->max_nnz = xxt_max_nnz;
429: xxt_handle->info->msg_buf_sz = stages[level] - stages[0];
430: xxt_handle->info->solve_uu = (PetscScalar *)malloc(m * sizeof(PetscScalar));
431: xxt_handle->info->solve_w = (PetscScalar *)malloc(m * sizeof(PetscScalar));
432: xxt_handle->info->x = x;
433: xxt_handle->info->col_vals = col_vals;
434: xxt_handle->info->col_sz = col_sz;
435: xxt_handle->info->col_indices = col_indices;
436: xxt_handle->info->stages = stages;
437: xxt_handle->info->nsolves = 0;
438: xxt_handle->info->tot_solve_time = 0.0;
440: free(segs);
441: free(u);
442: free(v);
443: free(uu);
444: free(z);
445: free(w);
447: return (0);
448: }
450: static PetscErrorCode do_xxt_solve(xxt_ADT xxt_handle, PetscScalar *uc)
451: {
452: PetscInt off, len, *iptr;
453: PetscInt level = xxt_handle->level;
454: PetscInt n = xxt_handle->info->n;
455: PetscInt m = xxt_handle->info->m;
456: PetscInt *stages = xxt_handle->info->stages;
457: PetscInt *col_indices = xxt_handle->info->col_indices;
458: PetscScalar *x_ptr, *uu_ptr;
459: PetscScalar *solve_uu = xxt_handle->info->solve_uu;
460: PetscScalar *solve_w = xxt_handle->info->solve_w;
461: PetscScalar *x = xxt_handle->info->x;
462: PetscBLASInt i1 = 1, dlen;
464: uu_ptr = solve_uu;
465: PCTFS_rvec_zero(uu_ptr, m);
467: /* x = X.Y^T.b */
468: /* uu = Y^T.b */
469: for (x_ptr = x, iptr = col_indices; *iptr != -1; x_ptr += len) {
470: off = *iptr++;
471: len = *iptr++;
472: PetscBLASIntCast(len, &dlen);
473: PetscCallBLAS("BLASdot", *uu_ptr++ = BLASdot_(&dlen, uc + off, &i1, x_ptr, &i1));
474: }
476: /* communication of beta */
477: uu_ptr = solve_uu;
478: if (level) PCTFS_ssgl_radd(uu_ptr, solve_w, level, stages);
480: PCTFS_rvec_zero(uc, n);
482: /* x = X.uu */
483: for (x_ptr = x, iptr = col_indices; *iptr != -1; x_ptr += len) {
484: off = *iptr++;
485: len = *iptr++;
486: PetscBLASIntCast(len, &dlen);
487: PetscCallBLAS("BLASaxpy", BLASaxpy_(&dlen, uu_ptr++, x_ptr, &i1, uc + off, &i1));
488: }
489: return 0;
490: }
492: static PetscErrorCode check_handle(xxt_ADT xxt_handle)
493: {
494: PetscInt vals[2], work[2], op[] = {NON_UNIFORM, GL_MIN, GL_MAX};
498: vals[0] = vals[1] = xxt_handle->id;
499: PCTFS_giop(vals, work, PETSC_STATIC_ARRAY_LENGTH(op) - 1, op);
501: return 0;
502: }
504: static PetscErrorCode det_separators(xxt_ADT xxt_handle)
505: {
506: PetscInt i, ct, id;
507: PetscInt mask, edge, *iptr;
508: PetscInt *dir, *used;
509: PetscInt sum[4], w[4];
510: PetscScalar rsum[4], rw[4];
511: PetscInt op[] = {GL_ADD, 0};
512: PetscScalar *lhs, *rhs;
513: PetscInt *nsep, *lnsep, *fo, nfo = 0;
514: PCTFS_gs_ADT PCTFS_gs_handle = xxt_handle->mvi->PCTFS_gs_handle;
515: PetscInt *local2global = xxt_handle->mvi->local2global;
516: PetscInt n = xxt_handle->mvi->n;
517: PetscInt m = xxt_handle->mvi->m;
518: PetscInt level = xxt_handle->level;
519: PetscInt shared = 0;
521: dir = (PetscInt *)malloc(sizeof(PetscInt) * (level + 1));
522: nsep = (PetscInt *)malloc(sizeof(PetscInt) * (level + 1));
523: lnsep = (PetscInt *)malloc(sizeof(PetscInt) * (level + 1));
524: fo = (PetscInt *)malloc(sizeof(PetscInt) * (n + 1));
525: used = (PetscInt *)malloc(sizeof(PetscInt) * n);
527: PCTFS_ivec_zero(dir, level + 1);
528: PCTFS_ivec_zero(nsep, level + 1);
529: PCTFS_ivec_zero(lnsep, level + 1);
530: PCTFS_ivec_set(fo, -1, n + 1);
531: PCTFS_ivec_zero(used, n);
533: lhs = (PetscScalar *)malloc(sizeof(PetscScalar) * m);
534: rhs = (PetscScalar *)malloc(sizeof(PetscScalar) * m);
536: /* determine the # of unique dof */
537: PCTFS_rvec_zero(lhs, m);
538: PCTFS_rvec_set(lhs, 1.0, n);
539: PCTFS_gs_gop_hc(PCTFS_gs_handle, lhs, "+\0", level);
540: PCTFS_rvec_zero(rsum, 2);
541: for (i = 0; i < n; i++) {
542: if (lhs[i] != 0.0) {
543: rsum[0] += 1.0 / lhs[i];
544: rsum[1] += lhs[i];
545: }
546: }
547: PCTFS_grop_hc(rsum, rw, 2, op, level);
548: rsum[0] += 0.1;
549: rsum[1] += 0.1;
551: if (PetscAbsScalar(rsum[0] - rsum[1]) > EPS) shared = 1;
553: xxt_handle->info->n_global = xxt_handle->info->m_global = (PetscInt)rsum[0];
554: xxt_handle->mvi->n_global = xxt_handle->mvi->m_global = (PetscInt)rsum[0];
556: /* determine separator sets top down */
557: if (shared) {
558: for (iptr = fo + n, id = PCTFS_my_id, mask = PCTFS_num_nodes >> 1, edge = level; edge > 0; edge--, mask >>= 1) {
559: /* set rsh of hc, fire, and collect lhs responses */
560: (id < mask) ? PCTFS_rvec_zero(lhs, m) : PCTFS_rvec_set(lhs, 1.0, m);
561: PCTFS_gs_gop_hc(PCTFS_gs_handle, lhs, "+\0", edge);
563: /* set lsh of hc, fire, and collect rhs responses */
564: (id < mask) ? PCTFS_rvec_set(rhs, 1.0, m) : PCTFS_rvec_zero(rhs, m);
565: PCTFS_gs_gop_hc(PCTFS_gs_handle, rhs, "+\0", edge);
567: for (i = 0; i < n; i++) {
568: if (id < mask) {
569: if (lhs[i] != 0.0) lhs[i] = 1.0;
570: }
571: if (id >= mask) {
572: if (rhs[i] != 0.0) rhs[i] = 1.0;
573: }
574: }
576: if (id < mask) PCTFS_gs_gop_hc(PCTFS_gs_handle, lhs, "+\0", edge - 1);
577: else PCTFS_gs_gop_hc(PCTFS_gs_handle, rhs, "+\0", edge - 1);
579: /* count number of dofs I own that have signal and not in sep set */
580: PCTFS_rvec_zero(rsum, 4);
581: for (PCTFS_ivec_zero(sum, 4), ct = i = 0; i < n; i++) {
582: if (!used[i]) {
583: /* number of unmarked dofs on node */
584: ct++;
585: /* number of dofs to be marked on lhs hc */
586: if (id < mask) {
587: if (lhs[i] != 0.0) {
588: sum[0]++;
589: rsum[0] += 1.0 / lhs[i];
590: }
591: }
592: /* number of dofs to be marked on rhs hc */
593: if (id >= mask) {
594: if (rhs[i] != 0.0) {
595: sum[1]++;
596: rsum[1] += 1.0 / rhs[i];
597: }
598: }
599: }
600: }
602: /* go for load balance - choose half with most unmarked dofs, bias LHS */
603: (id < mask) ? (sum[2] = ct) : (sum[3] = ct);
604: (id < mask) ? (rsum[2] = ct) : (rsum[3] = ct);
605: PCTFS_giop_hc(sum, w, 4, op, edge);
606: PCTFS_grop_hc(rsum, rw, 4, op, edge);
607: rsum[0] += 0.1;
608: rsum[1] += 0.1;
609: rsum[2] += 0.1;
610: rsum[3] += 0.1;
612: if (id < mask) {
613: /* mark dofs I own that have signal and not in sep set */
614: for (ct = i = 0; i < n; i++) {
615: if ((!used[i]) && (lhs[i] != 0.0)) {
616: ct++;
617: nfo++;
621: *--iptr = local2global[i];
622: used[i] = edge;
623: }
624: }
625: if (ct > 1) PCTFS_ivec_sort(iptr, ct);
627: lnsep[edge] = ct;
628: nsep[edge] = (PetscInt)rsum[0];
629: dir[edge] = LEFT;
630: }
632: if (id >= mask) {
633: /* mark dofs I own that have signal and not in sep set */
634: for (ct = i = 0; i < n; i++) {
635: if ((!used[i]) && (rhs[i] != 0.0)) {
636: ct++;
637: nfo++;
641: *--iptr = local2global[i];
642: used[i] = edge;
643: }
644: }
645: if (ct > 1) PCTFS_ivec_sort(iptr, ct);
647: lnsep[edge] = ct;
648: nsep[edge] = (PetscInt)rsum[1];
649: dir[edge] = RIGHT;
650: }
652: /* LATER or we can recur on these to order seps at this level */
653: /* do we need full set of separators for this? */
655: /* fold rhs hc into lower */
656: if (id >= mask) id -= mask;
657: }
658: } else {
659: for (iptr = fo + n, id = PCTFS_my_id, mask = PCTFS_num_nodes >> 1, edge = level; edge > 0; edge--, mask >>= 1) {
660: /* set rsh of hc, fire, and collect lhs responses */
661: (id < mask) ? PCTFS_rvec_zero(lhs, m) : PCTFS_rvec_set(lhs, 1.0, m);
662: PCTFS_gs_gop_hc(PCTFS_gs_handle, lhs, "+\0", edge);
664: /* set lsh of hc, fire, and collect rhs responses */
665: (id < mask) ? PCTFS_rvec_set(rhs, 1.0, m) : PCTFS_rvec_zero(rhs, m);
666: PCTFS_gs_gop_hc(PCTFS_gs_handle, rhs, "+\0", edge);
668: /* count number of dofs I own that have signal and not in sep set */
669: for (PCTFS_ivec_zero(sum, 4), ct = i = 0; i < n; i++) {
670: if (!used[i]) {
671: /* number of unmarked dofs on node */
672: ct++;
673: /* number of dofs to be marked on lhs hc */
674: if ((id < mask) && (lhs[i] != 0.0)) sum[0]++;
675: /* number of dofs to be marked on rhs hc */
676: if ((id >= mask) && (rhs[i] != 0.0)) sum[1]++;
677: }
678: }
680: /* go for load balance - choose half with most unmarked dofs, bias LHS */
681: (id < mask) ? (sum[2] = ct) : (sum[3] = ct);
682: PCTFS_giop_hc(sum, w, 4, op, edge);
684: /* lhs hc wins */
685: if (sum[2] >= sum[3]) {
686: if (id < mask) {
687: /* mark dofs I own that have signal and not in sep set */
688: for (ct = i = 0; i < n; i++) {
689: if ((!used[i]) && (lhs[i] != 0.0)) {
690: ct++;
691: nfo++;
692: *--iptr = local2global[i];
693: used[i] = edge;
694: }
695: }
696: if (ct > 1) PCTFS_ivec_sort(iptr, ct);
697: lnsep[edge] = ct;
698: }
699: nsep[edge] = sum[0];
700: dir[edge] = LEFT;
701: } else { /* rhs hc wins */
702: if (id >= mask) {
703: /* mark dofs I own that have signal and not in sep set */
704: for (ct = i = 0; i < n; i++) {
705: if ((!used[i]) && (rhs[i] != 0.0)) {
706: ct++;
707: nfo++;
708: *--iptr = local2global[i];
709: used[i] = edge;
710: }
711: }
712: if (ct > 1) PCTFS_ivec_sort(iptr, ct);
713: lnsep[edge] = ct;
714: }
715: nsep[edge] = sum[1];
716: dir[edge] = RIGHT;
717: }
718: /* LATER or we can recur on these to order seps at this level */
719: /* do we need full set of separators for this? */
721: /* fold rhs hc into lower */
722: if (id >= mask) id -= mask;
723: }
724: }
726: /* level 0 is on processor case - so mark the remainder */
727: for (ct = i = 0; i < n; i++) {
728: if (!used[i]) {
729: ct++;
730: nfo++;
731: *--iptr = local2global[i];
732: used[i] = edge;
733: }
734: }
735: if (ct > 1) PCTFS_ivec_sort(iptr, ct);
736: lnsep[edge] = ct;
737: nsep[edge] = ct;
738: dir[edge] = LEFT;
740: xxt_handle->info->nsep = nsep;
741: xxt_handle->info->lnsep = lnsep;
742: xxt_handle->info->fo = fo;
743: xxt_handle->info->nfo = nfo;
745: free(dir);
746: free(lhs);
747: free(rhs);
748: free(used);
749: return 0;
750: }
752: static mv_info *set_mvi(PetscInt *local2global, PetscInt n, PetscInt m, PetscErrorCode (*matvec)(mv_info *, PetscScalar *, PetscScalar *), void *grid_data)
753: {
754: mv_info *mvi;
756: mvi = (mv_info *)malloc(sizeof(mv_info));
757: mvi->n = n;
758: mvi->m = m;
759: mvi->n_global = -1;
760: mvi->m_global = -1;
761: mvi->local2global = (PetscInt *)malloc((m + 1) * sizeof(PetscInt));
762: PCTFS_ivec_copy(mvi->local2global, local2global, m);
763: mvi->local2global[m] = INT_MAX;
764: mvi->matvec = matvec;
765: mvi->grid_data = grid_data;
767: /* set xxt communication handle to perform restricted matvec */
768: mvi->PCTFS_gs_handle = PCTFS_gs_init(local2global, m, PCTFS_num_nodes);
770: return (mvi);
771: }
773: static PetscErrorCode do_matvec(mv_info *A, PetscScalar *v, PetscScalar *u)
774: {
775: A->matvec((mv_info *)A->grid_data, v, u);
776: return 0;
777: }