Actual source code: pdvec.c
2: /*
3: Code for some of the parallel vector primitives.
4: */
5: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
6: #include <petsc/private/viewerhdf5impl.h>
7: #include <petsc/private/glvisviewerimpl.h>
8: #include <petsc/private/glvisvecimpl.h>
9: #include <petscsf.h>
11: static PetscErrorCode VecResetPreallocationCOO_MPI(Vec v)
12: {
13: Vec_MPI *vmpi = (Vec_MPI *)v->data;
15: if (vmpi) {
16: PetscFree(vmpi->jmap1);
17: PetscFree(vmpi->perm1);
18: PetscFree(vmpi->Cperm);
19: PetscFree4(vmpi->imap2, vmpi->jmap2, vmpi->sendbuf, vmpi->recvbuf);
20: PetscFree(vmpi->perm2);
21: PetscSFDestroy(&vmpi->coo_sf);
22: }
23: return 0;
24: }
26: PetscErrorCode VecDestroy_MPI(Vec v)
27: {
28: Vec_MPI *x = (Vec_MPI *)v->data;
30: #if defined(PETSC_USE_LOG)
31: PetscLogObjectState((PetscObject)v, "Length=%" PetscInt_FMT, v->map->N);
32: #endif
33: if (!x) return 0;
34: PetscFree(x->array_allocated);
36: /* Destroy local representation of vector if it exists */
37: if (x->localrep) {
38: VecDestroy(&x->localrep);
39: VecScatterDestroy(&x->localupdate);
40: }
41: VecAssemblyReset_MPI(v);
43: /* Destroy the stashes: note the order - so that the tags are freed properly */
44: VecStashDestroy_Private(&v->bstash);
45: VecStashDestroy_Private(&v->stash);
47: VecResetPreallocationCOO_MPI(v);
48: PetscObjectComposeFunction((PetscObject)v, "PetscMatlabEnginePut_C", NULL);
49: PetscObjectComposeFunction((PetscObject)v, "PetscMatlabEngineGet_C", NULL);
50: PetscFree(v->data);
51: return 0;
52: }
54: PetscErrorCode VecView_MPI_ASCII(Vec xin, PetscViewer viewer)
55: {
56: PetscInt i, work = xin->map->n, cnt, len, nLen;
57: PetscMPIInt j, n = 0, size, rank, tag = ((PetscObject)viewer)->tag;
58: MPI_Status status;
59: PetscScalar *values;
60: const PetscScalar *xarray;
61: const char *name;
62: PetscViewerFormat format;
64: MPI_Comm_size(PetscObjectComm((PetscObject)xin), &size);
65: PetscViewerGetFormat(viewer, &format);
66: if (format == PETSC_VIEWER_LOAD_BALANCE) {
67: PetscInt nmax = 0, nmin = xin->map->n, navg;
68: for (i = 0; i < (PetscInt)size; i++) {
69: nmax = PetscMax(nmax, xin->map->range[i + 1] - xin->map->range[i]);
70: nmin = PetscMin(nmin, xin->map->range[i + 1] - xin->map->range[i]);
71: }
72: navg = xin->map->N / size;
73: PetscViewerASCIIPrintf(viewer, " Load Balance - Local vector size Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax);
74: return 0;
75: }
77: VecGetArrayRead(xin, &xarray);
78: /* determine maximum message to arrive */
79: MPI_Comm_rank(PetscObjectComm((PetscObject)xin), &rank);
80: MPI_Reduce(&work, &len, 1, MPIU_INT, MPI_MAX, 0, PetscObjectComm((PetscObject)xin));
81: if (format == PETSC_VIEWER_ASCII_GLVIS) rank = 0, len = 0; /* no parallel distributed write support from GLVis */
82: if (rank == 0) {
83: PetscMalloc1(len, &values);
84: /*
85: MATLAB format and ASCII format are very similar except
86: MATLAB uses %18.16e format while ASCII uses %g
87: */
88: if (format == PETSC_VIEWER_ASCII_MATLAB) {
89: PetscObjectGetName((PetscObject)xin, &name);
90: PetscViewerASCIIPrintf(viewer, "%s = [\n", name);
91: for (i = 0; i < xin->map->n; i++) {
92: #if defined(PETSC_USE_COMPLEX)
93: if (PetscImaginaryPart(xarray[i]) > 0.0) {
94: PetscViewerASCIIPrintf(viewer, "%18.16e + %18.16ei\n", (double)PetscRealPart(xarray[i]), (double)PetscImaginaryPart(xarray[i]));
95: } else if (PetscImaginaryPart(xarray[i]) < 0.0) {
96: PetscViewerASCIIPrintf(viewer, "%18.16e - %18.16ei\n", (double)PetscRealPart(xarray[i]), -(double)PetscImaginaryPart(xarray[i]));
97: } else {
98: PetscViewerASCIIPrintf(viewer, "%18.16e\n", (double)PetscRealPart(xarray[i]));
99: }
100: #else
101: PetscViewerASCIIPrintf(viewer, "%18.16e\n", (double)xarray[i]);
102: #endif
103: }
104: /* receive and print messages */
105: for (j = 1; j < size; j++) {
106: MPI_Recv(values, (PetscMPIInt)len, MPIU_SCALAR, j, tag, PetscObjectComm((PetscObject)xin), &status);
107: MPI_Get_count(&status, MPIU_SCALAR, &n);
108: for (i = 0; i < n; i++) {
109: #if defined(PETSC_USE_COMPLEX)
110: if (PetscImaginaryPart(values[i]) > 0.0) {
111: PetscViewerASCIIPrintf(viewer, "%18.16e + %18.16ei\n", (double)PetscRealPart(values[i]), (double)PetscImaginaryPart(values[i]));
112: } else if (PetscImaginaryPart(values[i]) < 0.0) {
113: PetscViewerASCIIPrintf(viewer, "%18.16e - %18.16ei\n", (double)PetscRealPart(values[i]), -(double)PetscImaginaryPart(values[i]));
114: } else {
115: PetscViewerASCIIPrintf(viewer, "%18.16e\n", (double)PetscRealPart(values[i]));
116: }
117: #else
118: PetscViewerASCIIPrintf(viewer, "%18.16e\n", (double)values[i]);
119: #endif
120: }
121: }
122: PetscViewerASCIIPrintf(viewer, "];\n");
124: } else if (format == PETSC_VIEWER_ASCII_SYMMODU) {
125: for (i = 0; i < xin->map->n; i++) {
126: #if defined(PETSC_USE_COMPLEX)
127: PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e\n", (double)PetscRealPart(xarray[i]), (double)PetscImaginaryPart(xarray[i]));
128: #else
129: PetscViewerASCIIPrintf(viewer, "%18.16e\n", (double)xarray[i]);
130: #endif
131: }
132: /* receive and print messages */
133: for (j = 1; j < size; j++) {
134: MPI_Recv(values, (PetscMPIInt)len, MPIU_SCALAR, j, tag, PetscObjectComm((PetscObject)xin), &status);
135: MPI_Get_count(&status, MPIU_SCALAR, &n);
136: for (i = 0; i < n; i++) {
137: #if defined(PETSC_USE_COMPLEX)
138: PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e\n", (double)PetscRealPart(values[i]), (double)PetscImaginaryPart(values[i]));
139: #else
140: PetscViewerASCIIPrintf(viewer, "%18.16e\n", (double)values[i]);
141: #endif
142: }
143: }
144: } else if (format == PETSC_VIEWER_ASCII_VTK_DEPRECATED || format == PETSC_VIEWER_ASCII_VTK_CELL_DEPRECATED) {
145: /*
146: state 0: No header has been output
147: state 1: Only POINT_DATA has been output
148: state 2: Only CELL_DATA has been output
149: state 3: Output both, POINT_DATA last
150: state 4: Output both, CELL_DATA last
151: */
152: static PetscInt stateId = -1;
153: int outputState = 0;
154: int doOutput = 0;
155: PetscBool hasState;
156: PetscInt bs, b;
158: if (stateId < 0) PetscObjectComposedDataRegister(&stateId);
159: PetscObjectComposedDataGetInt((PetscObject)viewer, stateId, outputState, hasState);
160: if (!hasState) outputState = 0;
162: PetscObjectGetName((PetscObject)xin, &name);
163: VecGetLocalSize(xin, &nLen);
164: PetscMPIIntCast(nLen, &n);
165: VecGetBlockSize(xin, &bs);
166: if (format == PETSC_VIEWER_ASCII_VTK_DEPRECATED) {
167: if (outputState == 0) {
168: outputState = 1;
169: doOutput = 1;
170: } else if (outputState == 1) doOutput = 0;
171: else if (outputState == 2) {
172: outputState = 3;
173: doOutput = 1;
174: } else if (outputState == 3) doOutput = 0;
177: if (doOutput) PetscViewerASCIIPrintf(viewer, "POINT_DATA %" PetscInt_FMT "\n", xin->map->N / bs);
178: } else {
179: if (outputState == 0) {
180: outputState = 2;
181: doOutput = 1;
182: } else if (outputState == 1) {
183: outputState = 4;
184: doOutput = 1;
185: } else if (outputState == 2) {
186: doOutput = 0;
187: } else {
189: if (outputState == 4) doOutput = 0;
190: }
192: if (doOutput) PetscViewerASCIIPrintf(viewer, "CELL_DATA %" PetscInt_FMT "\n", xin->map->N / bs);
193: }
194: PetscObjectComposedDataSetInt((PetscObject)viewer, stateId, outputState);
195: if (name) {
196: if (bs == 3) {
197: PetscViewerASCIIPrintf(viewer, "VECTORS %s double\n", name);
198: } else {
199: PetscViewerASCIIPrintf(viewer, "SCALARS %s double %" PetscInt_FMT "\n", name, bs);
200: }
201: } else {
202: PetscViewerASCIIPrintf(viewer, "SCALARS scalars double %" PetscInt_FMT "\n", bs);
203: }
204: if (bs != 3) PetscViewerASCIIPrintf(viewer, "LOOKUP_TABLE default\n");
205: for (i = 0; i < n / bs; i++) {
206: for (b = 0; b < bs; b++) {
207: if (b > 0) PetscViewerASCIIPrintf(viewer, " ");
208: PetscViewerASCIIPrintf(viewer, "%g", (double)PetscRealPart(xarray[i * bs + b]));
209: }
210: PetscViewerASCIIPrintf(viewer, "\n");
211: }
212: for (j = 1; j < size; j++) {
213: MPI_Recv(values, (PetscMPIInt)len, MPIU_SCALAR, j, tag, PetscObjectComm((PetscObject)xin), &status);
214: MPI_Get_count(&status, MPIU_SCALAR, &n);
215: for (i = 0; i < n / bs; i++) {
216: for (b = 0; b < bs; b++) {
217: if (b > 0) PetscViewerASCIIPrintf(viewer, " ");
218: PetscViewerASCIIPrintf(viewer, "%g", (double)PetscRealPart(values[i * bs + b]));
219: }
220: PetscViewerASCIIPrintf(viewer, "\n");
221: }
222: }
223: } else if (format == PETSC_VIEWER_ASCII_VTK_COORDS_DEPRECATED) {
224: PetscInt bs, b;
226: VecGetLocalSize(xin, &nLen);
227: PetscMPIIntCast(nLen, &n);
228: VecGetBlockSize(xin, &bs);
231: for (i = 0; i < n / bs; i++) {
232: for (b = 0; b < bs; b++) {
233: if (b > 0) PetscViewerASCIIPrintf(viewer, " ");
234: PetscViewerASCIIPrintf(viewer, "%g", (double)PetscRealPart(xarray[i * bs + b]));
235: }
236: for (b = bs; b < 3; b++) PetscViewerASCIIPrintf(viewer, " 0.0");
237: PetscViewerASCIIPrintf(viewer, "\n");
238: }
239: for (j = 1; j < size; j++) {
240: MPI_Recv(values, (PetscMPIInt)len, MPIU_SCALAR, j, tag, PetscObjectComm((PetscObject)xin), &status);
241: MPI_Get_count(&status, MPIU_SCALAR, &n);
242: for (i = 0; i < n / bs; i++) {
243: for (b = 0; b < bs; b++) {
244: if (b > 0) PetscViewerASCIIPrintf(viewer, " ");
245: PetscViewerASCIIPrintf(viewer, "%g", (double)PetscRealPart(values[i * bs + b]));
246: }
247: for (b = bs; b < 3; b++) PetscViewerASCIIPrintf(viewer, " 0.0");
248: PetscViewerASCIIPrintf(viewer, "\n");
249: }
250: }
251: } else if (format == PETSC_VIEWER_ASCII_PCICE) {
252: PetscInt bs, b, vertexCount = 1;
254: VecGetLocalSize(xin, &nLen);
255: PetscMPIIntCast(nLen, &n);
256: VecGetBlockSize(xin, &bs);
259: PetscViewerASCIIPrintf(viewer, "%" PetscInt_FMT "\n", xin->map->N / bs);
260: for (i = 0; i < n / bs; i++) {
261: PetscViewerASCIIPrintf(viewer, "%7" PetscInt_FMT " ", vertexCount++);
262: for (b = 0; b < bs; b++) {
263: if (b > 0) PetscViewerASCIIPrintf(viewer, " ");
264: #if !defined(PETSC_USE_COMPLEX)
265: PetscViewerASCIIPrintf(viewer, "% 12.5E", (double)xarray[i * bs + b]);
266: #endif
267: }
268: PetscViewerASCIIPrintf(viewer, "\n");
269: }
270: for (j = 1; j < size; j++) {
271: MPI_Recv(values, (PetscMPIInt)len, MPIU_SCALAR, j, tag, PetscObjectComm((PetscObject)xin), &status);
272: MPI_Get_count(&status, MPIU_SCALAR, &n);
273: for (i = 0; i < n / bs; i++) {
274: PetscViewerASCIIPrintf(viewer, "%7" PetscInt_FMT " ", vertexCount++);
275: for (b = 0; b < bs; b++) {
276: if (b > 0) PetscViewerASCIIPrintf(viewer, " ");
277: #if !defined(PETSC_USE_COMPLEX)
278: PetscViewerASCIIPrintf(viewer, "% 12.5E", (double)values[i * bs + b]);
279: #endif
280: }
281: PetscViewerASCIIPrintf(viewer, "\n");
282: }
283: }
284: } else if (format == PETSC_VIEWER_ASCII_GLVIS) {
285: /* GLVis ASCII visualization/dump: this function mimics mfem::GridFunction::Save() */
286: const PetscScalar *array;
287: PetscInt i, n, vdim, ordering = 1; /* mfem::FiniteElementSpace::Ordering::byVDIM */
288: PetscContainer glvis_container;
289: PetscViewerGLVisVecInfo glvis_vec_info;
290: PetscViewerGLVisInfo glvis_info;
292: /* mfem::FiniteElementSpace::Save() */
293: VecGetBlockSize(xin, &vdim);
294: PetscViewerASCIIPrintf(viewer, "FiniteElementSpace\n");
295: PetscObjectQuery((PetscObject)xin, "_glvis_info_container", (PetscObject *)&glvis_container);
297: PetscContainerGetPointer(glvis_container, (void **)&glvis_vec_info);
298: PetscViewerASCIIPrintf(viewer, "%s\n", glvis_vec_info->fec_type);
299: PetscViewerASCIIPrintf(viewer, "VDim: %" PetscInt_FMT "\n", vdim);
300: PetscViewerASCIIPrintf(viewer, "Ordering: %" PetscInt_FMT "\n", ordering);
301: PetscViewerASCIIPrintf(viewer, "\n");
302: /* mfem::Vector::Print() */
303: PetscObjectQuery((PetscObject)viewer, "_glvis_info_container", (PetscObject *)&glvis_container);
305: PetscContainerGetPointer(glvis_container, (void **)&glvis_info);
306: if (glvis_info->enabled) {
307: VecGetLocalSize(xin, &n);
308: VecGetArrayRead(xin, &array);
309: for (i = 0; i < n; i++) {
310: PetscViewerASCIIPrintf(viewer, glvis_info->fmt, (double)PetscRealPart(array[i]));
311: PetscViewerASCIIPrintf(viewer, "\n");
312: }
313: VecRestoreArrayRead(xin, &array);
314: }
315: } else if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
316: /* No info */
317: } else {
318: if (format != PETSC_VIEWER_ASCII_COMMON) PetscViewerASCIIPrintf(viewer, "Process [%d]\n", rank);
319: cnt = 0;
320: for (i = 0; i < xin->map->n; i++) {
321: if (format == PETSC_VIEWER_ASCII_INDEX) PetscViewerASCIIPrintf(viewer, "%" PetscInt_FMT ": ", cnt++);
322: #if defined(PETSC_USE_COMPLEX)
323: if (PetscImaginaryPart(xarray[i]) > 0.0) {
324: PetscViewerASCIIPrintf(viewer, "%g + %g i\n", (double)PetscRealPart(xarray[i]), (double)PetscImaginaryPart(xarray[i]));
325: } else if (PetscImaginaryPart(xarray[i]) < 0.0) {
326: PetscViewerASCIIPrintf(viewer, "%g - %g i\n", (double)PetscRealPart(xarray[i]), -(double)PetscImaginaryPart(xarray[i]));
327: } else {
328: PetscViewerASCIIPrintf(viewer, "%g\n", (double)PetscRealPart(xarray[i]));
329: }
330: #else
331: PetscViewerASCIIPrintf(viewer, "%g\n", (double)xarray[i]);
332: #endif
333: }
334: /* receive and print messages */
335: for (j = 1; j < size; j++) {
336: MPI_Recv(values, (PetscMPIInt)len, MPIU_SCALAR, j, tag, PetscObjectComm((PetscObject)xin), &status);
337: MPI_Get_count(&status, MPIU_SCALAR, &n);
338: if (format != PETSC_VIEWER_ASCII_COMMON) PetscViewerASCIIPrintf(viewer, "Process [%d]\n", j);
339: for (i = 0; i < n; i++) {
340: if (format == PETSC_VIEWER_ASCII_INDEX) PetscViewerASCIIPrintf(viewer, "%" PetscInt_FMT ": ", cnt++);
341: #if defined(PETSC_USE_COMPLEX)
342: if (PetscImaginaryPart(values[i]) > 0.0) {
343: PetscViewerASCIIPrintf(viewer, "%g + %g i\n", (double)PetscRealPart(values[i]), (double)PetscImaginaryPart(values[i]));
344: } else if (PetscImaginaryPart(values[i]) < 0.0) {
345: PetscViewerASCIIPrintf(viewer, "%g - %g i\n", (double)PetscRealPart(values[i]), -(double)PetscImaginaryPart(values[i]));
346: } else {
347: PetscViewerASCIIPrintf(viewer, "%g\n", (double)PetscRealPart(values[i]));
348: }
349: #else
350: PetscViewerASCIIPrintf(viewer, "%g\n", (double)values[i]);
351: #endif
352: }
353: }
354: }
355: PetscFree(values);
356: } else {
357: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
358: /* Rank 0 is not trying to receive anything, so don't send anything */
359: } else {
360: if (format == PETSC_VIEWER_ASCII_MATLAB || format == PETSC_VIEWER_ASCII_VTK_DEPRECATED || format == PETSC_VIEWER_ASCII_VTK_CELL_DEPRECATED) {
361: /* this may be a collective operation so make sure everyone calls it */
362: PetscObjectGetName((PetscObject)xin, &name);
363: }
364: MPI_Send((void *)xarray, xin->map->n, MPIU_SCALAR, 0, tag, PetscObjectComm((PetscObject)xin));
365: }
366: }
367: PetscViewerFlush(viewer);
368: VecRestoreArrayRead(xin, &xarray);
369: return 0;
370: }
372: PetscErrorCode VecView_MPI_Binary(Vec xin, PetscViewer viewer)
373: {
374: return VecView_Binary(xin, viewer);
375: }
377: #include <petscdraw.h>
378: PetscErrorCode VecView_MPI_Draw_LG(Vec xin, PetscViewer viewer)
379: {
380: PetscDraw draw;
381: PetscBool isnull;
382: PetscDrawLG lg;
383: PetscMPIInt i, size, rank, n, N, *lens = NULL, *disp = NULL;
384: PetscReal *values, *xx = NULL, *yy = NULL;
385: const PetscScalar *xarray;
386: int colors[] = {PETSC_DRAW_RED};
388: PetscViewerDrawGetDraw(viewer, 0, &draw);
389: PetscDrawIsNull(draw, &isnull);
390: if (isnull) return 0;
391: MPI_Comm_rank(PetscObjectComm((PetscObject)xin), &rank);
392: MPI_Comm_size(PetscObjectComm((PetscObject)xin), &size);
393: PetscMPIIntCast(xin->map->n, &n);
394: PetscMPIIntCast(xin->map->N, &N);
396: VecGetArrayRead(xin, &xarray);
397: #if defined(PETSC_USE_COMPLEX)
398: PetscMalloc1(n + 1, &values);
399: for (i = 0; i < n; i++) values[i] = PetscRealPart(xarray[i]);
400: #else
401: values = (PetscReal *)xarray;
402: #endif
403: if (rank == 0) {
404: PetscMalloc2(N, &xx, N, &yy);
405: for (i = 0; i < N; i++) xx[i] = (PetscReal)i;
406: PetscMalloc2(size, &lens, size, &disp);
407: for (i = 0; i < size; i++) lens[i] = (PetscMPIInt)xin->map->range[i + 1] - (PetscMPIInt)xin->map->range[i];
408: for (i = 0; i < size; i++) disp[i] = (PetscMPIInt)xin->map->range[i];
409: }
410: MPI_Gatherv(values, n, MPIU_REAL, yy, lens, disp, MPIU_REAL, 0, PetscObjectComm((PetscObject)xin));
411: PetscFree2(lens, disp);
412: #if defined(PETSC_USE_COMPLEX)
413: PetscFree(values);
414: #endif
415: VecRestoreArrayRead(xin, &xarray);
417: PetscViewerDrawGetDrawLG(viewer, 0, &lg);
418: PetscDrawLGReset(lg);
419: PetscDrawLGSetDimension(lg, 1);
420: PetscDrawLGSetColors(lg, colors);
421: if (rank == 0) {
422: PetscDrawLGAddPoints(lg, N, &xx, &yy);
423: PetscFree2(xx, yy);
424: }
425: PetscDrawLGDraw(lg);
426: PetscDrawLGSave(lg);
427: return 0;
428: }
430: PetscErrorCode VecView_MPI_Draw(Vec xin, PetscViewer viewer)
431: {
432: PetscMPIInt rank, size, tag = ((PetscObject)viewer)->tag;
433: PetscInt i, start, end;
434: MPI_Status status;
435: PetscReal min, max, tmp = 0.0;
436: PetscDraw draw;
437: PetscBool isnull;
438: PetscDrawAxis axis;
439: const PetscScalar *xarray;
441: PetscViewerDrawGetDraw(viewer, 0, &draw);
442: PetscDrawIsNull(draw, &isnull);
443: if (isnull) return 0;
444: MPI_Comm_size(PetscObjectComm((PetscObject)xin), &size);
445: MPI_Comm_rank(PetscObjectComm((PetscObject)xin), &rank);
447: VecMin(xin, NULL, &min);
448: VecMax(xin, NULL, &max);
449: if (min == max) {
450: min -= 1.e-5;
451: max += 1.e-5;
452: }
454: PetscDrawCheckResizedWindow(draw);
455: PetscDrawClear(draw);
457: PetscDrawAxisCreate(draw, &axis);
458: PetscDrawAxisSetLimits(axis, 0.0, (PetscReal)xin->map->N, min, max);
459: PetscDrawAxisDraw(axis);
460: PetscDrawAxisDestroy(&axis);
462: /* draw local part of vector */
463: VecGetArrayRead(xin, &xarray);
464: VecGetOwnershipRange(xin, &start, &end);
465: if (rank < size - 1) { /* send value to right */
466: MPI_Send((void *)&xarray[xin->map->n - 1], 1, MPIU_REAL, rank + 1, tag, PetscObjectComm((PetscObject)xin));
467: }
468: if (rank) { /* receive value from right */
469: MPI_Recv(&tmp, 1, MPIU_REAL, rank - 1, tag, PetscObjectComm((PetscObject)xin), &status);
470: }
471: PetscDrawCollectiveBegin(draw);
472: if (rank) PetscDrawLine(draw, (PetscReal)start - 1, tmp, (PetscReal)start, PetscRealPart(xarray[0]), PETSC_DRAW_RED);
473: for (i = 1; i < xin->map->n; i++) PetscDrawLine(draw, (PetscReal)(i - 1 + start), PetscRealPart(xarray[i - 1]), (PetscReal)(i + start), PetscRealPart(xarray[i]), PETSC_DRAW_RED);
474: PetscDrawCollectiveEnd(draw);
475: VecRestoreArrayRead(xin, &xarray);
477: PetscDrawFlush(draw);
478: PetscDrawPause(draw);
479: PetscDrawSave(draw);
480: return 0;
481: }
483: #if defined(PETSC_HAVE_MATLAB)
484: PetscErrorCode VecView_MPI_Matlab(Vec xin, PetscViewer viewer)
485: {
486: PetscMPIInt rank, size, *lens;
487: PetscInt i, N = xin->map->N;
488: const PetscScalar *xarray;
489: PetscScalar *xx;
491: VecGetArrayRead(xin, &xarray);
492: MPI_Comm_rank(PetscObjectComm((PetscObject)xin), &rank);
493: MPI_Comm_size(PetscObjectComm((PetscObject)xin), &size);
494: if (rank == 0) {
495: PetscMalloc1(N, &xx);
496: PetscMalloc1(size, &lens);
497: for (i = 0; i < size; i++) lens[i] = xin->map->range[i + 1] - xin->map->range[i];
499: MPI_Gatherv((void *)xarray, xin->map->n, MPIU_SCALAR, xx, lens, xin->map->range, MPIU_SCALAR, 0, PetscObjectComm((PetscObject)xin));
500: PetscFree(lens);
502: PetscObjectName((PetscObject)xin);
503: PetscViewerMatlabPutArray(viewer, N, 1, xx, ((PetscObject)xin)->name);
505: PetscFree(xx);
506: } else {
507: MPI_Gatherv((void *)xarray, xin->map->n, MPIU_SCALAR, 0, 0, 0, MPIU_SCALAR, 0, PetscObjectComm((PetscObject)xin));
508: }
509: VecRestoreArrayRead(xin, &xarray);
510: return 0;
511: }
512: #endif
514: #if defined(PETSC_HAVE_ADIOS)
515: #include <adios.h>
516: #include <adios_read.h>
517: #include <petsc/private/vieweradiosimpl.h>
518: #include <petsc/private/viewerimpl.h>
520: PetscErrorCode VecView_MPI_ADIOS(Vec xin, PetscViewer viewer)
521: {
522: PetscViewer_ADIOS *adios = (PetscViewer_ADIOS *)viewer->data;
523: const char *vecname;
524: int64_t id;
525: PetscInt n, N, rstart;
526: const PetscScalar *array;
527: char nglobalname[16], nlocalname[16], coffset[16];
529: PetscObjectGetName((PetscObject)xin, &vecname);
531: VecGetLocalSize(xin, &n);
532: VecGetSize(xin, &N);
533: VecGetOwnershipRange(xin, &rstart, NULL);
535: sprintf(nlocalname, "%d", (int)n);
536: sprintf(nglobalname, "%d", (int)N);
537: sprintf(coffset, "%d", (int)rstart);
538: id = adios_define_var(Petsc_adios_group, vecname, "", adios_double, nlocalname, nglobalname, coffset);
539: VecGetArrayRead(xin, &array);
540: adios_write_byid(adios->adios_handle, id, array);
541: VecRestoreArrayRead(xin, &array);
542: return 0;
543: }
544: #endif
546: #if defined(PETSC_HAVE_HDF5)
547: PetscErrorCode VecView_MPI_HDF5(Vec xin, PetscViewer viewer)
548: {
549: PetscViewer_HDF5 *hdf5 = (PetscViewer_HDF5 *)viewer->data;
550: /* TODO: It looks like we can remove the H5Sclose(filespace) and H5Dget_space(dset_id). Why do we do this? */
551: hid_t filespace; /* file dataspace identifier */
552: hid_t chunkspace; /* chunk dataset property identifier */
553: hid_t dset_id; /* dataset identifier */
554: hid_t memspace; /* memory dataspace identifier */
555: hid_t file_id;
556: hid_t group;
557: hid_t memscalartype; /* scalar type for mem (H5T_NATIVE_FLOAT or H5T_NATIVE_DOUBLE) */
558: hid_t filescalartype; /* scalar type for file (H5T_NATIVE_FLOAT or H5T_NATIVE_DOUBLE) */
559: PetscInt bs = PetscAbs(xin->map->bs);
560: hsize_t dim;
561: hsize_t maxDims[4], dims[4], chunkDims[4], count[4], offset[4];
562: PetscBool timestepping, dim2, spoutput;
563: PetscInt timestep = PETSC_MIN_INT, low;
564: hsize_t chunksize;
565: const PetscScalar *x;
566: const char *vecname;
568: PetscViewerHDF5OpenGroup(viewer, NULL, &file_id, &group);
569: PetscViewerHDF5IsTimestepping(viewer, ×tepping);
570: if (timestepping) PetscViewerHDF5GetTimestep(viewer, ×tep);
571: PetscViewerHDF5GetBaseDimension2(viewer, &dim2);
572: PetscViewerHDF5GetSPOutput(viewer, &spoutput);
574: /* Create the dataspace for the dataset.
575: *
576: * dims - holds the current dimensions of the dataset
577: *
578: * maxDims - holds the maximum dimensions of the dataset (unlimited
579: * for the number of time steps with the current dimensions for the
580: * other dimensions; so only additional time steps can be added).
581: *
582: * chunkDims - holds the size of a single time step (required to
583: * permit extending dataset).
584: */
585: dim = 0;
586: chunksize = 1;
587: if (timestep >= 0) {
588: dims[dim] = timestep + 1;
589: maxDims[dim] = H5S_UNLIMITED;
590: chunkDims[dim] = 1;
591: ++dim;
592: }
593: PetscHDF5IntCast(xin->map->N / bs, dims + dim);
595: maxDims[dim] = dims[dim];
596: chunkDims[dim] = PetscMax(1, dims[dim]);
597: chunksize *= chunkDims[dim];
598: ++dim;
599: if (bs > 1 || dim2) {
600: dims[dim] = bs;
601: maxDims[dim] = dims[dim];
602: chunkDims[dim] = PetscMax(1, dims[dim]);
603: chunksize *= chunkDims[dim];
604: ++dim;
605: }
606: #if defined(PETSC_USE_COMPLEX)
607: dims[dim] = 2;
608: maxDims[dim] = dims[dim];
609: chunkDims[dim] = PetscMax(1, dims[dim]);
610: chunksize *= chunkDims[dim];
611: /* hdf5 chunks must be less than 4GB */
612: if (chunksize > PETSC_HDF5_MAX_CHUNKSIZE / 64) {
613: if (bs > 1 || dim2) {
614: if (chunkDims[dim - 2] > (hsize_t)PetscSqrtReal((PetscReal)(PETSC_HDF5_MAX_CHUNKSIZE / 128))) chunkDims[dim - 2] = (hsize_t)PetscSqrtReal((PetscReal)(PETSC_HDF5_MAX_CHUNKSIZE / 128));
615: if (chunkDims[dim - 1] > (hsize_t)PetscSqrtReal((PetscReal)(PETSC_HDF5_MAX_CHUNKSIZE / 128))) chunkDims[dim - 1] = (hsize_t)PetscSqrtReal((PetscReal)(PETSC_HDF5_MAX_CHUNKSIZE / 128));
616: } else {
617: chunkDims[dim - 1] = PETSC_HDF5_MAX_CHUNKSIZE / 128;
618: }
619: }
620: ++dim;
621: #else
622: /* hdf5 chunks must be less than 4GB */
623: if (chunksize > PETSC_HDF5_MAX_CHUNKSIZE / 64) {
624: if (bs > 1 || dim2) {
625: if (chunkDims[dim - 2] > (hsize_t)PetscSqrtReal((PetscReal)(PETSC_HDF5_MAX_CHUNKSIZE / 64))) chunkDims[dim - 2] = (hsize_t)PetscSqrtReal((PetscReal)(PETSC_HDF5_MAX_CHUNKSIZE / 64));
626: if (chunkDims[dim - 1] > (hsize_t)PetscSqrtReal((PetscReal)(PETSC_HDF5_MAX_CHUNKSIZE / 64))) chunkDims[dim - 1] = (hsize_t)PetscSqrtReal((PetscReal)(PETSC_HDF5_MAX_CHUNKSIZE / 64));
627: } else {
628: chunkDims[dim - 1] = PETSC_HDF5_MAX_CHUNKSIZE / 64;
629: }
630: }
631: #endif
633: PetscCallHDF5Return(filespace, H5Screate_simple, (dim, dims, maxDims));
635: #if defined(PETSC_USE_REAL_SINGLE)
636: memscalartype = H5T_NATIVE_FLOAT;
637: filescalartype = H5T_NATIVE_FLOAT;
638: #elif defined(PETSC_USE_REAL___FLOAT128)
639: #error "HDF5 output with 128 bit floats not supported."
640: #elif defined(PETSC_USE_REAL___FP16)
641: #error "HDF5 output with 16 bit floats not supported."
642: #else
643: memscalartype = H5T_NATIVE_DOUBLE;
644: if (spoutput == PETSC_TRUE) filescalartype = H5T_NATIVE_FLOAT;
645: else filescalartype = H5T_NATIVE_DOUBLE;
646: #endif
648: /* Create the dataset with default properties and close filespace */
649: PetscObjectGetName((PetscObject)xin, &vecname);
650: if (H5Lexists(group, vecname, H5P_DEFAULT) < 1) {
651: /* Create chunk */
652: PetscCallHDF5Return(chunkspace, H5Pcreate, (H5P_DATASET_CREATE));
653: PetscCallHDF5(H5Pset_chunk, (chunkspace, dim, chunkDims));
655: PetscCallHDF5Return(dset_id, H5Dcreate2, (group, vecname, filescalartype, filespace, H5P_DEFAULT, chunkspace, H5P_DEFAULT));
656: PetscCallHDF5(H5Pclose, (chunkspace));
657: } else {
658: PetscCallHDF5Return(dset_id, H5Dopen2, (group, vecname, H5P_DEFAULT));
659: PetscCallHDF5(H5Dset_extent, (dset_id, dims));
660: }
661: PetscCallHDF5(H5Sclose, (filespace));
663: /* Each process defines a dataset and writes it to the hyperslab in the file */
664: dim = 0;
665: if (timestep >= 0) {
666: count[dim] = 1;
667: ++dim;
668: }
669: PetscHDF5IntCast(xin->map->n / bs, count + dim);
670: ++dim;
671: if (bs > 1 || dim2) {
672: count[dim] = bs;
673: ++dim;
674: }
675: #if defined(PETSC_USE_COMPLEX)
676: count[dim] = 2;
677: ++dim;
678: #endif
679: if (xin->map->n > 0 || H5_VERSION_GE(1, 10, 0)) {
680: PetscCallHDF5Return(memspace, H5Screate_simple, (dim, count, NULL));
681: } else {
682: /* Can't create dataspace with zero for any dimension, so create null dataspace. */
683: PetscCallHDF5Return(memspace, H5Screate, (H5S_NULL));
684: }
686: /* Select hyperslab in the file */
687: VecGetOwnershipRange(xin, &low, NULL);
688: dim = 0;
689: if (timestep >= 0) {
690: offset[dim] = timestep;
691: ++dim;
692: }
693: PetscHDF5IntCast(low / bs, offset + dim);
694: ++dim;
695: if (bs > 1 || dim2) {
696: offset[dim] = 0;
697: ++dim;
698: }
699: #if defined(PETSC_USE_COMPLEX)
700: offset[dim] = 0;
701: ++dim;
702: #endif
703: if (xin->map->n > 0 || H5_VERSION_GE(1, 10, 0)) {
704: PetscCallHDF5Return(filespace, H5Dget_space, (dset_id));
705: PetscCallHDF5(H5Sselect_hyperslab, (filespace, H5S_SELECT_SET, offset, NULL, count, NULL));
706: } else {
707: /* Create null filespace to match null memspace. */
708: PetscCallHDF5Return(filespace, H5Screate, (H5S_NULL));
709: }
711: VecGetArrayRead(xin, &x);
712: PetscCallHDF5(H5Dwrite, (dset_id, memscalartype, memspace, filespace, hdf5->dxpl_id, x));
713: PetscCallHDF5(H5Fflush, (file_id, H5F_SCOPE_GLOBAL));
714: VecRestoreArrayRead(xin, &x);
716: /* Close/release resources */
717: PetscCallHDF5(H5Gclose, (group));
718: PetscCallHDF5(H5Sclose, (filespace));
719: PetscCallHDF5(H5Sclose, (memspace));
720: PetscCallHDF5(H5Dclose, (dset_id));
722: #if defined(PETSC_USE_COMPLEX)
723: {
724: PetscBool tru = PETSC_TRUE;
725: PetscViewerHDF5WriteObjectAttribute(viewer, (PetscObject)xin, "complex", PETSC_BOOL, &tru);
726: }
727: #endif
728: if (timestepping) PetscViewerHDF5WriteObjectAttribute(viewer, (PetscObject)xin, "timestepping", PETSC_BOOL, ×tepping);
729: PetscInfo(xin, "Wrote Vec object with name %s\n", vecname);
730: return 0;
731: }
732: #endif
734: PETSC_EXTERN PetscErrorCode VecView_MPI(Vec xin, PetscViewer viewer)
735: {
736: PetscBool iascii, isbinary, isdraw;
737: #if defined(PETSC_HAVE_MATHEMATICA)
738: PetscBool ismathematica;
739: #endif
740: #if defined(PETSC_HAVE_HDF5)
741: PetscBool ishdf5;
742: #endif
743: #if defined(PETSC_HAVE_MATLAB)
744: PetscBool ismatlab;
745: #endif
746: #if defined(PETSC_HAVE_ADIOS)
747: PetscBool isadios;
748: #endif
749: PetscBool isglvis;
751: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii);
752: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary);
753: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw);
754: #if defined(PETSC_HAVE_MATHEMATICA)
755: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERMATHEMATICA, &ismathematica);
756: #endif
757: #if defined(PETSC_HAVE_HDF5)
758: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5);
759: #endif
760: #if defined(PETSC_HAVE_MATLAB)
761: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERMATLAB, &ismatlab);
762: #endif
763: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERGLVIS, &isglvis);
764: #if defined(PETSC_HAVE_ADIOS)
765: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERADIOS, &isadios);
766: #endif
767: if (iascii) {
768: VecView_MPI_ASCII(xin, viewer);
769: } else if (isbinary) {
770: VecView_MPI_Binary(xin, viewer);
771: } else if (isdraw) {
772: PetscViewerFormat format;
773: PetscViewerGetFormat(viewer, &format);
774: if (format == PETSC_VIEWER_DRAW_LG) {
775: VecView_MPI_Draw_LG(xin, viewer);
776: } else {
777: VecView_MPI_Draw(xin, viewer);
778: }
779: #if defined(PETSC_HAVE_MATHEMATICA)
780: } else if (ismathematica) {
781: PetscViewerMathematicaPutVector(viewer, xin);
782: #endif
783: #if defined(PETSC_HAVE_HDF5)
784: } else if (ishdf5) {
785: VecView_MPI_HDF5(xin, viewer);
786: #endif
787: #if defined(PETSC_HAVE_ADIOS)
788: } else if (isadios) {
789: VecView_MPI_ADIOS(xin, viewer);
790: #endif
791: #if defined(PETSC_HAVE_MATLAB)
792: } else if (ismatlab) {
793: VecView_MPI_Matlab(xin, viewer);
794: #endif
795: } else if (isglvis) VecView_GLVis(xin, viewer);
796: return 0;
797: }
799: PetscErrorCode VecGetSize_MPI(Vec xin, PetscInt *N)
800: {
801: *N = xin->map->N;
802: return 0;
803: }
805: PetscErrorCode VecGetValues_MPI(Vec xin, PetscInt ni, const PetscInt ix[], PetscScalar y[])
806: {
807: const PetscScalar *xx;
808: PetscInt i, tmp, start = xin->map->range[xin->stash.rank];
810: VecGetArrayRead(xin, &xx);
811: for (i = 0; i < ni; i++) {
812: if (xin->stash.ignorenegidx && ix[i] < 0) continue;
813: tmp = ix[i] - start;
815: y[i] = xx[tmp];
816: }
817: VecRestoreArrayRead(xin, &xx);
818: return 0;
819: }
821: PetscErrorCode VecSetValues_MPI(Vec xin, PetscInt ni, const PetscInt ix[], const PetscScalar y[], InsertMode addv)
822: {
823: PetscMPIInt rank = xin->stash.rank;
824: PetscInt *owners = xin->map->range, start = owners[rank];
825: PetscInt end = owners[rank + 1], i, row;
826: PetscScalar *xx;
828: if (PetscDefined(USE_DEBUG)) {
831: }
832: VecGetArray(xin, &xx);
833: xin->stash.insertmode = addv;
835: if (addv == INSERT_VALUES) {
836: for (i = 0; i < ni; i++) {
837: if (xin->stash.ignorenegidx && ix[i] < 0) continue;
839: if ((row = ix[i]) >= start && row < end) {
840: xx[row - start] = y[i];
841: } else if (!xin->stash.donotstash) {
843: VecStashValue_Private(&xin->stash, row, y[i]);
844: }
845: }
846: } else {
847: for (i = 0; i < ni; i++) {
848: if (xin->stash.ignorenegidx && ix[i] < 0) continue;
850: if ((row = ix[i]) >= start && row < end) {
851: xx[row - start] += y[i];
852: } else if (!xin->stash.donotstash) {
854: VecStashValue_Private(&xin->stash, row, y[i]);
855: }
856: }
857: }
858: VecRestoreArray(xin, &xx);
859: return 0;
860: }
862: PetscErrorCode VecSetValuesBlocked_MPI(Vec xin, PetscInt ni, const PetscInt ix[], const PetscScalar yin[], InsertMode addv)
863: {
864: PetscMPIInt rank = xin->stash.rank;
865: PetscInt *owners = xin->map->range, start = owners[rank];
866: PetscInt end = owners[rank + 1], i, row, bs = PetscAbs(xin->map->bs), j;
867: PetscScalar *xx, *y = (PetscScalar *)yin;
869: VecGetArray(xin, &xx);
870: if (PetscDefined(USE_DEBUG)) {
873: }
874: xin->stash.insertmode = addv;
876: if (addv == INSERT_VALUES) {
877: for (i = 0; i < ni; i++) {
878: if ((row = bs * ix[i]) >= start && row < end) {
879: for (j = 0; j < bs; j++) xx[row - start + j] = y[j];
880: } else if (!xin->stash.donotstash) {
881: if (ix[i] < 0) {
882: y += bs;
883: continue;
884: }
886: VecStashValuesBlocked_Private(&xin->bstash, ix[i], y);
887: }
888: y += bs;
889: }
890: } else {
891: for (i = 0; i < ni; i++) {
892: if ((row = bs * ix[i]) >= start && row < end) {
893: for (j = 0; j < bs; j++) xx[row - start + j] += y[j];
894: } else if (!xin->stash.donotstash) {
895: if (ix[i] < 0) {
896: y += bs;
897: continue;
898: }
900: VecStashValuesBlocked_Private(&xin->bstash, ix[i], y);
901: }
902: y += bs;
903: }
904: }
905: VecRestoreArray(xin, &xx);
906: return 0;
907: }
909: /*
910: Since nsends or nreceives may be zero we add 1 in certain mallocs
911: to make sure we never malloc an empty one.
912: */
913: PetscErrorCode VecAssemblyBegin_MPI(Vec xin)
914: {
915: PetscInt *owners = xin->map->range, *bowners, i, bs, nstash, reallocs;
916: PetscMPIInt size;
917: InsertMode addv;
918: MPI_Comm comm;
920: PetscObjectGetComm((PetscObject)xin, &comm);
921: if (xin->stash.donotstash) return 0;
923: MPIU_Allreduce((PetscEnum *)&xin->stash.insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, comm);
925: xin->stash.insertmode = addv; /* in case this processor had no cache */
926: xin->bstash.insertmode = addv; /* Block stash implicitly tracks InsertMode of scalar stash */
928: VecGetBlockSize(xin, &bs);
929: MPI_Comm_size(PetscObjectComm((PetscObject)xin), &size);
930: if (!xin->bstash.bowners && xin->map->bs != -1) {
931: PetscMalloc1(size + 1, &bowners);
932: for (i = 0; i < size + 1; i++) bowners[i] = owners[i] / bs;
933: xin->bstash.bowners = bowners;
934: } else bowners = xin->bstash.bowners;
936: VecStashScatterBegin_Private(&xin->stash, owners);
937: VecStashScatterBegin_Private(&xin->bstash, bowners);
938: VecStashGetInfo_Private(&xin->stash, &nstash, &reallocs);
939: PetscInfo(xin, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs);
940: VecStashGetInfo_Private(&xin->bstash, &nstash, &reallocs);
941: PetscInfo(xin, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs);
942: return 0;
943: }
945: PetscErrorCode VecAssemblyEnd_MPI(Vec vec)
946: {
947: PetscInt base, i, j, *row, flg, bs;
948: PetscMPIInt n;
949: PetscScalar *val, *vv, *array, *xarray;
951: if (!vec->stash.donotstash) {
952: VecGetArray(vec, &xarray);
953: VecGetBlockSize(vec, &bs);
954: base = vec->map->range[vec->stash.rank];
956: /* Process the stash */
957: while (1) {
958: VecStashScatterGetMesg_Private(&vec->stash, &n, &row, &val, &flg);
959: if (!flg) break;
960: if (vec->stash.insertmode == ADD_VALUES) {
961: for (i = 0; i < n; i++) xarray[row[i] - base] += val[i];
962: } else if (vec->stash.insertmode == INSERT_VALUES) {
963: for (i = 0; i < n; i++) xarray[row[i] - base] = val[i];
964: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Insert mode is not set correctly; corrupted vector");
965: }
966: VecStashScatterEnd_Private(&vec->stash);
968: /* now process the block-stash */
969: while (1) {
970: VecStashScatterGetMesg_Private(&vec->bstash, &n, &row, &val, &flg);
971: if (!flg) break;
972: for (i = 0; i < n; i++) {
973: array = xarray + row[i] * bs - base;
974: vv = val + i * bs;
975: if (vec->stash.insertmode == ADD_VALUES) {
976: for (j = 0; j < bs; j++) array[j] += vv[j];
977: } else if (vec->stash.insertmode == INSERT_VALUES) {
978: for (j = 0; j < bs; j++) array[j] = vv[j];
979: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Insert mode is not set correctly; corrupted vector");
980: }
981: }
982: VecStashScatterEnd_Private(&vec->bstash);
983: VecRestoreArray(vec, &xarray);
984: }
985: vec->stash.insertmode = NOT_SET_VALUES;
986: return 0;
987: }
989: PetscErrorCode VecSetPreallocationCOO_MPI(Vec x, PetscCount coo_n, const PetscInt coo_i[])
990: {
991: PetscInt m, M, rstart, rend;
992: Vec_MPI *vmpi = (Vec_MPI *)x->data;
993: PetscCount k, p, q, rem; /* Loop variables over coo arrays */
994: PetscMPIInt size;
995: MPI_Comm comm;
997: PetscObjectGetComm((PetscObject)x, &comm);
998: MPI_Comm_size(comm, &size);
999: VecResetPreallocationCOO_MPI(x);
1001: PetscLayoutSetUp(x->map);
1002: VecGetOwnershipRange(x, &rstart, &rend);
1003: VecGetLocalSize(x, &m);
1004: VecGetSize(x, &M);
1006: /* ---------------------------------------------------------------------------*/
1007: /* Sort COOs along with a permutation array, so that negative indices come */
1008: /* first, then local ones, then remote ones. */
1009: /* ---------------------------------------------------------------------------*/
1010: PetscCount n1 = coo_n, nneg, *perm;
1011: PetscInt *i1; /* Copy of input COOs along with a permutation array */
1012: PetscMalloc1(n1, &i1);
1013: PetscMalloc1(n1, &perm);
1014: PetscArraycpy(i1, coo_i, n1); /* Make a copy since we'll modify it */
1015: for (k = 0; k < n1; k++) perm[k] = k;
1017: /* Manipulate i1[] so that entries with negative indices will have the smallest
1018: index, local entries will have greater but negative indices, and remote entries
1019: will have positive indices.
1020: */
1021: for (k = 0; k < n1; k++) {
1022: if (i1[k] < 0) {
1023: if (x->stash.ignorenegidx) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */
1024: else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Found a negative index in VecSetPreallocateCOO() but VEC_IGNORE_NEGATIVE_INDICES was not set");
1025: } else if (i1[k] >= rstart && i1[k] < rend) {
1026: i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
1027: } else {
1029: if (x->stash.donotstash) i1[k] = PETSC_MIN_INT; /* Ignore off-proc indices as if they were negative */
1030: }
1031: }
1033: /* Sort the indices, after that, [0,nneg) have ignored entries, [nneg,rem) have local entries and [rem,n1) have remote entries */
1034: PetscSortIntWithCountArray(n1, i1, perm);
1035: for (k = 0; k < n1; k++) {
1036: if (i1[k] > PETSC_MIN_INT) break;
1037: } /* Advance k to the first entry we need to take care of */
1038: nneg = k;
1039: PetscSortedIntUpperBound(i1, nneg, n1, rend - 1 - PETSC_MAX_INT, &rem); /* rem is upper bound of the last local row */
1040: for (k = nneg; k < rem; k++) i1[k] += PETSC_MAX_INT; /* Revert indices of local entries */
1042: /* ---------------------------------------------------------------------------*/
1043: /* Build stuff for local entries */
1044: /* ---------------------------------------------------------------------------*/
1045: PetscCount tot1, *jmap1, *perm1;
1046: PetscCalloc1(m + 1, &jmap1);
1047: for (k = nneg; k < rem; k++) jmap1[i1[k] - rstart + 1]++; /* Count repeats of each local entry */
1048: for (k = 0; k < m; k++) jmap1[k + 1] += jmap1[k]; /* Transform jmap1[] to CSR-like data structure */
1049: tot1 = jmap1[m];
1050: PetscAssert(tot1 == rem - nneg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected errors in VecSetPreallocationCOO_MPI");
1051: PetscMalloc1(tot1, &perm1);
1052: PetscArraycpy(perm1, perm + nneg, tot1);
1054: /* ---------------------------------------------------------------------------*/
1055: /* Record the permutation array for filling the send buffer */
1056: /* ---------------------------------------------------------------------------*/
1057: PetscCount *Cperm;
1058: PetscMalloc1(n1 - rem, &Cperm);
1059: PetscArraycpy(Cperm, perm + rem, n1 - rem);
1060: PetscFree(perm);
1062: /* ---------------------------------------------------------------------------*/
1063: /* Send remote entries to their owner */
1064: /* ---------------------------------------------------------------------------*/
1065: /* Find which entries should be sent to which remote ranks*/
1066: PetscInt nsend = 0; /* Number of MPI ranks to send data to */
1067: PetscMPIInt *sendto; /* [nsend], storing remote ranks */
1068: PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
1069: const PetscInt *ranges;
1070: PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
1072: PetscLayoutGetRanges(x->map, &ranges);
1073: PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries);
1074: for (k = rem; k < n1;) {
1075: PetscMPIInt owner;
1076: PetscInt firstRow, lastRow;
1078: /* Locate a row range */
1079: firstRow = i1[k]; /* first row of this owner */
1080: PetscLayoutFindOwner(x->map, firstRow, &owner);
1081: lastRow = ranges[owner + 1] - 1; /* last row of this owner */
1083: /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
1084: PetscSortedIntUpperBound(i1, k, n1, lastRow, &p);
1086: /* All entries in [k,p) belong to this remote owner */
1087: if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
1088: PetscMPIInt *sendto2;
1089: PetscInt *nentries2;
1090: PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
1092: PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2);
1093: PetscArraycpy(sendto2, sendto, maxNsend);
1094: PetscArraycpy(nentries2, nentries2, maxNsend + 1);
1095: PetscFree2(sendto, nentries2);
1096: sendto = sendto2;
1097: nentries = nentries2;
1098: maxNsend = maxNsend2;
1099: }
1100: sendto[nsend] = owner;
1101: nentries[nsend] = p - k;
1102: PetscCountCast(p - k, &nentries[nsend]);
1103: nsend++;
1104: k = p;
1105: }
1107: /* Build 1st SF to know offsets on remote to send data */
1108: PetscSF sf1;
1109: PetscInt nroots = 1, nroots2 = 0;
1110: PetscInt nleaves = nsend, nleaves2 = 0;
1111: PetscInt *offsets;
1112: PetscSFNode *iremote;
1114: PetscSFCreate(comm, &sf1);
1115: PetscMalloc1(nsend, &iremote);
1116: PetscMalloc1(nsend, &offsets);
1117: for (k = 0; k < nsend; k++) {
1118: iremote[k].rank = sendto[k];
1119: iremote[k].index = 0;
1120: nleaves2 += nentries[k];
1122: }
1123: PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER);
1124: PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM);
1125: PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM); /* Would nroots2 overflow, we check offsets[] below */
1126: PetscSFDestroy(&sf1);
1127: PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem);
1129: /* Build 2nd SF to send remote COOs to their owner */
1130: PetscSF sf2;
1131: nroots = nroots2;
1132: nleaves = nleaves2;
1133: PetscSFCreate(comm, &sf2);
1134: PetscSFSetFromOptions(sf2);
1135: PetscMalloc1(nleaves, &iremote);
1136: p = 0;
1137: for (k = 0; k < nsend; k++) {
1139: for (q = 0; q < nentries[k]; q++, p++) {
1140: iremote[p].rank = sendto[k];
1141: iremote[p].index = offsets[k] + q;
1142: }
1143: }
1144: PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER);
1146: /* Send the remote COOs to their owner */
1147: PetscInt n2 = nroots, *i2; /* Buffers for received COOs from other ranks, along with a permutation array */
1148: PetscCount *perm2;
1149: PetscMalloc1(n2, &i2);
1150: PetscMalloc1(n2, &perm2);
1151: PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE);
1152: PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE);
1154: PetscFree(i1);
1155: PetscFree(offsets);
1156: PetscFree2(sendto, nentries);
1158: /* ---------------------------------------------------------------*/
1159: /* Sort received COOs along with a permutation array */
1160: /* ---------------------------------------------------------------*/
1161: PetscCount *imap2;
1162: PetscCount *jmap2, nnz2;
1163: PetscScalar *sendbuf, *recvbuf;
1164: PetscInt old;
1165: PetscCount sendlen = n1 - rem, recvlen = n2;
1167: for (k = 0; k < n2; k++) perm2[k] = k;
1168: PetscSortIntWithCountArray(n2, i2, perm2);
1170: /* nnz2 will be # of unique entries in the recvbuf */
1171: nnz2 = n2;
1172: for (k = 1; k < n2; k++) {
1173: if (i2[k] == i2[k - 1]) nnz2--;
1174: }
1176: /* Build imap2[] and jmap2[] for each unique entry */
1177: PetscMalloc4(nnz2, &imap2, nnz2 + 1, &jmap2, sendlen, &sendbuf, recvlen, &recvbuf);
1178: p = -1;
1179: old = -1;
1180: jmap2[0] = 0;
1181: jmap2++;
1182: for (k = 0; k < n2; k++) {
1183: if (i2[k] != old) { /* Meet a new entry */
1184: p++;
1185: imap2[p] = i2[k] - rstart;
1186: jmap2[p] = 1;
1187: old = i2[k];
1188: } else {
1189: jmap2[p]++;
1190: }
1191: }
1192: jmap2--;
1193: for (k = 0; k < nnz2; k++) jmap2[k + 1] += jmap2[k];
1195: PetscFree(i2);
1197: vmpi->coo_n = coo_n;
1198: vmpi->tot1 = tot1;
1199: vmpi->jmap1 = jmap1;
1200: vmpi->perm1 = perm1;
1201: vmpi->nnz2 = nnz2;
1202: vmpi->imap2 = imap2;
1203: vmpi->jmap2 = jmap2;
1204: vmpi->perm2 = perm2;
1206: vmpi->Cperm = Cperm;
1207: vmpi->sendbuf = sendbuf;
1208: vmpi->recvbuf = recvbuf;
1209: vmpi->sendlen = sendlen;
1210: vmpi->recvlen = recvlen;
1211: vmpi->coo_sf = sf2;
1212: return 0;
1213: }
1215: PetscErrorCode VecSetValuesCOO_MPI(Vec x, const PetscScalar v[], InsertMode imode)
1216: {
1217: Vec_MPI *vmpi = (Vec_MPI *)x->data;
1218: PetscInt m;
1219: PetscScalar *a, *sendbuf = vmpi->sendbuf, *recvbuf = vmpi->recvbuf;
1220: const PetscCount *jmap1 = vmpi->jmap1;
1221: const PetscCount *perm1 = vmpi->perm1;
1222: const PetscCount *imap2 = vmpi->imap2;
1223: const PetscCount *jmap2 = vmpi->jmap2;
1224: const PetscCount *perm2 = vmpi->perm2;
1225: const PetscCount *Cperm = vmpi->Cperm;
1226: const PetscCount nnz2 = vmpi->nnz2;
1228: VecGetLocalSize(x, &m);
1229: VecGetArray(x, &a);
1231: /* Pack entries to be sent to remote */
1232: for (PetscInt i = 0; i < vmpi->sendlen; i++) sendbuf[i] = v[Cperm[i]];
1234: /* Send remote entries to their owner and overlap the communication with local computation */
1235: PetscSFReduceWithMemTypeBegin(vmpi->coo_sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE);
1236: /* Add local entries to A and B */
1237: for (PetscInt i = 0; i < m; i++) { /* All entries in a[] are either zero'ed or added with a value (i.e., initialized) */
1238: PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */
1239: for (PetscCount k = jmap1[i]; k < jmap1[i + 1]; k++) sum += v[perm1[k]];
1240: a[i] = (imode == INSERT_VALUES ? 0.0 : a[i]) + sum;
1241: }
1242: PetscSFReduceEnd(vmpi->coo_sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE);
1244: /* Add received remote entries to A and B */
1245: for (PetscInt i = 0; i < nnz2; i++) {
1246: for (PetscCount k = jmap2[i]; k < jmap2[i + 1]; k++) a[imap2[i]] += recvbuf[perm2[k]];
1247: }
1249: VecRestoreArray(x, &a);
1250: return 0;
1251: }