Actual source code: math2opusutils.cu
1: #include <petsc/private/matimpl.h>
2: #include <petsc/private/vecimpl.h>
3: #include <petscsf.h>
4: #if defined(PETSC_HAVE_CUDA)
5: #include <thrust/for_each.h>
6: #include <thrust/device_vector.h>
7: #include <thrust/execution_policy.h>
8: #endif
10: PETSC_INTERN PetscErrorCode PetscSFGetVectorSF(PetscSF sf, PetscInt nv, PetscInt ldr, PetscInt ldl, PetscSF *vsf)
11: {
12: PetscSF rankssf;
13: const PetscSFNode *iremote;
14: PetscSFNode *viremote, *rremotes;
15: const PetscInt *ilocal;
16: PetscInt *vilocal = NULL, *ldrs;
17: const PetscMPIInt *ranks;
18: PetscMPIInt *sranks;
19: PetscInt nranks, nr, nl, vnr, vnl, i, v, j, maxl;
20: MPI_Comm comm;
25: if (nv == 1) {
26: PetscObjectReference((PetscObject)sf);
27: *vsf = sf;
28: return 0;
29: }
30: PetscObjectGetComm((PetscObject)sf, &comm);
31: PetscSFGetGraph(sf, &nr, &nl, &ilocal, &iremote);
32: PetscSFGetLeafRange(sf, NULL, &maxl);
33: maxl += 1;
34: if (ldl == PETSC_DECIDE) ldl = maxl;
35: if (ldr == PETSC_DECIDE) ldr = nr;
38: vnr = nr * nv;
39: vnl = nl * nv;
40: PetscMalloc1(vnl, &viremote);
41: if (ilocal) PetscMalloc1(vnl, &vilocal);
43: /* TODO: Should this special SF be available, e.g.
44: PetscSFGetRanksSF or similar? */
45: PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL);
46: PetscMalloc1(nranks, &sranks);
47: PetscArraycpy(sranks, ranks, nranks);
48: PetscSortMPIInt(nranks, sranks);
49: PetscMalloc1(nranks, &rremotes);
50: for (i = 0; i < nranks; i++) {
51: rremotes[i].rank = sranks[i];
52: rremotes[i].index = 0;
53: }
54: PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &rankssf);
55: PetscSFSetGraph(rankssf, 1, nranks, NULL, PETSC_OWN_POINTER, rremotes, PETSC_OWN_POINTER);
56: PetscMalloc1(nranks, &ldrs);
57: PetscSFBcastBegin(rankssf, MPIU_INT, &ldr, ldrs, MPI_REPLACE);
58: PetscSFBcastEnd(rankssf, MPIU_INT, &ldr, ldrs, MPI_REPLACE);
59: PetscSFDestroy(&rankssf);
61: j = -1;
62: for (i = 0; i < nl; i++) {
63: const PetscInt r = iremote[i].rank;
64: const PetscInt ii = iremote[i].index;
66: if (j < 0 || sranks[j] != r) PetscFindMPIInt(r, nranks, sranks, &j);
68: for (v = 0; v < nv; v++) {
69: viremote[v * nl + i].rank = r;
70: viremote[v * nl + i].index = v * ldrs[j] + ii;
71: if (ilocal) vilocal[v * nl + i] = v * ldl + ilocal[i];
72: }
73: }
74: PetscFree(sranks);
75: PetscFree(ldrs);
76: PetscSFCreate(comm, vsf);
77: PetscSFSetGraph(*vsf, vnr, vnl, vilocal, PETSC_OWN_POINTER, viremote, PETSC_OWN_POINTER);
78: return 0;
79: }
81: PETSC_INTERN PetscErrorCode MatDenseGetH2OpusVectorSF(Mat A, PetscSF h2sf, PetscSF *osf)
82: {
83: PetscSF asf;
88: PetscObjectQuery((PetscObject)A, "_math2opus_vectorsf", (PetscObject *)&asf);
89: if (!asf) {
90: PetscInt lda;
92: MatDenseGetLDA(A, &lda);
93: PetscSFGetVectorSF(h2sf, A->cmap->N, lda, PETSC_DECIDE, &asf);
94: PetscObjectCompose((PetscObject)A, "_math2opus_vectorsf", (PetscObject)asf);
95: PetscObjectDereference((PetscObject)asf);
96: }
97: *osf = asf;
98: return 0;
99: }
101: #if defined(PETSC_HAVE_CUDA)
102: struct SignVector_Functor {
103: const PetscScalar *v;
104: PetscScalar *s;
105: SignVector_Functor(const PetscScalar *_v, PetscScalar *_s) : v(_v), s(_s) { }
107: __host__ __device__ void operator()(PetscInt i) { s[i] = (v[i] < 0 ? -1 : 1); }
108: };
109: #endif
111: PETSC_INTERN PetscErrorCode VecSign(Vec v, Vec s)
112: {
113: const PetscScalar *av;
114: PetscScalar *as;
115: PetscInt i, n;
116: #if defined(PETSC_HAVE_CUDA)
117: PetscBool viscuda, siscuda;
118: #endif
122: VecGetLocalSize(s, &n);
123: VecGetLocalSize(v, &i);
125: #if defined(PETSC_HAVE_CUDA)
126: PetscObjectTypeCompareAny((PetscObject)v, &viscuda, VECSEQCUDA, VECMPICUDA, "");
127: PetscObjectTypeCompareAny((PetscObject)s, &siscuda, VECSEQCUDA, VECMPICUDA, "");
128: viscuda = (PetscBool)(viscuda && !v->boundtocpu);
129: siscuda = (PetscBool)(siscuda && !s->boundtocpu);
130: if (viscuda && siscuda) {
131: VecCUDAGetArrayRead(v, &av);
132: VecCUDAGetArrayWrite(s, &as);
133: SignVector_Functor sign_vector(av, as);
134: thrust::for_each(thrust::device, thrust::counting_iterator<PetscInt>(0), thrust::counting_iterator<PetscInt>(n), sign_vector);
135: VecCUDARestoreArrayWrite(s, &as);
136: VecCUDARestoreArrayRead(v, &av);
137: } else
138: #endif
139: {
140: VecGetArrayRead(v, &av);
141: VecGetArrayWrite(s, &as);
142: for (i = 0; i < n; i++) as[i] = PetscAbsScalar(av[i]) < 0 ? -1. : 1.;
143: VecRestoreArrayWrite(s, &as);
144: VecRestoreArrayRead(v, &av);
145: }
146: return 0;
147: }
149: #if defined(PETSC_HAVE_CUDA)
150: struct StandardBasis_Functor {
151: PetscScalar *v;
152: PetscInt j;
154: StandardBasis_Functor(PetscScalar *_v, PetscInt _j) : v(_v), j(_j) { }
155: __host__ __device__ void operator()(PetscInt i) { v[i] = (i == j ? 1 : 0); }
156: };
157: #endif
159: PETSC_INTERN PetscErrorCode VecSetDelta(Vec x, PetscInt i)
160: {
161: #if defined(PETSC_HAVE_CUDA)
162: PetscBool iscuda;
163: #endif
164: PetscInt st, en;
166: VecGetOwnershipRange(x, &st, &en);
167: #if defined(PETSC_HAVE_CUDA)
168: PetscObjectTypeCompareAny((PetscObject)x, &iscuda, VECSEQCUDA, VECMPICUDA, "");
169: iscuda = (PetscBool)(iscuda && !x->boundtocpu);
170: if (iscuda) {
171: PetscScalar *ax;
172: VecCUDAGetArrayWrite(x, &ax);
173: StandardBasis_Functor delta(ax, i - st);
174: thrust::for_each(thrust::device, thrust::counting_iterator<PetscInt>(0), thrust::counting_iterator<PetscInt>(en - st), delta);
175: VecCUDARestoreArrayWrite(x, &ax);
176: } else
177: #endif
178: {
179: VecSet(x, 0.);
180: if (st <= i && i < en) VecSetValue(x, i, 1.0, INSERT_VALUES);
181: VecAssemblyBegin(x);
182: VecAssemblyEnd(x);
183: }
184: return 0;
185: }
187: /* these are approximate norms */
188: /* NORM_2: Estimating the matrix p-norm Nicholas J. Higham
189: NORM_1/NORM_INFINITY: A block algorithm for matrix 1-norm estimation, with an application to 1-norm pseudospectra Higham, Nicholas J. and Tisseur, Francoise */
190: PETSC_INTERN PetscErrorCode MatApproximateNorm_Private(Mat A, NormType normtype, PetscInt normsamples, PetscReal *n)
191: {
192: Vec x, y, w, z;
193: PetscReal normz, adot;
194: PetscScalar dot;
195: PetscInt i, j, N, jold = -1;
196: PetscBool boundtocpu = PETSC_TRUE;
198: #if defined(PETSC_HAVE_DEVICE)
199: boundtocpu = A->boundtocpu;
200: #endif
201: switch (normtype) {
202: case NORM_INFINITY:
203: case NORM_1:
204: if (normsamples < 0) normsamples = 10; /* pure guess */
205: if (normtype == NORM_INFINITY) {
206: Mat B;
207: MatCreateTranspose(A, &B);
208: A = B;
209: } else {
210: PetscObjectReference((PetscObject)A);
211: }
212: MatCreateVecs(A, &x, &y);
213: MatCreateVecs(A, &z, &w);
214: VecBindToCPU(x, boundtocpu);
215: VecBindToCPU(y, boundtocpu);
216: VecBindToCPU(z, boundtocpu);
217: VecBindToCPU(w, boundtocpu);
218: VecGetSize(x, &N);
219: VecSet(x, 1. / N);
220: *n = 0.0;
221: for (i = 0; i < normsamples; i++) {
222: MatMult(A, x, y);
223: VecSign(y, w);
224: MatMultTranspose(A, w, z);
225: VecNorm(z, NORM_INFINITY, &normz);
226: VecDot(x, z, &dot);
227: adot = PetscAbsScalar(dot);
228: PetscInfo(A, "%s norm it %" PetscInt_FMT " -> (%g %g)\n", NormTypes[normtype], i, (double)normz, (double)adot);
229: if (normz <= adot && i > 0) {
230: VecNorm(y, NORM_1, n);
231: break;
232: }
233: VecMax(z, &j, &normz);
234: if (j == jold) {
235: VecNorm(y, NORM_1, n);
236: PetscInfo(A, "%s norm it %" PetscInt_FMT " -> breakdown (j==jold)\n", NormTypes[normtype], i);
237: break;
238: }
239: jold = j;
240: VecSetDelta(x, j);
241: }
242: MatDestroy(&A);
243: VecDestroy(&x);
244: VecDestroy(&w);
245: VecDestroy(&y);
246: VecDestroy(&z);
247: break;
248: case NORM_2:
249: if (normsamples < 0) normsamples = 20; /* pure guess */
250: MatCreateVecs(A, &x, &y);
251: MatCreateVecs(A, &z, NULL);
252: VecBindToCPU(x, boundtocpu);
253: VecBindToCPU(y, boundtocpu);
254: VecBindToCPU(z, boundtocpu);
255: VecSetRandom(x, NULL);
256: VecNormalize(x, NULL);
257: *n = 0.0;
258: for (i = 0; i < normsamples; i++) {
259: MatMult(A, x, y);
260: VecNormalize(y, n);
261: MatMultTranspose(A, y, z);
262: VecNorm(z, NORM_2, &normz);
263: VecDot(x, z, &dot);
264: adot = PetscAbsScalar(dot);
265: PetscInfo(A, "%s norm it %" PetscInt_FMT " -> %g (%g %g)\n", NormTypes[normtype], i, (double)*n, (double)normz, (double)adot);
266: if (normz <= adot) break;
267: if (i < normsamples - 1) {
268: Vec t;
270: VecNormalize(z, NULL);
271: t = x;
272: x = z;
273: z = t;
274: }
275: }
276: VecDestroy(&x);
277: VecDestroy(&y);
278: VecDestroy(&z);
279: break;
280: default:
281: SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "%s norm not supported", NormTypes[normtype]);
282: }
283: PetscInfo(A, "%s norm %g computed in %" PetscInt_FMT " iterations\n", NormTypes[normtype], (double)*n, i);
284: return 0;
285: }