Actual source code: math2opusutils.cu

  1: #include <petsc/private/matimpl.h>
  2: #include <petsc/private/vecimpl.h>
  3: #include <petscsf.h>
  4: #if defined(PETSC_HAVE_CUDA)
  5:   #include <thrust/for_each.h>
  6:   #include <thrust/device_vector.h>
  7:   #include <thrust/execution_policy.h>
  8: #endif

 10: PETSC_INTERN PetscErrorCode PetscSFGetVectorSF(PetscSF sf, PetscInt nv, PetscInt ldr, PetscInt ldl, PetscSF *vsf)
 11: {
 12:   PetscSF            rankssf;
 13:   const PetscSFNode *iremote;
 14:   PetscSFNode       *viremote, *rremotes;
 15:   const PetscInt    *ilocal;
 16:   PetscInt          *vilocal = NULL, *ldrs;
 17:   const PetscMPIInt *ranks;
 18:   PetscMPIInt       *sranks;
 19:   PetscInt           nranks, nr, nl, vnr, vnl, i, v, j, maxl;
 20:   MPI_Comm           comm;

 25:   if (nv == 1) {
 26:     PetscObjectReference((PetscObject)sf);
 27:     *vsf = sf;
 28:     return 0;
 29:   }
 30:   PetscObjectGetComm((PetscObject)sf, &comm);
 31:   PetscSFGetGraph(sf, &nr, &nl, &ilocal, &iremote);
 32:   PetscSFGetLeafRange(sf, NULL, &maxl);
 33:   maxl += 1;
 34:   if (ldl == PETSC_DECIDE) ldl = maxl;
 35:   if (ldr == PETSC_DECIDE) ldr = nr;
 38:   vnr = nr * nv;
 39:   vnl = nl * nv;
 40:   PetscMalloc1(vnl, &viremote);
 41:   if (ilocal) PetscMalloc1(vnl, &vilocal);

 43:   /* TODO: Should this special SF be available, e.g.
 44:      PetscSFGetRanksSF or similar? */
 45:   PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL);
 46:   PetscMalloc1(nranks, &sranks);
 47:   PetscArraycpy(sranks, ranks, nranks);
 48:   PetscSortMPIInt(nranks, sranks);
 49:   PetscMalloc1(nranks, &rremotes);
 50:   for (i = 0; i < nranks; i++) {
 51:     rremotes[i].rank  = sranks[i];
 52:     rremotes[i].index = 0;
 53:   }
 54:   PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &rankssf);
 55:   PetscSFSetGraph(rankssf, 1, nranks, NULL, PETSC_OWN_POINTER, rremotes, PETSC_OWN_POINTER);
 56:   PetscMalloc1(nranks, &ldrs);
 57:   PetscSFBcastBegin(rankssf, MPIU_INT, &ldr, ldrs, MPI_REPLACE);
 58:   PetscSFBcastEnd(rankssf, MPIU_INT, &ldr, ldrs, MPI_REPLACE);
 59:   PetscSFDestroy(&rankssf);

 61:   j = -1;
 62:   for (i = 0; i < nl; i++) {
 63:     const PetscInt r  = iremote[i].rank;
 64:     const PetscInt ii = iremote[i].index;

 66:     if (j < 0 || sranks[j] != r) PetscFindMPIInt(r, nranks, sranks, &j);
 68:     for (v = 0; v < nv; v++) {
 69:       viremote[v * nl + i].rank  = r;
 70:       viremote[v * nl + i].index = v * ldrs[j] + ii;
 71:       if (ilocal) vilocal[v * nl + i] = v * ldl + ilocal[i];
 72:     }
 73:   }
 74:   PetscFree(sranks);
 75:   PetscFree(ldrs);
 76:   PetscSFCreate(comm, vsf);
 77:   PetscSFSetGraph(*vsf, vnr, vnl, vilocal, PETSC_OWN_POINTER, viremote, PETSC_OWN_POINTER);
 78:   return 0;
 79: }

 81: PETSC_INTERN PetscErrorCode MatDenseGetH2OpusVectorSF(Mat A, PetscSF h2sf, PetscSF *osf)
 82: {
 83:   PetscSF asf;

 88:   PetscObjectQuery((PetscObject)A, "_math2opus_vectorsf", (PetscObject *)&asf);
 89:   if (!asf) {
 90:     PetscInt lda;

 92:     MatDenseGetLDA(A, &lda);
 93:     PetscSFGetVectorSF(h2sf, A->cmap->N, lda, PETSC_DECIDE, &asf);
 94:     PetscObjectCompose((PetscObject)A, "_math2opus_vectorsf", (PetscObject)asf);
 95:     PetscObjectDereference((PetscObject)asf);
 96:   }
 97:   *osf = asf;
 98:   return 0;
 99: }

101: #if defined(PETSC_HAVE_CUDA)
102: struct SignVector_Functor {
103:   const PetscScalar *v;
104:   PetscScalar       *s;
105:   SignVector_Functor(const PetscScalar *_v, PetscScalar *_s) : v(_v), s(_s) { }

107:   __host__ __device__ void operator()(PetscInt i) { s[i] = (v[i] < 0 ? -1 : 1); }
108: };
109: #endif

111: PETSC_INTERN PetscErrorCode VecSign(Vec v, Vec s)
112: {
113:   const PetscScalar *av;
114:   PetscScalar       *as;
115:   PetscInt           i, n;
116: #if defined(PETSC_HAVE_CUDA)
117:   PetscBool viscuda, siscuda;
118: #endif

122:   VecGetLocalSize(s, &n);
123:   VecGetLocalSize(v, &i);
125: #if defined(PETSC_HAVE_CUDA)
126:   PetscObjectTypeCompareAny((PetscObject)v, &viscuda, VECSEQCUDA, VECMPICUDA, "");
127:   PetscObjectTypeCompareAny((PetscObject)s, &siscuda, VECSEQCUDA, VECMPICUDA, "");
128:   viscuda = (PetscBool)(viscuda && !v->boundtocpu);
129:   siscuda = (PetscBool)(siscuda && !s->boundtocpu);
130:   if (viscuda && siscuda) {
131:     VecCUDAGetArrayRead(v, &av);
132:     VecCUDAGetArrayWrite(s, &as);
133:     SignVector_Functor sign_vector(av, as);
134:     thrust::for_each(thrust::device, thrust::counting_iterator<PetscInt>(0), thrust::counting_iterator<PetscInt>(n), sign_vector);
135:     VecCUDARestoreArrayWrite(s, &as);
136:     VecCUDARestoreArrayRead(v, &av);
137:   } else
138: #endif
139:   {
140:     VecGetArrayRead(v, &av);
141:     VecGetArrayWrite(s, &as);
142:     for (i = 0; i < n; i++) as[i] = PetscAbsScalar(av[i]) < 0 ? -1. : 1.;
143:     VecRestoreArrayWrite(s, &as);
144:     VecRestoreArrayRead(v, &av);
145:   }
146:   return 0;
147: }

149: #if defined(PETSC_HAVE_CUDA)
150: struct StandardBasis_Functor {
151:   PetscScalar *v;
152:   PetscInt     j;

154:   StandardBasis_Functor(PetscScalar *_v, PetscInt _j) : v(_v), j(_j) { }
155:   __host__ __device__ void operator()(PetscInt i) { v[i] = (i == j ? 1 : 0); }
156: };
157: #endif

159: PETSC_INTERN PetscErrorCode VecSetDelta(Vec x, PetscInt i)
160: {
161: #if defined(PETSC_HAVE_CUDA)
162:   PetscBool iscuda;
163: #endif
164:   PetscInt st, en;

166:   VecGetOwnershipRange(x, &st, &en);
167: #if defined(PETSC_HAVE_CUDA)
168:   PetscObjectTypeCompareAny((PetscObject)x, &iscuda, VECSEQCUDA, VECMPICUDA, "");
169:   iscuda = (PetscBool)(iscuda && !x->boundtocpu);
170:   if (iscuda) {
171:     PetscScalar *ax;
172:     VecCUDAGetArrayWrite(x, &ax);
173:     StandardBasis_Functor delta(ax, i - st);
174:     thrust::for_each(thrust::device, thrust::counting_iterator<PetscInt>(0), thrust::counting_iterator<PetscInt>(en - st), delta);
175:     VecCUDARestoreArrayWrite(x, &ax);
176:   } else
177: #endif
178:   {
179:     VecSet(x, 0.);
180:     if (st <= i && i < en) VecSetValue(x, i, 1.0, INSERT_VALUES);
181:     VecAssemblyBegin(x);
182:     VecAssemblyEnd(x);
183:   }
184:   return 0;
185: }

187: /* these are approximate norms */
188: /* NORM_2: Estimating the matrix p-norm Nicholas J. Higham
189:    NORM_1/NORM_INFINITY: A block algorithm for matrix 1-norm estimation, with an application to 1-norm pseudospectra Higham, Nicholas J. and Tisseur, Francoise */
190: PETSC_INTERN PetscErrorCode MatApproximateNorm_Private(Mat A, NormType normtype, PetscInt normsamples, PetscReal *n)
191: {
192:   Vec         x, y, w, z;
193:   PetscReal   normz, adot;
194:   PetscScalar dot;
195:   PetscInt    i, j, N, jold = -1;
196:   PetscBool   boundtocpu = PETSC_TRUE;

198: #if defined(PETSC_HAVE_DEVICE)
199:   boundtocpu = A->boundtocpu;
200: #endif
201:   switch (normtype) {
202:   case NORM_INFINITY:
203:   case NORM_1:
204:     if (normsamples < 0) normsamples = 10; /* pure guess */
205:     if (normtype == NORM_INFINITY) {
206:       Mat B;
207:       MatCreateTranspose(A, &B);
208:       A = B;
209:     } else {
210:       PetscObjectReference((PetscObject)A);
211:     }
212:     MatCreateVecs(A, &x, &y);
213:     MatCreateVecs(A, &z, &w);
214:     VecBindToCPU(x, boundtocpu);
215:     VecBindToCPU(y, boundtocpu);
216:     VecBindToCPU(z, boundtocpu);
217:     VecBindToCPU(w, boundtocpu);
218:     VecGetSize(x, &N);
219:     VecSet(x, 1. / N);
220:     *n = 0.0;
221:     for (i = 0; i < normsamples; i++) {
222:       MatMult(A, x, y);
223:       VecSign(y, w);
224:       MatMultTranspose(A, w, z);
225:       VecNorm(z, NORM_INFINITY, &normz);
226:       VecDot(x, z, &dot);
227:       adot = PetscAbsScalar(dot);
228:       PetscInfo(A, "%s norm it %" PetscInt_FMT " -> (%g %g)\n", NormTypes[normtype], i, (double)normz, (double)adot);
229:       if (normz <= adot && i > 0) {
230:         VecNorm(y, NORM_1, n);
231:         break;
232:       }
233:       VecMax(z, &j, &normz);
234:       if (j == jold) {
235:         VecNorm(y, NORM_1, n);
236:         PetscInfo(A, "%s norm it %" PetscInt_FMT " -> breakdown (j==jold)\n", NormTypes[normtype], i);
237:         break;
238:       }
239:       jold = j;
240:       VecSetDelta(x, j);
241:     }
242:     MatDestroy(&A);
243:     VecDestroy(&x);
244:     VecDestroy(&w);
245:     VecDestroy(&y);
246:     VecDestroy(&z);
247:     break;
248:   case NORM_2:
249:     if (normsamples < 0) normsamples = 20; /* pure guess */
250:     MatCreateVecs(A, &x, &y);
251:     MatCreateVecs(A, &z, NULL);
252:     VecBindToCPU(x, boundtocpu);
253:     VecBindToCPU(y, boundtocpu);
254:     VecBindToCPU(z, boundtocpu);
255:     VecSetRandom(x, NULL);
256:     VecNormalize(x, NULL);
257:     *n = 0.0;
258:     for (i = 0; i < normsamples; i++) {
259:       MatMult(A, x, y);
260:       VecNormalize(y, n);
261:       MatMultTranspose(A, y, z);
262:       VecNorm(z, NORM_2, &normz);
263:       VecDot(x, z, &dot);
264:       adot = PetscAbsScalar(dot);
265:       PetscInfo(A, "%s norm it %" PetscInt_FMT " -> %g (%g %g)\n", NormTypes[normtype], i, (double)*n, (double)normz, (double)adot);
266:       if (normz <= adot) break;
267:       if (i < normsamples - 1) {
268:         Vec t;

270:         VecNormalize(z, NULL);
271:         t = x;
272:         x = z;
273:         z = t;
274:       }
275:     }
276:     VecDestroy(&x);
277:     VecDestroy(&y);
278:     VecDestroy(&z);
279:     break;
280:   default:
281:     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "%s norm not supported", NormTypes[normtype]);
282:   }
283:   PetscInfo(A, "%s norm %g computed in %" PetscInt_FMT " iterations\n", NormTypes[normtype], (double)*n, i);
284:   return 0;
285: }