Actual source code: dvec2.c


  2: /*
  3:    Defines some vector operation functions that are shared by
  4:    sequential and parallel vectors.
  5: */
  6: #include <../src/vec/vec/impls/dvecimpl.h>
  7: #include <petsc/private/kernels/petscaxpy.h>

  9: #if defined(PETSC_USE_FORTRAN_KERNEL_MDOT)
 10: #include <../src/vec/vec/impls/seq/ftn-kernels/fmdot.h>
 11: PetscErrorCode VecMDot_Seq(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
 12: {
 13:   PetscInt           i, nv_rem, n = xin->map->n;
 14:   PetscScalar        sum0, sum1, sum2, sum3;
 15:   const PetscScalar *yy0, *yy1, *yy2, *yy3, *x;
 16:   Vec               *yy;

 18:   sum0 = 0.0;
 19:   sum1 = 0.0;
 20:   sum2 = 0.0;

 22:   i      = nv;
 23:   nv_rem = nv & 0x3;
 24:   yy     = (Vec *)yin;
 25:   VecGetArrayRead(xin, &x);

 27:   switch (nv_rem) {
 28:   case 3:
 29:     VecGetArrayRead(yy[0], &yy0);
 30:     VecGetArrayRead(yy[1], &yy1);
 31:     VecGetArrayRead(yy[2], &yy2);
 32:     fortranmdot3_(x, yy0, yy1, yy2, &n, &sum0, &sum1, &sum2);
 33:     VecRestoreArrayRead(yy[0], &yy0);
 34:     VecRestoreArrayRead(yy[1], &yy1);
 35:     VecRestoreArrayRead(yy[2], &yy2);
 36:     z[0] = sum0;
 37:     z[1] = sum1;
 38:     z[2] = sum2;
 39:     break;
 40:   case 2:
 41:     VecGetArrayRead(yy[0], &yy0);
 42:     VecGetArrayRead(yy[1], &yy1);
 43:     fortranmdot2_(x, yy0, yy1, &n, &sum0, &sum1);
 44:     VecRestoreArrayRead(yy[0], &yy0);
 45:     VecRestoreArrayRead(yy[1], &yy1);
 46:     z[0] = sum0;
 47:     z[1] = sum1;
 48:     break;
 49:   case 1:
 50:     VecGetArrayRead(yy[0], &yy0);
 51:     fortranmdot1_(x, yy0, &n, &sum0);
 52:     VecRestoreArrayRead(yy[0], &yy0);
 53:     z[0] = sum0;
 54:     break;
 55:   case 0:
 56:     break;
 57:   }
 58:   z += nv_rem;
 59:   i -= nv_rem;
 60:   yy += nv_rem;

 62:   while (i > 0) {
 63:     sum0 = 0.;
 64:     sum1 = 0.;
 65:     sum2 = 0.;
 66:     sum3 = 0.;
 67:     VecGetArrayRead(yy[0], &yy0);
 68:     VecGetArrayRead(yy[1], &yy1);
 69:     VecGetArrayRead(yy[2], &yy2);
 70:     VecGetArrayRead(yy[3], &yy3);
 71:     fortranmdot4_(x, yy0, yy1, yy2, yy3, &n, &sum0, &sum1, &sum2, &sum3);
 72:     VecRestoreArrayRead(yy[0], &yy0);
 73:     VecRestoreArrayRead(yy[1], &yy1);
 74:     VecRestoreArrayRead(yy[2], &yy2);
 75:     VecRestoreArrayRead(yy[3], &yy3);
 76:     yy += 4;
 77:     z[0] = sum0;
 78:     z[1] = sum1;
 79:     z[2] = sum2;
 80:     z[3] = sum3;
 81:     z += 4;
 82:     i -= 4;
 83:   }
 84:   VecRestoreArrayRead(xin, &x);
 85:   PetscLogFlops(PetscMax(nv * (2.0 * xin->map->n - 1), 0.0));
 86:   return 0;
 87: }

 89: #else
 90: PetscErrorCode VecMDot_Seq(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
 91: {
 92:   PetscInt           n = xin->map->n, i, j, nv_rem, j_rem;
 93:   PetscScalar        sum0, sum1, sum2, sum3, x0, x1, x2, x3;
 94:   const PetscScalar *yy0, *yy1, *yy2, *yy3, *x, *xbase;
 95:   Vec               *yy;

 97:   sum0 = 0.;
 98:   sum1 = 0.;
 99:   sum2 = 0.;

101:   i      = nv;
102:   nv_rem = nv & 0x3;
103:   yy     = (Vec *)yin;
104:   j      = n;
105:   VecGetArrayRead(xin, &xbase);
106:   x = xbase;

108:   switch (nv_rem) {
109:   case 3:
110:     VecGetArrayRead(yy[0], &yy0);
111:     VecGetArrayRead(yy[1], &yy1);
112:     VecGetArrayRead(yy[2], &yy2);
113:     switch (j_rem = j & 0x3) {
114:     case 3:
115:       x2 = x[2];
116:       sum0 += x2 * PetscConj(yy0[2]);
117:       sum1 += x2 * PetscConj(yy1[2]);
118:       sum2 += x2 * PetscConj(yy2[2]); /* fall through */
119:     case 2:
120:       x1 = x[1];
121:       sum0 += x1 * PetscConj(yy0[1]);
122:       sum1 += x1 * PetscConj(yy1[1]);
123:       sum2 += x1 * PetscConj(yy2[1]); /* fall through */
124:     case 1:
125:       x0 = x[0];
126:       sum0 += x0 * PetscConj(yy0[0]);
127:       sum1 += x0 * PetscConj(yy1[0]);
128:       sum2 += x0 * PetscConj(yy2[0]); /* fall through */
129:     case 0:
130:       x += j_rem;
131:       yy0 += j_rem;
132:       yy1 += j_rem;
133:       yy2 += j_rem;
134:       j -= j_rem;
135:       break;
136:     }
137:     while (j > 0) {
138:       x0 = x[0];
139:       x1 = x[1];
140:       x2 = x[2];
141:       x3 = x[3];
142:       x += 4;

144:       sum0 += x0 * PetscConj(yy0[0]) + x1 * PetscConj(yy0[1]) + x2 * PetscConj(yy0[2]) + x3 * PetscConj(yy0[3]);
145:       yy0 += 4;
146:       sum1 += x0 * PetscConj(yy1[0]) + x1 * PetscConj(yy1[1]) + x2 * PetscConj(yy1[2]) + x3 * PetscConj(yy1[3]);
147:       yy1 += 4;
148:       sum2 += x0 * PetscConj(yy2[0]) + x1 * PetscConj(yy2[1]) + x2 * PetscConj(yy2[2]) + x3 * PetscConj(yy2[3]);
149:       yy2 += 4;
150:       j -= 4;
151:     }
152:     z[0] = sum0;
153:     z[1] = sum1;
154:     z[2] = sum2;
155:     VecRestoreArrayRead(yy[0], &yy0);
156:     VecRestoreArrayRead(yy[1], &yy1);
157:     VecRestoreArrayRead(yy[2], &yy2);
158:     break;
159:   case 2:
160:     VecGetArrayRead(yy[0], &yy0);
161:     VecGetArrayRead(yy[1], &yy1);
162:     switch (j_rem = j & 0x3) {
163:     case 3:
164:       x2 = x[2];
165:       sum0 += x2 * PetscConj(yy0[2]);
166:       sum1 += x2 * PetscConj(yy1[2]); /* fall through */
167:     case 2:
168:       x1 = x[1];
169:       sum0 += x1 * PetscConj(yy0[1]);
170:       sum1 += x1 * PetscConj(yy1[1]); /* fall through */
171:     case 1:
172:       x0 = x[0];
173:       sum0 += x0 * PetscConj(yy0[0]);
174:       sum1 += x0 * PetscConj(yy1[0]); /* fall through */
175:     case 0:
176:       x += j_rem;
177:       yy0 += j_rem;
178:       yy1 += j_rem;
179:       j -= j_rem;
180:       break;
181:     }
182:     while (j > 0) {
183:       x0 = x[0];
184:       x1 = x[1];
185:       x2 = x[2];
186:       x3 = x[3];
187:       x += 4;

189:       sum0 += x0 * PetscConj(yy0[0]) + x1 * PetscConj(yy0[1]) + x2 * PetscConj(yy0[2]) + x3 * PetscConj(yy0[3]);
190:       yy0 += 4;
191:       sum1 += x0 * PetscConj(yy1[0]) + x1 * PetscConj(yy1[1]) + x2 * PetscConj(yy1[2]) + x3 * PetscConj(yy1[3]);
192:       yy1 += 4;
193:       j -= 4;
194:     }
195:     z[0] = sum0;
196:     z[1] = sum1;

198:     VecRestoreArrayRead(yy[0], &yy0);
199:     VecRestoreArrayRead(yy[1], &yy1);
200:     break;
201:   case 1:
202:     VecGetArrayRead(yy[0], &yy0);
203:     switch (j_rem = j & 0x3) {
204:     case 3:
205:       x2 = x[2];
206:       sum0 += x2 * PetscConj(yy0[2]); /* fall through */
207:     case 2:
208:       x1 = x[1];
209:       sum0 += x1 * PetscConj(yy0[1]); /* fall through */
210:     case 1:
211:       x0 = x[0];
212:       sum0 += x0 * PetscConj(yy0[0]); /* fall through */
213:     case 0:
214:       x += j_rem;
215:       yy0 += j_rem;
216:       j -= j_rem;
217:       break;
218:     }
219:     while (j > 0) {
220:       sum0 += x[0] * PetscConj(yy0[0]) + x[1] * PetscConj(yy0[1]) + x[2] * PetscConj(yy0[2]) + x[3] * PetscConj(yy0[3]);
221:       yy0 += 4;
222:       j -= 4;
223:       x += 4;
224:     }
225:     z[0] = sum0;

227:     VecRestoreArrayRead(yy[0], &yy0);
228:     break;
229:   case 0:
230:     break;
231:   }
232:   z += nv_rem;
233:   i -= nv_rem;
234:   yy += nv_rem;

236:   while (i > 0) {
237:     sum0 = 0.;
238:     sum1 = 0.;
239:     sum2 = 0.;
240:     sum3 = 0.;
241:     VecGetArrayRead(yy[0], &yy0);
242:     VecGetArrayRead(yy[1], &yy1);
243:     VecGetArrayRead(yy[2], &yy2);
244:     VecGetArrayRead(yy[3], &yy3);

246:     j = n;
247:     x = xbase;
248:     switch (j_rem = j & 0x3) {
249:     case 3:
250:       x2 = x[2];
251:       sum0 += x2 * PetscConj(yy0[2]);
252:       sum1 += x2 * PetscConj(yy1[2]);
253:       sum2 += x2 * PetscConj(yy2[2]);
254:       sum3 += x2 * PetscConj(yy3[2]); /* fall through */
255:     case 2:
256:       x1 = x[1];
257:       sum0 += x1 * PetscConj(yy0[1]);
258:       sum1 += x1 * PetscConj(yy1[1]);
259:       sum2 += x1 * PetscConj(yy2[1]);
260:       sum3 += x1 * PetscConj(yy3[1]); /* fall through */
261:     case 1:
262:       x0 = x[0];
263:       sum0 += x0 * PetscConj(yy0[0]);
264:       sum1 += x0 * PetscConj(yy1[0]);
265:       sum2 += x0 * PetscConj(yy2[0]);
266:       sum3 += x0 * PetscConj(yy3[0]); /* fall through */
267:     case 0:
268:       x += j_rem;
269:       yy0 += j_rem;
270:       yy1 += j_rem;
271:       yy2 += j_rem;
272:       yy3 += j_rem;
273:       j -= j_rem;
274:       break;
275:     }
276:     while (j > 0) {
277:       x0 = x[0];
278:       x1 = x[1];
279:       x2 = x[2];
280:       x3 = x[3];
281:       x += 4;

283:       sum0 += x0 * PetscConj(yy0[0]) + x1 * PetscConj(yy0[1]) + x2 * PetscConj(yy0[2]) + x3 * PetscConj(yy0[3]);
284:       yy0 += 4;
285:       sum1 += x0 * PetscConj(yy1[0]) + x1 * PetscConj(yy1[1]) + x2 * PetscConj(yy1[2]) + x3 * PetscConj(yy1[3]);
286:       yy1 += 4;
287:       sum2 += x0 * PetscConj(yy2[0]) + x1 * PetscConj(yy2[1]) + x2 * PetscConj(yy2[2]) + x3 * PetscConj(yy2[3]);
288:       yy2 += 4;
289:       sum3 += x0 * PetscConj(yy3[0]) + x1 * PetscConj(yy3[1]) + x2 * PetscConj(yy3[2]) + x3 * PetscConj(yy3[3]);
290:       yy3 += 4;
291:       j -= 4;
292:     }
293:     z[0] = sum0;
294:     z[1] = sum1;
295:     z[2] = sum2;
296:     z[3] = sum3;
297:     z += 4;
298:     i -= 4;
299:     VecRestoreArrayRead(yy[0], &yy0);
300:     VecRestoreArrayRead(yy[1], &yy1);
301:     VecRestoreArrayRead(yy[2], &yy2);
302:     VecRestoreArrayRead(yy[3], &yy3);
303:     yy += 4;
304:   }
305:   VecRestoreArrayRead(xin, &xbase);
306:   PetscLogFlops(PetscMax(nv * (2.0 * xin->map->n - 1), 0.0));
307:   return 0;
308: }
309: #endif

311: /* ----------------------------------------------------------------------------*/
312: PetscErrorCode VecMTDot_Seq(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
313: {
314:   PetscInt           n = xin->map->n, i, j, nv_rem, j_rem;
315:   PetscScalar        sum0, sum1, sum2, sum3, x0, x1, x2, x3;
316:   const PetscScalar *yy0, *yy1, *yy2, *yy3, *x, *xbase;
317:   Vec               *yy;

319:   sum0 = 0.;
320:   sum1 = 0.;
321:   sum2 = 0.;

323:   i      = nv;
324:   nv_rem = nv & 0x3;
325:   yy     = (Vec *)yin;
326:   j      = n;
327:   VecGetArrayRead(xin, &xbase);
328:   x = xbase;

330:   switch (nv_rem) {
331:   case 3:
332:     VecGetArrayRead(yy[0], &yy0);
333:     VecGetArrayRead(yy[1], &yy1);
334:     VecGetArrayRead(yy[2], &yy2);
335:     switch (j_rem = j & 0x3) {
336:     case 3:
337:       x2 = x[2];
338:       sum0 += x2 * yy0[2];
339:       sum1 += x2 * yy1[2];
340:       sum2 += x2 * yy2[2]; /* fall through */
341:     case 2:
342:       x1 = x[1];
343:       sum0 += x1 * yy0[1];
344:       sum1 += x1 * yy1[1];
345:       sum2 += x1 * yy2[1]; /* fall through */
346:     case 1:
347:       x0 = x[0];
348:       sum0 += x0 * yy0[0];
349:       sum1 += x0 * yy1[0];
350:       sum2 += x0 * yy2[0]; /* fall through */
351:     case 0:
352:       x += j_rem;
353:       yy0 += j_rem;
354:       yy1 += j_rem;
355:       yy2 += j_rem;
356:       j -= j_rem;
357:       break;
358:     }
359:     while (j > 0) {
360:       x0 = x[0];
361:       x1 = x[1];
362:       x2 = x[2];
363:       x3 = x[3];
364:       x += 4;

366:       sum0 += x0 * yy0[0] + x1 * yy0[1] + x2 * yy0[2] + x3 * yy0[3];
367:       yy0 += 4;
368:       sum1 += x0 * yy1[0] + x1 * yy1[1] + x2 * yy1[2] + x3 * yy1[3];
369:       yy1 += 4;
370:       sum2 += x0 * yy2[0] + x1 * yy2[1] + x2 * yy2[2] + x3 * yy2[3];
371:       yy2 += 4;
372:       j -= 4;
373:     }
374:     z[0] = sum0;
375:     z[1] = sum1;
376:     z[2] = sum2;
377:     VecRestoreArrayRead(yy[0], &yy0);
378:     VecRestoreArrayRead(yy[1], &yy1);
379:     VecRestoreArrayRead(yy[2], &yy2);
380:     break;
381:   case 2:
382:     VecGetArrayRead(yy[0], &yy0);
383:     VecGetArrayRead(yy[1], &yy1);
384:     switch (j_rem = j & 0x3) {
385:     case 3:
386:       x2 = x[2];
387:       sum0 += x2 * yy0[2];
388:       sum1 += x2 * yy1[2]; /* fall through */
389:     case 2:
390:       x1 = x[1];
391:       sum0 += x1 * yy0[1];
392:       sum1 += x1 * yy1[1]; /* fall through */
393:     case 1:
394:       x0 = x[0];
395:       sum0 += x0 * yy0[0];
396:       sum1 += x0 * yy1[0]; /* fall through */
397:     case 0:
398:       x += j_rem;
399:       yy0 += j_rem;
400:       yy1 += j_rem;
401:       j -= j_rem;
402:       break;
403:     }
404:     while (j > 0) {
405:       x0 = x[0];
406:       x1 = x[1];
407:       x2 = x[2];
408:       x3 = x[3];
409:       x += 4;

411:       sum0 += x0 * yy0[0] + x1 * yy0[1] + x2 * yy0[2] + x3 * yy0[3];
412:       yy0 += 4;
413:       sum1 += x0 * yy1[0] + x1 * yy1[1] + x2 * yy1[2] + x3 * yy1[3];
414:       yy1 += 4;
415:       j -= 4;
416:     }
417:     z[0] = sum0;
418:     z[1] = sum1;

420:     VecRestoreArrayRead(yy[0], &yy0);
421:     VecRestoreArrayRead(yy[1], &yy1);
422:     break;
423:   case 1:
424:     VecGetArrayRead(yy[0], &yy0);
425:     switch (j_rem = j & 0x3) {
426:     case 3:
427:       x2 = x[2];
428:       sum0 += x2 * yy0[2]; /* fall through */
429:     case 2:
430:       x1 = x[1];
431:       sum0 += x1 * yy0[1]; /* fall through */
432:     case 1:
433:       x0 = x[0];
434:       sum0 += x0 * yy0[0]; /* fall through */
435:     case 0:
436:       x += j_rem;
437:       yy0 += j_rem;
438:       j -= j_rem;
439:       break;
440:     }
441:     while (j > 0) {
442:       sum0 += x[0] * yy0[0] + x[1] * yy0[1] + x[2] * yy0[2] + x[3] * yy0[3];
443:       yy0 += 4;
444:       j -= 4;
445:       x += 4;
446:     }
447:     z[0] = sum0;

449:     VecRestoreArrayRead(yy[0], &yy0);
450:     break;
451:   case 0:
452:     break;
453:   }
454:   z += nv_rem;
455:   i -= nv_rem;
456:   yy += nv_rem;

458:   while (i > 0) {
459:     sum0 = 0.;
460:     sum1 = 0.;
461:     sum2 = 0.;
462:     sum3 = 0.;
463:     VecGetArrayRead(yy[0], &yy0);
464:     VecGetArrayRead(yy[1], &yy1);
465:     VecGetArrayRead(yy[2], &yy2);
466:     VecGetArrayRead(yy[3], &yy3);
467:     x = xbase;

469:     j = n;
470:     switch (j_rem = j & 0x3) {
471:     case 3:
472:       x2 = x[2];
473:       sum0 += x2 * yy0[2];
474:       sum1 += x2 * yy1[2];
475:       sum2 += x2 * yy2[2];
476:       sum3 += x2 * yy3[2]; /* fall through */
477:     case 2:
478:       x1 = x[1];
479:       sum0 += x1 * yy0[1];
480:       sum1 += x1 * yy1[1];
481:       sum2 += x1 * yy2[1];
482:       sum3 += x1 * yy3[1]; /* fall through */
483:     case 1:
484:       x0 = x[0];
485:       sum0 += x0 * yy0[0];
486:       sum1 += x0 * yy1[0];
487:       sum2 += x0 * yy2[0];
488:       sum3 += x0 * yy3[0]; /* fall through */
489:     case 0:
490:       x += j_rem;
491:       yy0 += j_rem;
492:       yy1 += j_rem;
493:       yy2 += j_rem;
494:       yy3 += j_rem;
495:       j -= j_rem;
496:       break;
497:     }
498:     while (j > 0) {
499:       x0 = x[0];
500:       x1 = x[1];
501:       x2 = x[2];
502:       x3 = x[3];
503:       x += 4;

505:       sum0 += x0 * yy0[0] + x1 * yy0[1] + x2 * yy0[2] + x3 * yy0[3];
506:       yy0 += 4;
507:       sum1 += x0 * yy1[0] + x1 * yy1[1] + x2 * yy1[2] + x3 * yy1[3];
508:       yy1 += 4;
509:       sum2 += x0 * yy2[0] + x1 * yy2[1] + x2 * yy2[2] + x3 * yy2[3];
510:       yy2 += 4;
511:       sum3 += x0 * yy3[0] + x1 * yy3[1] + x2 * yy3[2] + x3 * yy3[3];
512:       yy3 += 4;
513:       j -= 4;
514:     }
515:     z[0] = sum0;
516:     z[1] = sum1;
517:     z[2] = sum2;
518:     z[3] = sum3;
519:     z += 4;
520:     i -= 4;
521:     VecRestoreArrayRead(yy[0], &yy0);
522:     VecRestoreArrayRead(yy[1], &yy1);
523:     VecRestoreArrayRead(yy[2], &yy2);
524:     VecRestoreArrayRead(yy[3], &yy3);
525:     yy += 4;
526:   }
527:   VecRestoreArrayRead(xin, &xbase);
528:   PetscLogFlops(PetscMax(nv * (2.0 * xin->map->n - 1), 0.0));
529:   return 0;
530: }

532: PetscErrorCode VecMax_Seq(Vec xin, PetscInt *idx, PetscReal *z)
533: {
534:   PetscInt           i, j = 0, n = xin->map->n;
535:   PetscReal          max, tmp;
536:   const PetscScalar *xx;

538:   VecGetArrayRead(xin, &xx);
539:   if (!n) {
540:     max = PETSC_MIN_REAL;
541:     j   = -1;
542:   } else {
543:     max = PetscRealPart(*xx++);
544:     j   = 0;
545:     for (i = 1; i < n; i++) {
546:       if ((tmp = PetscRealPart(*xx++)) > max) {
547:         j   = i;
548:         max = tmp;
549:       }
550:     }
551:   }
552:   *z = max;
553:   if (idx) *idx = j;
554:   VecRestoreArrayRead(xin, &xx);
555:   return 0;
556: }

558: PetscErrorCode VecMin_Seq(Vec xin, PetscInt *idx, PetscReal *z)
559: {
560:   PetscInt           i, j = 0, n = xin->map->n;
561:   PetscReal          min, tmp;
562:   const PetscScalar *xx;

564:   VecGetArrayRead(xin, &xx);
565:   if (!n) {
566:     min = PETSC_MAX_REAL;
567:     j   = -1;
568:   } else {
569:     min = PetscRealPart(*xx++);
570:     j   = 0;
571:     for (i = 1; i < n; i++) {
572:       if ((tmp = PetscRealPart(*xx++)) < min) {
573:         j   = i;
574:         min = tmp;
575:       }
576:     }
577:   }
578:   *z = min;
579:   if (idx) *idx = j;
580:   VecRestoreArrayRead(xin, &xx);
581:   return 0;
582: }

584: PetscErrorCode VecSet_Seq(Vec xin, PetscScalar alpha)
585: {
586:   PetscInt     i, n = xin->map->n;
587:   PetscScalar *xx;

589:   VecGetArrayWrite(xin, &xx);
590:   if (alpha == (PetscScalar)0.0) {
591:     PetscArrayzero(xx, n);
592:   } else {
593:     for (i = 0; i < n; i++) xx[i] = alpha;
594:   }
595:   VecRestoreArrayWrite(xin, &xx);
596:   return 0;
597: }

599: PetscErrorCode VecMAXPY_Seq(Vec xin, PetscInt nv, const PetscScalar *alpha, Vec *y)
600: {
601:   PetscInt           n = xin->map->n, j, j_rem;
602:   const PetscScalar *yy0, *yy1, *yy2, *yy3;
603:   PetscScalar       *xx, alpha0, alpha1, alpha2, alpha3;

605: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
606:   #pragma disjoint(*xx, *yy0, *yy1, *yy2, *yy3, *alpha)
607: #endif

609:   PetscLogFlops(nv * 2.0 * n);
610:   VecGetArray(xin, &xx);
611:   switch (j_rem = nv & 0x3) {
612:   case 3:
613:     VecGetArrayRead(y[0], &yy0);
614:     VecGetArrayRead(y[1], &yy1);
615:     VecGetArrayRead(y[2], &yy2);
616:     alpha0 = alpha[0];
617:     alpha1 = alpha[1];
618:     alpha2 = alpha[2];
619:     alpha += 3;
620:     PetscKernelAXPY3(xx, alpha0, alpha1, alpha2, yy0, yy1, yy2, n);
621:     VecRestoreArrayRead(y[0], &yy0);
622:     VecRestoreArrayRead(y[1], &yy1);
623:     VecRestoreArrayRead(y[2], &yy2);
624:     y += 3;
625:     break;
626:   case 2:
627:     VecGetArrayRead(y[0], &yy0);
628:     VecGetArrayRead(y[1], &yy1);
629:     alpha0 = alpha[0];
630:     alpha1 = alpha[1];
631:     alpha += 2;
632:     PetscKernelAXPY2(xx, alpha0, alpha1, yy0, yy1, n);
633:     VecRestoreArrayRead(y[0], &yy0);
634:     VecRestoreArrayRead(y[1], &yy1);
635:     y += 2;
636:     break;
637:   case 1:
638:     VecGetArrayRead(y[0], &yy0);
639:     alpha0 = *alpha++;
640:     PetscKernelAXPY(xx, alpha0, yy0, n);
641:     VecRestoreArrayRead(y[0], &yy0);
642:     y += 1;
643:     break;
644:   }
645:   for (j = j_rem; j < nv; j += 4) {
646:     VecGetArrayRead(y[0], &yy0);
647:     VecGetArrayRead(y[1], &yy1);
648:     VecGetArrayRead(y[2], &yy2);
649:     VecGetArrayRead(y[3], &yy3);
650:     alpha0 = alpha[0];
651:     alpha1 = alpha[1];
652:     alpha2 = alpha[2];
653:     alpha3 = alpha[3];
654:     alpha += 4;

656:     PetscKernelAXPY4(xx, alpha0, alpha1, alpha2, alpha3, yy0, yy1, yy2, yy3, n);
657:     VecRestoreArrayRead(y[0], &yy0);
658:     VecRestoreArrayRead(y[1], &yy1);
659:     VecRestoreArrayRead(y[2], &yy2);
660:     VecRestoreArrayRead(y[3], &yy3);
661:     y += 4;
662:   }
663:   VecRestoreArray(xin, &xx);
664:   return 0;
665: }

667: #include <../src/vec/vec/impls/seq/ftn-kernels/faypx.h>

669: PetscErrorCode VecAYPX_Seq(Vec yin, PetscScalar alpha, Vec xin)
670: {
671:   PetscInt           n = yin->map->n;
672:   PetscScalar       *yy;
673:   const PetscScalar *xx;

675:   if (alpha == (PetscScalar)0.0) {
676:     VecCopy(xin, yin);
677:   } else if (alpha == (PetscScalar)1.0) {
678:     VecAXPY_Seq(yin, alpha, xin);
679:   } else if (alpha == (PetscScalar)-1.0) {
680:     PetscInt i;
681:     VecGetArrayRead(xin, &xx);
682:     VecGetArray(yin, &yy);

684:     for (i = 0; i < n; i++) yy[i] = xx[i] - yy[i];

686:     VecRestoreArrayRead(xin, &xx);
687:     VecRestoreArray(yin, &yy);
688:     PetscLogFlops(1.0 * n);
689:   } else {
690:     VecGetArrayRead(xin, &xx);
691:     VecGetArray(yin, &yy);
692: #if defined(PETSC_USE_FORTRAN_KERNEL_AYPX)
693:     {
694:       PetscScalar oalpha = alpha;
695:       fortranaypx_(&n, &oalpha, xx, yy);
696:     }
697: #else
698:     {
699:       PetscInt i;

701:       for (i = 0; i < n; i++) yy[i] = xx[i] + alpha * yy[i];
702:     }
703: #endif
704:     VecRestoreArrayRead(xin, &xx);
705:     VecRestoreArray(yin, &yy);
706:     PetscLogFlops(2.0 * n);
707:   }
708:   return 0;
709: }

711: #include <../src/vec/vec/impls/seq/ftn-kernels/fwaxpy.h>
712: /*
713:    IBM ESSL contains a routine dzaxpy() that is our WAXPY() but it appears
714:    to be slower than a regular C loop.  Hence,we do not include it.
715:    void ?zaxpy(int*,PetscScalar*,PetscScalar*,int*,PetscScalar*,int*,PetscScalar*,int*);
716: */

718: PetscErrorCode VecWAXPY_Seq(Vec win, PetscScalar alpha, Vec xin, Vec yin)
719: {
720:   PetscInt           i, n = win->map->n;
721:   PetscScalar       *ww;
722:   const PetscScalar *yy, *xx;

724:   VecGetArrayRead(xin, &xx);
725:   VecGetArrayRead(yin, &yy);
726:   VecGetArray(win, &ww);
727:   if (alpha == (PetscScalar)1.0) {
728:     PetscLogFlops(n);
729:     /* could call BLAS axpy after call to memcopy, but may be slower */
730:     for (i = 0; i < n; i++) ww[i] = yy[i] + xx[i];
731:   } else if (alpha == (PetscScalar)-1.0) {
732:     PetscLogFlops(n);
733:     for (i = 0; i < n; i++) ww[i] = yy[i] - xx[i];
734:   } else if (alpha == (PetscScalar)0.0) {
735:     PetscArraycpy(ww, yy, n);
736:   } else {
737:     PetscScalar oalpha = alpha;
738: #if defined(PETSC_USE_FORTRAN_KERNEL_WAXPY)
739:     fortranwaxpy_(&n, &oalpha, xx, yy, ww);
740: #else
741:     for (i = 0; i < n; i++) ww[i] = yy[i] + oalpha * xx[i];
742: #endif
743:     PetscLogFlops(2.0 * n);
744:   }
745:   VecRestoreArrayRead(xin, &xx);
746:   VecRestoreArrayRead(yin, &yy);
747:   VecRestoreArray(win, &ww);
748:   return 0;
749: }

751: PetscErrorCode VecMaxPointwiseDivide_Seq(Vec xin, Vec yin, PetscReal *max)
752: {
753:   PetscInt           n = xin->map->n, i;
754:   const PetscScalar *xx, *yy;
755:   PetscReal          m = 0.0;

757:   VecGetArrayRead(xin, &xx);
758:   VecGetArrayRead(yin, &yy);
759:   for (i = 0; i < n; i++) {
760:     if (yy[i] != (PetscScalar)0.0) {
761:       m = PetscMax(PetscAbsScalar(xx[i] / yy[i]), m);
762:     } else {
763:       m = PetscMax(PetscAbsScalar(xx[i]), m);
764:     }
765:   }
766:   VecRestoreArrayRead(xin, &xx);
767:   VecRestoreArrayRead(yin, &yy);
768:   MPIU_Allreduce(&m, max, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)xin));
769:   PetscLogFlops(n);
770:   return 0;
771: }

773: PetscErrorCode VecPlaceArray_Seq(Vec vin, const PetscScalar *a)
774: {
775:   Vec_Seq *v = (Vec_Seq *)vin->data;

778:   v->unplacedarray = v->array; /* save previous array so reset can bring it back */
779:   v->array         = (PetscScalar *)a;
780:   return 0;
781: }

783: PetscErrorCode VecReplaceArray_Seq(Vec vin, const PetscScalar *a)
784: {
785:   Vec_Seq *v = (Vec_Seq *)vin->data;

787:   PetscFree(v->array_allocated);
788:   v->array_allocated = v->array = (PetscScalar *)a;
789:   return 0;
790: }