Actual source code: dvec2.c
2: /*
3: Defines some vector operation functions that are shared by
4: sequential and parallel vectors.
5: */
6: #include <../src/vec/vec/impls/dvecimpl.h>
7: #include <petsc/private/kernels/petscaxpy.h>
9: #if defined(PETSC_USE_FORTRAN_KERNEL_MDOT)
10: #include <../src/vec/vec/impls/seq/ftn-kernels/fmdot.h>
11: PetscErrorCode VecMDot_Seq(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
12: {
13: PetscInt i, nv_rem, n = xin->map->n;
14: PetscScalar sum0, sum1, sum2, sum3;
15: const PetscScalar *yy0, *yy1, *yy2, *yy3, *x;
16: Vec *yy;
18: sum0 = 0.0;
19: sum1 = 0.0;
20: sum2 = 0.0;
22: i = nv;
23: nv_rem = nv & 0x3;
24: yy = (Vec *)yin;
25: VecGetArrayRead(xin, &x);
27: switch (nv_rem) {
28: case 3:
29: VecGetArrayRead(yy[0], &yy0);
30: VecGetArrayRead(yy[1], &yy1);
31: VecGetArrayRead(yy[2], &yy2);
32: fortranmdot3_(x, yy0, yy1, yy2, &n, &sum0, &sum1, &sum2);
33: VecRestoreArrayRead(yy[0], &yy0);
34: VecRestoreArrayRead(yy[1], &yy1);
35: VecRestoreArrayRead(yy[2], &yy2);
36: z[0] = sum0;
37: z[1] = sum1;
38: z[2] = sum2;
39: break;
40: case 2:
41: VecGetArrayRead(yy[0], &yy0);
42: VecGetArrayRead(yy[1], &yy1);
43: fortranmdot2_(x, yy0, yy1, &n, &sum0, &sum1);
44: VecRestoreArrayRead(yy[0], &yy0);
45: VecRestoreArrayRead(yy[1], &yy1);
46: z[0] = sum0;
47: z[1] = sum1;
48: break;
49: case 1:
50: VecGetArrayRead(yy[0], &yy0);
51: fortranmdot1_(x, yy0, &n, &sum0);
52: VecRestoreArrayRead(yy[0], &yy0);
53: z[0] = sum0;
54: break;
55: case 0:
56: break;
57: }
58: z += nv_rem;
59: i -= nv_rem;
60: yy += nv_rem;
62: while (i > 0) {
63: sum0 = 0.;
64: sum1 = 0.;
65: sum2 = 0.;
66: sum3 = 0.;
67: VecGetArrayRead(yy[0], &yy0);
68: VecGetArrayRead(yy[1], &yy1);
69: VecGetArrayRead(yy[2], &yy2);
70: VecGetArrayRead(yy[3], &yy3);
71: fortranmdot4_(x, yy0, yy1, yy2, yy3, &n, &sum0, &sum1, &sum2, &sum3);
72: VecRestoreArrayRead(yy[0], &yy0);
73: VecRestoreArrayRead(yy[1], &yy1);
74: VecRestoreArrayRead(yy[2], &yy2);
75: VecRestoreArrayRead(yy[3], &yy3);
76: yy += 4;
77: z[0] = sum0;
78: z[1] = sum1;
79: z[2] = sum2;
80: z[3] = sum3;
81: z += 4;
82: i -= 4;
83: }
84: VecRestoreArrayRead(xin, &x);
85: PetscLogFlops(PetscMax(nv * (2.0 * xin->map->n - 1), 0.0));
86: return 0;
87: }
89: #else
90: PetscErrorCode VecMDot_Seq(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
91: {
92: PetscInt n = xin->map->n, i, j, nv_rem, j_rem;
93: PetscScalar sum0, sum1, sum2, sum3, x0, x1, x2, x3;
94: const PetscScalar *yy0, *yy1, *yy2, *yy3, *x, *xbase;
95: Vec *yy;
97: sum0 = 0.;
98: sum1 = 0.;
99: sum2 = 0.;
101: i = nv;
102: nv_rem = nv & 0x3;
103: yy = (Vec *)yin;
104: j = n;
105: VecGetArrayRead(xin, &xbase);
106: x = xbase;
108: switch (nv_rem) {
109: case 3:
110: VecGetArrayRead(yy[0], &yy0);
111: VecGetArrayRead(yy[1], &yy1);
112: VecGetArrayRead(yy[2], &yy2);
113: switch (j_rem = j & 0x3) {
114: case 3:
115: x2 = x[2];
116: sum0 += x2 * PetscConj(yy0[2]);
117: sum1 += x2 * PetscConj(yy1[2]);
118: sum2 += x2 * PetscConj(yy2[2]); /* fall through */
119: case 2:
120: x1 = x[1];
121: sum0 += x1 * PetscConj(yy0[1]);
122: sum1 += x1 * PetscConj(yy1[1]);
123: sum2 += x1 * PetscConj(yy2[1]); /* fall through */
124: case 1:
125: x0 = x[0];
126: sum0 += x0 * PetscConj(yy0[0]);
127: sum1 += x0 * PetscConj(yy1[0]);
128: sum2 += x0 * PetscConj(yy2[0]); /* fall through */
129: case 0:
130: x += j_rem;
131: yy0 += j_rem;
132: yy1 += j_rem;
133: yy2 += j_rem;
134: j -= j_rem;
135: break;
136: }
137: while (j > 0) {
138: x0 = x[0];
139: x1 = x[1];
140: x2 = x[2];
141: x3 = x[3];
142: x += 4;
144: sum0 += x0 * PetscConj(yy0[0]) + x1 * PetscConj(yy0[1]) + x2 * PetscConj(yy0[2]) + x3 * PetscConj(yy0[3]);
145: yy0 += 4;
146: sum1 += x0 * PetscConj(yy1[0]) + x1 * PetscConj(yy1[1]) + x2 * PetscConj(yy1[2]) + x3 * PetscConj(yy1[3]);
147: yy1 += 4;
148: sum2 += x0 * PetscConj(yy2[0]) + x1 * PetscConj(yy2[1]) + x2 * PetscConj(yy2[2]) + x3 * PetscConj(yy2[3]);
149: yy2 += 4;
150: j -= 4;
151: }
152: z[0] = sum0;
153: z[1] = sum1;
154: z[2] = sum2;
155: VecRestoreArrayRead(yy[0], &yy0);
156: VecRestoreArrayRead(yy[1], &yy1);
157: VecRestoreArrayRead(yy[2], &yy2);
158: break;
159: case 2:
160: VecGetArrayRead(yy[0], &yy0);
161: VecGetArrayRead(yy[1], &yy1);
162: switch (j_rem = j & 0x3) {
163: case 3:
164: x2 = x[2];
165: sum0 += x2 * PetscConj(yy0[2]);
166: sum1 += x2 * PetscConj(yy1[2]); /* fall through */
167: case 2:
168: x1 = x[1];
169: sum0 += x1 * PetscConj(yy0[1]);
170: sum1 += x1 * PetscConj(yy1[1]); /* fall through */
171: case 1:
172: x0 = x[0];
173: sum0 += x0 * PetscConj(yy0[0]);
174: sum1 += x0 * PetscConj(yy1[0]); /* fall through */
175: case 0:
176: x += j_rem;
177: yy0 += j_rem;
178: yy1 += j_rem;
179: j -= j_rem;
180: break;
181: }
182: while (j > 0) {
183: x0 = x[0];
184: x1 = x[1];
185: x2 = x[2];
186: x3 = x[3];
187: x += 4;
189: sum0 += x0 * PetscConj(yy0[0]) + x1 * PetscConj(yy0[1]) + x2 * PetscConj(yy0[2]) + x3 * PetscConj(yy0[3]);
190: yy0 += 4;
191: sum1 += x0 * PetscConj(yy1[0]) + x1 * PetscConj(yy1[1]) + x2 * PetscConj(yy1[2]) + x3 * PetscConj(yy1[3]);
192: yy1 += 4;
193: j -= 4;
194: }
195: z[0] = sum0;
196: z[1] = sum1;
198: VecRestoreArrayRead(yy[0], &yy0);
199: VecRestoreArrayRead(yy[1], &yy1);
200: break;
201: case 1:
202: VecGetArrayRead(yy[0], &yy0);
203: switch (j_rem = j & 0x3) {
204: case 3:
205: x2 = x[2];
206: sum0 += x2 * PetscConj(yy0[2]); /* fall through */
207: case 2:
208: x1 = x[1];
209: sum0 += x1 * PetscConj(yy0[1]); /* fall through */
210: case 1:
211: x0 = x[0];
212: sum0 += x0 * PetscConj(yy0[0]); /* fall through */
213: case 0:
214: x += j_rem;
215: yy0 += j_rem;
216: j -= j_rem;
217: break;
218: }
219: while (j > 0) {
220: sum0 += x[0] * PetscConj(yy0[0]) + x[1] * PetscConj(yy0[1]) + x[2] * PetscConj(yy0[2]) + x[3] * PetscConj(yy0[3]);
221: yy0 += 4;
222: j -= 4;
223: x += 4;
224: }
225: z[0] = sum0;
227: VecRestoreArrayRead(yy[0], &yy0);
228: break;
229: case 0:
230: break;
231: }
232: z += nv_rem;
233: i -= nv_rem;
234: yy += nv_rem;
236: while (i > 0) {
237: sum0 = 0.;
238: sum1 = 0.;
239: sum2 = 0.;
240: sum3 = 0.;
241: VecGetArrayRead(yy[0], &yy0);
242: VecGetArrayRead(yy[1], &yy1);
243: VecGetArrayRead(yy[2], &yy2);
244: VecGetArrayRead(yy[3], &yy3);
246: j = n;
247: x = xbase;
248: switch (j_rem = j & 0x3) {
249: case 3:
250: x2 = x[2];
251: sum0 += x2 * PetscConj(yy0[2]);
252: sum1 += x2 * PetscConj(yy1[2]);
253: sum2 += x2 * PetscConj(yy2[2]);
254: sum3 += x2 * PetscConj(yy3[2]); /* fall through */
255: case 2:
256: x1 = x[1];
257: sum0 += x1 * PetscConj(yy0[1]);
258: sum1 += x1 * PetscConj(yy1[1]);
259: sum2 += x1 * PetscConj(yy2[1]);
260: sum3 += x1 * PetscConj(yy3[1]); /* fall through */
261: case 1:
262: x0 = x[0];
263: sum0 += x0 * PetscConj(yy0[0]);
264: sum1 += x0 * PetscConj(yy1[0]);
265: sum2 += x0 * PetscConj(yy2[0]);
266: sum3 += x0 * PetscConj(yy3[0]); /* fall through */
267: case 0:
268: x += j_rem;
269: yy0 += j_rem;
270: yy1 += j_rem;
271: yy2 += j_rem;
272: yy3 += j_rem;
273: j -= j_rem;
274: break;
275: }
276: while (j > 0) {
277: x0 = x[0];
278: x1 = x[1];
279: x2 = x[2];
280: x3 = x[3];
281: x += 4;
283: sum0 += x0 * PetscConj(yy0[0]) + x1 * PetscConj(yy0[1]) + x2 * PetscConj(yy0[2]) + x3 * PetscConj(yy0[3]);
284: yy0 += 4;
285: sum1 += x0 * PetscConj(yy1[0]) + x1 * PetscConj(yy1[1]) + x2 * PetscConj(yy1[2]) + x3 * PetscConj(yy1[3]);
286: yy1 += 4;
287: sum2 += x0 * PetscConj(yy2[0]) + x1 * PetscConj(yy2[1]) + x2 * PetscConj(yy2[2]) + x3 * PetscConj(yy2[3]);
288: yy2 += 4;
289: sum3 += x0 * PetscConj(yy3[0]) + x1 * PetscConj(yy3[1]) + x2 * PetscConj(yy3[2]) + x3 * PetscConj(yy3[3]);
290: yy3 += 4;
291: j -= 4;
292: }
293: z[0] = sum0;
294: z[1] = sum1;
295: z[2] = sum2;
296: z[3] = sum3;
297: z += 4;
298: i -= 4;
299: VecRestoreArrayRead(yy[0], &yy0);
300: VecRestoreArrayRead(yy[1], &yy1);
301: VecRestoreArrayRead(yy[2], &yy2);
302: VecRestoreArrayRead(yy[3], &yy3);
303: yy += 4;
304: }
305: VecRestoreArrayRead(xin, &xbase);
306: PetscLogFlops(PetscMax(nv * (2.0 * xin->map->n - 1), 0.0));
307: return 0;
308: }
309: #endif
311: /* ----------------------------------------------------------------------------*/
312: PetscErrorCode VecMTDot_Seq(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
313: {
314: PetscInt n = xin->map->n, i, j, nv_rem, j_rem;
315: PetscScalar sum0, sum1, sum2, sum3, x0, x1, x2, x3;
316: const PetscScalar *yy0, *yy1, *yy2, *yy3, *x, *xbase;
317: Vec *yy;
319: sum0 = 0.;
320: sum1 = 0.;
321: sum2 = 0.;
323: i = nv;
324: nv_rem = nv & 0x3;
325: yy = (Vec *)yin;
326: j = n;
327: VecGetArrayRead(xin, &xbase);
328: x = xbase;
330: switch (nv_rem) {
331: case 3:
332: VecGetArrayRead(yy[0], &yy0);
333: VecGetArrayRead(yy[1], &yy1);
334: VecGetArrayRead(yy[2], &yy2);
335: switch (j_rem = j & 0x3) {
336: case 3:
337: x2 = x[2];
338: sum0 += x2 * yy0[2];
339: sum1 += x2 * yy1[2];
340: sum2 += x2 * yy2[2]; /* fall through */
341: case 2:
342: x1 = x[1];
343: sum0 += x1 * yy0[1];
344: sum1 += x1 * yy1[1];
345: sum2 += x1 * yy2[1]; /* fall through */
346: case 1:
347: x0 = x[0];
348: sum0 += x0 * yy0[0];
349: sum1 += x0 * yy1[0];
350: sum2 += x0 * yy2[0]; /* fall through */
351: case 0:
352: x += j_rem;
353: yy0 += j_rem;
354: yy1 += j_rem;
355: yy2 += j_rem;
356: j -= j_rem;
357: break;
358: }
359: while (j > 0) {
360: x0 = x[0];
361: x1 = x[1];
362: x2 = x[2];
363: x3 = x[3];
364: x += 4;
366: sum0 += x0 * yy0[0] + x1 * yy0[1] + x2 * yy0[2] + x3 * yy0[3];
367: yy0 += 4;
368: sum1 += x0 * yy1[0] + x1 * yy1[1] + x2 * yy1[2] + x3 * yy1[3];
369: yy1 += 4;
370: sum2 += x0 * yy2[0] + x1 * yy2[1] + x2 * yy2[2] + x3 * yy2[3];
371: yy2 += 4;
372: j -= 4;
373: }
374: z[0] = sum0;
375: z[1] = sum1;
376: z[2] = sum2;
377: VecRestoreArrayRead(yy[0], &yy0);
378: VecRestoreArrayRead(yy[1], &yy1);
379: VecRestoreArrayRead(yy[2], &yy2);
380: break;
381: case 2:
382: VecGetArrayRead(yy[0], &yy0);
383: VecGetArrayRead(yy[1], &yy1);
384: switch (j_rem = j & 0x3) {
385: case 3:
386: x2 = x[2];
387: sum0 += x2 * yy0[2];
388: sum1 += x2 * yy1[2]; /* fall through */
389: case 2:
390: x1 = x[1];
391: sum0 += x1 * yy0[1];
392: sum1 += x1 * yy1[1]; /* fall through */
393: case 1:
394: x0 = x[0];
395: sum0 += x0 * yy0[0];
396: sum1 += x0 * yy1[0]; /* fall through */
397: case 0:
398: x += j_rem;
399: yy0 += j_rem;
400: yy1 += j_rem;
401: j -= j_rem;
402: break;
403: }
404: while (j > 0) {
405: x0 = x[0];
406: x1 = x[1];
407: x2 = x[2];
408: x3 = x[3];
409: x += 4;
411: sum0 += x0 * yy0[0] + x1 * yy0[1] + x2 * yy0[2] + x3 * yy0[3];
412: yy0 += 4;
413: sum1 += x0 * yy1[0] + x1 * yy1[1] + x2 * yy1[2] + x3 * yy1[3];
414: yy1 += 4;
415: j -= 4;
416: }
417: z[0] = sum0;
418: z[1] = sum1;
420: VecRestoreArrayRead(yy[0], &yy0);
421: VecRestoreArrayRead(yy[1], &yy1);
422: break;
423: case 1:
424: VecGetArrayRead(yy[0], &yy0);
425: switch (j_rem = j & 0x3) {
426: case 3:
427: x2 = x[2];
428: sum0 += x2 * yy0[2]; /* fall through */
429: case 2:
430: x1 = x[1];
431: sum0 += x1 * yy0[1]; /* fall through */
432: case 1:
433: x0 = x[0];
434: sum0 += x0 * yy0[0]; /* fall through */
435: case 0:
436: x += j_rem;
437: yy0 += j_rem;
438: j -= j_rem;
439: break;
440: }
441: while (j > 0) {
442: sum0 += x[0] * yy0[0] + x[1] * yy0[1] + x[2] * yy0[2] + x[3] * yy0[3];
443: yy0 += 4;
444: j -= 4;
445: x += 4;
446: }
447: z[0] = sum0;
449: VecRestoreArrayRead(yy[0], &yy0);
450: break;
451: case 0:
452: break;
453: }
454: z += nv_rem;
455: i -= nv_rem;
456: yy += nv_rem;
458: while (i > 0) {
459: sum0 = 0.;
460: sum1 = 0.;
461: sum2 = 0.;
462: sum3 = 0.;
463: VecGetArrayRead(yy[0], &yy0);
464: VecGetArrayRead(yy[1], &yy1);
465: VecGetArrayRead(yy[2], &yy2);
466: VecGetArrayRead(yy[3], &yy3);
467: x = xbase;
469: j = n;
470: switch (j_rem = j & 0x3) {
471: case 3:
472: x2 = x[2];
473: sum0 += x2 * yy0[2];
474: sum1 += x2 * yy1[2];
475: sum2 += x2 * yy2[2];
476: sum3 += x2 * yy3[2]; /* fall through */
477: case 2:
478: x1 = x[1];
479: sum0 += x1 * yy0[1];
480: sum1 += x1 * yy1[1];
481: sum2 += x1 * yy2[1];
482: sum3 += x1 * yy3[1]; /* fall through */
483: case 1:
484: x0 = x[0];
485: sum0 += x0 * yy0[0];
486: sum1 += x0 * yy1[0];
487: sum2 += x0 * yy2[0];
488: sum3 += x0 * yy3[0]; /* fall through */
489: case 0:
490: x += j_rem;
491: yy0 += j_rem;
492: yy1 += j_rem;
493: yy2 += j_rem;
494: yy3 += j_rem;
495: j -= j_rem;
496: break;
497: }
498: while (j > 0) {
499: x0 = x[0];
500: x1 = x[1];
501: x2 = x[2];
502: x3 = x[3];
503: x += 4;
505: sum0 += x0 * yy0[0] + x1 * yy0[1] + x2 * yy0[2] + x3 * yy0[3];
506: yy0 += 4;
507: sum1 += x0 * yy1[0] + x1 * yy1[1] + x2 * yy1[2] + x3 * yy1[3];
508: yy1 += 4;
509: sum2 += x0 * yy2[0] + x1 * yy2[1] + x2 * yy2[2] + x3 * yy2[3];
510: yy2 += 4;
511: sum3 += x0 * yy3[0] + x1 * yy3[1] + x2 * yy3[2] + x3 * yy3[3];
512: yy3 += 4;
513: j -= 4;
514: }
515: z[0] = sum0;
516: z[1] = sum1;
517: z[2] = sum2;
518: z[3] = sum3;
519: z += 4;
520: i -= 4;
521: VecRestoreArrayRead(yy[0], &yy0);
522: VecRestoreArrayRead(yy[1], &yy1);
523: VecRestoreArrayRead(yy[2], &yy2);
524: VecRestoreArrayRead(yy[3], &yy3);
525: yy += 4;
526: }
527: VecRestoreArrayRead(xin, &xbase);
528: PetscLogFlops(PetscMax(nv * (2.0 * xin->map->n - 1), 0.0));
529: return 0;
530: }
532: PetscErrorCode VecMax_Seq(Vec xin, PetscInt *idx, PetscReal *z)
533: {
534: PetscInt i, j = 0, n = xin->map->n;
535: PetscReal max, tmp;
536: const PetscScalar *xx;
538: VecGetArrayRead(xin, &xx);
539: if (!n) {
540: max = PETSC_MIN_REAL;
541: j = -1;
542: } else {
543: max = PetscRealPart(*xx++);
544: j = 0;
545: for (i = 1; i < n; i++) {
546: if ((tmp = PetscRealPart(*xx++)) > max) {
547: j = i;
548: max = tmp;
549: }
550: }
551: }
552: *z = max;
553: if (idx) *idx = j;
554: VecRestoreArrayRead(xin, &xx);
555: return 0;
556: }
558: PetscErrorCode VecMin_Seq(Vec xin, PetscInt *idx, PetscReal *z)
559: {
560: PetscInt i, j = 0, n = xin->map->n;
561: PetscReal min, tmp;
562: const PetscScalar *xx;
564: VecGetArrayRead(xin, &xx);
565: if (!n) {
566: min = PETSC_MAX_REAL;
567: j = -1;
568: } else {
569: min = PetscRealPart(*xx++);
570: j = 0;
571: for (i = 1; i < n; i++) {
572: if ((tmp = PetscRealPart(*xx++)) < min) {
573: j = i;
574: min = tmp;
575: }
576: }
577: }
578: *z = min;
579: if (idx) *idx = j;
580: VecRestoreArrayRead(xin, &xx);
581: return 0;
582: }
584: PetscErrorCode VecSet_Seq(Vec xin, PetscScalar alpha)
585: {
586: PetscInt i, n = xin->map->n;
587: PetscScalar *xx;
589: VecGetArrayWrite(xin, &xx);
590: if (alpha == (PetscScalar)0.0) {
591: PetscArrayzero(xx, n);
592: } else {
593: for (i = 0; i < n; i++) xx[i] = alpha;
594: }
595: VecRestoreArrayWrite(xin, &xx);
596: return 0;
597: }
599: PetscErrorCode VecMAXPY_Seq(Vec xin, PetscInt nv, const PetscScalar *alpha, Vec *y)
600: {
601: PetscInt n = xin->map->n, j, j_rem;
602: const PetscScalar *yy0, *yy1, *yy2, *yy3;
603: PetscScalar *xx, alpha0, alpha1, alpha2, alpha3;
605: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
606: #pragma disjoint(*xx, *yy0, *yy1, *yy2, *yy3, *alpha)
607: #endif
609: PetscLogFlops(nv * 2.0 * n);
610: VecGetArray(xin, &xx);
611: switch (j_rem = nv & 0x3) {
612: case 3:
613: VecGetArrayRead(y[0], &yy0);
614: VecGetArrayRead(y[1], &yy1);
615: VecGetArrayRead(y[2], &yy2);
616: alpha0 = alpha[0];
617: alpha1 = alpha[1];
618: alpha2 = alpha[2];
619: alpha += 3;
620: PetscKernelAXPY3(xx, alpha0, alpha1, alpha2, yy0, yy1, yy2, n);
621: VecRestoreArrayRead(y[0], &yy0);
622: VecRestoreArrayRead(y[1], &yy1);
623: VecRestoreArrayRead(y[2], &yy2);
624: y += 3;
625: break;
626: case 2:
627: VecGetArrayRead(y[0], &yy0);
628: VecGetArrayRead(y[1], &yy1);
629: alpha0 = alpha[0];
630: alpha1 = alpha[1];
631: alpha += 2;
632: PetscKernelAXPY2(xx, alpha0, alpha1, yy0, yy1, n);
633: VecRestoreArrayRead(y[0], &yy0);
634: VecRestoreArrayRead(y[1], &yy1);
635: y += 2;
636: break;
637: case 1:
638: VecGetArrayRead(y[0], &yy0);
639: alpha0 = *alpha++;
640: PetscKernelAXPY(xx, alpha0, yy0, n);
641: VecRestoreArrayRead(y[0], &yy0);
642: y += 1;
643: break;
644: }
645: for (j = j_rem; j < nv; j += 4) {
646: VecGetArrayRead(y[0], &yy0);
647: VecGetArrayRead(y[1], &yy1);
648: VecGetArrayRead(y[2], &yy2);
649: VecGetArrayRead(y[3], &yy3);
650: alpha0 = alpha[0];
651: alpha1 = alpha[1];
652: alpha2 = alpha[2];
653: alpha3 = alpha[3];
654: alpha += 4;
656: PetscKernelAXPY4(xx, alpha0, alpha1, alpha2, alpha3, yy0, yy1, yy2, yy3, n);
657: VecRestoreArrayRead(y[0], &yy0);
658: VecRestoreArrayRead(y[1], &yy1);
659: VecRestoreArrayRead(y[2], &yy2);
660: VecRestoreArrayRead(y[3], &yy3);
661: y += 4;
662: }
663: VecRestoreArray(xin, &xx);
664: return 0;
665: }
667: #include <../src/vec/vec/impls/seq/ftn-kernels/faypx.h>
669: PetscErrorCode VecAYPX_Seq(Vec yin, PetscScalar alpha, Vec xin)
670: {
671: PetscInt n = yin->map->n;
672: PetscScalar *yy;
673: const PetscScalar *xx;
675: if (alpha == (PetscScalar)0.0) {
676: VecCopy(xin, yin);
677: } else if (alpha == (PetscScalar)1.0) {
678: VecAXPY_Seq(yin, alpha, xin);
679: } else if (alpha == (PetscScalar)-1.0) {
680: PetscInt i;
681: VecGetArrayRead(xin, &xx);
682: VecGetArray(yin, &yy);
684: for (i = 0; i < n; i++) yy[i] = xx[i] - yy[i];
686: VecRestoreArrayRead(xin, &xx);
687: VecRestoreArray(yin, &yy);
688: PetscLogFlops(1.0 * n);
689: } else {
690: VecGetArrayRead(xin, &xx);
691: VecGetArray(yin, &yy);
692: #if defined(PETSC_USE_FORTRAN_KERNEL_AYPX)
693: {
694: PetscScalar oalpha = alpha;
695: fortranaypx_(&n, &oalpha, xx, yy);
696: }
697: #else
698: {
699: PetscInt i;
701: for (i = 0; i < n; i++) yy[i] = xx[i] + alpha * yy[i];
702: }
703: #endif
704: VecRestoreArrayRead(xin, &xx);
705: VecRestoreArray(yin, &yy);
706: PetscLogFlops(2.0 * n);
707: }
708: return 0;
709: }
711: #include <../src/vec/vec/impls/seq/ftn-kernels/fwaxpy.h>
712: /*
713: IBM ESSL contains a routine dzaxpy() that is our WAXPY() but it appears
714: to be slower than a regular C loop. Hence,we do not include it.
715: void ?zaxpy(int*,PetscScalar*,PetscScalar*,int*,PetscScalar*,int*,PetscScalar*,int*);
716: */
718: PetscErrorCode VecWAXPY_Seq(Vec win, PetscScalar alpha, Vec xin, Vec yin)
719: {
720: PetscInt i, n = win->map->n;
721: PetscScalar *ww;
722: const PetscScalar *yy, *xx;
724: VecGetArrayRead(xin, &xx);
725: VecGetArrayRead(yin, &yy);
726: VecGetArray(win, &ww);
727: if (alpha == (PetscScalar)1.0) {
728: PetscLogFlops(n);
729: /* could call BLAS axpy after call to memcopy, but may be slower */
730: for (i = 0; i < n; i++) ww[i] = yy[i] + xx[i];
731: } else if (alpha == (PetscScalar)-1.0) {
732: PetscLogFlops(n);
733: for (i = 0; i < n; i++) ww[i] = yy[i] - xx[i];
734: } else if (alpha == (PetscScalar)0.0) {
735: PetscArraycpy(ww, yy, n);
736: } else {
737: PetscScalar oalpha = alpha;
738: #if defined(PETSC_USE_FORTRAN_KERNEL_WAXPY)
739: fortranwaxpy_(&n, &oalpha, xx, yy, ww);
740: #else
741: for (i = 0; i < n; i++) ww[i] = yy[i] + oalpha * xx[i];
742: #endif
743: PetscLogFlops(2.0 * n);
744: }
745: VecRestoreArrayRead(xin, &xx);
746: VecRestoreArrayRead(yin, &yy);
747: VecRestoreArray(win, &ww);
748: return 0;
749: }
751: PetscErrorCode VecMaxPointwiseDivide_Seq(Vec xin, Vec yin, PetscReal *max)
752: {
753: PetscInt n = xin->map->n, i;
754: const PetscScalar *xx, *yy;
755: PetscReal m = 0.0;
757: VecGetArrayRead(xin, &xx);
758: VecGetArrayRead(yin, &yy);
759: for (i = 0; i < n; i++) {
760: if (yy[i] != (PetscScalar)0.0) {
761: m = PetscMax(PetscAbsScalar(xx[i] / yy[i]), m);
762: } else {
763: m = PetscMax(PetscAbsScalar(xx[i]), m);
764: }
765: }
766: VecRestoreArrayRead(xin, &xx);
767: VecRestoreArrayRead(yin, &yy);
768: MPIU_Allreduce(&m, max, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)xin));
769: PetscLogFlops(n);
770: return 0;
771: }
773: PetscErrorCode VecPlaceArray_Seq(Vec vin, const PetscScalar *a)
774: {
775: Vec_Seq *v = (Vec_Seq *)vin->data;
778: v->unplacedarray = v->array; /* save previous array so reset can bring it back */
779: v->array = (PetscScalar *)a;
780: return 0;
781: }
783: PetscErrorCode VecReplaceArray_Seq(Vec vin, const PetscScalar *a)
784: {
785: Vec_Seq *v = (Vec_Seq *)vin->data;
787: PetscFree(v->array_allocated);
788: v->array_allocated = v->array = (PetscScalar *)a;
789: return 0;
790: }