Actual source code: bvec1.c
2: /*
3: Defines the BLAS based vector operations. Code shared by parallel
4: and sequential vectors.
5: */
7: #include <../src/vec/vec/impls/dvecimpl.h>
8: #include <petscblaslapack.h>
10: PetscErrorCode VecDot_Seq(Vec xin, Vec yin, PetscScalar *z)
11: {
12: const PetscScalar *ya, *xa;
13: PetscBLASInt one = 1, bn = 0;
15: PetscBLASIntCast(xin->map->n, &bn);
16: VecGetArrayRead(xin, &xa);
17: VecGetArrayRead(yin, &ya);
18: /* arguments ya, xa are reversed because BLAS complex conjugates the first argument, PETSc the second */
19: PetscCallBLAS("BLASdot", *z = BLASdot_(&bn, ya, &one, xa, &one));
20: VecRestoreArrayRead(xin, &xa);
21: VecRestoreArrayRead(yin, &ya);
22: if (xin->map->n > 0) PetscLogFlops(2.0 * xin->map->n - 1);
23: return 0;
24: }
26: PetscErrorCode VecTDot_Seq(Vec xin, Vec yin, PetscScalar *z)
27: {
28: const PetscScalar *ya, *xa;
29: PetscBLASInt one = 1, bn = 0;
31: PetscBLASIntCast(xin->map->n, &bn);
32: VecGetArrayRead(xin, &xa);
33: VecGetArrayRead(yin, &ya);
34: PetscCallBLAS("BLASdot", *z = BLASdotu_(&bn, xa, &one, ya, &one));
35: VecRestoreArrayRead(xin, &xa);
36: VecRestoreArrayRead(yin, &ya);
37: if (xin->map->n > 0) PetscLogFlops(2.0 * xin->map->n - 1);
38: return 0;
39: }
41: PetscErrorCode VecScale_Seq(Vec xin, PetscScalar alpha)
42: {
43: PetscBLASInt one = 1, bn;
45: PetscBLASIntCast(xin->map->n, &bn);
46: if (alpha == (PetscScalar)0.0) {
47: VecSet_Seq(xin, alpha);
48: } else if (alpha != (PetscScalar)1.0) {
49: PetscScalar a = alpha, *xarray;
50: VecGetArray(xin, &xarray);
51: PetscCallBLAS("BLASscal", BLASscal_(&bn, &a, xarray, &one));
52: VecRestoreArray(xin, &xarray);
53: }
54: PetscLogFlops(xin->map->n);
55: return 0;
56: }
58: PetscErrorCode VecAXPY_Seq(Vec yin, PetscScalar alpha, Vec xin)
59: {
60: const PetscScalar *xarray;
61: PetscScalar *yarray;
62: PetscBLASInt one = 1, bn;
64: PetscBLASIntCast(yin->map->n, &bn);
65: /* assume that the BLAS handles alpha == 1.0 efficiently since we have no fast code for it */
66: if (alpha != (PetscScalar)0.0) {
67: VecGetArrayRead(xin, &xarray);
68: VecGetArray(yin, &yarray);
69: PetscCallBLAS("BLASaxpy", BLASaxpy_(&bn, &alpha, xarray, &one, yarray, &one));
70: VecRestoreArrayRead(xin, &xarray);
71: VecRestoreArray(yin, &yarray);
72: PetscLogFlops(2.0 * yin->map->n);
73: }
74: return 0;
75: }
77: PetscErrorCode VecAXPBY_Seq(Vec yin, PetscScalar a, PetscScalar b, Vec xin)
78: {
79: PetscInt n = yin->map->n, i;
80: const PetscScalar *xx;
81: PetscScalar *yy;
83: if (a == (PetscScalar)0.0) {
84: VecScale_Seq(yin, b);
85: } else if (b == (PetscScalar)1.0) {
86: VecAXPY_Seq(yin, a, xin);
87: } else if (a == (PetscScalar)1.0) {
88: VecAYPX_Seq(yin, b, xin);
89: } else if (b == (PetscScalar)0.0) {
90: VecGetArrayRead(xin, &xx);
91: VecGetArray(yin, (PetscScalar **)&yy);
92: for (i = 0; i < n; i++) yy[i] = a * xx[i];
93: VecRestoreArrayRead(xin, &xx);
94: VecRestoreArray(yin, (PetscScalar **)&yy);
95: PetscLogFlops(xin->map->n);
96: } else {
97: VecGetArrayRead(xin, &xx);
98: VecGetArray(yin, (PetscScalar **)&yy);
99: for (i = 0; i < n; i++) yy[i] = a * xx[i] + b * yy[i];
100: VecRestoreArrayRead(xin, &xx);
101: VecRestoreArray(yin, (PetscScalar **)&yy);
102: PetscLogFlops(3.0 * xin->map->n);
103: }
104: return 0;
105: }
107: PetscErrorCode VecAXPBYPCZ_Seq(Vec zin, PetscScalar alpha, PetscScalar beta, PetscScalar gamma, Vec xin, Vec yin)
108: {
109: PetscInt n = zin->map->n, i;
110: const PetscScalar *yy, *xx;
111: PetscScalar *zz;
113: VecGetArrayRead(xin, &xx);
114: VecGetArrayRead(yin, &yy);
115: VecGetArray(zin, &zz);
116: if (alpha == (PetscScalar)1.0) {
117: for (i = 0; i < n; i++) zz[i] = xx[i] + beta * yy[i] + gamma * zz[i];
118: PetscLogFlops(4.0 * n);
119: } else if (gamma == (PetscScalar)1.0) {
120: for (i = 0; i < n; i++) zz[i] = alpha * xx[i] + beta * yy[i] + zz[i];
121: PetscLogFlops(4.0 * n);
122: } else if (gamma == (PetscScalar)0.0) {
123: for (i = 0; i < n; i++) zz[i] = alpha * xx[i] + beta * yy[i];
124: PetscLogFlops(3.0 * n);
125: } else {
126: for (i = 0; i < n; i++) zz[i] = alpha * xx[i] + beta * yy[i] + gamma * zz[i];
127: PetscLogFlops(5.0 * n);
128: }
129: VecRestoreArrayRead(xin, &xx);
130: VecRestoreArrayRead(yin, &yy);
131: VecRestoreArray(zin, &zz);
132: return 0;
133: }