Actual source code: cufft.cu
2: /*
3: Provides an interface to the CUFFT package.
4: Testing examples can be found in ~src/mat/tests
5: */
7: #include <petscdevice_cuda.h>
8: #include <petsc/private/matimpl.h>
10: typedef struct {
11: PetscInt ndim;
12: PetscInt *dim;
13: cufftHandle p_forward, p_backward;
14: cufftComplex *devArray;
15: } Mat_CUFFT;
17: PetscErrorCode MatMult_SeqCUFFT(Mat A, Vec x, Vec y)
18: {
19: Mat_CUFFT *cufft = (Mat_CUFFT *)A->data;
20: cufftComplex *devArray = cufft->devArray;
21: PetscInt ndim = cufft->ndim, *dim = cufft->dim;
22: PetscScalar *x_array, *y_array;
24: VecGetArray(x, &x_array);
25: VecGetArray(y, &y_array);
26: if (!cufft->p_forward) {
27: /* create a plan, then execute it */
28: switch (ndim) {
29: case 1:
30: cufftPlan1d(&cufft->p_forward, dim[0], CUFFT_C2C, 1);
31: break;
32: case 2:
33: cufftPlan2d(&cufft->p_forward, dim[0], dim[1], CUFFT_C2C);
34: break;
35: case 3:
36: cufftPlan3d(&cufft->p_forward, dim[0], dim[1], dim[2], CUFFT_C2C);
37: break;
38: default:
39: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %" PetscInt_FMT "-dimensional transform", ndim);
40: }
41: }
42: /* transfer to GPU memory */
43: cudaMemcpy(devArray, x_array, sizeof(cufftComplex) * dim[ndim], cudaMemcpyHostToDevice);
44: /* execute transform */
45: cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_FORWARD);
46: /* transfer from GPU memory */
47: cudaMemcpy(y_array, devArray, sizeof(cufftComplex) * dim[ndim], cudaMemcpyDeviceToHost);
48: VecRestoreArray(y, &y_array);
49: VecRestoreArray(x, &x_array);
50: return 0;
51: }
53: PetscErrorCode MatMultTranspose_SeqCUFFT(Mat A, Vec x, Vec y)
54: {
55: Mat_CUFFT *cufft = (Mat_CUFFT *)A->data;
56: cufftComplex *devArray = cufft->devArray;
57: PetscInt ndim = cufft->ndim, *dim = cufft->dim;
58: PetscScalar *x_array, *y_array;
60: VecGetArray(x, &x_array);
61: VecGetArray(y, &y_array);
62: if (!cufft->p_backward) {
63: /* create a plan, then execute it */
64: switch (ndim) {
65: case 1:
66: cufftPlan1d(&cufft->p_backward, dim[0], CUFFT_C2C, 1);
67: break;
68: case 2:
69: cufftPlan2d(&cufft->p_backward, dim[0], dim[1], CUFFT_C2C);
70: break;
71: case 3:
72: cufftPlan3d(&cufft->p_backward, dim[0], dim[1], dim[2], CUFFT_C2C);
73: break;
74: default:
75: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %" PetscInt_FMT "-dimensional transform", ndim);
76: }
77: }
78: /* transfer to GPU memory */
79: cudaMemcpy(devArray, x_array, sizeof(cufftComplex) * dim[ndim], cudaMemcpyHostToDevice);
80: /* execute transform */
81: cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_INVERSE);
82: /* transfer from GPU memory */
83: cudaMemcpy(y_array, devArray, sizeof(cufftComplex) * dim[ndim], cudaMemcpyDeviceToHost);
84: VecRestoreArray(y, &y_array);
85: VecRestoreArray(x, &x_array);
86: return 0;
87: }
89: PetscErrorCode MatDestroy_SeqCUFFT(Mat A)
90: {
91: Mat_CUFFT *cufft = (Mat_CUFFT *)A->data;
93: PetscFree(cufft->dim);
94: if (cufft->p_forward) cufftDestroy(cufft->p_forward);
95: if (cufft->p_backward) cufftDestroy(cufft->p_backward);
96: cudaFree(cufft->devArray);
97: PetscFree(A->data);
98: PetscObjectChangeTypeName((PetscObject)A, 0);
99: return 0;
100: }
102: /*@
103: MatCreateSeqCUFFT - Creates a matrix object that provides `MATSEQCUFFT` via the NVIDIA package CuFFT
105: Collective
107: Input Parameters:
108: + comm - MPI communicator, set to `PETSC_COMM_SELF`
109: . ndim - the ndim-dimensional transform
110: - dim - array of size ndim, dim[i] contains the vector length in the i-dimension
112: Output Parameter:
113: . A - the matrix
115: Options Database Keys:
116: . -mat_cufft_plannerflags - set CUFFT planner flags
118: Level: intermediate
120: .seealso: `MATSEQCUFFT`
121: @*/
122: PetscErrorCode MatCreateSeqCUFFT(MPI_Comm comm, PetscInt ndim, const PetscInt dim[], Mat *A)
123: {
124: Mat_CUFFT *cufft;
125: PetscInt m = 1;
130: MatCreate(comm, A);
131: for (PetscInt d = 0; d < ndim; ++d) {
133: m *= dim[d];
134: }
135: MatSetSizes(*A, m, m, m, m);
136: PetscObjectChangeTypeName((PetscObject)*A, MATSEQCUFFT);
138: PetscNew(&cufft);
139: (*A)->data = (void *)cufft;
140: PetscMalloc1(ndim + 1, &cufft->dim);
141: PetscArraycpy(cufft->dim, dim, ndim);
143: cufft->ndim = ndim;
144: cufft->p_forward = 0;
145: cufft->p_backward = 0;
146: cufft->dim[ndim] = m;
148: /* GPU memory allocation */
149: cudaMalloc((void **)&cufft->devArray, sizeof(cufftComplex) * m);
151: (*A)->ops->mult = MatMult_SeqCUFFT;
152: (*A)->ops->multtranspose = MatMultTranspose_SeqCUFFT;
153: (*A)->assembled = PETSC_TRUE;
154: (*A)->ops->destroy = MatDestroy_SeqCUFFT;
156: /* get runtime options ...what options????? */
157: PetscOptionsBegin(comm, ((PetscObject)(*A))->prefix, "CUFFT Options", "Mat");
158: PetscOptionsEnd();
159: return 0;
160: }