Actual source code: cufft.cu


  2: /*
  3:     Provides an interface to the CUFFT package.
  4:     Testing examples can be found in ~src/mat/tests
  5: */

  7: #include <petscdevice_cuda.h>
  8: #include <petsc/private/matimpl.h>

 10: typedef struct {
 11:   PetscInt      ndim;
 12:   PetscInt     *dim;
 13:   cufftHandle   p_forward, p_backward;
 14:   cufftComplex *devArray;
 15: } Mat_CUFFT;

 17: PetscErrorCode MatMult_SeqCUFFT(Mat A, Vec x, Vec y)
 18: {
 19:   Mat_CUFFT    *cufft    = (Mat_CUFFT *)A->data;
 20:   cufftComplex *devArray = cufft->devArray;
 21:   PetscInt      ndim = cufft->ndim, *dim = cufft->dim;
 22:   PetscScalar  *x_array, *y_array;

 24:   VecGetArray(x, &x_array);
 25:   VecGetArray(y, &y_array);
 26:   if (!cufft->p_forward) {
 27:     /* create a plan, then execute it */
 28:     switch (ndim) {
 29:     case 1:
 30:       cufftPlan1d(&cufft->p_forward, dim[0], CUFFT_C2C, 1);
 31:       break;
 32:     case 2:
 33:       cufftPlan2d(&cufft->p_forward, dim[0], dim[1], CUFFT_C2C);
 34:       break;
 35:     case 3:
 36:       cufftPlan3d(&cufft->p_forward, dim[0], dim[1], dim[2], CUFFT_C2C);
 37:       break;
 38:     default:
 39:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %" PetscInt_FMT "-dimensional transform", ndim);
 40:     }
 41:   }
 42:   /* transfer to GPU memory */
 43:   cudaMemcpy(devArray, x_array, sizeof(cufftComplex) * dim[ndim], cudaMemcpyHostToDevice);
 44:   /* execute transform */
 45:   cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_FORWARD);
 46:   /* transfer from GPU memory */
 47:   cudaMemcpy(y_array, devArray, sizeof(cufftComplex) * dim[ndim], cudaMemcpyDeviceToHost);
 48:   VecRestoreArray(y, &y_array);
 49:   VecRestoreArray(x, &x_array);
 50:   return 0;
 51: }

 53: PetscErrorCode MatMultTranspose_SeqCUFFT(Mat A, Vec x, Vec y)
 54: {
 55:   Mat_CUFFT    *cufft    = (Mat_CUFFT *)A->data;
 56:   cufftComplex *devArray = cufft->devArray;
 57:   PetscInt      ndim = cufft->ndim, *dim = cufft->dim;
 58:   PetscScalar  *x_array, *y_array;

 60:   VecGetArray(x, &x_array);
 61:   VecGetArray(y, &y_array);
 62:   if (!cufft->p_backward) {
 63:     /* create a plan, then execute it */
 64:     switch (ndim) {
 65:     case 1:
 66:       cufftPlan1d(&cufft->p_backward, dim[0], CUFFT_C2C, 1);
 67:       break;
 68:     case 2:
 69:       cufftPlan2d(&cufft->p_backward, dim[0], dim[1], CUFFT_C2C);
 70:       break;
 71:     case 3:
 72:       cufftPlan3d(&cufft->p_backward, dim[0], dim[1], dim[2], CUFFT_C2C);
 73:       break;
 74:     default:
 75:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %" PetscInt_FMT "-dimensional transform", ndim);
 76:     }
 77:   }
 78:   /* transfer to GPU memory */
 79:   cudaMemcpy(devArray, x_array, sizeof(cufftComplex) * dim[ndim], cudaMemcpyHostToDevice);
 80:   /* execute transform */
 81:   cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_INVERSE);
 82:   /* transfer from GPU memory */
 83:   cudaMemcpy(y_array, devArray, sizeof(cufftComplex) * dim[ndim], cudaMemcpyDeviceToHost);
 84:   VecRestoreArray(y, &y_array);
 85:   VecRestoreArray(x, &x_array);
 86:   return 0;
 87: }

 89: PetscErrorCode MatDestroy_SeqCUFFT(Mat A)
 90: {
 91:   Mat_CUFFT *cufft = (Mat_CUFFT *)A->data;

 93:   PetscFree(cufft->dim);
 94:   if (cufft->p_forward) cufftDestroy(cufft->p_forward);
 95:   if (cufft->p_backward) cufftDestroy(cufft->p_backward);
 96:   cudaFree(cufft->devArray);
 97:   PetscFree(A->data);
 98:   PetscObjectChangeTypeName((PetscObject)A, 0);
 99:   return 0;
100: }

102: /*@
103:   MatCreateSeqCUFFT - Creates a matrix object that provides `MATSEQCUFFT` via the NVIDIA package CuFFT

105:   Collective

107:   Input Parameters:
108: + comm - MPI communicator, set to `PETSC_COMM_SELF`
109: . ndim - the ndim-dimensional transform
110: - dim  - array of size ndim, dim[i] contains the vector length in the i-dimension

112:   Output Parameter:
113: . A - the matrix

115:   Options Database Keys:
116: . -mat_cufft_plannerflags - set CUFFT planner flags

118:   Level: intermediate

120: .seealso: `MATSEQCUFFT`
121: @*/
122: PetscErrorCode MatCreateSeqCUFFT(MPI_Comm comm, PetscInt ndim, const PetscInt dim[], Mat *A)
123: {
124:   Mat_CUFFT *cufft;
125:   PetscInt   m = 1;

130:   MatCreate(comm, A);
131:   for (PetscInt d = 0; d < ndim; ++d) {
133:     m *= dim[d];
134:   }
135:   MatSetSizes(*A, m, m, m, m);
136:   PetscObjectChangeTypeName((PetscObject)*A, MATSEQCUFFT);

138:   PetscNew(&cufft);
139:   (*A)->data = (void *)cufft;
140:   PetscMalloc1(ndim + 1, &cufft->dim);
141:   PetscArraycpy(cufft->dim, dim, ndim);

143:   cufft->ndim       = ndim;
144:   cufft->p_forward  = 0;
145:   cufft->p_backward = 0;
146:   cufft->dim[ndim]  = m;

148:   /* GPU memory allocation */
149:   cudaMalloc((void **)&cufft->devArray, sizeof(cufftComplex) * m);

151:   (*A)->ops->mult          = MatMult_SeqCUFFT;
152:   (*A)->ops->multtranspose = MatMultTranspose_SeqCUFFT;
153:   (*A)->assembled          = PETSC_TRUE;
154:   (*A)->ops->destroy       = MatDestroy_SeqCUFFT;

156:   /* get runtime options ...what options????? */
157:   PetscOptionsBegin(comm, ((PetscObject)(*A))->prefix, "CUFFT Options", "Mat");
158:   PetscOptionsEnd();
159:   return 0;
160: }