Difficulty with CUFFT lib. HELP! cufft Inverse is not giving the supposed results
I am trying to use the cufft library. I am creating a 2D signal, applying a 2D fft on the 2D signal and then applying fft inverse to get back the original signal. The problem is that the output is nowhere near the original signal. Can someone guide me and tell me if I am doing something wrong please.

Here is the code:

int
main( int argc, char** argv)
{
int NX = 17, NY = 17;
cufftHandle plan;
cufftComplex *idata, *odata, *tdata;
cudaMalloc((void**)&idata, sizeof(cufftComplex)*NX*NY);
cudaMalloc((void**)&odata, sizeof(cufftComplex)*NX*NY);
cudaMalloc((void**)&tdata, sizeof(cufftComplex)*NX*NY);

cufftComplex* Input = (cufftComplex*)malloc(NY * NX * sizeof(cufftComplex));

for(int y = 0; y < NY; y++)
for (int x = 0; x < NX; x++)
{
Input[(y * NX) + x].x = 255;
Input[(y * NX) + x].y = 0;
}

cudaMemcpy(idata, Input, (NY * NX * sizeof(cufftComplex)), cudaMemcpyHostToDevice);
/* Create a 2D FFT plan. */
cufftPlan2d(&plan, NX, NY, CUFFT_C2C);
/* Use the CUFFT plan to transform the signal out of place. */
cufftExecC2C(plan, idata, tdata, CUFFT_FORWARD);

cufftExecC2C(plan, tdata, odata, CUFFT_INVERSE);

cufftComplex* InputImage = (cufftComplex*)malloc(NY * NX * sizeof(cufftComplex));
cufftComplex* OutputImageFFT = (cufftComplex*)malloc(NY * NX * sizeof(cufftComplex));
cudaMemcpy(InputImage, idata, (NY * NX * sizeof(cufftComplex)), cudaMemcpyDeviceToHost);
cudaMemcpy(OutputImageFFT, odata, (NY * NX * sizeof(cufftComplex)), cudaMemcpyDeviceToHost);
for (int y = 0; y < NY; y++)
for (int x = 0; x < NX; x++)
printf("X:%d, Y:%d, A:%f = B:%f %fi\n", x, y, InputImage[((y * NX) + x)].x, OutputImageFFT[((y * NX) + x)].x, OutputImageFFT[((y * NX) + x)].y);

getchar();
}
-----------------------------------------------------------------------------------------------------
Output:
X:0, Y:0, A:255.000000 0.000000i = B:73694.984375 0.085018i
X:1, Y:0, A:255.000000 0.000000i = B:73695.039063 0.059780i
X:2, Y:0, A:255.000000 0.000000i = B:73695.156250 0.002077i
X:3, Y:0, A:255.000000 0.000000i = B:73695.054688 -0.106803i
X:4, Y:0, A:255.000000 0.000000i = B:73694.976563 -0.119947i
X:5, Y:0, A:255.000000 0.000000i = B:73694.882813 -0.017992i
...

The output should be:
X:0, Y:0, A:255.000000 0.000000i = B:255.000000 0.000000i
X:1, Y:0, A:255.000000 0.000000i = B:255.000000 0.000000i
X:2, Y:0, A:255.000000 0.000000i = B:255.000000 0.000000i
X:3, Y:0, A:255.000000 0.000000i = B:255.000000 0.000000i
X:4, Y:0, A:255.000000 0.000000i = B:255.000000 0.000000i
X:5, Y:0, A:255.000000 0.000000i = B:255.000000 0.000000i
...
I am trying to use the cufft library. I am creating a 2D signal, applying a 2D fft on the 2D signal and then applying fft inverse to get back the original signal. The problem is that the output is nowhere near the original signal. Can someone guide me and tell me if I am doing something wrong please.



Here is the code:



int

main( int argc, char** argv)

{

int NX = 17, NY = 17;

cufftHandle plan;

cufftComplex *idata, *odata, *tdata;

cudaMalloc((void**)&idata, sizeof(cufftComplex)*NX*NY);

cudaMalloc((void**)&odata, sizeof(cufftComplex)*NX*NY);

cudaMalloc((void**)&tdata, sizeof(cufftComplex)*NX*NY);



cufftComplex* Input = (cufftComplex*)malloc(NY * NX * sizeof(cufftComplex));



for(int y = 0; y < NY; y++)

for (int x = 0; x < NX; x++)

{

Input[(y * NX) + x].x = 255;

Input[(y * NX) + x].y = 0;

}



cudaMemcpy(idata, Input, (NY * NX * sizeof(cufftComplex)), cudaMemcpyHostToDevice);

/* Create a 2D FFT plan. */

cufftPlan2d(&plan, NX, NY, CUFFT_C2C);

/* Use the CUFFT plan to transform the signal out of place. */

cufftExecC2C(plan, idata, tdata, CUFFT_FORWARD);



cufftExecC2C(plan, tdata, odata, CUFFT_INVERSE);



cufftComplex* InputImage = (cufftComplex*)malloc(NY * NX * sizeof(cufftComplex));

cufftComplex* OutputImageFFT = (cufftComplex*)malloc(NY * NX * sizeof(cufftComplex));

cudaMemcpy(InputImage, idata, (NY * NX * sizeof(cufftComplex)), cudaMemcpyDeviceToHost);

cudaMemcpy(OutputImageFFT, odata, (NY * NX * sizeof(cufftComplex)), cudaMemcpyDeviceToHost);

for (int y = 0; y < NY; y++)

for (int x = 0; x < NX; x++)

printf("X:%d, Y:%d, A:%f = B:%f %fi\n", x, y, InputImage[((y * NX) + x)].x, OutputImageFFT[((y * NX) + x)].x, OutputImageFFT[((y * NX) + x)].y);



getchar();

}

-----------------------------------------------------------------------------------------------------

Output:

X:0, Y:0, A:255.000000 0.000000i = B:73694.984375 0.085018i

X:1, Y:0, A:255.000000 0.000000i = B:73695.039063 0.059780i

X:2, Y:0, A:255.000000 0.000000i = B:73695.156250 0.002077i

X:3, Y:0, A:255.000000 0.000000i = B:73695.054688 -0.106803i

X:4, Y:0, A:255.000000 0.000000i = B:73694.976563 -0.119947i

X:5, Y:0, A:255.000000 0.000000i = B:73694.882813 -0.017992i

...



The output should be:

X:0, Y:0, A:255.000000 0.000000i = B:255.000000 0.000000i

X:1, Y:0, A:255.000000 0.000000i = B:255.000000 0.000000i

X:2, Y:0, A:255.000000 0.000000i = B:255.000000 0.000000i

X:3, Y:0, A:255.000000 0.000000i = B:255.000000 0.000000i

X:4, Y:0, A:255.000000 0.000000i = B:255.000000 0.000000i

X:5, Y:0, A:255.000000 0.000000i = B:255.000000 0.000000i

...

#1
Posted 03/18/2010 01:44 PM   
Scroll To Top