您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

add.cu 1.1KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. #include <iostream>
  2. #include <math.h>
  3. // Kernel function to add the elements of two arrays
  4. __global__
  5. void add(int n, float *x, float *y)
  6. {
  7. int index = threadIdx.x;
  8. int stride = blockDim.x;
  9. for (int i = index; i < n; i += stride)
  10. y[i] = x[i] + y[i];
  11. }
  12. /*
  13. __global__
  14. void add(int n, float *x, float *y)
  15. {
  16. for (int i = 0; i < n; i++)
  17. y[i] = x[i] + y[i];
  18. }
  19. */
  20. int main(void)
  21. {
  22. int N = 1<<20;
  23. float *x, *y;
  24. // Allocate Unified Memory – accessible from CPU or GPU
  25. cudaMallocManaged(&x, N*sizeof(float));
  26. cudaMallocManaged(&y, N*sizeof(float));
  27. // initialize x and y arrays on the host
  28. for (int i = 0; i < N; i++) {
  29. x[i] = 1.0f;
  30. y[i] = 2.0f;
  31. }
  32. // Run kernel on 1M elements on the GPU
  33. add<<<1, 256>>>(N, x, y);
  34. // Wait for GPU to finish before accessing on host
  35. cudaDeviceSynchronize();
  36. // Check for errors (all values should be 3.0f)
  37. float maxError = 0.0f;
  38. for (int i = 0; i < N; i++)
  39. maxError = fmax(maxError, fabs(y[i]-3.0f));
  40. std::cout << "Max error: " << maxError << std::endl;
  41. // Free memory
  42. cudaFree(x);
  43. cudaFree(y);
  44. return 0;
  45. }