Spaces:
Build error
Build error
| // React to failure on return code != cudaSuccess | |
| do { \ | |
| {_cuda_err = fn;} \ | |
| if (_cuda_err != cudaSuccess) goto _cuda_fail; \ | |
| } while(false) | |
| // React to failure on return code == 0 | |
| do { \ | |
| if (!(fn)) { _cuda_err = cudaUnspecified; goto _cuda_fail; } \ | |
| else _cuda_err = cudaSuccess; \ | |
| } while(false) | |
| // Clone CPU <-> CUDA | |
| template <typename T> | |
| T* cuda_clone(const void* ptr, int num) | |
| { | |
| T* cuda_ptr; | |
| cudaError_t r; | |
| r = cudaMalloc(&cuda_ptr, num * sizeof(T)); | |
| if (r != cudaSuccess) return NULL; | |
| r = cudaMemcpy(cuda_ptr, ptr, num * sizeof(T), cudaMemcpyHostToDevice); | |
| if (r != cudaSuccess) return NULL; | |
| cudaDeviceSynchronize(); | |
| return cuda_ptr; | |
| } | |
| template <typename T> | |
| T* cpu_clone(const void* ptr, int num) | |
| { | |
| T* cpu_ptr; | |
| cudaError_t r; | |
| cpu_ptr = (T*) malloc(num * sizeof(T)); | |
| if (cpu_ptr == NULL) return NULL; | |
| r = cudaMemcpy(cpu_ptr, ptr, num * sizeof(T), cudaMemcpyDeviceToHost); | |
| if (r != cudaSuccess) return NULL; | |
| cudaDeviceSynchronize(); | |
| return cpu_ptr; | |
| } | |
| // Pack two half values into a half2, host version | |
| __host__ inline __half2 pack_half2(__half h1, __half h2) | |
| { | |
| unsigned short s1 = *reinterpret_cast<unsigned short*>(&h1); | |
| unsigned short s2 = *reinterpret_cast<unsigned short*>(&h2); | |
| ushort2 us2 = make_ushort2(s1, s2); | |
| return *reinterpret_cast<__half2*>(&us2); | |
| } | |