Spaces:
Runtime error
Runtime error
| /****************************************************************************** | |
| * Copyright (c) 2011, Duane Merrill. All rights reserved. | |
| * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. | |
| * | |
| * Redistribution and use in source and binary forms, with or without | |
| * modification, are permitted provided that the following conditions are met: | |
| * * Redistributions of source code must retain the above copyright | |
| * notice, this list of conditions and the following disclaimer. | |
| * * Redistributions in binary form must reproduce the above copyright | |
| * notice, this list of conditions and the following disclaimer in the | |
| * documentation and/or other materials provided with the distribution. | |
| * * Neither the name of the NVIDIA CORPORATION nor the | |
| * names of its contributors may be used to endorse or promote products | |
| * derived from this software without specific prior written permission. | |
| * | |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | |
| * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
| * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
| * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY | |
| * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | |
| * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
| * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND | |
| * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
| * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| * | |
| ******************************************************************************/ | |
| /** | |
| * \file | |
| * cub::GridBarrier implements a software global barrier among thread blocks within a CUDA grid | |
| */ | |
| #pragma once | |
| #include "../util_debug.cuh" | |
| #include "../config.cuh" | |
| #include "../thread/thread_load.cuh" | |
| /// Optional outer namespace(s) | |
| CUB_NS_PREFIX | |
| /// CUB namespace | |
| namespace cub { | |
| /** | |
| * \addtogroup GridModule | |
| * @{ | |
| */ | |
| /** | |
| * \brief GridBarrier implements a software global barrier among thread blocks within a CUDA grid | |
| */ | |
| class GridBarrier | |
| { | |
| protected : | |
| typedef unsigned int SyncFlag; | |
| // Counters in global device memory | |
| SyncFlag* d_sync; | |
| public: | |
| /** | |
| * Constructor | |
| */ | |
| GridBarrier() : d_sync(NULL) {} | |
| /** | |
| * Synchronize | |
| */ | |
| __device__ __forceinline__ void Sync() const | |
| { | |
| volatile SyncFlag *d_vol_sync = d_sync; | |
| // Threadfence and syncthreads to make sure global writes are visible before | |
| // thread-0 reports in with its sync counter | |
| __threadfence(); | |
| CTA_SYNC(); | |
| if (blockIdx.x == 0) | |
| { | |
| // Report in ourselves | |
| if (threadIdx.x == 0) | |
| { | |
| d_vol_sync[blockIdx.x] = 1; | |
| } | |
| CTA_SYNC(); | |
| // Wait for everyone else to report in | |
| for (int peer_block = threadIdx.x; peer_block < gridDim.x; peer_block += blockDim.x) | |
| { | |
| while (ThreadLoad<LOAD_CG>(d_sync + peer_block) == 0) | |
| { | |
| __threadfence_block(); | |
| } | |
| } | |
| CTA_SYNC(); | |
| // Let everyone know it's safe to proceed | |
| for (int peer_block = threadIdx.x; peer_block < gridDim.x; peer_block += blockDim.x) | |
| { | |
| d_vol_sync[peer_block] = 0; | |
| } | |
| } | |
| else | |
| { | |
| if (threadIdx.x == 0) | |
| { | |
| // Report in | |
| d_vol_sync[blockIdx.x] = 1; | |
| // Wait for acknowledgment | |
| while (ThreadLoad<LOAD_CG>(d_sync + blockIdx.x) == 1) | |
| { | |
| __threadfence_block(); | |
| } | |
| } | |
| CTA_SYNC(); | |
| } | |
| } | |
| }; | |
| /** | |
| * \brief GridBarrierLifetime extends GridBarrier to provide lifetime management of the temporary device storage needed for cooperation. | |
| * | |
| * Uses RAII for lifetime, i.e., device resources are reclaimed when | |
| * the destructor is called. | |
| */ | |
| class GridBarrierLifetime : public GridBarrier | |
| { | |
| protected: | |
| // Number of bytes backed by d_sync | |
| size_t sync_bytes; | |
| public: | |
| /** | |
| * Constructor | |
| */ | |
| GridBarrierLifetime() : GridBarrier(), sync_bytes(0) {} | |
| /** | |
| * DeviceFrees and resets the progress counters | |
| */ | |
| cudaError_t HostReset() | |
| { | |
| cudaError_t retval = cudaSuccess; | |
| if (d_sync) | |
| { | |
| CubDebug(retval = cudaFree(d_sync)); | |
| d_sync = NULL; | |
| } | |
| sync_bytes = 0; | |
| return retval; | |
| } | |
| /** | |
| * Destructor | |
| */ | |
| virtual ~GridBarrierLifetime() | |
| { | |
| HostReset(); | |
| } | |
| /** | |
| * Sets up the progress counters for the next kernel launch (lazily | |
| * allocating and initializing them if necessary) | |
| */ | |
| cudaError_t Setup(int sweep_grid_size) | |
| { | |
| cudaError_t retval = cudaSuccess; | |
| do { | |
| size_t new_sync_bytes = sweep_grid_size * sizeof(SyncFlag); | |
| if (new_sync_bytes > sync_bytes) | |
| { | |
| if (d_sync) | |
| { | |
| if (CubDebug(retval = cudaFree(d_sync))) break; | |
| } | |
| sync_bytes = new_sync_bytes; | |
| // Allocate and initialize to zero | |
| if (CubDebug(retval = cudaMalloc((void**) &d_sync, sync_bytes))) break; | |
| if (CubDebug(retval = cudaMemset(d_sync, 0, new_sync_bytes))) break; | |
| } | |
| } while (0); | |
| return retval; | |
| } | |
| }; | |
| /** @} */ // end group GridModule | |
| } // CUB namespace | |
| CUB_NS_POSTFIX // Optional outer namespace(s) | |