Spaces:

Arrcttacsrks
/

Chat_X

Runtime error

App Files Files Community

Chat_X / llama.cpp /ggml /src /ggml-cuda /sum.cu

Arrcttacsrks

Upload llama.cpp/ggml/src/ggml-cuda/sum.cu with huggingface_hub

b628400 verified 9 months ago

raw

history blame contribute delete

1.68 kB

	#if !defined(GGML_USE_HIPBLAS) && !defined(GGML_USE_MUSA) && CUDART_VERSION >= 11700
	#define USE_CUB
	#endif // !defined(GGML_USE_HIPBLAS) && !defined(GGML_USE_MUSA) && CUDART_VERSION >= 11700

	#ifdef USE_CUB
	// On Windows CUB uses libraries with variables called CC_PASCAL which conflict with the define in common.cuh.
	// For this reason CUB must be included BEFORE anything else.
	#include <cub/cub.cuh>
	using namespace cub;
	#endif // USE_CUB

	#include "sumrows.cuh"
	#include "sum.cuh"

	#include <cstdint>

	void sum_f32_cuda(ggml_cuda_pool & pool, const float * x, float * dst, const int64_t ne, cudaStream_t stream) {
	#ifdef USE_CUB
	size_t tmp_size = 0;
	DeviceReduce::Sum(nullptr, tmp_size, x, dst, ne, stream);
	ggml_cuda_pool_alloc<uint8_t> tmp_alloc(pool, tmp_size);
	DeviceReduce::Sum(tmp_alloc.ptr, tmp_size, x, dst, ne, stream);
	#else
	// Use (inefficient) sum_rows implementation as a fallback.
	// For AMD there is rocPRIM which could be used as a drop-in replacement via hipcub but this would require C++11 -> C++14.
	sum_rows_f32_cuda(x, dst, ne, 1, stream);
	GGML_UNUSED(pool);
	#endif // USE_CUB
	}

	void ggml_cuda_op_sum(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
	const ggml_tensor * src0 = dst->src[0];

	GGML_ASSERT(src0->type == GGML_TYPE_F32);
	GGML_ASSERT( dst->type == GGML_TYPE_F32);
	GGML_ASSERT(ggml_is_contiguous(src0));

	const float * src0_d = (const float *) src0->data;
	float * dst_d = (float *) dst->data;

	const int64_t ne = ggml_nelements(src0);

	ggml_cuda_pool & pool = ctx.pool();
	cudaStream_t stream = ctx.stream();

	sum_f32_cuda(pool, src0_d, dst_d, ne, stream);
	}