gen3c / gui /include /tiny-cuda-nn /common_host.h
elungky's picture
Initial commit for new Space - pre-built Docker image
28451f7
/*
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/** @file common_host.h
* @author Thomas Müller and Nikolaus Binder, NVIDIA
* @brief Common utilities that are needed by pretty much every component of this framework.
*/
#pragma once
#include <tiny-cuda-nn/common.h>
#include <fmt/format.h>
#include <array>
#include <sstream>
#include <stdexcept>
#include <string>
#include <unordered_map>
#include <vector>
namespace tcnn {
using namespace fmt::literals;
enum class LogSeverity {
Info,
Debug,
Warning,
Error,
Success,
};
const std::function<void(LogSeverity, const std::string&)>& log_callback();
void set_log_callback(const std::function<void(LogSeverity, const std::string&)>& callback);
template <typename... Ts>
void log(LogSeverity severity, const std::string& msg, Ts&&... args) {
log_callback()(severity, fmt::format(msg, std::forward<Ts>(args)...));
}
template <typename... Ts> void log_info(const std::string& msg, Ts&&... args) { log(LogSeverity::Info, msg, std::forward<Ts>(args)...); }
template <typename... Ts> void log_debug(const std::string& msg, Ts&&... args) { log(LogSeverity::Debug, msg, std::forward<Ts>(args)...); }
template <typename... Ts> void log_warning(const std::string& msg, Ts&&... args) { log(LogSeverity::Warning, msg, std::forward<Ts>(args)...); }
template <typename... Ts> void log_error(const std::string& msg, Ts&&... args) { log(LogSeverity::Error, msg, std::forward<Ts>(args)...); }
template <typename... Ts> void log_success(const std::string& msg, Ts&&... args) { log(LogSeverity::Success, msg, std::forward<Ts>(args)...); }
bool verbose();
void set_verbose(bool verbose);
#define CHECK_THROW(x) \
do { if (!(x)) throw std::runtime_error{FILE_LINE " check failed: " #x}; } while(0)
/// Checks the result of a cuXXXXXX call and throws an error on failure
#define CU_CHECK_THROW(x) \
do { \
CUresult _result = x; \
if (_result != CUDA_SUCCESS) { \
const char *msg; \
cuGetErrorName(_result, &msg); \
throw std::runtime_error{fmt::format(FILE_LINE " " #x " failed: {}", msg)}; \
} \
} while(0)
/// Checks the result of a cuXXXXXX call and prints an error on failure
#define CU_CHECK_PRINT(x) \
do { \
CUresult _result = x; \
if (_result != CUDA_SUCCESS) { \
const char *msg; \
cuGetErrorName(_result, &msg); \
log_error(FILE_LINE " " #x " failed: {}", msg); \
} \
} while(0)
/// Checks the result of a cudaXXXXXX call and throws an error on failure
#define CUDA_CHECK_THROW(x) \
do { \
cudaError_t _result = x; \
if (_result != cudaSuccess) \
throw std::runtime_error{fmt::format(FILE_LINE " " #x " failed: {}", cudaGetErrorString(_result))}; \
} while(0)
/// Checks the result of a cudaXXXXXX call and prints an error on failure
#define CUDA_CHECK_PRINT(x) \
do { \
cudaError_t _result = x; \
if (_result != cudaSuccess) \
log_error(FILE_LINE " " #x " failed: {}", cudaGetErrorString(_result)); \
} while(0)
/// Checks the result of optixXXXXXX call and throws an error on failure
#define OPTIX_CHECK_THROW(x) \
do { \
OptixResult _result = x; \
if (_result != OPTIX_SUCCESS) { \
throw std::runtime_error(std::string("Optix call '" #x "' failed.")); \
} \
} while(0)
/// Checks the result of a optixXXXXXX call and throws an error with a log message on failure
#define OPTIX_CHECK_THROW_LOG(x) \
do { \
OptixResult _result = x; \
const size_t sizeof_log_returned = sizeof_log; \
sizeof_log = sizeof( log ); /* reset sizeof_log for future calls */ \
if (_result != OPTIX_SUCCESS) { \
throw std::runtime_error(std::string("Optix call '" #x "' failed. Log:\n") + log + (sizeof_log_returned == sizeof_log ? "" : "<truncated>")); \
} \
} while(0)
//////////////////////////////
// Enum<->string conversion //
//////////////////////////////
Activation string_to_activation(const std::string& activation_name);
std::string to_string(Activation activation);
GridType string_to_grid_type(const std::string& grid_type);
std::string to_string(GridType grid_type);
HashType string_to_hash_type(const std::string& hash_type);
std::string to_string(HashType hash_type);
InterpolationType string_to_interpolation_type(const std::string& interpolation_type);
std::string to_string(InterpolationType interpolation_type);
ReductionType string_to_reduction_type(const std::string& reduction_type);
std::string to_string(ReductionType reduction_type);
//////////////////
// Misc helpers //
//////////////////
int cuda_runtime_version();
inline std::string cuda_runtime_version_string() {
int v = cuda_runtime_version();
return fmt::format("{}.{}", v / 1000, (v % 100) / 10);
}
int cuda_device();
void set_cuda_device(int device);
int cuda_device_count();
bool cuda_supports_virtual_memory(int device);
inline bool cuda_supports_virtual_memory() {
return cuda_supports_virtual_memory(cuda_device());
}
std::string cuda_device_name(int device);
inline std::string cuda_device_name() {
return cuda_device_name(cuda_device());
}
uint32_t cuda_compute_capability(int device);
inline uint32_t cuda_compute_capability() {
return cuda_compute_capability(cuda_device());
}
uint32_t cuda_max_supported_compute_capability();
uint32_t cuda_supported_compute_capability(int device);
inline uint32_t cuda_supported_compute_capability() {
return cuda_supported_compute_capability(cuda_device());
}
size_t cuda_max_shmem(int device);
inline size_t cuda_max_shmem() {
return cuda_max_shmem(cuda_device());
}
uint32_t cuda_max_registers(int device);
inline uint32_t cuda_max_registers() {
return cuda_max_registers(cuda_device());
}
size_t cuda_memory_granularity(int device);
inline size_t cuda_memory_granularity() {
return cuda_memory_granularity(cuda_device());
}
struct MemoryInfo {
size_t total;
size_t free;
size_t used;
};
MemoryInfo cuda_memory_info();
// Hash helpers taken from https://stackoverflow.com/a/50978188
template <typename T>
T xorshift(T n, int i) {
return n ^ (n >> i);
}
inline uint32_t distribute(uint32_t n) {
uint32_t p = 0x55555555ul; // pattern of alternating 0 and 1
uint32_t c = 3423571495ul; // random uneven integer constant;
return c * xorshift(p * xorshift(n, 16), 16);
}
inline uint64_t distribute(uint64_t n) {
uint64_t p = 0x5555555555555555ull; // pattern of alternating 0 and 1
uint64_t c = 17316035218449499591ull;// random uneven integer constant;
return c * xorshift(p * xorshift(n, 32), 32);
}
template <typename T, typename S>
constexpr typename std::enable_if<std::is_unsigned<T>::value, T>::type rotl(const T n, const S i) {
const T m = (std::numeric_limits<T>::digits - 1);
const T c = i & m;
return (n << c) | (n >> (((T)0 - c) & m)); // this is usually recognized by the compiler to mean rotation
}
template <typename T>
size_t hash_combine(std::size_t seed, const T& v) {
return rotl(seed, std::numeric_limits<size_t>::digits / 3) ^ distribute(std::hash<T>{}(v));
}
std::string generate_device_code_preamble();
std::string to_snake_case(const std::string& str);
std::vector<std::string> split(const std::string& text, const std::string& delim);
template <typename T>
std::string join(const T& components, const std::string& delim) {
std::ostringstream s;
for (const auto& component : components) {
if (&components[0] != &component) {
s << delim;
}
s << component;
}
return s.str();
}
template <typename... Ts>
std::string dfmt(uint32_t indent, const std::string& format, Ts&&... args) {
// Trim empty lines at the beginning and end of format string.
// Also re-indent the format string `indent` deep.
uint32_t input_indent = std::numeric_limits<uint32_t>::max();
uint32_t n_empty_leading = 0, n_empty_trailing = 0;
bool leading = true;
std::vector<std::string> lines = split(format, "\n");
for (const auto& line : lines) {
bool empty = true;
uint32_t line_indent = 0;
for (uint32_t i = 0; i < line.length(); ++i) {
if (empty && line[i] == '\t') {
line_indent = i+1;
} else {
empty = false;
break;
}
}
if (empty) {
if (leading) {
++n_empty_leading;
}
++n_empty_trailing;
continue;
}
n_empty_trailing = 0;
leading = false;
input_indent = std::min(input_indent, line_indent);
}
if (input_indent == std::numeric_limits<uint32_t>::max()) {
return "";
}
lines.erase(lines.end() - n_empty_trailing, lines.end());
lines.erase(lines.begin(), lines.begin() + n_empty_leading);
for (auto& line : lines) {
if (line.length() >= input_indent) {
line = line.substr(input_indent);
line = line.insert(0, indent, '\t');
}
}
return fmt::format(join(lines, "\n"), std::forward<Ts>(args)...);
}
std::string to_lower(std::string str);
std::string to_upper(std::string str);
inline bool equals_case_insensitive(const std::string& str1, const std::string& str2) {
return to_lower(str1) == to_lower(str2);
}
struct CaseInsensitiveHash { size_t operator()(const std::string& v) const { return std::hash<std::string>{}(to_lower(v)); }};
struct CaseInsensitiveEqual { bool operator()(const std::string& l, const std::string& r) const { return equals_case_insensitive(l, r); }};
template <typename T>
using ci_hashmap = std::unordered_map<std::string, T, CaseInsensitiveHash, CaseInsensitiveEqual>;
template <typename T>
std::string type_to_string();
template <typename T, uint32_t N, size_t A>
std::string to_string(const tvec<T, N, A>& v) {
return fmt::format("tvec<{}, {}, {}>({})", type_to_string<T>(), N, A, join(v, ", "));
}
inline std::string bytes_to_string(size_t bytes) {
std::array<std::string, 7> suffixes = {{ "B", "KB", "MB", "GB", "TB", "PB", "EB" }};
double count = (double)bytes;
uint32_t i = 0;
for (; i < suffixes.size() && count >= 1024; ++i) {
count /= 1024;
}
std::ostringstream oss;
oss.precision(3);
oss << count << " " << suffixes[i];
return oss.str();
}
inline bool is_pot(uint32_t num, uint32_t* log2 = nullptr) {
if (log2) *log2 = 0;
if (num > 0) {
while (num % 2 == 0) {
num /= 2;
if (log2) ++*log2;
}
if (num == 1) {
return true;
}
}
return false;
}
inline uint32_t powi(uint32_t base, uint32_t exponent) {
uint32_t result = 1;
for (uint32_t i = 0; i < exponent; ++i) {
result *= base;
}
return result;
}
class ScopeGuard {
public:
ScopeGuard() = default;
ScopeGuard(const std::function<void()>& callback) : m_callback{callback} {}
ScopeGuard(std::function<void()>&& callback) : m_callback{std::move(callback)} {}
ScopeGuard& operator=(const ScopeGuard& other) = delete;
ScopeGuard(const ScopeGuard& other) = delete;
ScopeGuard& operator=(ScopeGuard&& other) { std::swap(m_callback, other.m_callback); return *this; }
ScopeGuard(ScopeGuard&& other) { *this = std::move(other); }
~ScopeGuard() { if (m_callback) { m_callback(); } }
void disarm() {
m_callback = {};
}
private:
std::function<void()> m_callback;
};
template <typename T>
class Lazy {
public:
template <typename F>
T& get(F&& generator) {
if (!m_val) {
m_val = generator();
}
return m_val;
}
private:
T m_val;
};
#if defined(__CUDACC__) || (defined(__clang__) && defined(__CUDA__))
template <typename K, typename T, typename ... Types>
inline void linear_kernel(K kernel, uint32_t shmem_size, cudaStream_t stream, T n_elements, Types ... args) {
if (n_elements <= 0) {
return;
}
kernel<<<n_blocks_linear(n_elements), N_THREADS_LINEAR, shmem_size, stream>>>(n_elements, args...);
}
template <typename F>
__global__ void parallel_for_kernel(const size_t n_elements, F fun) {
const size_t i = threadIdx.x + blockIdx.x * blockDim.x;
if (i >= n_elements) return;
fun(i);
}
template <typename F>
inline void parallel_for_gpu(uint32_t shmem_size, cudaStream_t stream, size_t n_elements, F&& fun) {
if (n_elements <= 0) {
return;
}
parallel_for_kernel<F><<<n_blocks_linear(n_elements), N_THREADS_LINEAR, shmem_size, stream>>>(n_elements, fun);
}
template <typename F>
inline void parallel_for_gpu(cudaStream_t stream, size_t n_elements, F&& fun) {
parallel_for_gpu(0, stream, n_elements, std::forward<F>(fun));
}
template <typename F>
inline void parallel_for_gpu(size_t n_elements, F&& fun) {
parallel_for_gpu(nullptr, n_elements, std::forward<F>(fun));
}
template <typename F>
__global__ void parallel_for_aos_kernel(const size_t n_elements, const uint32_t n_dims, F fun) {
const size_t dim = threadIdx.x;
const size_t elem = threadIdx.y + blockIdx.x * blockDim.y;
if (dim >= n_dims) return;
if (elem >= n_elements) return;
fun(elem, dim);
}
template <typename F>
inline void parallel_for_gpu_aos(uint32_t shmem_size, cudaStream_t stream, size_t n_elements, uint32_t n_dims, F&& fun) {
if (n_elements <= 0 || n_dims <= 0) {
return;
}
const dim3 threads = { n_dims, div_round_up(N_THREADS_LINEAR, n_dims), 1 };
const size_t n_threads = threads.x * threads.y;
const dim3 blocks = { (uint32_t)div_round_up(n_elements * n_dims, n_threads), 1, 1 };
parallel_for_aos_kernel<<<blocks, threads, shmem_size, stream>>>(
n_elements, n_dims, fun
);
}
template <typename F>
inline void parallel_for_gpu_aos(cudaStream_t stream, size_t n_elements, uint32_t n_dims, F&& fun) {
parallel_for_gpu_aos(0, stream, n_elements, n_dims, std::forward<F>(fun));
}
template <typename F>
inline void parallel_for_gpu_aos(size_t n_elements, uint32_t n_dims, F&& fun) {
parallel_for_gpu_aos(nullptr, n_elements, n_dims, std::forward<F>(fun));
}
template <typename F>
__global__ void parallel_for_soa_kernel(const size_t n_elements, const uint32_t n_dims, F fun) {
const size_t elem = threadIdx.x + blockIdx.x * blockDim.x;
const size_t dim = blockIdx.y;
if (elem >= n_elements) return;
if (dim >= n_dims) return;
fun(elem, dim);
}
template <typename F>
inline void parallel_for_gpu_soa(uint32_t shmem_size, cudaStream_t stream, size_t n_elements, uint32_t n_dims, F&& fun) {
if (n_elements <= 0 || n_dims <= 0) {
return;
}
const dim3 blocks = { n_blocks_linear(n_elements), n_dims, 1 };
parallel_for_soa_kernel<<<n_blocks_linear(n_elements), N_THREADS_LINEAR, shmem_size, stream>>>(
n_elements, n_dims, fun
);
}
template <typename F>
inline void parallel_for_gpu_soa(cudaStream_t stream, size_t n_elements, uint32_t n_dims, F&& fun) {
parallel_for_gpu_soa(0, stream, n_elements, n_dims, std::forward<F>(fun));
}
template <typename F>
inline void parallel_for_gpu_soa(size_t n_elements, uint32_t n_dims, F&& fun) {
parallel_for_gpu_soa(nullptr, n_elements, n_dims, std::forward<F>(fun));
}
#endif
}