/* * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** @file common_host.h * @author Thomas Müller and Nikolaus Binder, NVIDIA * @brief Common utilities that are needed by pretty much every component of this framework. */ #pragma once #include #include #include #include #include #include #include #include namespace tcnn { using namespace fmt::literals; enum class LogSeverity { Info, Debug, Warning, Error, Success, }; const std::function& log_callback(); void set_log_callback(const std::function& callback); template void log(LogSeverity severity, const std::string& msg, Ts&&... args) { log_callback()(severity, fmt::format(msg, std::forward(args)...)); } template void log_info(const std::string& msg, Ts&&... args) { log(LogSeverity::Info, msg, std::forward(args)...); } template void log_debug(const std::string& msg, Ts&&... args) { log(LogSeverity::Debug, msg, std::forward(args)...); } template void log_warning(const std::string& msg, Ts&&... args) { log(LogSeverity::Warning, msg, std::forward(args)...); } template void log_error(const std::string& msg, Ts&&... args) { log(LogSeverity::Error, msg, std::forward(args)...); } template void log_success(const std::string& msg, Ts&&... args) { log(LogSeverity::Success, msg, std::forward(args)...); } bool verbose(); void set_verbose(bool verbose); #define CHECK_THROW(x) \ do { if (!(x)) throw std::runtime_error{FILE_LINE " check failed: " #x}; } while(0) /// Checks the result of a cuXXXXXX call and throws an error on failure #define CU_CHECK_THROW(x) \ do { \ CUresult _result = x; \ if (_result != CUDA_SUCCESS) { \ const char *msg; \ cuGetErrorName(_result, &msg); \ throw std::runtime_error{fmt::format(FILE_LINE " " #x " failed: {}", msg)}; \ } \ } while(0) /// Checks the result of a cuXXXXXX call and prints an error on failure #define CU_CHECK_PRINT(x) \ do { \ CUresult _result = x; \ if (_result != CUDA_SUCCESS) { \ const char *msg; \ cuGetErrorName(_result, &msg); \ log_error(FILE_LINE " " #x " failed: {}", msg); \ } \ } while(0) /// Checks the result of a cudaXXXXXX call and throws an error on failure #define CUDA_CHECK_THROW(x) \ do { \ cudaError_t _result = x; \ if (_result != cudaSuccess) \ throw std::runtime_error{fmt::format(FILE_LINE " " #x " failed: {}", cudaGetErrorString(_result))}; \ } while(0) /// Checks the result of a cudaXXXXXX call and prints an error on failure #define CUDA_CHECK_PRINT(x) \ do { \ cudaError_t _result = x; \ if (_result != cudaSuccess) \ log_error(FILE_LINE " " #x " failed: {}", cudaGetErrorString(_result)); \ } while(0) /// Checks the result of optixXXXXXX call and throws an error on failure #define OPTIX_CHECK_THROW(x) \ do { \ OptixResult _result = x; \ if (_result != OPTIX_SUCCESS) { \ throw std::runtime_error(std::string("Optix call '" #x "' failed.")); \ } \ } while(0) /// Checks the result of a optixXXXXXX call and throws an error with a log message on failure #define OPTIX_CHECK_THROW_LOG(x) \ do { \ OptixResult _result = x; \ const size_t sizeof_log_returned = sizeof_log; \ sizeof_log = sizeof( log ); /* reset sizeof_log for future calls */ \ if (_result != OPTIX_SUCCESS) { \ throw std::runtime_error(std::string("Optix call '" #x "' failed. Log:\n") + log + (sizeof_log_returned == sizeof_log ? "" : "")); \ } \ } while(0) ////////////////////////////// // Enum<->string conversion // ////////////////////////////// Activation string_to_activation(const std::string& activation_name); std::string to_string(Activation activation); GridType string_to_grid_type(const std::string& grid_type); std::string to_string(GridType grid_type); HashType string_to_hash_type(const std::string& hash_type); std::string to_string(HashType hash_type); InterpolationType string_to_interpolation_type(const std::string& interpolation_type); std::string to_string(InterpolationType interpolation_type); ReductionType string_to_reduction_type(const std::string& reduction_type); std::string to_string(ReductionType reduction_type); ////////////////// // Misc helpers // ////////////////// int cuda_runtime_version(); inline std::string cuda_runtime_version_string() { int v = cuda_runtime_version(); return fmt::format("{}.{}", v / 1000, (v % 100) / 10); } int cuda_device(); void set_cuda_device(int device); int cuda_device_count(); bool cuda_supports_virtual_memory(int device); inline bool cuda_supports_virtual_memory() { return cuda_supports_virtual_memory(cuda_device()); } std::string cuda_device_name(int device); inline std::string cuda_device_name() { return cuda_device_name(cuda_device()); } uint32_t cuda_compute_capability(int device); inline uint32_t cuda_compute_capability() { return cuda_compute_capability(cuda_device()); } uint32_t cuda_max_supported_compute_capability(); uint32_t cuda_supported_compute_capability(int device); inline uint32_t cuda_supported_compute_capability() { return cuda_supported_compute_capability(cuda_device()); } size_t cuda_max_shmem(int device); inline size_t cuda_max_shmem() { return cuda_max_shmem(cuda_device()); } uint32_t cuda_max_registers(int device); inline uint32_t cuda_max_registers() { return cuda_max_registers(cuda_device()); } size_t cuda_memory_granularity(int device); inline size_t cuda_memory_granularity() { return cuda_memory_granularity(cuda_device()); } struct MemoryInfo { size_t total; size_t free; size_t used; }; MemoryInfo cuda_memory_info(); // Hash helpers taken from https://stackoverflow.com/a/50978188 template T xorshift(T n, int i) { return n ^ (n >> i); } inline uint32_t distribute(uint32_t n) { uint32_t p = 0x55555555ul; // pattern of alternating 0 and 1 uint32_t c = 3423571495ul; // random uneven integer constant; return c * xorshift(p * xorshift(n, 16), 16); } inline uint64_t distribute(uint64_t n) { uint64_t p = 0x5555555555555555ull; // pattern of alternating 0 and 1 uint64_t c = 17316035218449499591ull;// random uneven integer constant; return c * xorshift(p * xorshift(n, 32), 32); } template constexpr typename std::enable_if::value, T>::type rotl(const T n, const S i) { const T m = (std::numeric_limits::digits - 1); const T c = i & m; return (n << c) | (n >> (((T)0 - c) & m)); // this is usually recognized by the compiler to mean rotation } template size_t hash_combine(std::size_t seed, const T& v) { return rotl(seed, std::numeric_limits::digits / 3) ^ distribute(std::hash{}(v)); } std::string generate_device_code_preamble(); std::string to_snake_case(const std::string& str); std::vector split(const std::string& text, const std::string& delim); template std::string join(const T& components, const std::string& delim) { std::ostringstream s; for (const auto& component : components) { if (&components[0] != &component) { s << delim; } s << component; } return s.str(); } template std::string dfmt(uint32_t indent, const std::string& format, Ts&&... args) { // Trim empty lines at the beginning and end of format string. // Also re-indent the format string `indent` deep. uint32_t input_indent = std::numeric_limits::max(); uint32_t n_empty_leading = 0, n_empty_trailing = 0; bool leading = true; std::vector lines = split(format, "\n"); for (const auto& line : lines) { bool empty = true; uint32_t line_indent = 0; for (uint32_t i = 0; i < line.length(); ++i) { if (empty && line[i] == '\t') { line_indent = i+1; } else { empty = false; break; } } if (empty) { if (leading) { ++n_empty_leading; } ++n_empty_trailing; continue; } n_empty_trailing = 0; leading = false; input_indent = std::min(input_indent, line_indent); } if (input_indent == std::numeric_limits::max()) { return ""; } lines.erase(lines.end() - n_empty_trailing, lines.end()); lines.erase(lines.begin(), lines.begin() + n_empty_leading); for (auto& line : lines) { if (line.length() >= input_indent) { line = line.substr(input_indent); line = line.insert(0, indent, '\t'); } } return fmt::format(join(lines, "\n"), std::forward(args)...); } std::string to_lower(std::string str); std::string to_upper(std::string str); inline bool equals_case_insensitive(const std::string& str1, const std::string& str2) { return to_lower(str1) == to_lower(str2); } struct CaseInsensitiveHash { size_t operator()(const std::string& v) const { return std::hash{}(to_lower(v)); }}; struct CaseInsensitiveEqual { bool operator()(const std::string& l, const std::string& r) const { return equals_case_insensitive(l, r); }}; template using ci_hashmap = std::unordered_map; template std::string type_to_string(); template std::string to_string(const tvec& v) { return fmt::format("tvec<{}, {}, {}>({})", type_to_string(), N, A, join(v, ", ")); } inline std::string bytes_to_string(size_t bytes) { std::array suffixes = {{ "B", "KB", "MB", "GB", "TB", "PB", "EB" }}; double count = (double)bytes; uint32_t i = 0; for (; i < suffixes.size() && count >= 1024; ++i) { count /= 1024; } std::ostringstream oss; oss.precision(3); oss << count << " " << suffixes[i]; return oss.str(); } inline bool is_pot(uint32_t num, uint32_t* log2 = nullptr) { if (log2) *log2 = 0; if (num > 0) { while (num % 2 == 0) { num /= 2; if (log2) ++*log2; } if (num == 1) { return true; } } return false; } inline uint32_t powi(uint32_t base, uint32_t exponent) { uint32_t result = 1; for (uint32_t i = 0; i < exponent; ++i) { result *= base; } return result; } class ScopeGuard { public: ScopeGuard() = default; ScopeGuard(const std::function& callback) : m_callback{callback} {} ScopeGuard(std::function&& callback) : m_callback{std::move(callback)} {} ScopeGuard& operator=(const ScopeGuard& other) = delete; ScopeGuard(const ScopeGuard& other) = delete; ScopeGuard& operator=(ScopeGuard&& other) { std::swap(m_callback, other.m_callback); return *this; } ScopeGuard(ScopeGuard&& other) { *this = std::move(other); } ~ScopeGuard() { if (m_callback) { m_callback(); } } void disarm() { m_callback = {}; } private: std::function m_callback; }; template class Lazy { public: template T& get(F&& generator) { if (!m_val) { m_val = generator(); } return m_val; } private: T m_val; }; #if defined(__CUDACC__) || (defined(__clang__) && defined(__CUDA__)) template inline void linear_kernel(K kernel, uint32_t shmem_size, cudaStream_t stream, T n_elements, Types ... args) { if (n_elements <= 0) { return; } kernel<<>>(n_elements, args...); } template __global__ void parallel_for_kernel(const size_t n_elements, F fun) { const size_t i = threadIdx.x + blockIdx.x * blockDim.x; if (i >= n_elements) return; fun(i); } template inline void parallel_for_gpu(uint32_t shmem_size, cudaStream_t stream, size_t n_elements, F&& fun) { if (n_elements <= 0) { return; } parallel_for_kernel<<>>(n_elements, fun); } template inline void parallel_for_gpu(cudaStream_t stream, size_t n_elements, F&& fun) { parallel_for_gpu(0, stream, n_elements, std::forward(fun)); } template inline void parallel_for_gpu(size_t n_elements, F&& fun) { parallel_for_gpu(nullptr, n_elements, std::forward(fun)); } template __global__ void parallel_for_aos_kernel(const size_t n_elements, const uint32_t n_dims, F fun) { const size_t dim = threadIdx.x; const size_t elem = threadIdx.y + blockIdx.x * blockDim.y; if (dim >= n_dims) return; if (elem >= n_elements) return; fun(elem, dim); } template inline void parallel_for_gpu_aos(uint32_t shmem_size, cudaStream_t stream, size_t n_elements, uint32_t n_dims, F&& fun) { if (n_elements <= 0 || n_dims <= 0) { return; } const dim3 threads = { n_dims, div_round_up(N_THREADS_LINEAR, n_dims), 1 }; const size_t n_threads = threads.x * threads.y; const dim3 blocks = { (uint32_t)div_round_up(n_elements * n_dims, n_threads), 1, 1 }; parallel_for_aos_kernel<<>>( n_elements, n_dims, fun ); } template inline void parallel_for_gpu_aos(cudaStream_t stream, size_t n_elements, uint32_t n_dims, F&& fun) { parallel_for_gpu_aos(0, stream, n_elements, n_dims, std::forward(fun)); } template inline void parallel_for_gpu_aos(size_t n_elements, uint32_t n_dims, F&& fun) { parallel_for_gpu_aos(nullptr, n_elements, n_dims, std::forward(fun)); } template __global__ void parallel_for_soa_kernel(const size_t n_elements, const uint32_t n_dims, F fun) { const size_t elem = threadIdx.x + blockIdx.x * blockDim.x; const size_t dim = blockIdx.y; if (elem >= n_elements) return; if (dim >= n_dims) return; fun(elem, dim); } template inline void parallel_for_gpu_soa(uint32_t shmem_size, cudaStream_t stream, size_t n_elements, uint32_t n_dims, F&& fun) { if (n_elements <= 0 || n_dims <= 0) { return; } const dim3 blocks = { n_blocks_linear(n_elements), n_dims, 1 }; parallel_for_soa_kernel<<>>( n_elements, n_dims, fun ); } template inline void parallel_for_gpu_soa(cudaStream_t stream, size_t n_elements, uint32_t n_dims, F&& fun) { parallel_for_gpu_soa(0, stream, n_elements, n_dims, std::forward(fun)); } template inline void parallel_for_gpu_soa(size_t n_elements, uint32_t n_dims, F&& fun) { parallel_for_gpu_soa(nullptr, n_elements, n_dims, std::forward(fun)); } #endif }