Spaces:
Build error
Build error
/* | |
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | |
* SPDX-License-Identifier: Apache-2.0 | |
* | |
* Licensed under the Apache License, Version 2.0 (the "License"); | |
* you may not use this file except in compliance with the License. | |
* You may obtain a copy of the License at | |
* | |
* http://www.apache.org/licenses/LICENSE-2.0 | |
* | |
* Unless required by applicable law or agreed to in writing, software | |
* distributed under the License is distributed on an "AS IS" BASIS, | |
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
* See the License for the specific language governing permissions and | |
* limitations under the License. | |
*/ | |
/** @file common_host.h | |
* @author Thomas Müller and Nikolaus Binder, NVIDIA | |
* @brief Common utilities that are needed by pretty much every component of this framework. | |
*/ | |
namespace tcnn { | |
using namespace fmt::literals; | |
enum class LogSeverity { | |
Info, | |
Debug, | |
Warning, | |
Error, | |
Success, | |
}; | |
const std::function<void(LogSeverity, const std::string&)>& log_callback(); | |
void set_log_callback(const std::function<void(LogSeverity, const std::string&)>& callback); | |
template <typename... Ts> | |
void log(LogSeverity severity, const std::string& msg, Ts&&... args) { | |
log_callback()(severity, fmt::format(msg, std::forward<Ts>(args)...)); | |
} | |
template <typename... Ts> void log_info(const std::string& msg, Ts&&... args) { log(LogSeverity::Info, msg, std::forward<Ts>(args)...); } | |
template <typename... Ts> void log_debug(const std::string& msg, Ts&&... args) { log(LogSeverity::Debug, msg, std::forward<Ts>(args)...); } | |
template <typename... Ts> void log_warning(const std::string& msg, Ts&&... args) { log(LogSeverity::Warning, msg, std::forward<Ts>(args)...); } | |
template <typename... Ts> void log_error(const std::string& msg, Ts&&... args) { log(LogSeverity::Error, msg, std::forward<Ts>(args)...); } | |
template <typename... Ts> void log_success(const std::string& msg, Ts&&... args) { log(LogSeverity::Success, msg, std::forward<Ts>(args)...); } | |
bool verbose(); | |
void set_verbose(bool verbose); | |
/// Checks the result of a cuXXXXXX call and throws an error on failure | |
/// Checks the result of a cuXXXXXX call and prints an error on failure | |
/// Checks the result of a cudaXXXXXX call and throws an error on failure | |
/// Checks the result of a cudaXXXXXX call and prints an error on failure | |
/// Checks the result of optixXXXXXX call and throws an error on failure | |
/// Checks the result of a optixXXXXXX call and throws an error with a log message on failure | |
////////////////////////////// | |
// Enum<->string conversion // | |
////////////////////////////// | |
Activation string_to_activation(const std::string& activation_name); | |
std::string to_string(Activation activation); | |
GridType string_to_grid_type(const std::string& grid_type); | |
std::string to_string(GridType grid_type); | |
HashType string_to_hash_type(const std::string& hash_type); | |
std::string to_string(HashType hash_type); | |
InterpolationType string_to_interpolation_type(const std::string& interpolation_type); | |
std::string to_string(InterpolationType interpolation_type); | |
ReductionType string_to_reduction_type(const std::string& reduction_type); | |
std::string to_string(ReductionType reduction_type); | |
////////////////// | |
// Misc helpers // | |
////////////////// | |
int cuda_runtime_version(); | |
inline std::string cuda_runtime_version_string() { | |
int v = cuda_runtime_version(); | |
return fmt::format("{}.{}", v / 1000, (v % 100) / 10); | |
} | |
int cuda_device(); | |
void set_cuda_device(int device); | |
int cuda_device_count(); | |
bool cuda_supports_virtual_memory(int device); | |
inline bool cuda_supports_virtual_memory() { | |
return cuda_supports_virtual_memory(cuda_device()); | |
} | |
std::string cuda_device_name(int device); | |
inline std::string cuda_device_name() { | |
return cuda_device_name(cuda_device()); | |
} | |
uint32_t cuda_compute_capability(int device); | |
inline uint32_t cuda_compute_capability() { | |
return cuda_compute_capability(cuda_device()); | |
} | |
uint32_t cuda_max_supported_compute_capability(); | |
uint32_t cuda_supported_compute_capability(int device); | |
inline uint32_t cuda_supported_compute_capability() { | |
return cuda_supported_compute_capability(cuda_device()); | |
} | |
size_t cuda_max_shmem(int device); | |
inline size_t cuda_max_shmem() { | |
return cuda_max_shmem(cuda_device()); | |
} | |
uint32_t cuda_max_registers(int device); | |
inline uint32_t cuda_max_registers() { | |
return cuda_max_registers(cuda_device()); | |
} | |
size_t cuda_memory_granularity(int device); | |
inline size_t cuda_memory_granularity() { | |
return cuda_memory_granularity(cuda_device()); | |
} | |
struct MemoryInfo { | |
size_t total; | |
size_t free; | |
size_t used; | |
}; | |
MemoryInfo cuda_memory_info(); | |
// Hash helpers taken from https://stackoverflow.com/a/50978188 | |
template <typename T> | |
T xorshift(T n, int i) { | |
return n ^ (n >> i); | |
} | |
inline uint32_t distribute(uint32_t n) { | |
uint32_t p = 0x55555555ul; // pattern of alternating 0 and 1 | |
uint32_t c = 3423571495ul; // random uneven integer constant; | |
return c * xorshift(p * xorshift(n, 16), 16); | |
} | |
inline uint64_t distribute(uint64_t n) { | |
uint64_t p = 0x5555555555555555ull; // pattern of alternating 0 and 1 | |
uint64_t c = 17316035218449499591ull;// random uneven integer constant; | |
return c * xorshift(p * xorshift(n, 32), 32); | |
} | |
template <typename T, typename S> | |
constexpr typename std::enable_if<std::is_unsigned<T>::value, T>::type rotl(const T n, const S i) { | |
const T m = (std::numeric_limits<T>::digits - 1); | |
const T c = i & m; | |
return (n << c) | (n >> (((T)0 - c) & m)); // this is usually recognized by the compiler to mean rotation | |
} | |
template <typename T> | |
size_t hash_combine(std::size_t seed, const T& v) { | |
return rotl(seed, std::numeric_limits<size_t>::digits / 3) ^ distribute(std::hash<T>{}(v)); | |
} | |
std::string generate_device_code_preamble(); | |
std::string to_snake_case(const std::string& str); | |
std::vector<std::string> split(const std::string& text, const std::string& delim); | |
template <typename T> | |
std::string join(const T& components, const std::string& delim) { | |
std::ostringstream s; | |
for (const auto& component : components) { | |
if (&components[0] != &component) { | |
s << delim; | |
} | |
s << component; | |
} | |
return s.str(); | |
} | |
template <typename... Ts> | |
std::string dfmt(uint32_t indent, const std::string& format, Ts&&... args) { | |
// Trim empty lines at the beginning and end of format string. | |
// Also re-indent the format string `indent` deep. | |
uint32_t input_indent = std::numeric_limits<uint32_t>::max(); | |
uint32_t n_empty_leading = 0, n_empty_trailing = 0; | |
bool leading = true; | |
std::vector<std::string> lines = split(format, "\n"); | |
for (const auto& line : lines) { | |
bool empty = true; | |
uint32_t line_indent = 0; | |
for (uint32_t i = 0; i < line.length(); ++i) { | |
if (empty && line[i] == '\t') { | |
line_indent = i+1; | |
} else { | |
empty = false; | |
break; | |
} | |
} | |
if (empty) { | |
if (leading) { | |
++n_empty_leading; | |
} | |
++n_empty_trailing; | |
continue; | |
} | |
n_empty_trailing = 0; | |
leading = false; | |
input_indent = std::min(input_indent, line_indent); | |
} | |
if (input_indent == std::numeric_limits<uint32_t>::max()) { | |
return ""; | |
} | |
lines.erase(lines.end() - n_empty_trailing, lines.end()); | |
lines.erase(lines.begin(), lines.begin() + n_empty_leading); | |
for (auto& line : lines) { | |
if (line.length() >= input_indent) { | |
line = line.substr(input_indent); | |
line = line.insert(0, indent, '\t'); | |
} | |
} | |
return fmt::format(join(lines, "\n"), std::forward<Ts>(args)...); | |
} | |
std::string to_lower(std::string str); | |
std::string to_upper(std::string str); | |
inline bool equals_case_insensitive(const std::string& str1, const std::string& str2) { | |
return to_lower(str1) == to_lower(str2); | |
} | |
struct CaseInsensitiveHash { size_t operator()(const std::string& v) const { return std::hash<std::string>{}(to_lower(v)); }}; | |
struct CaseInsensitiveEqual { bool operator()(const std::string& l, const std::string& r) const { return equals_case_insensitive(l, r); }}; | |
template <typename T> | |
using ci_hashmap = std::unordered_map<std::string, T, CaseInsensitiveHash, CaseInsensitiveEqual>; | |
template <typename T> | |
std::string type_to_string(); | |
template <typename T, uint32_t N, size_t A> | |
std::string to_string(const tvec<T, N, A>& v) { | |
return fmt::format("tvec<{}, {}, {}>({})", type_to_string<T>(), N, A, join(v, ", ")); | |
} | |
inline std::string bytes_to_string(size_t bytes) { | |
std::array<std::string, 7> suffixes = {{ "B", "KB", "MB", "GB", "TB", "PB", "EB" }}; | |
double count = (double)bytes; | |
uint32_t i = 0; | |
for (; i < suffixes.size() && count >= 1024; ++i) { | |
count /= 1024; | |
} | |
std::ostringstream oss; | |
oss.precision(3); | |
oss << count << " " << suffixes[i]; | |
return oss.str(); | |
} | |
inline bool is_pot(uint32_t num, uint32_t* log2 = nullptr) { | |
if (log2) *log2 = 0; | |
if (num > 0) { | |
while (num % 2 == 0) { | |
num /= 2; | |
if (log2) ++*log2; | |
} | |
if (num == 1) { | |
return true; | |
} | |
} | |
return false; | |
} | |
inline uint32_t powi(uint32_t base, uint32_t exponent) { | |
uint32_t result = 1; | |
for (uint32_t i = 0; i < exponent; ++i) { | |
result *= base; | |
} | |
return result; | |
} | |
class ScopeGuard { | |
public: | |
ScopeGuard() = default; | |
ScopeGuard(const std::function<void()>& callback) : m_callback{callback} {} | |
ScopeGuard(std::function<void()>&& callback) : m_callback{std::move(callback)} {} | |
ScopeGuard& operator=(const ScopeGuard& other) = delete; | |
ScopeGuard(const ScopeGuard& other) = delete; | |
ScopeGuard& operator=(ScopeGuard&& other) { std::swap(m_callback, other.m_callback); return *this; } | |
ScopeGuard(ScopeGuard&& other) { *this = std::move(other); } | |
~ScopeGuard() { if (m_callback) { m_callback(); } } | |
void disarm() { | |
m_callback = {}; | |
} | |
private: | |
std::function<void()> m_callback; | |
}; | |
template <typename T> | |
class Lazy { | |
public: | |
template <typename F> | |
T& get(F&& generator) { | |
if (!m_val) { | |
m_val = generator(); | |
} | |
return m_val; | |
} | |
private: | |
T m_val; | |
}; | |
template <typename K, typename T, typename ... Types> | |
inline void linear_kernel(K kernel, uint32_t shmem_size, cudaStream_t stream, T n_elements, Types ... args) { | |
if (n_elements <= 0) { | |
return; | |
} | |
kernel<<<n_blocks_linear(n_elements), N_THREADS_LINEAR, shmem_size, stream>>>(n_elements, args...); | |
} | |
template <typename F> | |
__global__ void parallel_for_kernel(const size_t n_elements, F fun) { | |
const size_t i = threadIdx.x + blockIdx.x * blockDim.x; | |
if (i >= n_elements) return; | |
fun(i); | |
} | |
template <typename F> | |
inline void parallel_for_gpu(uint32_t shmem_size, cudaStream_t stream, size_t n_elements, F&& fun) { | |
if (n_elements <= 0) { | |
return; | |
} | |
parallel_for_kernel<F><<<n_blocks_linear(n_elements), N_THREADS_LINEAR, shmem_size, stream>>>(n_elements, fun); | |
} | |
template <typename F> | |
inline void parallel_for_gpu(cudaStream_t stream, size_t n_elements, F&& fun) { | |
parallel_for_gpu(0, stream, n_elements, std::forward<F>(fun)); | |
} | |
template <typename F> | |
inline void parallel_for_gpu(size_t n_elements, F&& fun) { | |
parallel_for_gpu(nullptr, n_elements, std::forward<F>(fun)); | |
} | |
template <typename F> | |
__global__ void parallel_for_aos_kernel(const size_t n_elements, const uint32_t n_dims, F fun) { | |
const size_t dim = threadIdx.x; | |
const size_t elem = threadIdx.y + blockIdx.x * blockDim.y; | |
if (dim >= n_dims) return; | |
if (elem >= n_elements) return; | |
fun(elem, dim); | |
} | |
template <typename F> | |
inline void parallel_for_gpu_aos(uint32_t shmem_size, cudaStream_t stream, size_t n_elements, uint32_t n_dims, F&& fun) { | |
if (n_elements <= 0 || n_dims <= 0) { | |
return; | |
} | |
const dim3 threads = { n_dims, div_round_up(N_THREADS_LINEAR, n_dims), 1 }; | |
const size_t n_threads = threads.x * threads.y; | |
const dim3 blocks = { (uint32_t)div_round_up(n_elements * n_dims, n_threads), 1, 1 }; | |
parallel_for_aos_kernel<<<blocks, threads, shmem_size, stream>>>( | |
n_elements, n_dims, fun | |
); | |
} | |
template <typename F> | |
inline void parallel_for_gpu_aos(cudaStream_t stream, size_t n_elements, uint32_t n_dims, F&& fun) { | |
parallel_for_gpu_aos(0, stream, n_elements, n_dims, std::forward<F>(fun)); | |
} | |
template <typename F> | |
inline void parallel_for_gpu_aos(size_t n_elements, uint32_t n_dims, F&& fun) { | |
parallel_for_gpu_aos(nullptr, n_elements, n_dims, std::forward<F>(fun)); | |
} | |
template <typename F> | |
__global__ void parallel_for_soa_kernel(const size_t n_elements, const uint32_t n_dims, F fun) { | |
const size_t elem = threadIdx.x + blockIdx.x * blockDim.x; | |
const size_t dim = blockIdx.y; | |
if (elem >= n_elements) return; | |
if (dim >= n_dims) return; | |
fun(elem, dim); | |
} | |
template <typename F> | |
inline void parallel_for_gpu_soa(uint32_t shmem_size, cudaStream_t stream, size_t n_elements, uint32_t n_dims, F&& fun) { | |
if (n_elements <= 0 || n_dims <= 0) { | |
return; | |
} | |
const dim3 blocks = { n_blocks_linear(n_elements), n_dims, 1 }; | |
parallel_for_soa_kernel<<<n_blocks_linear(n_elements), N_THREADS_LINEAR, shmem_size, stream>>>( | |
n_elements, n_dims, fun | |
); | |
} | |
template <typename F> | |
inline void parallel_for_gpu_soa(cudaStream_t stream, size_t n_elements, uint32_t n_dims, F&& fun) { | |
parallel_for_gpu_soa(0, stream, n_elements, n_dims, std::forward<F>(fun)); | |
} | |
template <typename F> | |
inline void parallel_for_gpu_soa(size_t n_elements, uint32_t n_dims, F&& fun) { | |
parallel_for_gpu_soa(nullptr, n_elements, n_dims, std::forward<F>(fun)); | |
} | |
} | |