Spaces:
Build error
Build error
File size: 19,716 Bytes
28451f7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 |
/*
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/** @file testbed.h
* @author Thomas Müller & Alex Evans, NVIDIA
*/
#pragma once
#include <neural-graphics-primitives/adam_optimizer.h>
#include <neural-graphics-primitives/bounding_box.cuh>
#include <neural-graphics-primitives/camera_path.h>
#include <neural-graphics-primitives/common_host.h>
#include <neural-graphics-primitives/discrete_distribution.h>
#include <neural-graphics-primitives/render_buffer.h>
#include <neural-graphics-primitives/shared_queue.h>
#include <neural-graphics-primitives/thread_pool.h>
#ifdef NGP_GUI
# include <neural-graphics-primitives/openxr_hmd.h>
#endif
#include <tiny-cuda-nn/multi_stream.h>
#include <tiny-cuda-nn/random.h>
#include <json/json.hpp>
#ifdef NGP_PYTHON
# include <pybind11/numpy.h>
# include <pybind11/pybind11.h>
#endif
#include <deque>
#include <thread>
struct GLFWwindow;
namespace ngp {
struct Triangle;
class GLTexture;
struct ViewIdx {
i16vec2 px;
uint32_t view;
};
class Testbed {
public:
Testbed(ETestbedMode mode = ETestbedMode::None);
~Testbed();
bool clear_tmp_dir();
void update_imgui_paths();
void set_mode(ETestbedMode mode);
using distance_fun_t = std::function<void(uint32_t, const vec3*, float*, cudaStream_t)>;
using normals_fun_t = std::function<void(uint32_t, const vec3*, vec3*, cudaStream_t)>;
struct LevelStats {
float mean() { return count ? (x / (float)count) : 0.f; }
float variance() { return count ? (xsquared - (x * x) / (float)count) / (float)count : 0.f; }
float sigma() { return sqrtf(variance()); }
float fraczero() { return (float)numzero / float(count + numzero); }
float fracquant() { return (float)numquant / float(count); }
float x;
float xsquared;
float min;
float max;
int numzero;
int numquant;
int count;
};
class CudaDevice;
struct View {
std::shared_ptr<CudaRenderBuffer> render_buffer = nullptr;
ivec2 full_resolution = {1, 1};
int visualized_dimension = 0;
mat4x3 camera0 = mat4x3::identity();
mat4x3 camera1 = mat4x3::identity();
mat4x3 prev_camera = mat4x3::identity();
Foveation foveation;
Foveation prev_foveation;
vec2 relative_focal_length;
vec2 screen_center;
Lens lens;
CudaDevice* device = nullptr;
GPUImage<ViewIdx> index_field;
GPUImage<uint8_t> hole_mask;
GPUImage<float> depth_buffer;
vec2 fov() const { return relative_focal_length_to_fov(relative_focal_length); }
uint32_t uid = 0;
};
void render_by_reprojection(cudaStream_t stream, std::vector<View>& views);
void render_frame(
cudaStream_t stream,
const mat4x3& camera_matrix0,
const mat4x3& camera_matrix1,
const mat4x3& prev_camera_matrix,
const vec2& screen_center,
const vec2& relative_focal_length,
const Foveation& foveation,
const Foveation& prev_foveation,
const Lens& lens,
int visualized_dimension,
CudaRenderBuffer& render_buffer,
bool to_srgb = true,
CudaDevice* device = nullptr
);
void render_frame_main(
CudaDevice& device,
const mat4x3& camera_matrix0,
const mat4x3& camera_matrix1,
const vec2& screen_center,
const vec2& relative_focal_length,
const Foveation& foveation,
const Lens& lens,
int visualized_dimension
);
void render_frame_epilogue(
cudaStream_t stream,
const mat4x3& camera_matrix0,
const mat4x3& prev_camera_matrix,
const vec2& screen_center,
const vec2& relative_focal_length,
const Foveation& foveation,
const Foveation& prev_foveation,
const Lens& lens,
CudaRenderBuffer& render_buffer,
bool to_srgb = true
);
void init_camera_path_from_reproject_src_cameras();
void visualize_reproject_src_cameras(ImDrawList* list, const mat4& world2proj);
void clear_src_views();
void reset_accumulation(bool due_to_camera_movement = false, bool immediate_redraw = true, bool reset_pip = false);
void redraw_next_frame() { m_render_skip_due_to_lack_of_camera_movement_counter = 0; }
bool reprojection_available() { return m_dlss; }
void load_mesh(const fs::path& data_path);
void set_exposure(float exposure) { m_exposure = exposure; }
void translate_camera(const vec3& rel, const mat3& rot, bool allow_up_down = true);
mat3 rotation_from_angles(const vec2& angles) const;
void mouse_drag();
void mouse_wheel();
void load_file(const fs::path& path);
vec3 look_at() const;
void set_look_at(const vec3& pos);
float scale() const { return m_scale; }
void set_scale(float scale);
vec3 view_pos() const { return m_camera[3]; }
vec3 view_dir() const { return m_camera[2]; }
vec3 view_up() const { return m_camera[1]; }
vec3 view_side() const { return m_camera[0]; }
void set_view_dir(const vec3& dir);
void reset_camera();
bool keyboard_event();
void update_density_grid_mean_and_bitfield(cudaStream_t stream);
void mark_density_grid_in_sphere_empty(const vec3& pos, float radius, cudaStream_t stream);
void prepare_next_camera_path_frame();
void overlay_fps();
void imgui();
vec2 calc_focal_length(const ivec2& resolution, const vec2& relative_focal_length, int fov_axis, float zoom) const;
vec2 render_screen_center(const vec2& screen_center) const;
void optimise_mesh_step(uint32_t N_STEPS);
void compute_mesh_vertex_colors();
float get_depth_from_renderbuffer(const CudaRenderBuffer& render_buffer, const vec2& uv);
vec3 get_3d_pos_from_pixel(const CudaRenderBuffer& render_buffer, const vec2& focus_pixel);
void autofocus();
#ifdef NGP_PYTHON
std::pair<pybind11::array_t<float>, pybind11::array_t<float>>
render_to_cpu(int width, int height, int spp, bool linear, float start_t, float end_t, float fps, float shutter_fraction);
pybind11::array_t<float>
render_to_cpu_rgba(int width, int height, int spp, bool linear, float start_t, float end_t, float fps, float shutter_fraction);
pybind11::array_t<float> view(bool linear, size_t view) const;
std::pair<pybind11::array_t<float>, pybind11::array_t<uint32_t>>
reproject(const mat4x3& src, const pybind11::array_t<float>& src_img, const pybind11::array_t<float>& src_depth, const mat4x3& dst);
uint32_t add_src_view(
mat4x3 camera_to_world,
float fx,
float fy,
float cx,
float cy,
Lens lens,
pybind11::array_t<float> img,
pybind11::array_t<float> depth,
float timestamp,
bool is_srgb = false
);
pybind11::array_t<uint32_t> src_view_ids() const;
# ifdef NGP_GUI
pybind11::array_t<float> screenshot(bool linear, bool front_buffer) const;
# endif
#endif
mat4x3 view_camera(size_t view) const;
void draw_visualizations(ImDrawList* list, const mat4x3& camera_matrix);
void reproject_views(const std::vector<const View*> src, View& dst);
void render(bool skip_rendering);
void init_window(int resw, int resh, bool hidden = false, bool second_window = false);
void destroy_window();
void init_vr();
void update_vr_performance_settings();
void apply_camera_smoothing(float elapsed_ms);
bool begin_frame();
void handle_user_input();
vec3 vr_to_world(const vec3& pos) const;
void begin_vr_frame_and_handle_vr_input();
void draw_gui();
bool frame();
bool want_repl();
void load_image(const fs::path& data_path);
void load_exr_image(const fs::path& data_path);
void load_stbi_image(const fs::path& data_path);
void load_binary_image(const fs::path& data_path);
float fov() const;
void set_fov(float val);
vec2 fov_xy() const;
void set_fov_xy(const vec2& val);
CameraKeyframe copy_camera_to_keyframe() const;
void set_camera_from_keyframe(const CameraKeyframe& k);
void set_camera_from_time(float t);
void load_camera_path(const fs::path& path);
bool loop_animation();
void set_loop_animation(bool value);
fs::path root_dir();
void set_root_dir(const fs::path& dir);
bool m_want_repl = false;
bool m_render_window = false;
bool m_gather_histograms = false;
bool m_render_ground_truth = false;
EGroundTruthRenderMode m_ground_truth_render_mode = EGroundTruthRenderMode::Shade;
float m_ground_truth_alpha = 1.0f;
bool m_render = true;
int m_max_spp = 0;
ETestbedMode m_testbed_mode = ETestbedMode::None;
// Rendering stuff
ivec2 m_window_res = ivec2(0);
bool m_dynamic_res = false;
float m_dynamic_res_target_fps = 20.0f;
int m_fixed_res_factor = 8;
float m_scale = 1.0;
float m_aperture_size = 0.0f;
vec2 m_relative_focal_length = vec2(1.0f);
uint32_t m_fov_axis = 1;
float m_zoom = 1.f; // 2d zoom factor (for insets?)
vec2 m_screen_center = vec2(0.5f); // center of 2d zoom
float m_ndc_znear = 1.0f / 32.0f;
float m_ndc_zfar = 128.0f;
mat4x3 m_camera = mat4x3::identity();
mat4x3 m_default_camera = transpose(mat3x4{1.0f, 0.0f, 0.0f, 0.5f, 0.0f, -1.0f, 0.0f, 0.5f, 0.0f, 0.0f, -1.0f, 0.5f});
mat4x3 m_smoothed_camera = mat4x3::identity();
size_t m_render_skip_due_to_lack_of_camera_movement_counter = 0;
bool m_fps_camera = false;
bool m_camera_smoothing = false;
bool m_autofocus = false;
vec3 m_autofocus_target = vec3(0.5f);
bool m_render_with_lens_distortion = false;
Lens m_render_lens = {};
CameraPath m_camera_path = {};
bool m_record_camera_path = false;
vec3 m_up_dir = {0.0f, 1.0f, 0.0f};
vec3 m_sun_dir = normalize(vec3(1.0f));
float m_bounding_radius = 1;
float m_exposure = 0.f;
ERenderMode m_render_mode = ERenderMode::Shade;
uint32_t m_seed = 1337;
#ifdef NGP_GUI
GLFWwindow* m_glfw_window = nullptr;
struct SecondWindow {
GLFWwindow* window = nullptr;
GLuint program = 0;
GLuint vao = 0, vbo = 0;
void draw(GLuint texture);
} m_second_window;
float m_drag_depth = 1.0f;
// The VAO will be empty, but we need a valid one for attribute-less rendering
GLuint m_blit_vao = 0;
GLuint m_blit_program = 0;
void init_opengl_shaders();
void blit_texture(
const Foveation& foveation,
GLint rgba_texture,
GLint rgba_filter_mode,
GLint depth_texture,
GLint framebuffer,
const ivec2& offset,
const ivec2& resolution
);
void create_second_window();
std::unique_ptr<OpenXRHMD> m_hmd;
OpenXRHMD::FrameInfoPtr m_vr_frame_info;
bool m_vr_use_depth_reproject = false;
bool m_vr_use_hidden_area_mask = false;
std::deque<View> m_reproject_src_views;
View m_reproject_pending_view;
int m_reproject_min_src_view_index = 0;
int m_reproject_max_src_view_index = 1;
int m_reproject_max_src_view_count = -1; // -1 indicates unlimited
uint32_t m_reproject_selected_src_view = 0;
bool m_reproject_freeze_src_views = false;
int m_reproject_n_views_to_cache = 1;
bool m_reproject_visualize_src_views = false;
float m_reproject_min_t = 0.1f;
float m_reproject_step_factor = 1.05f;
vec3 m_reproject_parallax = vec3(0.0f, 0.0f, 0.0f);
bool m_reproject_enable = false;
bool m_reproject_reuse_last_frame = true;
float m_reproject_lazy_render_ms = 100.0f;
float m_reproject_lazy_render_res_factor = 1.25f;
bool m_pm_enable = false;
EPmVizMode m_pm_viz_mode = EPmVizMode::Shade;
void set_n_views(size_t n_views);
// Callback invoked when a keyboard event is detected.
// If the callback returns `true`, the event is considered handled and the default behavior will not occur.
std::function<bool()> m_keyboard_event_callback;
// Callback invoked when a file is dropped onto the window.
// If the callback returns `true`, the files are considered handled and the default behavior will not occur.
std::function<bool(const std::vector<std::string>&)> m_file_drop_callback;
std::shared_ptr<GLTexture> m_pip_render_texture;
std::vector<std::shared_ptr<GLTexture>> m_rgba_render_textures;
std::vector<std::shared_ptr<GLTexture>> m_depth_render_textures;
#endif
std::shared_ptr<CudaRenderBuffer> m_pip_render_buffer;
SharedQueue<std::unique_ptr<ICallable>> m_task_queue;
void redraw_gui_next_frame() { m_gui_redraw = true; }
bool m_gui_redraw = true;
enum EDataType {
Float,
Half,
};
struct VolPayload {
vec3 dir;
vec4 col;
uint32_t pixidx;
};
float m_camera_velocity = 1.0f;
EColorSpace m_color_space = EColorSpace::Linear;
ETonemapCurve m_tonemap_curve = ETonemapCurve::Identity;
bool m_dlss = false;
std::shared_ptr<IDlssProvider> m_dlss_provider;
float m_dlss_sharpening = 0.0f;
// 3D stuff
float m_render_near_distance = 0.0f;
float m_slice_plane_z = 0.0f;
bool m_floor_enable = false;
inline float get_floor_y() const { return m_floor_enable ? m_aabb.min.y + 0.001f : -10000.f; }
BoundingBox m_raw_aabb;
BoundingBox m_aabb = {vec3(0.0f), vec3(1.0f)};
BoundingBox m_render_aabb = {vec3(0.0f), vec3(1.0f)};
mat3 m_render_aabb_to_local = mat3::identity();
// Rendering/UI bookkeeping
Ema<float> m_render_ms = {EEmaType::Time, 100};
// The frame contains everything, i.e. rendering + GUI and buffer swapping
Ema<float> m_frame_ms = {EEmaType::Time, 100};
std::chrono::time_point<std::chrono::steady_clock> m_last_frame_time_point;
std::chrono::time_point<std::chrono::steady_clock> m_last_gui_draw_time_point;
vec4 m_background_color = {0.0f, 0.0f, 0.0f, 1.0f};
bool m_vsync = true;
bool m_render_transparency_as_checkerboard = false;
// Visualization of neuron activations
int m_visualized_dimension = -1;
int m_visualized_layer = 0;
std::vector<View> m_views;
ivec2 m_n_views = {1, 1};
float m_picture_in_picture_res = 0.f; // if non zero, requests a small second picture :)
enum class ImGuiMode : uint32_t {
Enabled,
FpsOverlay,
Disabled,
// Don't set the below
NumModes,
};
struct ImGuiVars {
static const uint32_t MAX_PATH_LEN = 1024;
ImGuiMode mode = ImGuiMode::Enabled; // tab to cycle
char cam_path_path[MAX_PATH_LEN] = "cam.json";
char video_path[MAX_PATH_LEN] = "video.mp4";
char cam_export_path[MAX_PATH_LEN] = "cam_export.json";
void* overlay_font = nullptr;
} m_imgui;
fs::path m_root_dir = "";
bool m_visualize_unit_cube = false;
bool m_edit_render_aabb = false;
bool m_edit_world_transform = true;
bool m_snap_to_pixel_centers = false;
vec3 m_parallax_shift = {0.0f, 0.0f, 0.0f}; // to shift the viewer's origin by some amount in camera space
StreamAndEvent m_stream;
class CudaDevice {
public:
struct Data {
std::shared_ptr<Buffer2D<uint8_t>> hidden_area_mask;
};
CudaDevice(int id, bool is_primary);
CudaDevice(const CudaDevice&) = delete;
CudaDevice& operator=(const CudaDevice&) = delete;
CudaDevice(CudaDevice&&) = default;
CudaDevice& operator=(CudaDevice&&) = default;
ScopeGuard device_guard();
int id() const { return m_id; }
bool is_primary() const { return m_is_primary; }
std::string name() const { return cuda_device_name(m_id); }
int compute_capability() const { return cuda_compute_capability(m_id); }
cudaStream_t stream() const { return m_stream->get(); }
void wait_for(cudaStream_t stream) const {
CUDA_CHECK_THROW(cudaEventRecord(m_primary_device_event.event, stream));
m_stream->wait_for(m_primary_device_event.event);
}
void signal(cudaStream_t stream) const { m_stream->signal(stream); }
const CudaRenderBufferView& render_buffer_view() const { return m_render_buffer_view; }
void set_render_buffer_view(const CudaRenderBufferView& view) { m_render_buffer_view = view; }
Data& data() const { return *m_data; }
bool dirty() const { return m_dirty; }
void set_dirty(bool value) { m_dirty = value; }
void clear() {
m_data = std::make_unique<Data>();
m_render_buffer_view = {};
set_dirty(true);
}
template <class F> auto enqueue_task(F&& f) -> std::future<std::result_of_t<F()>> {
if (is_primary()) {
return std::async(std::launch::deferred, std::forward<F>(f));
} else {
return m_render_worker->enqueue_task(std::forward<F>(f));
}
}
private:
int m_id;
bool m_is_primary;
std::unique_ptr<StreamAndEvent> m_stream;
struct Event {
Event() { CUDA_CHECK_THROW(cudaEventCreate(&event)); }
~Event() { cudaEventDestroy(event); }
Event(const Event&) = delete;
Event& operator=(const Event&) = delete;
Event(Event&& other) { *this = std::move(other); }
Event& operator=(Event&& other) {
std::swap(event, other.event);
return *this;
}
cudaEvent_t event = {};
};
Event m_primary_device_event;
std::unique_ptr<Data> m_data;
CudaRenderBufferView m_render_buffer_view = {};
bool m_dirty = true;
std::unique_ptr<ThreadPool> m_render_worker;
};
void sync_device(CudaRenderBuffer& render_buffer, CudaDevice& device);
ScopeGuard use_device(cudaStream_t stream, CudaRenderBuffer& render_buffer, CudaDevice& device);
void set_all_devices_dirty();
std::vector<CudaDevice> m_devices;
CudaDevice& primary_device() { return m_devices.front(); }
ThreadPool m_thread_pool;
std::vector<std::future<void>> m_render_futures;
bool m_use_aux_devices = false;
bool m_foveated_rendering = false;
bool m_dynamic_foveated_rendering = true;
float m_foveated_rendering_full_res_diameter = 0.55f;
float m_foveated_rendering_scaling = 1.0f;
float m_foveated_rendering_max_scaling = 2.0f;
bool m_foveated_rendering_visualize = false;
default_rng_t m_rng;
CudaRenderBuffer m_windowless_render_surface{std::make_shared<CudaSurface2D>()};
// ---------- Gen3C stuff
/**
* Common signature for Gen3C-related UI callback functions, to be implemented
* in Python.
*
* Inputs:
* name: name of the UI event (e.g. name of the button pressed).
*
* Returns: bool, whether the operation was successful.
*/
using gen3c_cb_t = std::function<bool(const std::string&)>;
gen3c_cb_t m_gen3c_cb;
// Info string to be displayed in the Gen3C UI window.
std::string m_gen3c_info;
// Path to an image or directory to use to seed the generative model.
// The specific format is guessed based on what the path points to.
std::string m_gen3c_seed_path;
// Whether to automatically launch new inference requests.
bool m_gen3c_auto_inference = false;
EGen3cCameraSource m_gen3c_camera_source = EGen3cCameraSource::Authored;
// Fake translation speed in scene unit / frame.
vec3 m_gen3c_translation_speed = {0.05f, 0.f, 0.f};
// Fake rotation speed around (x, y, z) in radians / frame.
vec3 m_gen3c_rotation_speed = {0.f, 0.05f, 0.f};
// Number of frames to request for each inference request.
std::string m_gen3c_inference_info = "";
// Progress of seeding-related things (scale 0..1). Set to a negative value to hide the progress bar.
float m_gen3c_seeding_progress = -1.0f;
// Progress of inference-related things (scale 0..1). Set to a negative value to hide the progress bar.
float m_gen3c_inference_progress = -1.0f;
// Saving Gen3C inference outputs
bool m_gen3c_save_frames = false;
// Whether or not to display generated frames in the UI.
// No display means that we can save some time by not de-compressing
// the result video from the server, and even skip depth prediction for most frames.
bool m_gen3c_display_frames = false;
std::string m_gen3c_output_dir = "";
// When rendering with Gen3C, whether to include the rendered cache in the generated video (for debugging / visualization)
bool m_gen3c_show_cache_renderings = false;
bool m_gen3c_inference_is_connected = false;
// Either we render the camera path from the local pointcloud or we use the inference server to get a photoreal video
bool m_gen3c_render_with_gen3c = true;
};
} // namespace ngp
|