/* * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** @file testbed.h * @author Thomas Müller & Alex Evans, NVIDIA */ #pragma once #include #include #include #include #include #include #include #include #ifdef NGP_GUI # include #endif #include #include #include #ifdef NGP_PYTHON # include # include #endif #include #include struct GLFWwindow; namespace ngp { struct Triangle; class GLTexture; struct ViewIdx { i16vec2 px; uint32_t view; }; class Testbed { public: Testbed(ETestbedMode mode = ETestbedMode::None); ~Testbed(); bool clear_tmp_dir(); void update_imgui_paths(); void set_mode(ETestbedMode mode); using distance_fun_t = std::function; using normals_fun_t = std::function; struct LevelStats { float mean() { return count ? (x / (float)count) : 0.f; } float variance() { return count ? (xsquared - (x * x) / (float)count) / (float)count : 0.f; } float sigma() { return sqrtf(variance()); } float fraczero() { return (float)numzero / float(count + numzero); } float fracquant() { return (float)numquant / float(count); } float x; float xsquared; float min; float max; int numzero; int numquant; int count; }; class CudaDevice; struct View { std::shared_ptr render_buffer = nullptr; ivec2 full_resolution = {1, 1}; int visualized_dimension = 0; mat4x3 camera0 = mat4x3::identity(); mat4x3 camera1 = mat4x3::identity(); mat4x3 prev_camera = mat4x3::identity(); Foveation foveation; Foveation prev_foveation; vec2 relative_focal_length; vec2 screen_center; Lens lens; CudaDevice* device = nullptr; GPUImage index_field; GPUImage hole_mask; GPUImage depth_buffer; vec2 fov() const { return relative_focal_length_to_fov(relative_focal_length); } uint32_t uid = 0; }; void render_by_reprojection(cudaStream_t stream, std::vector& views); void render_frame( cudaStream_t stream, const mat4x3& camera_matrix0, const mat4x3& camera_matrix1, const mat4x3& prev_camera_matrix, const vec2& screen_center, const vec2& relative_focal_length, const Foveation& foveation, const Foveation& prev_foveation, const Lens& lens, int visualized_dimension, CudaRenderBuffer& render_buffer, bool to_srgb = true, CudaDevice* device = nullptr ); void render_frame_main( CudaDevice& device, const mat4x3& camera_matrix0, const mat4x3& camera_matrix1, const vec2& screen_center, const vec2& relative_focal_length, const Foveation& foveation, const Lens& lens, int visualized_dimension ); void render_frame_epilogue( cudaStream_t stream, const mat4x3& camera_matrix0, const mat4x3& prev_camera_matrix, const vec2& screen_center, const vec2& relative_focal_length, const Foveation& foveation, const Foveation& prev_foveation, const Lens& lens, CudaRenderBuffer& render_buffer, bool to_srgb = true ); void init_camera_path_from_reproject_src_cameras(); void visualize_reproject_src_cameras(ImDrawList* list, const mat4& world2proj); void clear_src_views(); void reset_accumulation(bool due_to_camera_movement = false, bool immediate_redraw = true, bool reset_pip = false); void redraw_next_frame() { m_render_skip_due_to_lack_of_camera_movement_counter = 0; } bool reprojection_available() { return m_dlss; } void load_mesh(const fs::path& data_path); void set_exposure(float exposure) { m_exposure = exposure; } void translate_camera(const vec3& rel, const mat3& rot, bool allow_up_down = true); mat3 rotation_from_angles(const vec2& angles) const; void mouse_drag(); void mouse_wheel(); void load_file(const fs::path& path); vec3 look_at() const; void set_look_at(const vec3& pos); float scale() const { return m_scale; } void set_scale(float scale); vec3 view_pos() const { return m_camera[3]; } vec3 view_dir() const { return m_camera[2]; } vec3 view_up() const { return m_camera[1]; } vec3 view_side() const { return m_camera[0]; } void set_view_dir(const vec3& dir); void reset_camera(); bool keyboard_event(); void update_density_grid_mean_and_bitfield(cudaStream_t stream); void mark_density_grid_in_sphere_empty(const vec3& pos, float radius, cudaStream_t stream); void prepare_next_camera_path_frame(); void overlay_fps(); void imgui(); vec2 calc_focal_length(const ivec2& resolution, const vec2& relative_focal_length, int fov_axis, float zoom) const; vec2 render_screen_center(const vec2& screen_center) const; void optimise_mesh_step(uint32_t N_STEPS); void compute_mesh_vertex_colors(); float get_depth_from_renderbuffer(const CudaRenderBuffer& render_buffer, const vec2& uv); vec3 get_3d_pos_from_pixel(const CudaRenderBuffer& render_buffer, const vec2& focus_pixel); void autofocus(); #ifdef NGP_PYTHON std::pair, pybind11::array_t> render_to_cpu(int width, int height, int spp, bool linear, float start_t, float end_t, float fps, float shutter_fraction); pybind11::array_t render_to_cpu_rgba(int width, int height, int spp, bool linear, float start_t, float end_t, float fps, float shutter_fraction); pybind11::array_t view(bool linear, size_t view) const; std::pair, pybind11::array_t> reproject(const mat4x3& src, const pybind11::array_t& src_img, const pybind11::array_t& src_depth, const mat4x3& dst); uint32_t add_src_view( mat4x3 camera_to_world, float fx, float fy, float cx, float cy, Lens lens, pybind11::array_t img, pybind11::array_t depth, float timestamp, bool is_srgb = false ); pybind11::array_t src_view_ids() const; # ifdef NGP_GUI pybind11::array_t screenshot(bool linear, bool front_buffer) const; # endif #endif mat4x3 view_camera(size_t view) const; void draw_visualizations(ImDrawList* list, const mat4x3& camera_matrix); void reproject_views(const std::vector src, View& dst); void render(bool skip_rendering); void init_window(int resw, int resh, bool hidden = false, bool second_window = false); void destroy_window(); void init_vr(); void update_vr_performance_settings(); void apply_camera_smoothing(float elapsed_ms); bool begin_frame(); void handle_user_input(); vec3 vr_to_world(const vec3& pos) const; void begin_vr_frame_and_handle_vr_input(); void draw_gui(); bool frame(); bool want_repl(); void load_image(const fs::path& data_path); void load_exr_image(const fs::path& data_path); void load_stbi_image(const fs::path& data_path); void load_binary_image(const fs::path& data_path); float fov() const; void set_fov(float val); vec2 fov_xy() const; void set_fov_xy(const vec2& val); CameraKeyframe copy_camera_to_keyframe() const; void set_camera_from_keyframe(const CameraKeyframe& k); void set_camera_from_time(float t); void load_camera_path(const fs::path& path); bool loop_animation(); void set_loop_animation(bool value); fs::path root_dir(); void set_root_dir(const fs::path& dir); bool m_want_repl = false; bool m_render_window = false; bool m_gather_histograms = false; bool m_render_ground_truth = false; EGroundTruthRenderMode m_ground_truth_render_mode = EGroundTruthRenderMode::Shade; float m_ground_truth_alpha = 1.0f; bool m_render = true; int m_max_spp = 0; ETestbedMode m_testbed_mode = ETestbedMode::None; // Rendering stuff ivec2 m_window_res = ivec2(0); bool m_dynamic_res = false; float m_dynamic_res_target_fps = 20.0f; int m_fixed_res_factor = 8; float m_scale = 1.0; float m_aperture_size = 0.0f; vec2 m_relative_focal_length = vec2(1.0f); uint32_t m_fov_axis = 1; float m_zoom = 1.f; // 2d zoom factor (for insets?) vec2 m_screen_center = vec2(0.5f); // center of 2d zoom float m_ndc_znear = 1.0f / 32.0f; float m_ndc_zfar = 128.0f; mat4x3 m_camera = mat4x3::identity(); mat4x3 m_default_camera = transpose(mat3x4{1.0f, 0.0f, 0.0f, 0.5f, 0.0f, -1.0f, 0.0f, 0.5f, 0.0f, 0.0f, -1.0f, 0.5f}); mat4x3 m_smoothed_camera = mat4x3::identity(); size_t m_render_skip_due_to_lack_of_camera_movement_counter = 0; bool m_fps_camera = false; bool m_camera_smoothing = false; bool m_autofocus = false; vec3 m_autofocus_target = vec3(0.5f); bool m_render_with_lens_distortion = false; Lens m_render_lens = {}; CameraPath m_camera_path = {}; bool m_record_camera_path = false; vec3 m_up_dir = {0.0f, 1.0f, 0.0f}; vec3 m_sun_dir = normalize(vec3(1.0f)); float m_bounding_radius = 1; float m_exposure = 0.f; ERenderMode m_render_mode = ERenderMode::Shade; uint32_t m_seed = 1337; #ifdef NGP_GUI GLFWwindow* m_glfw_window = nullptr; struct SecondWindow { GLFWwindow* window = nullptr; GLuint program = 0; GLuint vao = 0, vbo = 0; void draw(GLuint texture); } m_second_window; float m_drag_depth = 1.0f; // The VAO will be empty, but we need a valid one for attribute-less rendering GLuint m_blit_vao = 0; GLuint m_blit_program = 0; void init_opengl_shaders(); void blit_texture( const Foveation& foveation, GLint rgba_texture, GLint rgba_filter_mode, GLint depth_texture, GLint framebuffer, const ivec2& offset, const ivec2& resolution ); void create_second_window(); std::unique_ptr m_hmd; OpenXRHMD::FrameInfoPtr m_vr_frame_info; bool m_vr_use_depth_reproject = false; bool m_vr_use_hidden_area_mask = false; std::deque m_reproject_src_views; View m_reproject_pending_view; int m_reproject_min_src_view_index = 0; int m_reproject_max_src_view_index = 1; int m_reproject_max_src_view_count = -1; // -1 indicates unlimited uint32_t m_reproject_selected_src_view = 0; bool m_reproject_freeze_src_views = false; int m_reproject_n_views_to_cache = 1; bool m_reproject_visualize_src_views = false; float m_reproject_min_t = 0.1f; float m_reproject_step_factor = 1.05f; vec3 m_reproject_parallax = vec3(0.0f, 0.0f, 0.0f); bool m_reproject_enable = false; bool m_reproject_reuse_last_frame = true; float m_reproject_lazy_render_ms = 100.0f; float m_reproject_lazy_render_res_factor = 1.25f; bool m_pm_enable = false; EPmVizMode m_pm_viz_mode = EPmVizMode::Shade; void set_n_views(size_t n_views); // Callback invoked when a keyboard event is detected. // If the callback returns `true`, the event is considered handled and the default behavior will not occur. std::function m_keyboard_event_callback; // Callback invoked when a file is dropped onto the window. // If the callback returns `true`, the files are considered handled and the default behavior will not occur. std::function&)> m_file_drop_callback; std::shared_ptr m_pip_render_texture; std::vector> m_rgba_render_textures; std::vector> m_depth_render_textures; #endif std::shared_ptr m_pip_render_buffer; SharedQueue> m_task_queue; void redraw_gui_next_frame() { m_gui_redraw = true; } bool m_gui_redraw = true; enum EDataType { Float, Half, }; struct VolPayload { vec3 dir; vec4 col; uint32_t pixidx; }; float m_camera_velocity = 1.0f; EColorSpace m_color_space = EColorSpace::Linear; ETonemapCurve m_tonemap_curve = ETonemapCurve::Identity; bool m_dlss = false; std::shared_ptr m_dlss_provider; float m_dlss_sharpening = 0.0f; // 3D stuff float m_render_near_distance = 0.0f; float m_slice_plane_z = 0.0f; bool m_floor_enable = false; inline float get_floor_y() const { return m_floor_enable ? m_aabb.min.y + 0.001f : -10000.f; } BoundingBox m_raw_aabb; BoundingBox m_aabb = {vec3(0.0f), vec3(1.0f)}; BoundingBox m_render_aabb = {vec3(0.0f), vec3(1.0f)}; mat3 m_render_aabb_to_local = mat3::identity(); // Rendering/UI bookkeeping Ema m_render_ms = {EEmaType::Time, 100}; // The frame contains everything, i.e. rendering + GUI and buffer swapping Ema m_frame_ms = {EEmaType::Time, 100}; std::chrono::time_point m_last_frame_time_point; std::chrono::time_point m_last_gui_draw_time_point; vec4 m_background_color = {0.0f, 0.0f, 0.0f, 1.0f}; bool m_vsync = true; bool m_render_transparency_as_checkerboard = false; // Visualization of neuron activations int m_visualized_dimension = -1; int m_visualized_layer = 0; std::vector m_views; ivec2 m_n_views = {1, 1}; float m_picture_in_picture_res = 0.f; // if non zero, requests a small second picture :) enum class ImGuiMode : uint32_t { Enabled, FpsOverlay, Disabled, // Don't set the below NumModes, }; struct ImGuiVars { static const uint32_t MAX_PATH_LEN = 1024; ImGuiMode mode = ImGuiMode::Enabled; // tab to cycle char cam_path_path[MAX_PATH_LEN] = "cam.json"; char video_path[MAX_PATH_LEN] = "video.mp4"; char cam_export_path[MAX_PATH_LEN] = "cam_export.json"; void* overlay_font = nullptr; } m_imgui; fs::path m_root_dir = ""; bool m_visualize_unit_cube = false; bool m_edit_render_aabb = false; bool m_edit_world_transform = true; bool m_snap_to_pixel_centers = false; vec3 m_parallax_shift = {0.0f, 0.0f, 0.0f}; // to shift the viewer's origin by some amount in camera space StreamAndEvent m_stream; class CudaDevice { public: struct Data { std::shared_ptr> hidden_area_mask; }; CudaDevice(int id, bool is_primary); CudaDevice(const CudaDevice&) = delete; CudaDevice& operator=(const CudaDevice&) = delete; CudaDevice(CudaDevice&&) = default; CudaDevice& operator=(CudaDevice&&) = default; ScopeGuard device_guard(); int id() const { return m_id; } bool is_primary() const { return m_is_primary; } std::string name() const { return cuda_device_name(m_id); } int compute_capability() const { return cuda_compute_capability(m_id); } cudaStream_t stream() const { return m_stream->get(); } void wait_for(cudaStream_t stream) const { CUDA_CHECK_THROW(cudaEventRecord(m_primary_device_event.event, stream)); m_stream->wait_for(m_primary_device_event.event); } void signal(cudaStream_t stream) const { m_stream->signal(stream); } const CudaRenderBufferView& render_buffer_view() const { return m_render_buffer_view; } void set_render_buffer_view(const CudaRenderBufferView& view) { m_render_buffer_view = view; } Data& data() const { return *m_data; } bool dirty() const { return m_dirty; } void set_dirty(bool value) { m_dirty = value; } void clear() { m_data = std::make_unique(); m_render_buffer_view = {}; set_dirty(true); } template auto enqueue_task(F&& f) -> std::future> { if (is_primary()) { return std::async(std::launch::deferred, std::forward(f)); } else { return m_render_worker->enqueue_task(std::forward(f)); } } private: int m_id; bool m_is_primary; std::unique_ptr m_stream; struct Event { Event() { CUDA_CHECK_THROW(cudaEventCreate(&event)); } ~Event() { cudaEventDestroy(event); } Event(const Event&) = delete; Event& operator=(const Event&) = delete; Event(Event&& other) { *this = std::move(other); } Event& operator=(Event&& other) { std::swap(event, other.event); return *this; } cudaEvent_t event = {}; }; Event m_primary_device_event; std::unique_ptr m_data; CudaRenderBufferView m_render_buffer_view = {}; bool m_dirty = true; std::unique_ptr m_render_worker; }; void sync_device(CudaRenderBuffer& render_buffer, CudaDevice& device); ScopeGuard use_device(cudaStream_t stream, CudaRenderBuffer& render_buffer, CudaDevice& device); void set_all_devices_dirty(); std::vector m_devices; CudaDevice& primary_device() { return m_devices.front(); } ThreadPool m_thread_pool; std::vector> m_render_futures; bool m_use_aux_devices = false; bool m_foveated_rendering = false; bool m_dynamic_foveated_rendering = true; float m_foveated_rendering_full_res_diameter = 0.55f; float m_foveated_rendering_scaling = 1.0f; float m_foveated_rendering_max_scaling = 2.0f; bool m_foveated_rendering_visualize = false; default_rng_t m_rng; CudaRenderBuffer m_windowless_render_surface{std::make_shared()}; // ---------- Gen3C stuff /** * Common signature for Gen3C-related UI callback functions, to be implemented * in Python. * * Inputs: * name: name of the UI event (e.g. name of the button pressed). * * Returns: bool, whether the operation was successful. */ using gen3c_cb_t = std::function; gen3c_cb_t m_gen3c_cb; // Info string to be displayed in the Gen3C UI window. std::string m_gen3c_info; // Path to an image or directory to use to seed the generative model. // The specific format is guessed based on what the path points to. std::string m_gen3c_seed_path; // Whether to automatically launch new inference requests. bool m_gen3c_auto_inference = false; EGen3cCameraSource m_gen3c_camera_source = EGen3cCameraSource::Authored; // Fake translation speed in scene unit / frame. vec3 m_gen3c_translation_speed = {0.05f, 0.f, 0.f}; // Fake rotation speed around (x, y, z) in radians / frame. vec3 m_gen3c_rotation_speed = {0.f, 0.05f, 0.f}; // Number of frames to request for each inference request. std::string m_gen3c_inference_info = ""; // Progress of seeding-related things (scale 0..1). Set to a negative value to hide the progress bar. float m_gen3c_seeding_progress = -1.0f; // Progress of inference-related things (scale 0..1). Set to a negative value to hide the progress bar. float m_gen3c_inference_progress = -1.0f; // Saving Gen3C inference outputs bool m_gen3c_save_frames = false; // Whether or not to display generated frames in the UI. // No display means that we can save some time by not de-compressing // the result video from the server, and even skip depth prediction for most frames. bool m_gen3c_display_frames = false; std::string m_gen3c_output_dir = ""; // When rendering with Gen3C, whether to include the rendered cache in the generated video (for debugging / visualization) bool m_gen3c_show_cache_renderings = false; bool m_gen3c_inference_is_connected = false; // Either we render the camera path from the local pointcloud or we use the inference server to get a photoreal video bool m_gen3c_render_with_gen3c = true; }; } // namespace ngp