File size: 19,716 Bytes
28451f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
/*
 * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/** @file   testbed.h
 *  @author Thomas Müller & Alex Evans, NVIDIA
 */

#pragma once

#include <neural-graphics-primitives/adam_optimizer.h>
#include <neural-graphics-primitives/bounding_box.cuh>
#include <neural-graphics-primitives/camera_path.h>
#include <neural-graphics-primitives/common_host.h>
#include <neural-graphics-primitives/discrete_distribution.h>
#include <neural-graphics-primitives/render_buffer.h>
#include <neural-graphics-primitives/shared_queue.h>
#include <neural-graphics-primitives/thread_pool.h>

#ifdef NGP_GUI
#	include <neural-graphics-primitives/openxr_hmd.h>
#endif

#include <tiny-cuda-nn/multi_stream.h>
#include <tiny-cuda-nn/random.h>

#include <json/json.hpp>

#ifdef NGP_PYTHON
#	include <pybind11/numpy.h>
#	include <pybind11/pybind11.h>
#endif

#include <deque>
#include <thread>

struct GLFWwindow;

namespace ngp {

struct Triangle;
class GLTexture;

struct ViewIdx {
	i16vec2 px;
	uint32_t view;
};

class Testbed {
public:
	Testbed(ETestbedMode mode = ETestbedMode::None);
	~Testbed();

	bool clear_tmp_dir();
	void update_imgui_paths();

	void set_mode(ETestbedMode mode);

	using distance_fun_t = std::function<void(uint32_t, const vec3*, float*, cudaStream_t)>;
	using normals_fun_t = std::function<void(uint32_t, const vec3*, vec3*, cudaStream_t)>;

	struct LevelStats {
		float mean() { return count ? (x / (float)count) : 0.f; }
		float variance() { return count ? (xsquared - (x * x) / (float)count) / (float)count : 0.f; }
		float sigma() { return sqrtf(variance()); }
		float fraczero() { return (float)numzero / float(count + numzero); }
		float fracquant() { return (float)numquant / float(count); }

		float x;
		float xsquared;
		float min;
		float max;
		int numzero;
		int numquant;
		int count;
	};

	class CudaDevice;

	struct View {
		std::shared_ptr<CudaRenderBuffer> render_buffer = nullptr;
		ivec2 full_resolution = {1, 1};
		int visualized_dimension = 0;

		mat4x3 camera0 = mat4x3::identity();
		mat4x3 camera1 = mat4x3::identity();
		mat4x3 prev_camera = mat4x3::identity();

		Foveation foveation;
		Foveation prev_foveation;

		vec2 relative_focal_length;
		vec2 screen_center;

		Lens lens;

		CudaDevice* device = nullptr;

		GPUImage<ViewIdx> index_field;
		GPUImage<uint8_t> hole_mask;
		GPUImage<float> depth_buffer;


		vec2 fov() const { return relative_focal_length_to_fov(relative_focal_length); }

		uint32_t uid = 0;
	};

	void render_by_reprojection(cudaStream_t stream, std::vector<View>& views);

	void render_frame(
		cudaStream_t stream,
		const mat4x3& camera_matrix0,
		const mat4x3& camera_matrix1,
		const mat4x3& prev_camera_matrix,
		const vec2& screen_center,
		const vec2& relative_focal_length,
		const Foveation& foveation,
		const Foveation& prev_foveation,
		const Lens& lens,
		int visualized_dimension,
		CudaRenderBuffer& render_buffer,
		bool to_srgb = true,
		CudaDevice* device = nullptr
	);
	void render_frame_main(
		CudaDevice& device,
		const mat4x3& camera_matrix0,
		const mat4x3& camera_matrix1,
		const vec2& screen_center,
		const vec2& relative_focal_length,
		const Foveation& foveation,
		const Lens& lens,
		int visualized_dimension
	);
	void render_frame_epilogue(
		cudaStream_t stream,
		const mat4x3& camera_matrix0,
		const mat4x3& prev_camera_matrix,
		const vec2& screen_center,
		const vec2& relative_focal_length,
		const Foveation& foveation,
		const Foveation& prev_foveation,
		const Lens& lens,
		CudaRenderBuffer& render_buffer,
		bool to_srgb = true
	);

	void init_camera_path_from_reproject_src_cameras();
	void visualize_reproject_src_cameras(ImDrawList* list, const mat4& world2proj);
	void clear_src_views();

	void reset_accumulation(bool due_to_camera_movement = false, bool immediate_redraw = true, bool reset_pip = false);
	void redraw_next_frame() { m_render_skip_due_to_lack_of_camera_movement_counter = 0; }
	bool reprojection_available() { return m_dlss; }
	void load_mesh(const fs::path& data_path);
	void set_exposure(float exposure) { m_exposure = exposure; }
	void translate_camera(const vec3& rel, const mat3& rot, bool allow_up_down = true);
	mat3 rotation_from_angles(const vec2& angles) const;
	void mouse_drag();
	void mouse_wheel();
	void load_file(const fs::path& path);
	vec3 look_at() const;
	void set_look_at(const vec3& pos);
	float scale() const { return m_scale; }
	void set_scale(float scale);
	vec3 view_pos() const { return m_camera[3]; }
	vec3 view_dir() const { return m_camera[2]; }
	vec3 view_up() const { return m_camera[1]; }
	vec3 view_side() const { return m_camera[0]; }
	void set_view_dir(const vec3& dir);
	void reset_camera();
	bool keyboard_event();
	void update_density_grid_mean_and_bitfield(cudaStream_t stream);
	void mark_density_grid_in_sphere_empty(const vec3& pos, float radius, cudaStream_t stream);

	void prepare_next_camera_path_frame();
	void overlay_fps();
	void imgui();
	vec2 calc_focal_length(const ivec2& resolution, const vec2& relative_focal_length, int fov_axis, float zoom) const;
	vec2 render_screen_center(const vec2& screen_center) const;
	void optimise_mesh_step(uint32_t N_STEPS);
	void compute_mesh_vertex_colors();

	float get_depth_from_renderbuffer(const CudaRenderBuffer& render_buffer, const vec2& uv);
	vec3 get_3d_pos_from_pixel(const CudaRenderBuffer& render_buffer, const vec2& focus_pixel);
	void autofocus();

#ifdef NGP_PYTHON
	std::pair<pybind11::array_t<float>, pybind11::array_t<float>>
		render_to_cpu(int width, int height, int spp, bool linear, float start_t, float end_t, float fps, float shutter_fraction);
	pybind11::array_t<float>
		render_to_cpu_rgba(int width, int height, int spp, bool linear, float start_t, float end_t, float fps, float shutter_fraction);
	pybind11::array_t<float> view(bool linear, size_t view) const;
	std::pair<pybind11::array_t<float>, pybind11::array_t<uint32_t>>
		reproject(const mat4x3& src, const pybind11::array_t<float>& src_img, const pybind11::array_t<float>& src_depth, const mat4x3& dst);
	uint32_t add_src_view(
		mat4x3 camera_to_world,
		float fx,
		float fy,
		float cx,
		float cy,
		Lens lens,
		pybind11::array_t<float> img,
		pybind11::array_t<float> depth,
		float timestamp,
		bool is_srgb = false
	);
	pybind11::array_t<uint32_t> src_view_ids() const;
#	ifdef NGP_GUI
	pybind11::array_t<float> screenshot(bool linear, bool front_buffer) const;
#	endif
#endif

	mat4x3 view_camera(size_t view) const;


	void draw_visualizations(ImDrawList* list, const mat4x3& camera_matrix);
	void reproject_views(const std::vector<const View*> src, View& dst);
	void render(bool skip_rendering);
	void init_window(int resw, int resh, bool hidden = false, bool second_window = false);
	void destroy_window();
	void init_vr();
	void update_vr_performance_settings();
	void apply_camera_smoothing(float elapsed_ms);
	bool begin_frame();
	void handle_user_input();
	vec3 vr_to_world(const vec3& pos) const;
	void begin_vr_frame_and_handle_vr_input();
	void draw_gui();
	bool frame();
	bool want_repl();
	void load_image(const fs::path& data_path);
	void load_exr_image(const fs::path& data_path);
	void load_stbi_image(const fs::path& data_path);
	void load_binary_image(const fs::path& data_path);
	float fov() const;
	void set_fov(float val);
	vec2 fov_xy() const;
	void set_fov_xy(const vec2& val);
	CameraKeyframe copy_camera_to_keyframe() const;
	void set_camera_from_keyframe(const CameraKeyframe& k);
	void set_camera_from_time(float t);
	void load_camera_path(const fs::path& path);
	bool loop_animation();
	void set_loop_animation(bool value);

	fs::path root_dir();
	void set_root_dir(const fs::path& dir);

	bool m_want_repl = false;

	bool m_render_window = false;
	bool m_gather_histograms = false;

	bool m_render_ground_truth = false;
	EGroundTruthRenderMode m_ground_truth_render_mode = EGroundTruthRenderMode::Shade;
	float m_ground_truth_alpha = 1.0f;

	bool m_render = true;
	int m_max_spp = 0;
	ETestbedMode m_testbed_mode = ETestbedMode::None;

	// Rendering stuff
	ivec2 m_window_res = ivec2(0);
	bool m_dynamic_res = false;
	float m_dynamic_res_target_fps = 20.0f;
	int m_fixed_res_factor = 8;
	float m_scale = 1.0;
	float m_aperture_size = 0.0f;
	vec2 m_relative_focal_length = vec2(1.0f);
	uint32_t m_fov_axis = 1;
	float m_zoom = 1.f;                // 2d zoom factor (for insets?)
	vec2 m_screen_center = vec2(0.5f); // center of 2d zoom

	float m_ndc_znear = 1.0f / 32.0f;
	float m_ndc_zfar = 128.0f;

	mat4x3 m_camera = mat4x3::identity();
	mat4x3 m_default_camera = transpose(mat3x4{1.0f, 0.0f, 0.0f, 0.5f, 0.0f, -1.0f, 0.0f, 0.5f, 0.0f, 0.0f, -1.0f, 0.5f});
	mat4x3 m_smoothed_camera = mat4x3::identity();
	size_t m_render_skip_due_to_lack_of_camera_movement_counter = 0;

	bool m_fps_camera = false;
	bool m_camera_smoothing = false;
	bool m_autofocus = false;
	vec3 m_autofocus_target = vec3(0.5f);

	bool m_render_with_lens_distortion = false;
	Lens m_render_lens = {};

	CameraPath m_camera_path = {};
	bool m_record_camera_path = false;

	vec3 m_up_dir = {0.0f, 1.0f, 0.0f};
	vec3 m_sun_dir = normalize(vec3(1.0f));
	float m_bounding_radius = 1;
	float m_exposure = 0.f;

	ERenderMode m_render_mode = ERenderMode::Shade;

	uint32_t m_seed = 1337;

#ifdef NGP_GUI
	GLFWwindow* m_glfw_window = nullptr;
	struct SecondWindow {
		GLFWwindow* window = nullptr;
		GLuint program = 0;
		GLuint vao = 0, vbo = 0;
		void draw(GLuint texture);
	} m_second_window;

	float m_drag_depth = 1.0f;

	// The VAO will be empty, but we need a valid one for attribute-less rendering
	GLuint m_blit_vao = 0;
	GLuint m_blit_program = 0;

	void init_opengl_shaders();
	void blit_texture(
		const Foveation& foveation,
		GLint rgba_texture,
		GLint rgba_filter_mode,
		GLint depth_texture,
		GLint framebuffer,
		const ivec2& offset,
		const ivec2& resolution
	);

	void create_second_window();

	std::unique_ptr<OpenXRHMD> m_hmd;
	OpenXRHMD::FrameInfoPtr m_vr_frame_info;

	bool m_vr_use_depth_reproject = false;
	bool m_vr_use_hidden_area_mask = false;

	std::deque<View> m_reproject_src_views;
	View m_reproject_pending_view;

	int m_reproject_min_src_view_index = 0;
	int m_reproject_max_src_view_index = 1;
	int m_reproject_max_src_view_count = -1;  // -1 indicates unlimited
	uint32_t m_reproject_selected_src_view = 0;
	bool m_reproject_freeze_src_views = false;
	int m_reproject_n_views_to_cache = 1;
	bool m_reproject_visualize_src_views = false;

	float m_reproject_min_t = 0.1f;
	float m_reproject_step_factor = 1.05f;
	vec3 m_reproject_parallax = vec3(0.0f, 0.0f, 0.0f);
	bool m_reproject_enable = false;
	bool m_reproject_reuse_last_frame = true;

	float m_reproject_lazy_render_ms = 100.0f;
	float m_reproject_lazy_render_res_factor = 1.25f;


	bool m_pm_enable = false;
	EPmVizMode m_pm_viz_mode = EPmVizMode::Shade;

	void set_n_views(size_t n_views);

	// Callback invoked when a keyboard event is detected.
	// If the callback returns `true`, the event is considered handled and the default behavior will not occur.
	std::function<bool()> m_keyboard_event_callback;

	// Callback invoked when a file is dropped onto the window.
	// If the callback returns `true`, the files are considered handled and the default behavior will not occur.
	std::function<bool(const std::vector<std::string>&)> m_file_drop_callback;

	std::shared_ptr<GLTexture> m_pip_render_texture;
	std::vector<std::shared_ptr<GLTexture>> m_rgba_render_textures;
	std::vector<std::shared_ptr<GLTexture>> m_depth_render_textures;
#endif

	std::shared_ptr<CudaRenderBuffer> m_pip_render_buffer;

	SharedQueue<std::unique_ptr<ICallable>> m_task_queue;

	void redraw_gui_next_frame() { m_gui_redraw = true; }

	bool m_gui_redraw = true;

	enum EDataType {
		Float,
		Half,
	};

	struct VolPayload {
		vec3 dir;
		vec4 col;
		uint32_t pixidx;
	};

	float m_camera_velocity = 1.0f;
	EColorSpace m_color_space = EColorSpace::Linear;
	ETonemapCurve m_tonemap_curve = ETonemapCurve::Identity;
	bool m_dlss = false;
	std::shared_ptr<IDlssProvider> m_dlss_provider;
	float m_dlss_sharpening = 0.0f;

	// 3D stuff
	float m_render_near_distance = 0.0f;
	float m_slice_plane_z = 0.0f;
	bool m_floor_enable = false;
	inline float get_floor_y() const { return m_floor_enable ? m_aabb.min.y + 0.001f : -10000.f; }
	BoundingBox m_raw_aabb;
	BoundingBox m_aabb = {vec3(0.0f), vec3(1.0f)};
	BoundingBox m_render_aabb = {vec3(0.0f), vec3(1.0f)};
	mat3 m_render_aabb_to_local = mat3::identity();

	// Rendering/UI bookkeeping
	Ema<float> m_render_ms = {EEmaType::Time, 100};
	// The frame contains everything, i.e. rendering + GUI and buffer swapping
	Ema<float> m_frame_ms = {EEmaType::Time, 100};
	std::chrono::time_point<std::chrono::steady_clock> m_last_frame_time_point;
	std::chrono::time_point<std::chrono::steady_clock> m_last_gui_draw_time_point;
	vec4 m_background_color = {0.0f, 0.0f, 0.0f, 1.0f};

	bool m_vsync = true;
	bool m_render_transparency_as_checkerboard = false;

	// Visualization of neuron activations
	int m_visualized_dimension = -1;
	int m_visualized_layer = 0;

	std::vector<View> m_views;
	ivec2 m_n_views = {1, 1};

	float m_picture_in_picture_res = 0.f; // if non zero, requests a small second picture :)

	enum class ImGuiMode : uint32_t {
		Enabled,
		FpsOverlay,
		Disabled,
		// Don't set the below
		NumModes,
	};

	struct ImGuiVars {
		static const uint32_t MAX_PATH_LEN = 1024;

		ImGuiMode mode = ImGuiMode::Enabled; // tab to cycle
		char cam_path_path[MAX_PATH_LEN] = "cam.json";
		char video_path[MAX_PATH_LEN] = "video.mp4";
		char cam_export_path[MAX_PATH_LEN] = "cam_export.json";

		void* overlay_font = nullptr;
	} m_imgui;

	fs::path m_root_dir = "";

	bool m_visualize_unit_cube = false;
	bool m_edit_render_aabb = false;
	bool m_edit_world_transform = true;

	bool m_snap_to_pixel_centers = false;

	vec3 m_parallax_shift = {0.0f, 0.0f, 0.0f}; // to shift the viewer's origin by some amount in camera space

	StreamAndEvent m_stream;

	class CudaDevice {
	public:
		struct Data {
			std::shared_ptr<Buffer2D<uint8_t>> hidden_area_mask;
		};

		CudaDevice(int id, bool is_primary);

		CudaDevice(const CudaDevice&) = delete;
		CudaDevice& operator=(const CudaDevice&) = delete;

		CudaDevice(CudaDevice&&) = default;
		CudaDevice& operator=(CudaDevice&&) = default;

		ScopeGuard device_guard();

		int id() const { return m_id; }

		bool is_primary() const { return m_is_primary; }

		std::string name() const { return cuda_device_name(m_id); }

		int compute_capability() const { return cuda_compute_capability(m_id); }

		cudaStream_t stream() const { return m_stream->get(); }

		void wait_for(cudaStream_t stream) const {
			CUDA_CHECK_THROW(cudaEventRecord(m_primary_device_event.event, stream));
			m_stream->wait_for(m_primary_device_event.event);
		}

		void signal(cudaStream_t stream) const { m_stream->signal(stream); }

		const CudaRenderBufferView& render_buffer_view() const { return m_render_buffer_view; }

		void set_render_buffer_view(const CudaRenderBufferView& view) { m_render_buffer_view = view; }

		Data& data() const { return *m_data; }

		bool dirty() const { return m_dirty; }

		void set_dirty(bool value) { m_dirty = value; }

		void clear() {
			m_data = std::make_unique<Data>();
			m_render_buffer_view = {};
			set_dirty(true);
		}

		template <class F> auto enqueue_task(F&& f) -> std::future<std::result_of_t<F()>> {
			if (is_primary()) {
				return std::async(std::launch::deferred, std::forward<F>(f));
			} else {
				return m_render_worker->enqueue_task(std::forward<F>(f));
			}
		}

	private:
		int m_id;
		bool m_is_primary;
		std::unique_ptr<StreamAndEvent> m_stream;
		struct Event {
			Event() { CUDA_CHECK_THROW(cudaEventCreate(&event)); }

			~Event() { cudaEventDestroy(event); }

			Event(const Event&) = delete;
			Event& operator=(const Event&) = delete;
			Event(Event&& other) { *this = std::move(other); }
			Event& operator=(Event&& other) {
				std::swap(event, other.event);
				return *this;
			}

			cudaEvent_t event = {};
		};
		Event m_primary_device_event;
		std::unique_ptr<Data> m_data;
		CudaRenderBufferView m_render_buffer_view = {};

		bool m_dirty = true;

		std::unique_ptr<ThreadPool> m_render_worker;
	};

	void sync_device(CudaRenderBuffer& render_buffer, CudaDevice& device);
	ScopeGuard use_device(cudaStream_t stream, CudaRenderBuffer& render_buffer, CudaDevice& device);
	void set_all_devices_dirty();

	std::vector<CudaDevice> m_devices;
	CudaDevice& primary_device() { return m_devices.front(); }

	ThreadPool m_thread_pool;
	std::vector<std::future<void>> m_render_futures;

	bool m_use_aux_devices = false;
	bool m_foveated_rendering = false;
	bool m_dynamic_foveated_rendering = true;
	float m_foveated_rendering_full_res_diameter = 0.55f;
	float m_foveated_rendering_scaling = 1.0f;
	float m_foveated_rendering_max_scaling = 2.0f;
	bool m_foveated_rendering_visualize = false;

	default_rng_t m_rng;

	CudaRenderBuffer m_windowless_render_surface{std::make_shared<CudaSurface2D>()};

	// ---------- Gen3C stuff
	/**
	 * Common signature for Gen3C-related UI callback functions, to be implemented
	 * in Python.
	 *
	 * Inputs:
	 *   name: name of the UI event (e.g. name of the button pressed).
	 *
	 * Returns: bool, whether the operation was successful.
	 */
	using gen3c_cb_t = std::function<bool(const std::string&)>;
	gen3c_cb_t m_gen3c_cb;

	// Info string to be displayed in the Gen3C UI window.
	std::string m_gen3c_info;
	// Path to an image or directory to use to seed the generative model.
	// The specific format is guessed based on what the path points to.
	std::string m_gen3c_seed_path;
	// Whether to automatically launch new inference requests.
	bool m_gen3c_auto_inference = false;

	EGen3cCameraSource m_gen3c_camera_source = EGen3cCameraSource::Authored;
	// Fake translation speed in scene unit / frame.
	vec3 m_gen3c_translation_speed = {0.05f, 0.f, 0.f};
	// Fake rotation speed around (x, y, z) in radians / frame.
	vec3 m_gen3c_rotation_speed = {0.f, 0.05f, 0.f};

	// Number of frames to request for each inference request.
	std::string m_gen3c_inference_info = "";

	// Progress of seeding-related things (scale 0..1). Set to a negative value to hide the progress bar.
	float m_gen3c_seeding_progress = -1.0f;
	// Progress of inference-related things (scale 0..1). Set to a negative value to hide the progress bar.
	float m_gen3c_inference_progress = -1.0f;

	// Saving Gen3C inference outputs
	bool m_gen3c_save_frames = false;
	// Whether or not to display generated frames in the UI.
	// No display means that we can save some time by not de-compressing
	// the result video from the server, and even skip depth prediction for most frames.
	bool m_gen3c_display_frames = false;
	std::string m_gen3c_output_dir = "";

	// When rendering with Gen3C, whether to include the rendered cache in the generated video (for debugging / visualization)
	bool m_gen3c_show_cache_renderings = false;

	bool m_gen3c_inference_is_connected = false;
	// Either we render the camera path from the local pointcloud or we use the inference server to get a photoreal video
	bool m_gen3c_render_with_gen3c = true;
};

} // namespace ngp