anvilinteractiv commited on
Commit
2c0de48
·
verified ·
1 Parent(s): fcfeede

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +494 -397
app.py CHANGED
@@ -12,14 +12,23 @@ from huggingface_hub import hf_hub_download, snapshot_download
12
  import subprocess
13
  import shutil
14
  import base64
 
 
 
 
 
15
 
16
  # Install additional dependencies
17
- subprocess.run("pip install spandrel==0.4.1 --no-deps", shell=True, check=True)
 
 
 
 
18
 
19
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
20
  DTYPE = torch.float16
21
 
22
- print("DEVICE: ", DEVICE)
23
 
24
  DEFAULT_FACE_NUMBER = 100000
25
  MAX_SEED = np.iinfo(np.int32).max
@@ -34,10 +43,12 @@ os.makedirs(TMP_DIR, exist_ok=True)
34
 
35
  TRIPOSG_CODE_DIR = "./triposg"
36
  if not os.path.exists(TRIPOSG_CODE_DIR):
 
37
  os.system(f"git clone {TRIPOSG_REPO_URL} {TRIPOSG_CODE_DIR}")
38
 
39
  MV_ADAPTER_CODE_DIR = "./mv_adapter"
40
  if not os.path.exists(MV_ADAPTER_CODE_DIR):
 
41
  os.system(f"git clone {MV_ADAPTER_REPO_URL} {MV_ADAPTER_CODE_DIR} && cd {MV_ADAPTER_CODE_DIR} && git checkout 7d37a97e9bc223cdb8fd26a76bd8dd46504c7c3d")
42
 
43
  import sys
@@ -46,48 +57,60 @@ sys.path.append(os.path.join(TRIPOSG_CODE_DIR, "scripts"))
46
  sys.path.append(MV_ADAPTER_CODE_DIR)
47
  sys.path.append(os.path.join(MV_ADAPTER_CODE_DIR, "scripts"))
48
 
49
- # triposg
50
- from image_process import prepare_image
51
- from briarmbg import BriaRMBG
52
- snapshot_download("briaai/RMBG-1.4", local_dir=RMBG_PRETRAINED_MODEL)
53
- rmbg_net = BriaRMBG.from_pretrained(RMBG_PRETRAINED_MODEL).to(DEVICE)
54
- rmbg_net.eval()
55
- from triposg.pipelines.pipeline_triposg import TripoSGPipeline
56
- snapshot_download("VAST-AI/TripoSG", local_dir=TRIPOSG_PRETRAINED_MODEL)
57
- triposg_pipe = TripoSGPipeline.from_pretrained(TRIPOSG_PRETRAINED_MODEL).to(DEVICE, DTYPE)
58
-
59
- # mv adapter
60
- NUM_VIEWS = 6
61
- from inference_ig2mv_sdxl import prepare_pipeline, preprocess_image, remove_bg
62
- from mvadapter.utils import get_orthogonal_camera, tensor_to_image, make_image_grid
63
- from mvadapter.utils.render import NVDiffRastContextWrapper, load_mesh, render
64
- mv_adapter_pipe = prepare_pipeline(
65
- base_model="stabilityai/stable-diffusion-xl-base-1.0",
66
- vae_model="madebyollin/sdxl-vae-fp16-fix",
67
- unet_model=None,
68
- lora_model=None,
69
- adapter_path="huanngzh/mv-adapter",
70
- scheduler=None,
71
- num_views=NUM_VIEWS,
72
- device=DEVICE,
73
- dtype=torch.float16,
74
- )
75
- birefnet = AutoModelForImageSegmentation.from_pretrained(
76
- "ZhengPeng7/BiRefNet", trust_remote_code=True
77
- ).to(DEVICE)
78
- transform_image = transforms.Compose(
79
- [
80
- transforms.Resize((1024, 1024)),
81
- transforms.ToTensor(),
82
- transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
83
- ]
84
- )
85
- remove_bg_fn = lambda x: remove_bg(x, birefnet, transform_image, DEVICE)
86
-
87
- if not os.path.exists("checkpoints/RealESRGAN_x2plus.pth"):
88
- hf_hub_download("dtarnow/UPscaler", filename="RealESRGAN_x2plus.pth", local_dir="checkpoints")
89
- if not os.path.exists("checkpoints/big-lama.pt"):
90
- subprocess.run("wget -P checkpoints/ https://github.com/Sanster/models/releases/download/add_big_lama/big-lama.pt", shell=True, check=True)
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
  def get_random_hex():
93
  random_bytes = os.urandom(8)
@@ -96,163 +119,194 @@ def get_random_hex():
96
 
97
  @spaces.GPU(duration=180)
98
  def run_full(image: str, seed: int = 0, num_inference_steps: int = 50, guidance_scale: float = 7.5, simplify: bool = True, target_face_num: int = DEFAULT_FACE_NUMBER, req=None):
99
- image_seg = prepare_image(image, bg_color=np.array([1.0, 1.0, 1.0]), rmbg_net=rmbg_net)
100
-
101
- outputs = triposg_pipe(
102
- image=image_seg,
103
- generator=torch.Generator(device=triposg_pipe.device).manual_seed(seed),
104
- num_inference_steps=num_inference_steps,
105
- guidance_scale=guidance_scale
106
- ).samples[0]
107
- print("mesh extraction done")
108
- mesh = trimesh.Trimesh(outputs[0].astype(np.float32), np.ascontiguousarray(outputs[1]))
109
-
110
- if simplify:
111
- print("start simplify")
112
- from utils import simplify_mesh
113
- mesh = simplify_mesh(mesh, target_face_num)
114
-
115
- save_dir = os.path.join(TMP_DIR, "examples")
116
- os.makedirs(save_dir, exist_ok=True)
117
- mesh_path = os.path.join(save_dir, f"polygenixai_{get_random_hex()}.glb")
118
- mesh.export(mesh_path)
119
- print("save to ", mesh_path)
120
-
121
- torch.cuda.empty_cache()
122
-
123
- height, width = 768, 768
124
- cameras = get_orthogonal_camera(
125
- elevation_deg=[0, 0, 0, 0, 89.99, -89.99],
126
- distance=[1.8] * NUM_VIEWS,
127
- left=-0.55,
128
- right=0.55,
129
- bottom=-0.55,
130
- top=0.55,
131
- azimuth_deg=[x - 90 for x in [0, 90, 180, 270, 180, 180]],
132
- device=DEVICE,
133
- )
134
- ctx = NVDiffRastContextWrapper(device=DEVICE, context_type="cuda")
135
-
136
- mesh = load_mesh(mesh_path, rescale=True, device=DEVICE)
137
- render_out = render(
138
- ctx,
139
- mesh,
140
- cameras,
141
- height=height,
142
- width=width,
143
- render_attr=False,
144
- normal_background=0.0,
145
- )
146
- control_images = (
147
- torch.cat(
148
- [
149
- (render_out.pos + 0.5).clamp(0, 1),
150
- (render_out.normal / 2 + 0.5).clamp(0, 1),
151
- ],
152
- dim=-1,
 
 
 
 
153
  )
154
- .permute(0, 3, 1, 2)
155
- .to(DEVICE)
156
- )
157
 
158
- image = Image.open(image)
159
- image = remove_bg_fn(image)
160
- image = preprocess_image(image, height, width)
161
-
162
- pipe_kwargs = {}
163
- if seed != -1 and isinstance(seed, int):
164
- pipe_kwargs["generator"] = torch.Generator(device=DEVICE).manual_seed(seed)
165
-
166
- images = mv_adapter_pipe(
167
- "high quality",
168
- height=height,
169
- width=width,
170
- num_inference_steps=15,
171
- guidance_scale=3.0,
172
- num_images_per_prompt=NUM_VIEWS,
173
- control_image=control_images,
174
- control_conditioning_scale=1.0,
175
- reference_image=image,
176
- reference_conditioning_scale=1.0,
177
- negative_prompt="watermark, ugly, deformed, noisy, blurry, low contrast",
178
- cross_attention_kwargs={"scale": 1.0},
179
- **pipe_kwargs,
180
- ).images
181
-
182
- torch.cuda.empty_cache()
183
-
184
- mv_image_path = os.path.join(save_dir, f"polygenixai_mv_{get_random_hex()}.png")
185
- make_image_grid(images, rows=1).save(mv_image_path)
186
-
187
- from texture import TexturePipeline, ModProcessConfig
188
- texture_pipe = TexturePipeline(
189
- upscaler_ckpt_path="checkpoints/RealESRGAN_x2plus.pth",
190
- inpaint_ckpt_path="checkpoints/big-lama.pt",
191
- device=DEVICE,
192
- )
193
 
194
- textured_glb_path = texture_pipe(
195
- mesh_path=mesh_path,
196
- save_dir=save_dir,
197
- save_name=f"polygenixai_texture_mesh_{get_random_hex()}.glb",
198
- uv_unwarp=True,
199
- uv_size=4096,
200
- rgb_path=mv_image_path,
201
- rgb_process_config=ModProcessConfig(view_upscale=True, inpaint_mode="view"),
202
- camera_azimuth_deg=[x - 90 for x in [0, 90, 180, 270, 180, 180]],
203
- )
204
 
205
- return image_seg, mesh_path, textured_glb_path
 
 
 
206
 
207
  def gradio_generate(image: str, seed: int = 0, num_inference_steps: int = 50, guidance_scale: float = 7.5, simplify: bool = True, target_face_num: int = DEFAULT_FACE_NUMBER):
208
- # Verify API key
209
- api_key = os.getenv("POLYGENIX_API_KEY", "your-secret-api-key")
210
- request = gr.Request()
211
- if not request.headers.get("x-api-key") == api_key:
212
- raise ValueError("Invalid API key")
213
-
214
- # Handle base64 image
215
- if image.startswith("data:image"):
216
- # Extract base64 data (e.g., "data:image/jpeg;base64,...")
217
- base64_string = image.split(",")[1]
218
- image_data = base64.b64decode(base64_string)
219
- # Save to temporary file
220
- temp_image_path = os.path.join(TMP_DIR, f"input_{get_random_hex()}.png")
221
- with open(temp_image_path, "wb") as f:
222
- f.write(image_data)
223
- else:
224
- temp_image_path = image
225
- if not os.path.exists(temp_image_path):
226
- raise ValueError("Invalid or missing image file")
227
-
228
- image_seg, mesh_path, textured_glb_path = run_full(temp_image_path, seed, num_inference_steps, guidance_scale, simplify, target_face_num, req=None)
229
- session_hash = os.path.basename(os.path.dirname(textured_glb_path))
230
- return {"file_url": f"/files/{session_hash}/{os.path.basename(textured_glb_path)}"}
 
 
 
 
 
 
 
231
 
232
  def start_session(req: gr.Request):
233
- save_dir = os.path.join(TMP_DIR, str(req.session_hash))
234
- os.makedirs(save_dir, exist_ok=True)
235
- print("start session, mkdir", save_dir)
 
 
 
 
236
 
237
  def end_session(req: gr.Request):
238
- save_dir = os.path.join(TMP_DIR, str(req.session_hash))
239
- shutil.rmtree(save_dir)
 
 
 
 
 
240
 
241
  def get_random_seed(randomize_seed, seed):
242
- if randomize_seed:
243
- seed = random.randint(0, MAX_SEED)
244
- return seed
 
 
 
 
 
245
 
246
  @spaces.GPU()
247
  @torch.no_grad()
248
  def run_segmentation(image: str):
249
- image = prepare_image(image, bg_color=np.array([1.0, 1.0, 1.0]), rmbg_net=rmbg_net)
250
- return image
 
 
 
 
 
 
251
 
252
  @spaces.GPU(duration=90)
253
  @torch.no_grad()
254
  def image_to_3d(
255
- image: Image.Image,
256
  seed: int,
257
  num_inference_steps: int,
258
  guidance_scale: float,
@@ -260,130 +314,162 @@ def image_to_3d(
260
  target_face_num: int,
261
  req: gr.Request
262
  ):
263
- outputs = triposg_pipe(
264
- image=image,
265
- generator=torch.Generator(device=triposg_pipe.device).manual_seed(seed),
266
- num_inference_steps=num_inference_steps,
267
- guidance_scale=guidance_scale
268
- ).samples[0]
269
- print("mesh extraction done")
270
- mesh = trimesh.Trimesh(outputs[0].astype(np.float32), np.ascontiguousarray(outputs[1]))
271
-
272
- if simplify:
273
- print("start simplify")
274
- from utils import simplify_mesh
275
- mesh = simplify_mesh(mesh, target_face_num)
276
-
277
- save_dir = os.path.join(TMP_DIR, str(req.session_hash))
278
- mesh_path = os.path.join(save_dir, f"polygenixai_{get_random_hex()}.glb")
279
- mesh.export(mesh_path)
280
- print("save to ", mesh_path)
281
-
282
- torch.cuda.empty_cache()
283
-
284
- return mesh_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
285
 
286
  @spaces.GPU(duration=120)
287
  @torch.no_grad()
288
  def run_texture(image: Image, mesh_path: str, seed: int, req: gr.Request):
289
- height, width = 768, 768
290
- cameras = get_orthogonal_camera(
291
- elevation_deg=[0, 0, 0, 0, 89.99, -89.99],
292
- distance=[1.8] * NUM_VIEWS,
293
- left=-0.55,
294
- right=0.55,
295
- bottom=-0.55,
296
- top=0.55,
297
- azimuth_deg=[x - 90 for x in [0, 90, 180, 270, 180, 180]],
298
- device=DEVICE,
299
- )
300
- ctx = NVDiffRastContextWrapper(device=DEVICE, context_type="cuda")
301
-
302
- mesh = load_mesh(mesh_path, rescale=True, device=DEVICE)
303
- render_out = render(
304
- ctx,
305
- mesh,
306
- cameras,
307
- height=height,
308
- width=width,
309
- render_attr=False,
310
- normal_background=0.0,
311
- )
312
- control_images = (
313
- torch.cat(
314
- [
315
- (render_out.pos + 0.5).clamp(0, 1),
316
- (render_out.normal / 2 + 0.5).clamp(0, 1),
317
- ],
318
- dim=-1,
 
 
 
 
 
319
  )
320
- .permute(0, 3, 1, 2)
321
- .to(DEVICE)
322
- )
323
 
324
- image = Image.open(image)
325
- image = remove_bg_fn(image)
326
- image = preprocess_image(image, height, width)
327
-
328
- pipe_kwargs = {}
329
- if seed != -1 and isinstance(seed, int):
330
- pipe_kwargs["generator"] = torch.Generator(device=DEVICE).manual_seed(seed)
331
-
332
- images = mv_adapter_pipe(
333
- "high quality",
334
- height=height,
335
- width=width,
336
- num_inference_steps=15,
337
- guidance_scale=3.0,
338
- num_images_per_prompt=NUM_VIEWS,
339
- control_image=control_images,
340
- control_conditioning_scale=1.0,
341
- reference_image=image,
342
- reference_conditioning_scale=1.0,
343
- negative_prompt="watermark, ugly, deformed, noisy, blurry, low contrast",
344
- cross_attention_kwargs={"scale": 1.0},
345
- **pipe_kwargs,
346
- ).images
347
-
348
- torch.cuda.empty_cache()
349
-
350
- mv_image_path = os.path.join(save_dir, f"polygenixai_mv_{get_random_hex()}.png")
351
- make_image_grid(images, rows=1).save(mv_image_path)
352
-
353
- from texture import TexturePipeline, ModProcessConfig
354
- texture_pipe = TexturePipeline(
355
- upscaler_ckpt_path="checkpoints/RealESRGAN_x2plus.pth",
356
- inpaint_ckpt_path="checkpoints/big-lama.pt",
357
- device=DEVICE,
358
- )
359
 
360
- textured_glb_path = texture_pipe(
361
- mesh_path=mesh_path,
362
- save_dir=save_dir,
363
- save_name=f"polygenixai_texture_mesh_{get_random_hex()}.glb",
364
- uv_unwarp=True,
365
- uv_size=4096,
366
- rgb_path=mv_image_path,
367
- rgb_process_config=ModProcessConfig(view_upscale=True, inpaint_mode="view"),
368
- camera_azimuth_deg=[x - 90 for x in [0, 90, 180, 270, 180, 180]],
369
- )
370
 
371
- return textured_glb_path
372
-
373
- # Define Gradio API endpoint separately
374
- api_interface = gr.Interface(
375
- fn=gradio_generate,
376
- inputs=[
377
- gr.Image(type="filepath", label="Image"), # Accepts base64 strings or file paths
378
- gr.Number(label="Seed", value=0, precision=0),
379
- gr.Number(label="Inference Steps", value=50, precision=0),
380
- gr.Number(label="Guidance Scale", value=7.5),
381
- gr.Checkbox(label="Simplify Mesh", value=True),
382
- gr.Number(label="Target Face Number", value=DEFAULT_FACE_NUMBER, precision=0)
383
- ],
384
- outputs="json",
385
- api_name="/api/generate"
386
- )
 
 
 
 
 
 
 
 
 
 
387
 
388
  HEADER = """
389
  # 🌌 PolyGenixAI: Craft 3D Worlds with Cosmic Precision
@@ -469,105 +555,116 @@ HEADER = """
469
  """
470
 
471
  # Gradio web interface
472
- with gr.Blocks(title="PolyGenixAI", css="body { background-color: #1A1A1A; } .gr-panel { background-color: #2D2D2D; }") as demo:
473
- gr.Markdown(HEADER)
474
- with gr.Tabs(elem_classes="gr-tab"):
475
- with gr.Tab("Create 3D Model"):
476
- with gr.Row():
477
- with gr.Column(scale=1):
478
- image_prompts = gr.Image(label="Upload Image", type="filepath", height=300, elem_classes="gr-panel")
479
- seg_image = gr.Image(label="Preview Segmentation", type="pil", format="png", interactive=False, height=300, elem_classes="gr-panel")
480
- with gr.Accordion("Style & Settings", open=True, elem_classes="gr-accordion"):
481
- style_filter = gr.Dropdown(
482
- choices=["None", "Realistic", "Fantasy", "Cartoon", "Sci-Fi", "Vintage", "Cosmic", "Neon"],
483
- label="Style Filter",
484
- value="None",
485
- info="Select a style to inspire your 3D model (optional)",
486
- elem_classes="gr-dropdown"
487
- )
488
- seed = gr.Slider(
489
- label="Seed",
490
- minimum=0,
491
- maximum=MAX_SEED,
492
- step=1,
493
- value=0,
494
- elem_classes="gr-slider"
495
- )
496
- randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
497
- num_inference_steps = gr.Slider(
498
- label="Inference Steps",
499
- minimum=8,
500
- maximum=50,
501
- step=1,
502
- value=50,
503
- info="Higher steps enhance detail but increase processing time",
504
- elem_classes="gr-slider"
505
- )
506
- guidance_scale = gr.Slider(
507
- label="Guidance Scale",
508
- minimum=0.0,
509
- maximum=20.0,
510
- step=0.1,
511
- value=7.0,
512
- info="Controls adherence to input image",
513
- elem_classes="gr-slider"
514
- )
515
- reduce_face = gr.Checkbox(label="Simplify Mesh", value=True)
516
- target_face_num = gr.Slider(
517
- maximum=1000000,
518
- minimum=10000,
519
- value=DEFAULT_FACE_NUMBER,
520
- label="Target Face Number",
521
- info="Adjust mesh complexity for performance",
522
- elem_classes="gr-slider"
523
- )
524
- gen_button = gr.Button("Generate 3D Model", variant="primary", elem_classes="gr-button-primary")
525
- gen_texture_button = gr.Button("Apply Texture", variant="secondary", interactive=False, elem_classes="gr-button-secondary")
526
- with gr.Column(scale=1):
527
- model_output = gr.Model3D(label="3D Model Preview", interactive=False, height=400, elem_classes="gr-panel")
528
- textured_model_output = gr.Model3D(label="Textured 3D Model", interactive=False, height=400, elem_classes="gr-panel")
529
- download_button = gr.Button("Download GLB", variant="secondary", elem_classes="gr-button-secondary")
530
- with gr.Tab("Cosmic Gallery"):
531
- gr.Markdown("### Discover Stellar Creations")
532
- gr.Examples(
533
- examples=[
534
- f"{TRIPOSG_CODE_DIR}/assets/example_data/{image}"
535
- for image in os.listdir(f"{TRIPOSG_CODE_DIR}/assets/example_data")
536
- ],
537
- fn=run_full,
538
- inputs=[image_prompts],
539
- outputs=[seg_image, model_output, textured_model_output],
540
- cache_examples=True,
541
- )
542
- gr.Markdown("Connect with creators in our <a href='https://www.anvilinteractive.com/community' style='color: #A78BFA; text-decoration: none;'>PolyGenixAI Cosmic Community</a>!")
543
- gen_button.click(
544
- run_segmentation,
545
- inputs=[image_prompts],
546
- outputs=[seg_image]
547
- ).then(
548
- get_random_seed,
549
- inputs=[randomize_seed, seed],
550
- outputs=[seed],
551
- ).then(
552
- image_to_3d,
553
- inputs=[
554
- seg_image,
555
- seed,
556
- num_inference_steps,
557
- guidance_scale,
558
- reduce_face,
559
- target_face_num
560
- ],
561
- outputs=[model_output]
562
- ).then(lambda: gr.Button(interactive=True), outputs=[gen_texture_button])
563
- gen_texture_button.click(
564
- run_texture,
565
- inputs=[image_prompts, model_output, seed],
566
- outputs=[textured_model_output]
567
- )
568
- demo.load(start_session)
569
- demo.unload(end_session)
 
 
 
 
 
 
570
 
571
  if __name__ == "__main__":
572
- # Launch both the API and web interface
573
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
 
 
 
12
  import subprocess
13
  import shutil
14
  import base64
15
+ import logging
16
+
17
+ # Set up logging
18
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
19
+ logger = logging.getLogger(__name__)
20
 
21
  # Install additional dependencies
22
+ try:
23
+ subprocess.run("pip install spandrel==0.4.1 --no-deps", shell=True, check=True)
24
+ except Exception as e:
25
+ logger.error(f"Failed to install spandrel: {str(e)}")
26
+ raise
27
 
28
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
29
  DTYPE = torch.float16
30
 
31
+ logger.info(f"Using device: {DEVICE}")
32
 
33
  DEFAULT_FACE_NUMBER = 100000
34
  MAX_SEED = np.iinfo(np.int32).max
 
43
 
44
  TRIPOSG_CODE_DIR = "./triposg"
45
  if not os.path.exists(TRIPOSG_CODE_DIR):
46
+ logger.info(f"Cloning TripoSG repository to {TRIPOSG_CODE_DIR}")
47
  os.system(f"git clone {TRIPOSG_REPO_URL} {TRIPOSG_CODE_DIR}")
48
 
49
  MV_ADAPTER_CODE_DIR = "./mv_adapter"
50
  if not os.path.exists(MV_ADAPTER_CODE_DIR):
51
+ logger.info(f"Cloning MV-Adapter repository to {MV_ADAPTER_CODE_DIR}")
52
  os.system(f"git clone {MV_ADAPTER_REPO_URL} {MV_ADAPTER_CODE_DIR} && cd {MV_ADAPTER_CODE_DIR} && git checkout 7d37a97e9bc223cdb8fd26a76bd8dd46504c7c3d")
53
 
54
  import sys
 
57
  sys.path.append(MV_ADAPTER_CODE_DIR)
58
  sys.path.append(os.path.join(MV_ADAPTER_CODE_DIR, "scripts"))
59
 
60
+ try:
61
+ # triposg
62
+ from image_process import prepare_image
63
+ from briarmbg import BriaRMBG
64
+ snapshot_download("briaai/RMBG-1.4", local_dir=RMBG_PRETRAINED_MODEL)
65
+ rmbg_net = BriaRMBG.from_pretrained(RMBG_PRETRAINED_MODEL).to(DEVICE)
66
+ rmbg_net.eval()
67
+ from triposg.pipelines.pipeline_triposg import TripoSGPipeline
68
+ snapshot_download("VAST-AI/TripoSG", local_dir=TRIPOSG_PRETRAINED_MODEL)
69
+ triposg_pipe = TripoSGPipeline.from_pretrained(TRIPOSG_PRETRAINED_MODEL).to(DEVICE, DTYPE)
70
+ except Exception as e:
71
+ logger.error(f"Failed to load TripoSG models: {str(e)}")
72
+ raise
73
+
74
+ try:
75
+ # mv adapter
76
+ NUM_VIEWS = 6
77
+ from inference_ig2mv_sdxl import prepare_pipeline, preprocess_image, remove_bg
78
+ from mvadapter.utils import get_orthogonal_camera, tensor_to_image, make_image_grid
79
+ from mvadapter.utils.render import NVDiffRastContextWrapper, load_mesh, render
80
+ mv_adapter_pipe = prepare_pipeline(
81
+ base_model="stabilityai/stable-diffusion-xl-base-1.0",
82
+ vae_model="madebyollin/sdxl-vae-fp16-fix",
83
+ unet_model=None,
84
+ lora_model=None,
85
+ adapter_path="huanngzh/mv-adapter",
86
+ scheduler=None,
87
+ num_views=NUM_VIEWS,
88
+ device=DEVICE,
89
+ dtype=torch.float16,
90
+ )
91
+ birefnet = AutoModelForImageSegmentation.from_pretrained(
92
+ "ZhengPeng7/BiRefNet", trust_remote_code=True
93
+ ).to(DEVICE)
94
+ transform_image = transforms.Compose(
95
+ [
96
+ transforms.Resize((1024, 1024)),
97
+ transforms.ToTensor(),
98
+ transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
99
+ ]
100
+ )
101
+ remove_bg_fn = lambda x: remove_bg(x, birefnet, transform_image, DEVICE)
102
+ except Exception as e:
103
+ logger.error(f"Failed to load MV-Adapter models: {str(e)}")
104
+ raise
105
+
106
+ try:
107
+ if not os.path.exists("checkpoints/RealESRGAN_x2plus.pth"):
108
+ hf_hub_download("dtarnow/UPscaler", filename="RealESRGAN_x2plus.pth", local_dir="checkpoints")
109
+ if not os.path.exists("checkpoints/big-lama.pt"):
110
+ subprocess.run("wget -P checkpoints/ https://github.com/Sanster/models/releases/download/add_big_lama/big-lama.pt", shell=True, check=True)
111
+ except Exception as e:
112
+ logger.error(f"Failed to download checkpoints: {str(e)}")
113
+ raise
114
 
115
  def get_random_hex():
116
  random_bytes = os.urandom(8)
 
119
 
120
  @spaces.GPU(duration=180)
121
  def run_full(image: str, seed: int = 0, num_inference_steps: int = 50, guidance_scale: float = 7.5, simplify: bool = True, target_face_num: int = DEFAULT_FACE_NUMBER, req=None):
122
+ try:
123
+ image_seg = prepare_image(image, bg_color=np.array([1.0, 1.0, 1.0]), rmbg_net=rmbg_net)
124
+
125
+ outputs = triposg_pipe(
126
+ image=image_seg,
127
+ generator=torch.Generator(device=triposg_pipe.device).manual_seed(seed),
128
+ num_inference_steps=num_inference_steps,
129
+ guidance_scale=guidance_scale
130
+ ).samples[0]
131
+ logger.info("Mesh extraction done")
132
+ mesh = trimesh.Trimesh(outputs[0].astype(np.float32), np.ascontiguousarray(outputs[1]))
133
+
134
+ if simplify:
135
+ logger.info("Starting mesh simplification")
136
+ from utils import simplify_mesh
137
+ mesh = simplify_mesh(mesh, target_face_num)
138
+
139
+ save_dir = os.path.join(TMP_DIR, "examples")
140
+ os.makedirs(save_dir, exist_ok=True)
141
+ mesh_path = os.path.join(save_dir, f"polygenixai_{get_random_hex()}.glb")
142
+ mesh.export(mesh_path)
143
+ logger.info(f"Saved mesh to {mesh_path}")
144
+
145
+ torch.cuda.empty_cache()
146
+
147
+ height, width = 768, 768
148
+ cameras = get_orthogonal_camera(
149
+ elevation_deg=[0, 0, 0, 0, 89.99, -89.99],
150
+ distance=[1.8] * NUM_VIEWS,
151
+ left=-0.55,
152
+ right=0.55,
153
+ bottom=-0.55,
154
+ top=0.55,
155
+ azimuth_deg=[x - 90 for x in [0, 90, 180, 270, 180, 180]],
156
+ device=DEVICE,
157
+ )
158
+ ctx = NVDiffRastContextWrapper(device=DEVICE, context_type="cuda")
159
+
160
+ mesh = load_mesh(mesh_path, rescale=True, device=DEVICE)
161
+ render_out = render(
162
+ ctx,
163
+ mesh,
164
+ cameras,
165
+ height=height,
166
+ width=width,
167
+ render_attr=False,
168
+ normal_background=0.0,
169
+ )
170
+ control_images = (
171
+ torch.cat(
172
+ [
173
+ (render_out.pos + 0.5).clamp(0, 1),
174
+ (render_out.normal / 2 + 0.5).clamp(0, 1),
175
+ ],
176
+ dim=-1,
177
+ )
178
+ .permute(0, 3, 1, 2)
179
+ .to(DEVICE)
180
  )
 
 
 
181
 
182
+ image = Image.open(image)
183
+ image = remove_bg_fn(image)
184
+ image = preprocess_image(image, height, width)
185
+
186
+ pipe_kwargs = {}
187
+ if seed != -1 and isinstance(seed, int):
188
+ pipe_kwargs["generator"] = torch.Generator(device=DEVICE).manual_seed(seed)
189
+
190
+ images = mv_adapter_pipe(
191
+ "high quality",
192
+ height=height,
193
+ width=width,
194
+ num_inference_steps=15,
195
+ guidance_scale=3.0,
196
+ num_images_per_prompt=NUM_VIEWS,
197
+ control_image=control_images,
198
+ control_conditioning_scale=1.0,
199
+ reference_image=image,
200
+ reference_conditioning_scale=1.0,
201
+ negative_prompt="watermark, ugly, deformed, noisy, blurry, low contrast",
202
+ cross_attention_kwargs={"scale": 1.0},
203
+ **pipe_kwargs,
204
+ ).images
205
+
206
+ torch.cuda.empty_cache()
207
+
208
+ mv_image_path = os.path.join(save_dir, f"polygenixai_mv_{get_random_hex()}.png")
209
+ make_image_grid(images, rows=1).save(mv_image_path)
210
+
211
+ from texture import TexturePipeline, ModProcessConfig
212
+ texture_pipe = TexturePipeline(
213
+ upscaler_ckpt_path="checkpoints/RealESRGAN_x2plus.pth",
214
+ inpaint_ckpt_path="checkpoints/big-lama.pt",
215
+ device=DEVICE,
216
+ )
217
 
218
+ textured_glb_path = texture_pipe(
219
+ mesh_path=mesh_path,
220
+ save_dir=save_dir,
221
+ save_name=f"polygenixai_texture_mesh_{get_random_hex()}.glb",
222
+ uv_unwarp=True,
223
+ uv_size=4096,
224
+ rgb_path=mv_image_path,
225
+ rgb_process_config=ModProcessConfig(view_upscale=True, inpaint_mode="view"),
226
+ camera_azimuth_deg=[x - 90 for x in [0, 90, 180, 270, 180, 180]],
227
+ )
228
 
229
+ return image_seg, mesh_path, textured_glb_path
230
+ except Exception as e:
231
+ logger.error(f"Error in run_full: {str(e)}")
232
+ raise
233
 
234
  def gradio_generate(image: str, seed: int = 0, num_inference_steps: int = 50, guidance_scale: float = 7.5, simplify: bool = True, target_face_num: int = DEFAULT_FACE_NUMBER):
235
+ try:
236
+ logger.info("Starting gradio_generate")
237
+ # Verify API key
238
+ api_key = os.getenv("POLYGENIX_API_KEY", "your-secret-api-key")
239
+ request = gr.Request()
240
+ if not request.headers.get("x-api-key") == api_key:
241
+ logger.error("Invalid API key")
242
+ raise ValueError("Invalid API key")
243
+
244
+ # Handle base64 image or file path
245
+ if image.startswith("data:image"):
246
+ logger.info("Processing base64 image")
247
+ base64_string = image.split(",")[1]
248
+ image_data = base64.b64decode(base64_string)
249
+ temp_image_path = os.path.join(TMP_DIR, f"input_{get_random_hex()}.png")
250
+ with open(temp_image_path, "wb") as f:
251
+ f.write(image_data)
252
+ else:
253
+ temp_image_path = image
254
+ if not os.path.exists(temp_image_path):
255
+ logger.error(f"Image file not found: {temp_image_path}")
256
+ raise ValueError("Invalid or missing image file")
257
+
258
+ image_seg, mesh_path, textured_glb_path = run_full(temp_image_path, seed, num_inference_steps, guidance_scale, simplify, target_face_num, req=None)
259
+ session_hash = os.path.basename(os.path.dirname(textured_glb_path))
260
+ logger.info(f"Generated model at /files/{session_hash}/{os.path.basename(textured_glb_path)}")
261
+ return {"file_url": f"/files/{session_hash}/{os.path.basename(textured_glb_path)}"}
262
+ except Exception as e:
263
+ logger.error(f"Error in gradio_generate: {str(e)}")
264
+ raise
265
 
266
  def start_session(req: gr.Request):
267
+ try:
268
+ save_dir = os.path.join(TMP_DIR, str(req.session_hash))
269
+ os.makedirs(save_dir, exist_ok=True)
270
+ logger.info(f"Started session, created directory: {save_dir}")
271
+ except Exception as e:
272
+ logger.error(f"Error in start_session: {str(e)}")
273
+ raise
274
 
275
  def end_session(req: gr.Request):
276
+ try:
277
+ save_dir = os.path.join(TMP_DIR, str(req.session_hash))
278
+ shutil.rmtree(save_dir)
279
+ logger.info(f"Ended session, removed directory: {save_dir}")
280
+ except Exception as e:
281
+ logger.error(f"Error in end_session: {str(e)}")
282
+ raise
283
 
284
  def get_random_seed(randomize_seed, seed):
285
+ try:
286
+ if randomize_seed:
287
+ seed = random.randint(0, MAX_SEED)
288
+ logger.info(f"Generated seed: {seed}")
289
+ return seed
290
+ except Exception as e:
291
+ logger.error(f"Error in get_random_seed: {str(e)}")
292
+ raise
293
 
294
  @spaces.GPU()
295
  @torch.no_grad()
296
  def run_segmentation(image: str):
297
+ try:
298
+ logger.info("Running segmentation")
299
+ image = prepare_image(image, bg_color=np.array([1.0, 1.0, 1.0]), rmbg_net=rmbg_net)
300
+ logger.info("Segmentation complete")
301
+ return image
302
+ except Exception as e:
303
+ logger.error(f"Error in run_segmentation: {str(e)}")
304
+ raise
305
 
306
  @spaces.GPU(duration=90)
307
  @torch.no_grad()
308
  def image_to_3d(
309
+ image, # Changed to accept FileData dict or PIL Image
310
  seed: int,
311
  num_inference_steps: int,
312
  guidance_scale: float,
 
314
  target_face_num: int,
315
  req: gr.Request
316
  ):
317
+ try:
318
+ logger.info("Running image_to_3d")
319
+ # Handle FileData dict from gradio_client
320
+ if isinstance(image, dict):
321
+ image_path = image.get("path") or image.get("url")
322
+ if not image_path:
323
+ logger.error("Invalid image input: no path or URL provided")
324
+ raise ValueError("Invalid image input: no path or URL provided")
325
+ image = Image.open(image_path)
326
+ elif not isinstance(image, Image.Image):
327
+ logger.error(f"Invalid image type: {type(image)}")
328
+ raise ValueError(f"Expected PIL Image or FileData dict, got {type(image)}")
329
+
330
+ outputs = triposg_pipe(
331
+ image=image,
332
+ generator=torch.Generator(device=triposg_pipe.device).manual_seed(seed),
333
+ num_inference_steps=num_inference_steps,
334
+ guidance_scale=guidance_scale
335
+ ).samples[0]
336
+ logger.info("Mesh extraction done")
337
+ mesh = trimesh.Trimesh(outputs[0].astype(np.float32), np.ascontiguousarray(outputs[1]))
338
+
339
+ if simplify:
340
+ logger.info("Starting mesh simplification")
341
+ try:
342
+ from utils import simplify_mesh
343
+ mesh = simplify_mesh(mesh, target_face_num)
344
+ except ImportError as e:
345
+ logger.error(f"Failed to import simplify_mesh: {str(e)}")
346
+ raise
347
+
348
+ save_dir = os.path.join(TMP_DIR, str(req.session_hash))
349
+ os.makedirs(save_dir, exist_ok=True)
350
+ mesh_path = os.path.join(save_dir, f"polygenixai_{get_random_hex()}.glb")
351
+ mesh.export(mesh_path)
352
+ logger.info(f"Saved mesh to {mesh_path}")
353
+
354
+ torch.cuda.empty_cache()
355
+ return {"path": mesh_path}
356
+ except Exception as e:
357
+ logger.error(f"Error in image_to_3d: {str(e)}")
358
+ raise
359
 
360
  @spaces.GPU(duration=120)
361
  @torch.no_grad()
362
  def run_texture(image: Image, mesh_path: str, seed: int, req: gr.Request):
363
+ try:
364
+ logger.info("Running texture generation")
365
+ height, width = 768, 768
366
+ cameras = get_orthogonal_camera(
367
+ elevation_deg=[0, 0, 0, 0, 89.99, -89.99],
368
+ distance=[1.8] * NUM_VIEWS,
369
+ left=-0.55,
370
+ right=0.55,
371
+ bottom=-0.55,
372
+ top=0.55,
373
+ azimuth_deg=[x - 90 for x in [0, 90, 180, 270, 180, 180]],
374
+ device=DEVICE,
375
+ )
376
+ ctx = NVDiffRastContextWrapper(device=DEVICE, context_type="cuda")
377
+
378
+ mesh = load_mesh(mesh_path, rescale=True, device=DEVICE)
379
+ render_out = render(
380
+ ctx,
381
+ mesh,
382
+ cameras,
383
+ height=height,
384
+ width=width,
385
+ render_attr=False,
386
+ normal_background=0.0,
387
+ )
388
+ control_images = (
389
+ torch.cat(
390
+ [
391
+ (render_out.pos + 0.5).clamp(0, 1),
392
+ (render_out.normal / 2 + 0.5).clamp(0, 1),
393
+ ],
394
+ dim=-1,
395
+ )
396
+ .permute(0, 3, 1, 2)
397
+ .to(DEVICE)
398
  )
 
 
 
399
 
400
+ image = Image.open(image)
401
+ image = remove_bg_fn(image)
402
+ image = preprocess_image(image, height, width)
403
+
404
+ pipe_kwargs = {}
405
+ if seed != -1 and isinstance(seed, int):
406
+ pipe_kwargs["generator"] = torch.Generator(device=DEVICE).manual_seed(seed)
407
+
408
+ images = mv_adapter_pipe(
409
+ "high quality",
410
+ height=height,
411
+ width=width,
412
+ num_inference_steps=15,
413
+ guidance_scale=3.0,
414
+ num_images_per_prompt=NUM_VIEWS,
415
+ control_image=control_images,
416
+ control_conditioning_scale=1.0,
417
+ reference_image=image,
418
+ reference_conditioning_scale=1.0,
419
+ negative_prompt="watermark, ugly, deformed, noisy, blurry, low contrast",
420
+ cross_attention_kwargs={"scale": 1.0},
421
+ **pipe_kwargs,
422
+ ).images
423
+
424
+ torch.cuda.empty_cache()
425
+
426
+ mv_image_path = os.path.join(save_dir, f"polygenixai_mv_{get_random_hex()}.png")
427
+ make_image_grid(images, rows=1).save(mv_image_path)
428
+
429
+ from texture import TexturePipeline, ModProcessConfig
430
+ texture_pipe = TexturePipeline(
431
+ upscaler_ckpt_path="checkpoints/RealESRGAN_x2plus.pth",
432
+ inpaint_ckpt_path="checkpoints/big-lama.pt",
433
+ device=DEVICE,
434
+ )
435
 
436
+ textured_glb_path = texture_pipe(
437
+ mesh_path=mesh_path,
438
+ save_dir=save_dir,
439
+ save_name=f"polygenixai_texture_mesh_{get_random_hex()}.glb",
440
+ uv_unwarp=True,
441
+ uv_size=4096,
442
+ rgb_path=mv_image_path,
443
+ rgb_process_config=ModProcessConfig(view_upscale=True, inpaint_mode="view"),
444
+ camera_azimuth_deg=[x - 90 for x in [0, 90, 180, 270, 180, 180]],
445
+ )
446
 
447
+ logger.info(f"Textured model saved to {textured_glb_path}")
448
+ return {"path": textured_glb_path}
449
+ except Exception as e:
450
+ logger.error(f"Error in run_texture: {str(e)}")
451
+ raise
452
+
453
+ # Define Gradio API endpoint
454
+ try:
455
+ logger.info("Initializing Gradio API interface")
456
+ api_interface = gr.Interface(
457
+ fn=gradio_generate,
458
+ inputs=[
459
+ gr.Image(type="filepath", label="Image"),
460
+ gr.Number(label="Seed", value=0, precision=0),
461
+ gr.Number(label="Inference Steps", value=50, precision=0),
462
+ gr.Number(label="Guidance Scale", value=7.5),
463
+ gr.Checkbox(label="Simplify Mesh", value=True),
464
+ gr.Number(label="Target Face Number", value=DEFAULT_FACE_NUMBER, precision=0)
465
+ ],
466
+ outputs="json",
467
+ api_name="/api/generate"
468
+ )
469
+ logger.info("Gradio API interface initialized successfully")
470
+ except Exception as e:
471
+ logger.error(f"Failed to initialize Gradio API interface: {str(e)}")
472
+ raise
473
 
474
  HEADER = """
475
  # 🌌 PolyGenixAI: Craft 3D Worlds with Cosmic Precision
 
555
  """
556
 
557
  # Gradio web interface
558
+ try:
559
+ logger.info("Initializing Gradio Blocks interface")
560
+ with gr.Blocks(title="PolyGenixAI", css="body { background-color: #1A1A1A; } .gr-panel { background-color: #2D2D2D; }") as demo:
561
+ gr.Markdown(HEADER)
562
+ with gr.Tabs(elem_classes="gr-tab"):
563
+ with gr.Tab("Create 3D Model"):
564
+ with gr.Row():
565
+ with gr.Column(scale=1):
566
+ image_prompts = gr.Image(label="Upload Image", type="filepath", height=300, elem_classes="gr-panel")
567
+ seg_image = gr.Image(label="Preview Segmentation", type="pil", format="png", interactive=False, height=300, elem_classes="gr-panel")
568
+ with gr.Accordion("Style & Settings", open=True, elem_classes="gr-accordion"):
569
+ style_filter = gr.Dropdown(
570
+ choices=["None", "Realistic", "Fantasy", "Cartoon", "Sci-Fi", "Vintage", "Cosmic", "Neon"],
571
+ label="Style Filter",
572
+ value="None",
573
+ info="Select a style to inspire your 3D model (optional)",
574
+ elem_classes="gr-dropdown"
575
+ )
576
+ seed = gr.Slider(
577
+ label="Seed",
578
+ minimum=0,
579
+ maximum=MAX_SEED,
580
+ step=1,
581
+ value=0,
582
+ elem_classes="gr-slider"
583
+ )
584
+ randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
585
+ num_inference_steps = gr.Slider(
586
+ label="Inference Steps",
587
+ minimum=8,
588
+ maximum=50,
589
+ step=1,
590
+ value=50,
591
+ info="Higher steps enhance detail but increase processing time",
592
+ elem_classes="gr-slider"
593
+ )
594
+ guidance_scale = gr.Slider(
595
+ label="Guidance Scale",
596
+ minimum=0.0,
597
+ maximum=20.0,
598
+ step=0.1,
599
+ value=7.0,
600
+ info="Controls adherence to input image",
601
+ elem_classes="gr-slider"
602
+ )
603
+ reduce_face = gr.Checkbox(label="Simplify Mesh", value=True)
604
+ target_face_num = gr.Slider(
605
+ maximum=1000000,
606
+ minimum=10000,
607
+ value=DEFAULT_FACE_NUMBER,
608
+ label="Target Face Number",
609
+ info="Adjust mesh complexity for performance",
610
+ elem_classes="gr-slider"
611
+ )
612
+ gen_button = gr.Button("Generate 3D Model", variant="primary", elem_classes="gr-button-primary")
613
+ gen_texture_button = gr.Button("Apply Texture", variant="secondary", interactive=False, elem_classes="gr-button-secondary")
614
+ with gr.Column(scale=1):
615
+ model_output = gr.Model3D(label="3D Model Preview", interactive=False, height=400, elem_classes="gr-panel")
616
+ textured_model_output = gr.Model3D(label="Textured 3D Model", interactive=False, height=400, elem_classes="gr-panel")
617
+ download_button = gr.Button("Download GLB", variant="secondary", elem_classes="gr-button-secondary")
618
+ with gr.Tab("Cosmic Gallery"):
619
+ gr.Markdown("### Discover Stellar Creations")
620
+ gr.Examples(
621
+ examples=[
622
+ f"{TRIPOSG_CODE_DIR}/assets/example_data/{image}"
623
+ for image in os.listdir(f"{TRIPOSG_CODE_DIR}/assets/example_data")
624
+ ],
625
+ fn=run_full,
626
+ inputs=[image_prompts],
627
+ outputs=[seg_image, model_output, textured_model_output],
628
+ cache_examples=True,
629
+ )
630
+ gr.Markdown("Connect with creators in our <a href='https://www.anvilinteractive.com/community' style='color: #A78BFA; text-decoration: none;'>PolyGenixAI Cosmic Community</a>!")
631
+ gen_button.click(
632
+ run_segmentation,
633
+ inputs=[image_prompts],
634
+ outputs=[seg_image]
635
+ ).then(
636
+ get_random_seed,
637
+ inputs=[randomize_seed, seed],
638
+ outputs=[seed],
639
+ ).then(
640
+ image_to_3d,
641
+ inputs=[
642
+ seg_image,
643
+ seed,
644
+ num_inference_steps,
645
+ guidance_scale,
646
+ reduce_face,
647
+ target_face_num
648
+ ],
649
+ outputs=[model_output]
650
+ ).then(lambda: gr.Button(interactive=True), outputs=[gen_texture_button])
651
+ gen_texture_button.click(
652
+ run_texture,
653
+ inputs=[image_prompts, model_output, seed],
654
+ outputs=[textured_model_output]
655
+ )
656
+ demo.load(start_session)
657
+ demo.unload(end_session)
658
+ logger.info("Gradio Blocks interface initialized successfully")
659
+ except Exception as e:
660
+ logger.error(f"Failed to initialize Gradio Blocks interface: {str(e)}")
661
+ raise
662
 
663
  if __name__ == "__main__":
664
+ try:
665
+ logger.info("Launching Gradio application")
666
+ demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)
667
+ logger.info("Gradio application launched successfully")
668
+ except Exception as e:
669
+ logger.error(f"Failed to launch Gradio application: {str(e)}")
670
+ raise