PolyGenixAI6.0

Running on Zero

App Files Files

anvilinteractiv commited on 27 days ago

Commit

c484979

verified ·

1 Parent(s): 15f252d

Update app.py

Browse files

Files changed (1) hide show

app.py +300 -168

app.py CHANGED Viewed

@@ -3,10 +3,9 @@ import os
 import gradio as gr
 import numpy as np
 import torch
-from torch.cuda.amp import autocast
 import trimesh
 import random
-from PIL import Image
 from transformers import AutoModelForImageSegmentation
 from torchvision import transforms
 from huggingface_hub import hf_hub_download, snapshot_download
@@ -14,9 +13,6 @@ import subprocess
 import shutil
 import base64
 import logging
-import time
-import traceback
-import requests
 # Set up logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
@@ -26,7 +22,7 @@ logger = logging.getLogger(__name__)
 try:
     subprocess.run("pip install spandrel==0.4.1 --no-deps", shell=True, check=True)
 except Exception as e:
-    logger.error(f"Failed to install spandrel: {str(e)}\n{traceback.format_exc()}")
     raise
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
@@ -34,7 +30,7 @@ DTYPE = torch.float16
 logger.info(f"Using device: {DEVICE}")
-DEFAULT_FACE_NUMBER = 20000  # Reduced for memory efficiency
 MAX_SEED = np.iinfo(np.int32).max
 TRIPOSG_REPO_URL = "https://github.com/VAST-AI-Research/TripoSG.git"
 MV_ADAPTER_REPO_URL = "https://github.com/huanngzh/MV-Adapter.git"
@@ -62,20 +58,22 @@ sys.path.append(MV_ADAPTER_CODE_DIR)
 sys.path.append(os.path.join(MV_ADAPTER_CODE_DIR, "scripts"))
 try:
     from image_process import prepare_image
     from briarmbg import BriaRMBG
     snapshot_download("briaai/RMBG-1.4", local_dir=RMBG_PRETRAINED_MODEL)
-    rmbg_net = BriaRMBG.from_pretrained(RMBG_PRETRAINED_MODEL).to(DEVICE, dtype=DTYPE)
     rmbg_net.eval()
     from triposg.pipelines.pipeline_triposg import TripoSGPipeline
     snapshot_download("VAST-AI/TripoSG", local_dir=TRIPOSG_PRETRAINED_MODEL)
-    triposg_pipe = TripoSGPipeline.from_pretrained(TRIPOSG_PRETRAINED_MODEL).to(DEVICE, dtype=DTYPE)
 except Exception as e:
-    logger.error(f"Failed to load TripoSG models: {str(e)}\n{traceback.format_exc()}")
     raise
 try:
-    NUM_VIEWS = 4  # Reduced for memory efficiency
     from inference_ig2mv_sdxl import prepare_pipeline, preprocess_image, remove_bg
     from mvadapter.utils import get_orthogonal_camera, tensor_to_image, make_image_grid
     from mvadapter.utils.render import NVDiffRastContextWrapper, load_mesh, render
@@ -92,17 +90,17 @@ try:
     )
     birefnet = AutoModelForImageSegmentation.from_pretrained(
         "ZhengPeng7/BiRefNet", trust_remote_code=True
-    ).to(DEVICE, dtype=DTYPE)
     transform_image = transforms.Compose(
         [
-            transforms.Resize((512, 512)),  # Reduced resolution
             transforms.ToTensor(),
             transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
         ]
     )
     remove_bg_fn = lambda x: remove_bg(x, birefnet, transform_image, DEVICE)
 except Exception as e:
-    logger.error(f"Failed to load MV-Adapter models: {str(e)}\n{traceback.format_exc()}")
     raise
 try:
@@ -111,201 +109,139 @@ try:
     if not os.path.exists("checkpoints/big-lama.pt"):
         subprocess.run("wget -P checkpoints/ https://github.com/Sanster/models/releases/download/add_big_lama/big-lama.pt", shell=True, check=True)
 except Exception as e:
-    logger.error(f"Failed to download checkpoints: {str(e)}\n{traceback.format_exc()}")
     raise
-def log_gpu_memory():
-    if torch.cuda.is_available():
-        allocated = torch.cuda.memory_allocated() / 1024**3
-        reserved = torch.cuda.memory_reserved() / 1024**3
-        logger.info(f"GPU Memory: Allocated {allocated:.2f} GB, Reserved {reserved:.2f} GB")
 def get_random_hex():
     random_bytes = os.urandom(8)
     random_hex = random_bytes.hex()
     return random_hex
-def retry_on_failure(func, max_attempts=3, delay=1):
-    for attempt in range(max_attempts):
-        try:
-            return func()
-        except RuntimeError as e:
-            logger.warning(f"Attempt {attempt + 1} failed: {str(e)}\n{traceback.format_exc()}")
-            if attempt == max_attempts - 1:
-                raise
-            time.sleep(delay)
-@spaces.GPU(duration=2)
-@torch.no_grad()
-def run_segmentation(image):
     try:
-        log_gpu_memory()
-        if isinstance(image, dict):
-            image_path = image.get("path") or image.get("url")
-            if not image_path:
-                raise ValueError("Invalid image input: no path or URL provided")
-            if image_path.startswith("http"):
-                temp_image_path = os.path.join(TMP_DIR, f"input_{get_random_hex()}.png")
-                image_path = download_image(image_path, temp_image_path)
-        elif isinstance(image, str) and image.startswith("http"):
-            temp_image_path = os.path.join(TMP_DIR, f"input_{get_random_hex()}.png")
-            image_path = download_image(image, temp_image_path)
-        else:
-            image_path = image
-            if not isinstance(image, (str, bytes)) or (isinstance(image, str) and not os.path.exists(image)):
-                raise ValueError(f"Expected str (path/URL), bytes, or FileData dict, got {type(image)}")
-        with autocast():
-            image_seg = prepare_image(image_path, bg_color=np.array([1.0, 1.0, 1.0]), rmbg_net=rmbg_net)
-        rmbg_net.to("cpu")
-        torch.cuda.empty_cache()
-        log_gpu_memory()
-        return image_seg
-    except Exception as e:
-        logger.error(f"Error in run_segmentation: {str(e)}\n{traceback.format_exc()}")
-        raise
-@spaces.GPU(duration=3)
-@torch.no_grad()
-def image_to_3d(image, seed, num_inference_steps=30, guidance_scale=7.0, simplify=True, target_face_num=DEFAULT_FACE_NUMBER, req=None):
-    try:
-        log_gpu_memory()
-        triposg_pipe.to(DEVICE, dtype=DTYPE)
-        with autocast():
-            outputs = triposg_pipe(
-                image=image,
-                generator=torch.Generator(device=triposg_pipe.device).manual_seed(seed),
-                num_inference_steps=num_inference_steps,
-                guidance_scale=guidance_scale
-            ).samples[0]
         mesh = trimesh.Trimesh(outputs[0].astype(np.float32), np.ascontiguousarray(outputs[1]))
         if simplify:
             from utils import simplify_mesh
             mesh = simplify_mesh(mesh, target_face_num)
-        save_dir = os.path.join(TMP_DIR, str(req.session_hash) if req else "examples")
         os.makedirs(save_dir, exist_ok=True)
         mesh_path = os.path.join(save_dir, f"polygenixai_{get_random_hex()}.glb")
         mesh.export(mesh_path)
-        triposg_pipe.to("cpu")
         torch.cuda.empty_cache()
-        log_gpu_memory()
-        return mesh_path
-    except Exception as e:
-        logger.error(f"Error in image_to_3d: {str(e)}\n{traceback.format_exc()}")
-        raise
-@spaces.GPU(duration=3)
-@torch.no_grad()
-def run_texture(image, mesh_path, seed, req=None):
-    try:
-        log_gpu_memory()
-        height, width = 512, 512
         cameras = get_orthogonal_camera(
-            elevation_deg=[0, 0, 0, 89.99],
             distance=[1.8] * NUM_VIEWS,
             left=-0.55,
             right=0.55,
             bottom=-0.55,
             top=0.55,
-            azimuth_deg=[x - 90 for x in [0, 90, 180, 180]],
             device=DEVICE,
         )
         ctx = NVDiffRastContextWrapper(device=DEVICE, context_type="cuda")
         mesh = load_mesh(mesh_path, rescale=True, device=DEVICE)
-        with autocast():
-            render_out = render(
-                ctx,
-                mesh,
-                cameras,
-                height=height,
-                width=width,
-                render_attr=False,
-                normal_background=0.0,
-            )
         control_images = (
             torch.cat(
-                [(render_out.pos + 0.5).clamp(0, 1), (render_out.normal / 2 + 0.5).clamp(0, 1)],
                 dim=-1,
             )
             .permute(0, 3, 1, 2)
             .to(DEVICE)
         )
-        del render_out
         image = Image.open(image)
-        birefnet.to(DEVICE, dtype=DTYPE)
-        with autocast():
-            image = remove_bg_fn(image)
-        birefnet.to("cpu")
         image = preprocess_image(image, height, width)
-        pipe_kwargs = {"generator": torch.Generator(device=DEVICE).manual_seed(seed)} if seed != -1 else {}
-        mv_adapter_pipe.to(DEVICE, dtype=DTYPE)
-        with autocast():
-            images = mv_adapter_pipe(
-                "high quality",
-                height=height,
-                width=width,
-                num_inference_steps=10,
-                guidance_scale=3.0,
-                num_images_per_prompt=NUM_VIEWS,
-                control_image=control_images,
-                control_conditioning_scale=1.0,
-                reference_image=image,
-                reference_conditioning_scale=1.0,
-                negative_prompt="watermark, ugly, deformed, noisy, blurry, low contrast",
-                cross_attention_kwargs={"scale": 1.0},
-                **pipe_kwargs,
-            ).images
-        mv_adapter_pipe.to("cpu")
-        del control_images
-        save_dir = os.path.join(TMP_DIR, str(req.session_hash) if req else "examples")
         os.makedirs(save_dir, exist_ok=True)
         mv_image_path = os.path.join(save_dir, f"mv_adapter_{get_random_hex()}.png")
         make_image_grid(images, rows=1).save(mv_image_path)
         from texture import TexturePipeline, ModProcessConfig
         texture_pipe = TexturePipeline(
             upscaler_ckpt_path="checkpoints/RealESRGAN_x2plus.pth",
             inpaint_ckpt_path="checkpoints/big-lama.pt",
             device=DEVICE,
         )
         textured_glb_path = texture_pipe(
             mesh_path=mesh_path,
             save_dir=save_dir,
             save_name=f"polygenixai_texture_mesh_{get_random_hex()}.glb",
             uv_unwarp=True,
-            uv_size=2048,
             rgb_path=mv_image_path,
             rgb_process_config=ModProcessConfig(view_upscale=True, inpaint_mode="view"),
-            camera_azimuth_deg=[x - 90 for x in [0, 90, 180, 180]],
         )
-        torch.cuda.empty_cache()
-        log_gpu_memory()
-        return textured_glb_path
-    except Exception as e:
-        logger.error(f"Error in run_texture: {str(e)}\n{traceback.format_exc()}")
-        raise
-@spaces.GPU(duration=3)
-@torch.no_grad()
-def run_full(image, seed=0, num_inference_steps=30, guidance_scale=7.0, simplify=True, target_face_num=DEFAULT_FACE_NUMBER, req=None):
-    try:
-        log_gpu_memory()
-        image_seg = run_segmentation(image)
-        mesh_path = image_to_3d(image_seg, seed, num_inference_steps, guidance_scale, simplify, target_face_num, req)
-        textured_glb_path = run_texture(image, mesh_path, seed, req)
         return image_seg, mesh_path, textured_glb_path
     except Exception as e:
-        logger.error(f"Error in run_full: {str(e)}\n{traceback.format_exc()}")
         raise
-def gradio_generate(image, seed=0, num_inference_steps=30, guidance_scale=7.0, simplify=True, target_face_num=DEFAULT_FACE_NUMBER):
     try:
         logger.info("Starting gradio_generate")
         api_key = os.getenv("POLYGENIX_API_KEY", "your-secret-api-key")
         request = gr.Request()
         if not request.headers.get("x-api-key") == api_key:
             logger.error("Invalid API key")
             raise ValueError("Invalid API key")
         if image.startswith("data:image"):
             logger.info("Processing base64 image")
             base64_string = image.split(",")[1]
@@ -319,12 +255,12 @@ def gradio_generate(image, seed=0, num_inference_steps=30, guidance_scale=7.0, s
                 logger.error(f"Image file not found: {temp_image_path}")
                 raise ValueError("Invalid or missing image file")
-        image_seg, mesh_path, textured_glb_path = run_full(temp_image_path, seed, num_inference_steps, guidance_scale, simplify, target_face_num, request)
         session_hash = os.path.basename(os.path.dirname(textured_glb_path))
         logger.info(f"Generated model at /files/{session_hash}/{os.path.basename(textured_glb_path)}")
         return {"file_url": f"/files/{session_hash}/{os.path.basename(textured_glb_path)}"}
     except Exception as e:
-        logger.error(f"Error in gradio_generate: {str(e)}\n{traceback.format_exc()}")
         raise
 def start_session(req: gr.Request):
@@ -333,7 +269,7 @@ def start_session(req: gr.Request):
         os.makedirs(save_dir, exist_ok=True)
         logger.info(f"Started session, created directory: {save_dir}")
     except Exception as e:
-        logger.error(f"Error in start_session: {str(e)}\n{traceback.format_exc()}")
         raise
 def end_session(req: gr.Request):
@@ -342,7 +278,7 @@ def end_session(req: gr.Request):
         shutil.rmtree(save_dir)
         logger.info(f"Ended session, removed directory: {save_dir}")
     except Exception as e:
-        logger.error(f"Error in end_session: {str(e)}\n{traceback.format_exc()}")
         raise
 def get_random_seed(randomize_seed, seed):
@@ -352,10 +288,12 @@ def get_random_seed(randomize_seed, seed):
         logger.info(f"Generated seed: {seed}")
         return seed
     except Exception as e:
-        logger.error(f"Error in get_random_seed: {str(e)}\n{traceback.format_exc()}")
         raise
 def download_image(url: str, save_path: str) -> str:
     try:
         logger.info(f"Downloading image from {url}")
         response = requests.get(url, stream=True)
@@ -366,21 +304,216 @@ def download_image(url: str, save_path: str) -> str:
         logger.info(f"Saved image to {save_path}")
         return save_path
     except Exception as e:
-        logger.error(f"Failed to download image from {url}: {str(e)}\n{traceback.format_exc()}")
         raise
-@spaces.GPU(duration=3)
 @torch.no_grad()
-def run_full_api(image, seed=0, num_inference_steps=30, guidance_scale=7.0, simplify=True, target_face_num=DEFAULT_FACE_NUMBER, req=None):
     try:
         logger.info("Running run_full_api")
-        def execute():
-            image_seg, mesh_path, textured_glb_path = run_full(image, seed, num_inference_steps, guidance_scale, simplify, target_face_num, req)
-            session_hash = os.path.basename(os.path.dirname(textured_glb_path))
-            return {"file_url": f"/files/{session_hash}/{os.path.basename(textured_glb_path)}"}
-        return retry_on_failure(execute)
     except Exception as e:
-        logger.error(f"Error in run_full_api: {str(e)}\n{traceback.format_exc()}")
         raise
 # Define Gradio API endpoint
@@ -391,8 +524,8 @@ try:
         inputs=[
             gr.Image(type="filepath", label="Image"),
             gr.Number(label="Seed", value=0, precision=0),
-            gr.Number(label="Inference Steps", value=30, precision=0),
-            gr.Number(label="Guidance Scale", value=7.0),
             gr.Checkbox(label="Simplify Mesh", value=True),
             gr.Number(label="Target Face Number", value=DEFAULT_FACE_NUMBER, precision=0)
         ],
@@ -401,7 +534,7 @@ try:
     )
     logger.info("Gradio API interface initialized successfully")
 except Exception as e:
-    logger.error(f"Failed to initialize Gradio API interface: {str(e)}\n{traceback.format_exc()}")
     raise
 HEADER = """
@@ -487,6 +620,7 @@ HEADER = """
 </style>
 """
 try:
     logger.info("Initializing Gradio Blocks interface")
     with gr.Blocks(title="PolyGenixAI", css="body { background-color: #1A1A1A; } .gr-panel { background-color: #2D2D2D; }") as demo:
@@ -519,7 +653,7 @@ try:
                                 minimum=8,
                                 maximum=50,
                                 step=1,
-                                value=30,
                                 info="Higher steps enhance detail but increase processing time",
                                 elem_classes="gr-slider"
                             )
@@ -534,7 +668,7 @@ try:
                             )
                             reduce_face = gr.Checkbox(label="Simplify Mesh", value=True)
                             target_face_num = gr.Slider(
-                                maximum=100000,
                                 minimum=10000,
                                 value=DEFAULT_FACE_NUMBER,
                                 label="Target Face Number",
@@ -554,7 +688,7 @@ try:
                         f"{TRIPOSG_CODE_DIR}/assets/example_data/{image}"
                         for image in os.listdir(f"{TRIPOSG_CODE_DIR}/assets/example_data")
                     ],
-                    fn=run_full_api,
                     inputs=[image_prompts],
                     outputs=[seg_image, model_output, textured_model_output],
                     cache_examples=True,
@@ -579,9 +713,7 @@ try:
                 target_face_num
             ],
             outputs=[model_output]
-        ).then(
-            lambda: gr.Button(interactive=True), outputs=[gen_texture_button]
-        )
         gen_texture_button.click(
             run_texture,
             inputs=[image_prompts, model_output, seed],
@@ -591,7 +723,7 @@ try:
         demo.unload(end_session)
     logger.info("Gradio Blocks interface initialized successfully")
 except Exception as e:
-    logger.error(f"Failed to initialize Gradio Blocks interface: {str(e)}\n{traceback.format_exc()}")
     raise
 if __name__ == "__main__":
@@ -600,5 +732,5 @@ if __name__ == "__main__":
         demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)
         logger.info("Gradio application launched successfully")
     except Exception as e:
-        logger.error(f"Failed to launch Gradio application: {str(e)}\n{traceback.format_exc()}")
         raise

 import gradio as gr
 import numpy as np
 import torch
+from PIL import Image
 import trimesh
 import random
 from transformers import AutoModelForImageSegmentation
 from torchvision import transforms
 from huggingface_hub import hf_hub_download, snapshot_download
 import shutil
 import base64
 import logging
 # Set up logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 try:
     subprocess.run("pip install spandrel==0.4.1 --no-deps", shell=True, check=True)
 except Exception as e:
+    logger.error(f"Failed to install spandrel: {str(e)}")
     raise
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 logger.info(f"Using device: {DEVICE}")
+DEFAULT_FACE_NUMBER = 100000
 MAX_SEED = np.iinfo(np.int32).max
 TRIPOSG_REPO_URL = "https://github.com/VAST-AI-Research/TripoSG.git"
 MV_ADAPTER_REPO_URL = "https://github.com/huanngzh/MV-Adapter.git"
 sys.path.append(os.path.join(MV_ADAPTER_CODE_DIR, "scripts"))
 try:
+    # triposg
     from image_process import prepare_image
     from briarmbg import BriaRMBG
     snapshot_download("briaai/RMBG-1.4", local_dir=RMBG_PRETRAINED_MODEL)
+    rmbg_net = BriaRMBG.from_pretrained(RMBG_PRETRAINED_MODEL).to(DEVICE)
     rmbg_net.eval()
     from triposg.pipelines.pipeline_triposg import TripoSGPipeline
     snapshot_download("VAST-AI/TripoSG", local_dir=TRIPOSG_PRETRAINED_MODEL)
+    triposg_pipe = TripoSGPipeline.from_pretrained(TRIPOSG_PRETRAINED_MODEL).to(DEVICE, DTYPE)
 except Exception as e:
+    logger.error(f"Failed to load TripoSG models: {str(e)}")
     raise
 try:
+    # mv adapter
+    NUM_VIEWS = 6
     from inference_ig2mv_sdxl import prepare_pipeline, preprocess_image, remove_bg
     from mvadapter.utils import get_orthogonal_camera, tensor_to_image, make_image_grid
     from mvadapter.utils.render import NVDiffRastContextWrapper, load_mesh, render
     )
     birefnet = AutoModelForImageSegmentation.from_pretrained(
         "ZhengPeng7/BiRefNet", trust_remote_code=True
+    ).to(DEVICE)
     transform_image = transforms.Compose(
         [
+            transforms.Resize((1024, 1024)),
             transforms.ToTensor(),
             transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
         ]
     )
     remove_bg_fn = lambda x: remove_bg(x, birefnet, transform_image, DEVICE)
 except Exception as e:
+    logger.error(f"Failed to load MV-Adapter models: {str(e)}")
     raise
 try:
     if not os.path.exists("checkpoints/big-lama.pt"):
         subprocess.run("wget -P checkpoints/ https://github.com/Sanster/models/releases/download/add_big_lama/big-lama.pt", shell=True, check=True)
 except Exception as e:
+    logger.error(f"Failed to download checkpoints: {str(e)}")
     raise
 def get_random_hex():
     random_bytes = os.urandom(8)
     random_hex = random_bytes.hex()
     return random_hex
+@spaces.GPU(duration=5)
+def run_full(image: str, seed: int = 0, num_inference_steps: int = 50, guidance_scale: float = 7.5, simplify: bool = True, target_face_num: int = DEFAULT_FACE_NUMBER, req=None):
     try:
+        image_seg = prepare_image(image, bg_color=np.array([1.0, 1.0, 1.0]), rmbg_net=rmbg_net)
+        outputs = triposg_pipe(
+            image=image_seg,
+            generator=torch.Generator(device=triposg_pipe.device).manual_seed(seed),
+            num_inference_steps=num_inference_steps,
+            guidance_scale=guidance_scale
+        ).samples[0]
+        logger.info("Mesh extraction done")
         mesh = trimesh.Trimesh(outputs[0].astype(np.float32), np.ascontiguousarray(outputs[1]))
         if simplify:
+            logger.info("Starting mesh simplification")
             from utils import simplify_mesh
             mesh = simplify_mesh(mesh, target_face_num)
+        save_dir = os.path.join(TMP_DIR, "examples")
         os.makedirs(save_dir, exist_ok=True)
         mesh_path = os.path.join(save_dir, f"polygenixai_{get_random_hex()}.glb")
         mesh.export(mesh_path)
+        logger.info(f"Saved mesh to {mesh_path}")
         torch.cuda.empty_cache()
+        height, width = 768, 768
         cameras = get_orthogonal_camera(
+            elevation_deg=[0, 0, 0, 0, 89.99, -89.99],
             distance=[1.8] * NUM_VIEWS,
             left=-0.55,
             right=0.55,
             bottom=-0.55,
             top=0.55,
+            azimuth_deg=[x - 90 for x in [0, 90, 180, 270, 180, 180]],
             device=DEVICE,
         )
         ctx = NVDiffRastContextWrapper(device=DEVICE, context_type="cuda")
         mesh = load_mesh(mesh_path, rescale=True, device=DEVICE)
+        render_out = render(
+            ctx,
+            mesh,
+            cameras,
+            height=height,
+            width=width,
+            render_attr=False,
+            normal_background=0.0,
+        )
         control_images = (
             torch.cat(
+                [
+                    (render_out.pos + 0.5).clamp(0, 1),
+                    (render_out.normal / 2 + 0.5).clamp(0, 1),
+                ],
                 dim=-1,
             )
             .permute(0, 3, 1, 2)
             .to(DEVICE)
         )
         image = Image.open(image)
+        image = remove_bg_fn(image)
         image = preprocess_image(image, height, width)
+        pipe_kwargs = {}
+        if seed != -1 and isinstance(seed, int):
+            pipe_kwargs["generator"] = torch.Generator(device=DEVICE).manual_seed(seed)
+        images = mv_adapter_pipe(
+            "high quality",
+            height=height,
+            width=width,
+            num_inference_steps=15,
+            guidance_scale=3.0,
+            num_images_per_prompt=NUM_VIEWS,
+            control_image=control_images,
+            control_conditioning_scale=1.0,
+            reference_image=image,
+            reference_conditioning_scale=1.0,
+            negative_prompt="watermark, ugly, deformed, noisy, blurry, low contrast",
+            cross_attention_kwargs={"scale": 1.0},
+            **pipe_kwargs,
+        ).images
+        torch.cuda.empty_cache()
         os.makedirs(save_dir, exist_ok=True)
         mv_image_path = os.path.join(save_dir, f"mv_adapter_{get_random_hex()}.png")
         make_image_grid(images, rows=1).save(mv_image_path)
         from texture import TexturePipeline, ModProcessConfig
         texture_pipe = TexturePipeline(
             upscaler_ckpt_path="checkpoints/RealESRGAN_x2plus.pth",
             inpaint_ckpt_path="checkpoints/big-lama.pt",
             device=DEVICE,
         )
         textured_glb_path = texture_pipe(
             mesh_path=mesh_path,
             save_dir=save_dir,
             save_name=f"polygenixai_texture_mesh_{get_random_hex()}.glb",
             uv_unwarp=True,
+            uv_size=4096,
             rgb_path=mv_image_path,
             rgb_process_config=ModProcessConfig(view_upscale=True, inpaint_mode="view"),
+            camera_azimuth_deg=[x - 90 for x in [0, 90, 180, 270, 180, 180]],
         )
         return image_seg, mesh_path, textured_glb_path
     except Exception as e:
+        logger.error(f"Error in run_full: {str(e)}")
         raise
+def gradio_generate(image: str, seed: int = 0, num_inference_steps: int = 50, guidance_scale: float = 7.5, simplify: bool = True, target_face_num: int = DEFAULT_FACE_NUMBER):
     try:
         logger.info("Starting gradio_generate")
+        # Verify API key
         api_key = os.getenv("POLYGENIX_API_KEY", "your-secret-api-key")
         request = gr.Request()
         if not request.headers.get("x-api-key") == api_key:
             logger.error("Invalid API key")
             raise ValueError("Invalid API key")
+        # Handle base64 image or file path
         if image.startswith("data:image"):
             logger.info("Processing base64 image")
             base64_string = image.split(",")[1]
                 logger.error(f"Image file not found: {temp_image_path}")
                 raise ValueError("Invalid or missing image file")
+        image_seg, mesh_path, textured_glb_path = run_full(temp_image_path, seed, num_inference_steps, guidance_scale, simplify, target_face_num, req=None)
         session_hash = os.path.basename(os.path.dirname(textured_glb_path))
         logger.info(f"Generated model at /files/{session_hash}/{os.path.basename(textured_glb_path)}")
         return {"file_url": f"/files/{session_hash}/{os.path.basename(textured_glb_path)}"}
     except Exception as e:
+        logger.error(f"Error in gradio_generate: {str(e)}")
         raise
 def start_session(req: gr.Request):
         os.makedirs(save_dir, exist_ok=True)
         logger.info(f"Started session, created directory: {save_dir}")
     except Exception as e:
+        logger.error(f"Error in start_session: {str(e)}")
         raise
 def end_session(req: gr.Request):
         shutil.rmtree(save_dir)
         logger.info(f"Ended session, removed directory: {save_dir}")
     except Exception as e:
+        logger.error(f"Error in end_session: {str(e)}")
         raise
 def get_random_seed(randomize_seed, seed):
         logger.info(f"Generated seed: {seed}")
         return seed
     except Exception as e:
+        logger.error(f"Error in get_random_seed: {str(e)}")
         raise
 def download_image(url: str, save_path: str) -> str:
+    """Download an image from a URL and save it locally."""
     try:
         logger.info(f"Downloading image from {url}")
         response = requests.get(url, stream=True)
         logger.info(f"Saved image to {save_path}")
         return save_path
     except Exception as e:
+        logger.error(f"Failed to download image from {url}: {str(e)}")
+        raise
+@spaces.GPU()
+@torch.no_grad()
+def run_segmentation(image):
+    try:
+        logger.info("Running segmentation")
+        # Handle FileData dict or URL
+        if isinstance(image, dict):
+            image_path = image.get("path") or image.get("url")
+            if not image_path:
+                logger.error("Invalid image input: no path or URL provided")
+                raise ValueError("Invalid image input: no path or URL provided")
+            if image_path.startswith("http"):
+                temp_image_path = os.path.join(TMP_DIR, f"input_{get_random_hex()}.png")
+                image_path = download_image(image_path, temp_image_path)
+        elif isinstance(image, str) and image.startswith("http"):
+            temp_image_path = os.path.join(TMP_DIR, f"input_{get_random_hex()}.png")
+            image_path = download_image(image, temp_image_path)
+        else:
+            image_path = image
+            if not isinstance(image, (str, bytes)) or (isinstance(image, str) and not os.path.exists(image)):
+                logger.error(f"Invalid image type or path: {type(image)}")
+                raise ValueError(f"Expected str (path/URL), bytes, or FileData dict, got {type(image)}")
+        image = prepare_image(image_path, bg_color=np.array([1.0, 1.0, 1.0]), rmbg_net=rmbg_net)
+        logger.info("Segmentation complete")
+        return image
+    except Exception as e:
+        logger.error(f"Error in run_segmentation: {str(e)}")
+        raise
+@spaces.GPU(duration=5)
+@torch.no_grad()
+def image_to_3d(
+    image,  # Changed to accept FileData dict or PIL Image
+    seed: int,
+    num_inference_steps: int,
+    guidance_scale: float,
+    simplify: bool,
+    target_face_num: int,
+    req: gr.Request
+):
+    try:
+        logger.info("Running image_to_3d")
+        # Handle FileData dict from gradio_client
+        if isinstance(image, dict):
+            image_path = image.get("path") or image.get("url")
+            if not image_path:
+                logger.error("Invalid image input: no path or URL provided")
+                raise ValueError("Invalid image input: no path or URL provided")
+            image = Image.open(image_path)
+        elif not isinstance(image, Image.Image):
+            logger.error(f"Invalid image type: {type(image)}")
+            raise ValueError(f"Expected PIL Image or FileData dict, got {type(image)}")
+        outputs = triposg_pipe(
+            image=image,
+            generator=torch.Generator(device=triposg_pipe.device).manual_seed(seed),
+            num_inference_steps=num_inference_steps,
+            guidance_scale=guidance_scale
+        ).samples[0]
+        logger.info("Mesh extraction done")
+        mesh = trimesh.Trimesh(outputs[0].astype(np.float32), np.ascontiguousarray(outputs[1]))
+        if simplify:
+            logger.info("Starting mesh simplification")
+            try:
+                from utils import simplify_mesh
+                mesh = simplify_mesh(mesh, target_face_num)
+            except ImportError as e:
+                logger.error(f"Failed to import simplify_mesh: {str(e)}")
+                raise
+        save_dir = os.path.join(TMP_DIR, str(req.session_hash))
+        os.makedirs(save_dir, exist_ok=True)
+        mesh_path = os.path.join(save_dir, f"polygenixai_{get_random_hex()}.glb")
+        mesh.export(mesh_path)
+        logger.info(f"Saved mesh to {mesh_path}")
+        torch.cuda.empty_cache()
+        return mesh_path
+    except Exception as e:
+        logger.error(f"Error in image_to_3d: {str(e)}")
         raise
+@spaces.GPU(duration=5)
 @torch.no_grad()
+def run_texture(image: Image, mesh_path: str, seed: int, req: gr.Request):
+    try:
+        logger.info("Running texture generation")
+        height, width = 768, 768
+        cameras = get_orthogonal_camera(
+            elevation_deg=[0, 0, 0, 0, 89.99, -89.99],
+            distance=[1.8] * NUM_VIEWS,
+            left=-0.55,
+            right=0.55,
+            bottom=-0.55,
+            top=0.55,
+            azimuth_deg=[x - 90 for x in [0, 90, 180, 270, 180, 180]],
+            device=DEVICE,
+        )
+        ctx = NVDiffRastContextWrapper(device=DEVICE, context_type="cuda")
+        mesh = load_mesh(mesh_path, rescale=True, device=DEVICE)
+        render_out = render(
+            ctx,
+            mesh,
+            cameras,
+            height=height,
+            width=width,
+            render_attr=False,
+            normal_background=0.0,
+        )
+        control_images = (
+            torch.cat(
+                [
+                    (render_out.pos + 0.5).clamp(0, 1),
+                    (render_out.normal / 2 + 0.5).clamp(0, 1),
+                ],
+                dim=-1,
+            )
+            .permute(0, 3, 1, 2)
+            .to(DEVICE)
+        )
+        image = Image.open(image)
+        image = remove_bg_fn(image)
+        image = preprocess_image(image, height, width)
+        pipe_kwargs = {}
+        if seed != -1 and isinstance(seed, int):
+            pipe_kwargs["generator"] = torch.Generator(device=DEVICE).manual_seed(seed)
+        images = mv_adapter_pipe(
+            "high quality",
+            height=height,
+            width=width,
+            num_inference_steps=15,
+            guidance_scale=3.0,
+            num_images_per_prompt=NUM_VIEWS,
+            control_image=control_images,
+            control_conditioning_scale=1.0,
+            reference_image=image,
+            reference_conditioning_scale=1.0,
+            negative_prompt="watermark, ugly, deformed, noisy, blurry, low contrast",
+            cross_attention_kwargs={"scale": 1.0},
+            **pipe_kwargs,
+        ).images
+        torch.cuda.empty_cache()
+        save_dir = os.path.join(TMP_DIR, str(req.session_hash))
+        os.makedirs(save_dir, exist_ok=True)
+        mv_image_path = os.path.join(save_dir, f"mv_adapter_{get_random_hex()}.png")
+        make_image_grid(images, rows=1).save(mv_image_path)
+        from texture import TexturePipeline, ModProcessConfig
+        texture_pipe = TexturePipeline(
+            upscaler_ckpt_path="checkpoints/RealESRGAN_x2plus.pth",
+            inpaint_ckpt_path="checkpoints/big-lama.pt",
+            device=DEVICE,
+        )
+        textured_glb_path = texture_pipe(
+            mesh_path=mesh_path,
+            save_dir=save_dir,
+            save_name=f"polygenixai_texture_mesh_{get_random_hex()}.glb",
+            uv_unwarp=True,
+            uv_size=4096,
+            rgb_path=mv_image_path,
+            rgb_process_config=ModProcessConfig(view_upscale=True, inpaint_mode="view"),
+            camera_azimuth_deg=[x - 90 for x in [0, 90, 180, 270, 180, 180]],
+        )
+        logger.info(f"Textured model saved to {textured_glb_path}")
+        return textured_glb_path
+    except Exception as e:
+        logger.error(f"Error in run_texture: {str(e)}")
+        raise
+@spaces.GPU(duration=5)
+@torch.no_grad()
+def run_full_api(image, seed: int = 0, num_inference_steps: int = 50, guidance_scale: float = 7.5, simplify: bool = True, target_face_num: int = DEFAULT_FACE_NUMBER, req: gr.Request = None):
     try:
         logger.info("Running run_full_api")
+        # Handle FileData dict or URL
+        if isinstance(image, dict):
+            image_path = image.get("path") or image.get("url")
+            if not image_path:
+                logger.error("Invalid image input: no path or URL provided")
+                raise ValueError("Invalid image input: no path or URL provided")
+            if image_path.startswith("http"):
+                temp_image_path = os.path.join(TMP_DIR, f"input_{get_random_hex()}.png")
+                image_path = download_image(image_path, temp_image_path)
+        elif isinstance(image, str) and image.startswith("http"):
+            temp_image_path = os.path.join(TMP_DIR, f"input_{get_random_hex()}.png")
+            image_path = download_image(image, temp_image_path)
+        else:
+            image_path = image
+            if not isinstance(image, str) or not os.path.exists(image_path):
+                logger.error(f"Invalid image path: {image_path}")
+                raise ValueError(f"Invalid image path: {image_path}")
+        image_seg, mesh_path, textured_glb_path = run_full(image_path, seed, num_inference_steps, guidance_scale, simplify, target_face_num, req)
+        session_hash = os.path.basename(os.path.dirname(textured_glb_path))
+        logger.info(f"Generated textured model at /files/{session_hash}/{os.path.basename(textured_glb_path)}")
+        return {"file_url": f"/files/{session_hash}/{os.path.basename(textured_glb_path)}"}
     except Exception as e:
+        logger.error(f"Error in run_full_api: {str(e)}")
         raise
 # Define Gradio API endpoint
         inputs=[
             gr.Image(type="filepath", label="Image"),
             gr.Number(label="Seed", value=0, precision=0),
+            gr.Number(label="Inference Steps", value=50, precision=0),
+            gr.Number(label="Guidance Scale", value=7.5),
             gr.Checkbox(label="Simplify Mesh", value=True),
             gr.Number(label="Target Face Number", value=DEFAULT_FACE_NUMBER, precision=0)
         ],
     )
     logger.info("Gradio API interface initialized successfully")
 except Exception as e:
+    logger.error(f"Failed to initialize Gradio API interface: {str(e)}")
     raise
 HEADER = """
 </style>
 """
+# Gradio web interface
 try:
     logger.info("Initializing Gradio Blocks interface")
     with gr.Blocks(title="PolyGenixAI", css="body { background-color: #1A1A1A; } .gr-panel { background-color: #2D2D2D; }") as demo:
                                 minimum=8,
                                 maximum=50,
                                 step=1,
+                                value=50,
                                 info="Higher steps enhance detail but increase processing time",
                                 elem_classes="gr-slider"
                             )
                             )
                             reduce_face = gr.Checkbox(label="Simplify Mesh", value=True)
                             target_face_num = gr.Slider(
+                                maximum=1000000,
                                 minimum=10000,
                                 value=DEFAULT_FACE_NUMBER,
                                 label="Target Face Number",
                         f"{TRIPOSG_CODE_DIR}/assets/example_data/{image}"
                         for image in os.listdir(f"{TRIPOSG_CODE_DIR}/assets/example_data")
                     ],
+                    fn=run_full,
                     inputs=[image_prompts],
                     outputs=[seg_image, model_output, textured_model_output],
                     cache_examples=True,
                 target_face_num
             ],
             outputs=[model_output]
+        ).then(lambda: gr.Button(interactive=True), outputs=[gen_texture_button])
         gen_texture_button.click(
             run_texture,
             inputs=[image_prompts, model_output, seed],
         demo.unload(end_session)
     logger.info("Gradio Blocks interface initialized successfully")
 except Exception as e:
+    logger.error(f"Failed to initialize Gradio Blocks interface: {str(e)}")
     raise
 if __name__ == "__main__":
         demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)
         logger.info("Gradio application launched successfully")
     except Exception as e:
+        logger.error(f"Failed to launch Gradio application: {str(e)}")
         raise