PolyGenixAI6.0

Running on Zero

App Files Files

anvilinteractiv commited on 5 days ago

Commit

e0d7b74

verified ·

1 Parent(s): c484979

Update app.py

Browse files

Files changed (1) hide show

app.py +6 -26

app.py CHANGED Viewed

@@ -13,6 +13,7 @@ import subprocess
 import shutil
 import base64
 import logging
 # Set up logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
@@ -58,7 +59,6 @@ sys.path.append(MV_ADAPTER_CODE_DIR)
 sys.path.append(os.path.join(MV_ADAPTER_CODE_DIR, "scripts"))
 try:
-    # triposg
     from image_process import prepare_image
     from briarmbg import BriaRMBG
     snapshot_download("briaai/RMBG-1.4", local_dir=RMBG_PRETRAINED_MODEL)
@@ -72,7 +72,6 @@ except Exception as e:
     raise
 try:
-    # mv adapter
     NUM_VIEWS = 6
     from inference_ig2mv_sdxl import prepare_pipeline, preprocess_image, remove_bg
     from mvadapter.utils import get_orthogonal_camera, tensor_to_image, make_image_grid
@@ -144,7 +143,7 @@ def run_full(image: str, seed: int = 0, num_inference_steps: int = 50, guidance_
         torch.cuda.empty_cache()
-        height, width = 768, 768
         cameras = get_orthogonal_camera(
             elevation_deg=[0, 0, 0, 0, 89.99, -89.99],
             distance=[1.8] * NUM_VIEWS,
@@ -168,13 +167,7 @@ def run_full(image: str, seed: int = 0, num_inference_steps: int = 50, guidance_
             normal_background=0.0,
         )
         control_images = (
-            torch.cat(
-                [
-                    (render_out.pos + 0.5).clamp(0, 1),
-                    (render_out.normal / 2 + 0.5).clamp(0, 1),
-                ],
-                dim=-1,
-            )
             .permute(0, 3, 1, 2)
             .to(DEVICE)
         )
@@ -234,14 +227,12 @@ def run_full(image: str, seed: int = 0, num_inference_steps: int = 50, guidance_
 def gradio_generate(image: str, seed: int = 0, num_inference_steps: int = 50, guidance_scale: float = 7.5, simplify: bool = True, target_face_num: int = DEFAULT_FACE_NUMBER):
     try:
         logger.info("Starting gradio_generate")
-        # Verify API key
         api_key = os.getenv("POLYGENIX_API_KEY", "your-secret-api-key")
         request = gr.Request()
         if not request.headers.get("x-api-key") == api_key:
             logger.error("Invalid API key")
             raise ValueError("Invalid API key")
-        # Handle base64 image or file path
         if image.startswith("data:image"):
             logger.info("Processing base64 image")
             base64_string = image.split(",")[1]
@@ -291,9 +282,7 @@ def get_random_seed(randomize_seed, seed):
         logger.error(f"Error in get_random_seed: {str(e)}")
         raise
 def download_image(url: str, save_path: str) -> str:
-    """Download an image from a URL and save it locally."""
     try:
         logger.info(f"Downloading image from {url}")
         response = requests.get(url, stream=True)
@@ -312,7 +301,6 @@ def download_image(url: str, save_path: str) -> str:
 def run_segmentation(image):
     try:
         logger.info("Running segmentation")
-        # Handle FileData dict or URL
         if isinstance(image, dict):
             image_path = image.get("path") or image.get("url")
             if not image_path:
@@ -340,7 +328,7 @@ def run_segmentation(image):
 @spaces.GPU(duration=5)
 @torch.no_grad()
 def image_to_3d(
-    image,  # Changed to accept FileData dict or PIL Image
     seed: int,
     num_inference_steps: int,
     guidance_scale: float,
@@ -350,7 +338,6 @@ def image_to_3d(
 ):
     try:
         logger.info("Running image_to_3d")
-        # Handle FileData dict from gradio_client
         if isinstance(image, dict):
             image_path = image.get("path") or image.get("url")
             if not image_path:
@@ -396,7 +383,7 @@ def image_to_3d(
 def run_texture(image: Image, mesh_path: str, seed: int, req: gr.Request):
     try:
         logger.info("Running texture generation")
-        height, width = 768, 768
         cameras = get_orthogonal_camera(
             elevation_deg=[0, 0, 0, 0, 89.99, -89.99],
             distance=[1.8] * NUM_VIEWS,
@@ -420,13 +407,7 @@ def run_texture(image: Image, mesh_path: str, seed: int, req: gr.Request):
             normal_background=0.0,
         )
         control_images = (
-            torch.cat(
-                [
-                    (render_out.pos + 0.5).clamp(0, 1),
-                    (render_out.normal / 2 + 0.5).clamp(0, 1),
-                ],
-                dim=-1,
-            )
             .permute(0, 3, 1, 2)
             .to(DEVICE)
         )
@@ -490,7 +471,6 @@ def run_texture(image: Image, mesh_path: str, seed: int, req: gr.Request):
 def run_full_api(image, seed: int = 0, num_inference_steps: int = 50, guidance_scale: float = 7.5, simplify: bool = True, target_face_num: int = DEFAULT_FACE_NUMBER, req: gr.Request = None):
     try:
         logger.info("Running run_full_api")
-        # Handle FileData dict or URL
         if isinstance(image, dict):
             image_path = image.get("path") or image.get("url")
             if not image_path:

 import shutil
 import base64
 import logging
+import requests
 # Set up logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 sys.path.append(os.path.join(MV_ADAPTER_CODE_DIR, "scripts"))
 try:
     from image_process import prepare_image
     from briarmbg import BriaRMBG
     snapshot_download("briaai/RMBG-1.4", local_dir=RMBG_PRETRAINED_MODEL)
     raise
 try:
     NUM_VIEWS = 6
     from inference_ig2mv_sdxl import prepare_pipeline, preprocess_image, remove_bg
     from mvadapter.utils import get_orthogonal_camera, tensor_to_image, make_image_grid
         torch.cuda.empty_cache()
+        height, width = 1920, 1080  # Set resolution for YouTube Shorts, TikTok, Reels
         cameras = get_orthogonal_camera(
             elevation_deg=[0, 0, 0, 0, 89.99, -89.99],
             distance=[1.8] * NUM_VIEWS,
             normal_background=0.0,
         )
         control_images = (
+            (render_out.pos + 0.5).clamp(0, 1)  # Use only position map, remove normal map
             .permute(0, 3, 1, 2)
             .to(DEVICE)
         )
 def gradio_generate(image: str, seed: int = 0, num_inference_steps: int = 50, guidance_scale: float = 7.5, simplify: bool = True, target_face_num: int = DEFAULT_FACE_NUMBER):
     try:
         logger.info("Starting gradio_generate")
         api_key = os.getenv("POLYGENIX_API_KEY", "your-secret-api-key")
         request = gr.Request()
         if not request.headers.get("x-api-key") == api_key:
             logger.error("Invalid API key")
             raise ValueError("Invalid API key")
         if image.startswith("data:image"):
             logger.info("Processing base64 image")
             base64_string = image.split(",")[1]
         logger.error(f"Error in get_random_seed: {str(e)}")
         raise
 def download_image(url: str, save_path: str) -> str:
     try:
         logger.info(f"Downloading image from {url}")
         response = requests.get(url, stream=True)
 def run_segmentation(image):
     try:
         logger.info("Running segmentation")
         if isinstance(image, dict):
             image_path = image.get("path") or image.get("url")
             if not image_path:
 @spaces.GPU(duration=5)
 @torch.no_grad()
 def image_to_3d(
+    image,
     seed: int,
     num_inference_steps: int,
     guidance_scale: float,
 ):
     try:
         logger.info("Running image_to_3d")
         if isinstance(image, dict):
             image_path = image.get("path") or image.get("url")
             if not image_path:
 def run_texture(image: Image, mesh_path: str, seed: int, req: gr.Request):
     try:
         logger.info("Running texture generation")
+        height, width = 1920, 1080  # Set resolution for YouTube Shorts, TikTok, Reels
         cameras = get_orthogonal_camera(
             elevation_deg=[0, 0, 0, 0, 89.99, -89.99],
             distance=[1.8] * NUM_VIEWS,
             normal_background=0.0,
         )
         control_images = (
+            (render_out.pos + 0.5).clamp(0, 1)  # Use only position map, remove normal map
             .permute(0, 3, 1, 2)
             .to(DEVICE)
         )
 def run_full_api(image, seed: int = 0, num_inference_steps: int = 50, guidance_scale: float = 7.5, simplify: bool = True, target_face_num: int = DEFAULT_FACE_NUMBER, req: gr.Request = None):
     try:
         logger.info("Running run_full_api")
         if isinstance(image, dict):
             image_path = image.get("path") or image.get("url")
             if not image_path: