import spaces import os import gradio as gr import numpy as np import torch from PIL import Image import trimesh import random from transformers import AutoModelForImageSegmentation from torchvision import transforms from huggingface_hub import hf_hub_download, snapshot_download import subprocess import shutil # install others subprocess.run("pip install spandrel==0.4.1 --no-deps", shell=True, check=True) DEVICE = "cuda" if torch.cuda.is_available() else "cpu" DTYPE = torch.float16 print("DEVICE: ", DEVICE) DEFAULT_FACE_NUMBER = 100000 MAX_SEED = np.iinfo(np.int32).max TRIPOSG_REPO_URL = "https://github.com/VAST-AI-Research/TripoSG.git" MV_ADAPTER_REPO_URL = "https://github.com/huanngzh/MV-Adapter.git" RMBG_PRETRAINED_MODEL = "checkpoints/RMBG-1.4" TRIPOSG_PRETRAINED_MODEL = "checkpoints/TripoSG" TMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "tmp") os.makedirs(TMP_DIR, exist_ok=True) TRIPOSG_CODE_DIR = "./triposg" if not os.path.exists(TRIPOSG_CODE_DIR): os.system(f"git clone {TRIPOSG_REPO_URL} {TRIPOSG_CODE_DIR}") MV_ADAPTER_CODE_DIR = "./mv_adapter" if not os.path.exists(MV_ADAPTER_CODE_DIR): os.system(f"git clone {MV_ADAPTER_REPO_URL} {MV_ADAPTER_CODE_DIR} && cd {MV_ADAPTER_CODE_DIR} && git checkout 7d37a97e9bc223cdb8fd26a76bd8dd46504c7c3d") import sys sys.path.append(TRIPOSG_CODE_DIR) sys.path.append(os.path.join(TRIPOSG_CODE_DIR, "scripts")) sys.path.append(MV_ADAPTER_CODE_DIR) sys.path.append(os.path.join(MV_ADAPTER_CODE_DIR, "scripts")) HEADER = """ # 🌌 PolyGenixAI: Craft 3D Worlds with Cosmic Precision ## Unleash Infinite Creativity with AI-Powered 3D Generation by AnvilInteractive Solutions
## 🚀 Launch Your Creation: 1. **Upload an Image** (clear, single-object images shine brightest) 2. **Choose a Style Filter** to infuse your unique vision 3. Click **Generate 3D Model** to sculpt your mesh 4. Click **Apply Texture** to bring your model to life 5. **Download GLB** to share your masterpiecePowered by cutting-edge AI and multi-view technology from AnvilInteractive Solutions. Join our PolyGenixAI Community to connect with creators and spark inspiration.
""" # triposg from image_process import prepare_image from briarmbg import BriaRMBG snapshot_download("briaai/RMBG-1.4", local_dir=RMBG_PRETRAINED_MODEL) rmbg_net = BriaRMBG.from_pretrained(RMBG_PRETRAINED_MODEL).to(DEVICE) rmbg_net.eval() from triposg.pipelines.pipeline_triposg import TripoSGPipeline snapshot_download("VAST-AI/TripoSG", local_dir=TRIPOSG_PRETRAINED_MODEL) triposg_pipe = TripoSGPipeline.from_pretrained(TRIPOSG_PRETRAINED_MODEL).to(DEVICE, DTYPE) # mv adapter NUM_VIEWS = 6 from inference_ig2mv_sdxl import prepare_pipeline, preprocess_image, remove_bg from mvadapter.utils import get_orthogonal_camera, tensor_to_image, make_image_grid from mvadapter.utils.render import NVDiffRastContextWrapper, load_mesh, render mv_adapter_pipe = prepare_pipeline( base_model="stabilityai/stable-diffusion-xl-base-1.0", vae_model="madebyollin/sdxl-vae-fp16-fix", unet_model=None, lora_model=None, adapter_path="huanngzh/mv-adapter", scheduler=None, num_views=NUM_VIEWS, device=DEVICE, dtype=torch.float16, ) birefnet = AutoModelForImageSegmentation.from_pretrained( "ZhengPeng7/BiRefNet", trust_remote_code=True ) birefnet.to(DEVICE) transform_image = transforms.Compose( [ transforms.Resize((1024, 1024)), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ] ) remove_bg_fn = lambda x: remove_bg(x, birefnet, transform_image, DEVICE) if not os.path.exists("checkpoints/RealESRGAN_x2plus.pth"): hf_hub_download("dtarnow/UPscaler", filename="RealESRGAN_x2plus.pth", local_dir="checkpoints") if not os.path.exists("checkpoints/big-lama.pt"): subprocess.run("wget -P checkpoints/ https://github.com/Sanster/models/releases/download/add_big_lama/big-lama.pt", shell=True, check=True) def start_session(req: gr.Request): save_dir = os.path.join(TMP_DIR, str(req.session_hash)) os.makedirs(save_dir, exist_ok=True) print("start session, mkdir", save_dir) def end_session(req: gr.Request): save_dir = os.path.join(TMP_DIR, str(req.session_hash)) shutil.rmtree(save_dir) def get_random_hex(): random_bytes = os.urandom(8) random_hex = random_bytes.hex() return random_hex def get_random_seed(randomize_seed, seed): if randomize_seed: seed = random.randint(0, MAX_SEED) return seed @spaces.GPU(duration=180) def run_full(image: str, req: gr.Request): seed = 0 num_inference_steps = 50 guidance_scale = 7.5 simplify = True target_face_num = DEFAULT_FACE_NUMBER image_seg = prepare_image(image, bg_color=np.array([1.0, 1.0, 1.0]), rmbg_net=rmbg_net) outputs = triposg_pipe( image=image_seg, generator=torch.Generator(device=triposg_pipe.device).manual_seed(seed), num_inference_steps=num_inference_steps, guidance_scale=guidance_scale ).samples[0] print("mesh extraction done") mesh = trimesh.Trimesh(outputs[0].astype(np.float32), np.ascontiguousarray(outputs[1])) if simplify: print("start simplify") from utils import simplify_mesh mesh = simplify_mesh(mesh, target_face_num) save_dir = os.path.join(TMP_DIR, "examples") os.makedirs(save_dir, exist_ok=True) mesh_path = os.path.join(save_dir, f"polygenixai_{get_random_hex()}.glb") mesh.export(mesh_path) print("save to ", mesh_path) torch.cuda.empty_cache() height, width = 768, 768 # Prepare cameras cameras = get_orthogonal_camera( elevation_deg=[0, 0, 0, 0, 89.99, -89.99], distance=[1.8] * NUM_VIEWS, left=-0.55, right=0.55, bottom=-0.55, top=0.55, azimuth_deg=[x - 90 for x in [0, 90, 180, 270, 180, 180]], device=DEVICE, ) ctx = NVDiffRastContextWrapper(device=DEVICE, context_type="cuda") mesh = load_mesh(mesh_path, rescale=True, device=DEVICE) render_out = render( ctx, mesh, cameras, height=height, width=width, render_attr=False, normal_background=0.0, ) control_images = ( torch.cat( [ (render_out.pos + 0.5).clamp(0, 1), (render_out.normal / 2 + 0.5).clamp(0, 1), ], dim=-1, ) .permute(0, 3, 1, 2) .to(DEVICE) ) image = Image.open(image) image = remove_bg_fn(image) image = preprocess_image(image, height, width) pipe_kwargs = {} if seed != -1 and isinstance(seed, int): pipe_kwargs["generator"] = torch.Generator(device=DEVICE).manual_seed(seed) images = mv_adapter_pipe( "high quality", height=height, width=width, num_inference_steps=15, guidance_scale=3.0, num_images_per_prompt=NUM_VIEWS, control_image=control_images, control_conditioning_scale=1.0, reference_image=image, reference_conditioning_scale=1.0, negative_prompt="watermark, ugly, deformed, noisy, blurry, low contrast", cross_attention_kwargs={"scale": 1.0}, **pipe_kwargs, ).images torch.cuda.empty_cache() mv_image_path = os.path.join(save_dir, f"polygenixai_mv_{get_random_hex()}.png") make_image_grid(images, rows=1).save(mv_image_path) from texture import TexturePipeline, ModProcessConfig texture_pipe = TexturePipeline( upscaler_ckpt_path="checkpoints/RealESRGAN_x2plus.pth", inpaint_ckpt_path="checkpoints/big-lama.pt", device=DEVICE, ) textured_glb_path = texture_pipe( mesh_path=mesh_path, save_dir=save_dir, save_name=f"polygenixai_texture_mesh_{get_random_hex()}.glb", uv_unwarp=True, uv_size=4096, rgb_path=mv_image_path, rgb_process_config=ModProcessConfig(view_upscale=True, inpaint_mode="view"), camera_azimuth_deg=[x - 90 for x in [0, 90, 180, 270, 180, 180]], ) return image_seg, mesh_path, textured_glb_path @spaces.GPU() @torch.no_grad() def run_segmentation(image: str): image = prepare_image(image, bg_color=np.array([1.0, 1.0, 1.0]), rmbg_net=rmbg_net) return image @spaces.GPU(duration=90) @torch.no_grad() def image_to_3d( image: Image.Image, seed: int, num_inference_steps: int, guidance_scale: float, simplify: bool, target_face_num: int, req: gr.Request ): outputs = triposg_pipe( image=image, generator=torch.Generator(device=triposg_pipe.device).manual_seed(seed), num_inference_steps=num_inference_steps, guidance_scale=guidance_scale ).samples[0] print("mesh extraction done") mesh = trimesh.Trimesh(outputs[0].astype(np.float32), np.ascontiguousarray(outputs[1])) if simplify: print("start simplify") from utils import simplify_mesh mesh = simplify_mesh(mesh, target_face_num) save_dir = os.path.join(TMP_DIR, str(req.session_hash)) mesh_path = os.path.join(save_dir, f"polygenixai_{get_random_hex()}.glb") mesh.export(mesh_path) print("save to ", mesh_path) torch.cuda.empty_cache() return mesh_path @spaces.GPU(duration=120) @torch.no_grad() def run_texture(image: Image, mesh_path: str, seed: int, req: gr.Request): height, width = 768, 768 # Prepare cameras cameras = get_orthogonal_camera( elevation_deg=[0, 0, 0, 0, 89.99, -89.99], distance=[1.8] * NUM_VIEWS, left=-0.55, right=0.55, bottom=-0.55, top=0.55, azimuth_deg=[x - 90 for x in [0, 90, 180, 270, 180, 180]], device=DEVICE, ) ctx = NVDiffRastContextWrapper(device=DEVICE, context_type="cuda") mesh = load_mesh(mesh_path, rescale=True, device=DEVICE) render_out = render( ctx, mesh, cameras, height=height, width=width, render_attr=False, normal_background=0.0, ) control_images = ( torch.cat( [ (render_out.pos + 0.5).clamp(0, 1), (render_out.normal / 2 + 0.5).clamp(0, 1), ], dim=-1, ) .permute(0, 3, 1, 2) .to(DEVICE) ) image = Image.open(image) image = remove_bg_fn(image) image = preprocess_image(image, height, width) pipe_kwargs = {} if seed != -1 and isinstance(seed, int): pipe_kwargs["generator"] = torch.Generator(device=DEVICE).manual_seed(seed) images = mv_adapter_pipe( "high quality", height=height, width=width, num_inference_steps=15, guidance_scale=3.0, num_images_per_prompt=NUM_VIEWS, control_image=control_images, control_conditioning_scale=1.0, reference_image=image, reference_conditioning_scale=1.0, negative_prompt="watermark, ugly, deformed, noisy, blurry, low contrast", cross_attention_kwargs={"scale": 1.0}, **pipe_kwargs, ).images torch.cuda.empty_cache() save_dir = os.path.join(TMP_DIR, str(req.session_hash)) mv_image_path = os.path.join(save_dir, f"polygenixai_mv_{get_random_hex()}.png") make_image_grid(images, rows=1).save(mv_image_path) from texture import TexturePipeline, ModProcessConfig texture_pipe = TexturePipeline( upscaler_ckpt_path="checkpoints/RealESRGAN_x2plus.pth", inpaint_ckpt_path="checkpoints/big-lama.pt", device=DEVICE, ) textured_glb_path = texture_pipe( mesh_path=mesh_path, save_dir=save_dir, save_name=f"polygenixai_texture_mesh_{get_random_hex()}.glb", uv_unwarp=True, uv_size=4096, rgb_path=mv_image_path, rgb_process_config=ModProcessConfig(view_upscale=True, inpaint_mode="view"), camera_azimuth_deg=[x - 90 for x in [0, 90, 180, 270, 180, 180]], ) return textured_glb_path with gr.Blocks(title="PolyGenixAI", css="body { background-color: #1A1A1A; } .gr-panel { background-color: #2D2D2D; }") as demo: gr.Markdown(HEADER) with gr.Tabs(elem_classes="gr-tab"): with gr.Tab("Create 3D Model"): with gr.Row(): with gr.Column(scale=1): image_prompts = gr.Image(label="Upload Image", type="filepath", height=300, elem_classes="gr-panel") seg_image = gr.Image(label="Preview Segmentation", type="pil", format="png", interactive=False, height=300, elem_classes="gr-panel") with gr.Accordion("Style & Settings", open=True, elem_classes="gr-accordion"): style_filter = gr.Dropdown( choices=["None", "Realistic", "Fantasy", "Cartoon", "Sci-Fi", "Vintage", "Cosmic", "Neon"], label="Style Filter", value="None", info="Select a style to inspire your 3D model (optional)", elem_classes="gr-dropdown" ) seed = gr.Slider( label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0, elem_classes="gr-slider" ) randomize_seed = gr.Checkbox(label="Randomize Seed", value=True) num_inference_steps = gr.Slider( label="Inference Steps", minimum=8, maximum=50, step=1, value=50, info="Higher steps enhance detail but increase processing time", elem_classes="gr-slider" ) guidance_scale = gr.Slider( label="Guidance Scale", minimum=0.0, maximum=20.0, step=0.1, value=7.0, info="Controls adherence to input image", elem_classes="gr-slider" ) reduce_face = gr.Checkbox(label="Simplify Mesh", value=True) target_face_num = gr.Slider( maximum=1000000, minimum=10000, value=DEFAULT_FACE_NUMBER, label="Target Face Number", info="Adjust mesh complexity for performance", elem_classes="gr-slider" ) gen_button = gr.Button("Generate 3D Model", variant="primary", elem_classes="gr-button-primary") gen_texture_button = gr.Button("Apply Texture", variant="secondary", interactive=False, elem_classes="gr-button-secondary") with gr.Column(scale=1): model_output = gr.Model3D(label="3D Model Preview", interactive=False, height=400, elem_classes="gr-panel") textured_model_output = gr.Model3D(label="Textured 3D Model", interactive=False, height=400, elem_classes="gr-panel") download_button = gr.Button("Download GLB", variant="secondary", elem_classes="gr-button-secondary") with gr.Tab("Cosmic Gallery"): gr.Markdown("### Discover Stellar Creations") gr.Examples( examples=[ f"{TRIPOSG_CODE_DIR}/assets/example_data/{image}" for image in os.listdir(f"{TRIPOSG_CODE_DIR}/assets/example_data") ], fn=run_full, inputs=[image_prompts], outputs=[seg_image, model_output, textured_model_output], cache_examples=True, ) gr.Markdown("Connect with creators in our PolyGenixAI Cosmic Community!") gen_button.click( run_segmentation, inputs=[image_prompts], outputs=[seg_image] ).then( get_random_seed, inputs=[randomize_seed, seed], outputs=[seed], ).then( image_to_3d, inputs=[ seg_image, seed, num_inference_steps, guidance_scale, reduce_face, target_face_num ], outputs=[model_output] ).then(lambda: gr.Button(interactive=True), outputs=[gen_texture_button]) gen_texture_button.click( run_texture, inputs=[image_prompts, model_output, seed], outputs=[textured_model_output] ) demo.load(start_session) demo.unload(end_session) demo.launch()