PolyGenixAI6.0 / app.py
anvilinteractiv's picture
Update app.py
ab04635 verified
raw
history blame
17.4 kB
import spaces
import os
import gradio as gr
import numpy as np
import torch
from PIL import Image
import trimesh
import random
from transformers import AutoModelForImageSegmentation
from torchvision import transforms
from huggingface_hub import hf_hub_download, snapshot_download
import subprocess
import shutil
# install others
subprocess.run("pip install spandrel==0.4.1 --no-deps", shell=True, check=True)
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DTYPE = torch.float16
print("DEVICE: ", DEVICE)
DEFAULT_FACE_NUMBER = 100000
MAX_SEED = np.iinfo(np.int32).max
TRIPOSG_REPO_URL = "https://github.com/VAST-AI-Research/TripoSG.git"
MV_ADAPTER_REPO_URL = "https://github.com/huanngzh/MV-Adapter.git"
RMBG_PRETRAINED_MODEL = "checkpoints/RMBG-1.4"
TRIPOSG_PRETRAINED_MODEL = "checkpoints/TripoSG"
TMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "tmp")
os.makedirs(TMP_DIR, exist_ok=True)
TRIPOSG_CODE_DIR = "./triposg"
if not os.path.exists(TRIPOSG_CODE_DIR):
os.system(f"git clone {TRIPOSG_REPO_URL} {TRIPOSG_CODE_DIR}")
MV_ADAPTER_CODE_DIR = "./mv_adapter"
if not os.path.exists(MV_ADAPTER_CODE_DIR):
os.system(f"git clone {MV_ADAPTER_REPO_URL} {MV_ADAPTER_CODE_DIR} && cd {MV_ADAPTER_CODE_DIR} && git checkout 7d37a97e9bc223cdb8fd26a76bd8dd46504c7c3d")
import sys
sys.path.append(TRIPOSG_CODE_DIR)
sys.path.append(os.path.join(TRIPOSG_CODE_DIR, "scripts"))
sys.path.append(MV_ADAPTER_CODE_DIR)
sys.path.append(os.path.join(MV_ADAPTER_CODE_DIR, "scripts"))
HEADER = """
# 🎨 PolyGenixAI: Transform Ideas into 3D Masterpieces
## Unleash Your Creativity with AI-Powered 3D Generation by AnvilInteractive Solutions
<p style="font-size: 1.1em;">By <a href="https://www.anvilinteractive.com/" style="color: #3B82F6; text-decoration: none; font-weight: bold;">AnvilInteractive Solutions</a></p>
## 🚀 Get Started:
1. **Upload an Image** (clear, single-object images work best)
2. **Select Filters** to customize styles
3. Click **Generate 3D Model** to create your mesh
4. Click **Apply Texture** to enhance with realistic textures
5. **Download GLB** to save your creation
<p style="font-size: 0.9em; margin-top: 10px;">Powered by advanced AI and multi-view technology from AnvilInteractive Solutions. Join our community at <a href="https://www.anvilinteractive.com/polygenixai" style="color: #3B82F6; text-decoration: none;">PolyGenixAI</a> for tips and inspiration.</p>
<style>
.gr-button-primary {
background-color: #3B82F6 !important;
color: white !important;
border-radius: 8px !important;
padding: 10px 20px !important;
font-weight: 600 !important;
}
.gr-button-secondary {
background-color: #E5E7EB !important;
color: #1F2937 !important;
border-radius: 8px !important;
padding: 10px 20px !important;
}
.gr-panel {
border-radius: 12px !important;
box-shadow: 0 4px 6px rgba(0,0,0,0.1) !important;
padding: 20px !important;
}
.gr-accordion {
background-color: #F9FAFB !important;
border-radius: 8px !important;
}
</style>
"""
# triposg
from image_process import prepare_image
from briarmbg import BriaRMBG
snapshot_download("briaai/RMBG-1.4", local_dir=RMBG_PRETRAINED_MODEL)
rmbg_net = BriaRMBG.from_pretrained(RMBG_PRETRAINED_MODEL).to(DEVICE)
rmbg_net.eval()
from triposg.pipelines.pipeline_triposg import TripoSGPipeline
snapshot_download("VAST-AI/TripoSG", local_dir=TRIPOSG_PRETRAINED_MODEL)
triposg_pipe = TripoSGPipeline.from_pretrained(TRIPOSG_PRETRAINED_MODEL).to(DEVICE, DTYPE)
# mv adapter
NUM_VIEWS = 6
from inference_ig2mv_sdxl import prepare_pipeline, preprocess_image, remove_bg
from mvadapter.utils import get_orthogonal_camera, tensor_to_image, make_image_grid
from mvadapter.utils.render import NVDiffRastContextWrapper, load_mesh, render
mv_adapter_pipe = prepare_pipeline(
base_model="stabilityai/stable-diffusion-xl-base-1.0",
vae_model="madebyollin/sdxl-vae-fp16-fix",
unet_model=None,
lora_model=None,
adapter_path="huanngzh/mv-adapter",
scheduler=None,
num_views=NUM_VIEWS,
device=DEVICE,
dtype=torch.float16,
)
birefnet = AutoModelForImageSegmentation.from_pretrained(
"ZhengPeng7/BiRefNet", trust_remote_code=True
)
birefnet.to(DEVICE)
transform_image = transforms.Compose(
[
transforms.Resize((1024, 1024)),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
]
)
remove_bg_fn = lambda x: remove_bg(x, birefnet, transform_image, DEVICE)
if not os.path.exists("checkpoints/RealESRGAN_x2plus.pth"):
hf_hub_download("dtarnow/UPscaler", filename="RealESRGAN_x2plus.pth", local_dir="checkpoints")
if not os.path.exists("checkpoints/big-lama.pt"):
subprocess.run("wget -P checkpoints/ https://github.com/Sanster/models/releases/download/add_big_lama/big-lama.pt", shell=True, check=True)
def start_session(req: gr.Request):
save_dir = os.path.join(TMP_DIR, str(req.session_hash))
os.makedirs(save_dir, exist_ok=True)
print("start session, mkdir", save_dir)
def end_session(req: gr.Request):
save_dir = os.path.join(TMP_DIR, str(req.session_hash))
shutil.rmtree(save_dir)
def get_random_hex():
random_bytes = os.urandom(8)
random_hex = random_bytes.hex()
return random_hex
def get_random_seed(randomize_seed, seed):
if randomize_seed:
seed = random.randint(0, MAX_SEED)
return seed
@spaces.GPU(duration=180)
def run_full(image: str, req: gr.Request):
seed = 0
num_inference_steps = 50
guidance_scale = 7.5
simplify = True
target_face_num = DEFAULT_FACE_NUMBER
image_seg = prepare_image(image, bg_color=np.array([1.0, 1.0, 1.0]), rmbg_net=rmbg_net)
outputs = triposg_pipe(
image=image_seg,
generator=torch.Generator(device=triposg_pipe.device).manual_seed(seed),
num_inference_steps=num_inference_steps,
guidance_scale=guidance_scale
).samples[0]
print("mesh extraction done")
mesh = trimesh.Trimesh(outputs[0].astype(np.float32), np.ascontiguousarray(outputs[1]))
if simplify:
print("start simplify")
from utils import simplify_mesh
mesh = simplify_mesh(mesh, target_face_num)
save_dir = os.path.join(TMP_DIR, "examples")
os.makedirs(save_dir, exist_ok=True)
mesh_path = os.path.join(save_dir, f"polygenixai_{get_random_hex()}.glb")
mesh.export(mesh_path)
print("save to ", mesh_path)
torch.cuda.empty_cache()
height, width = 768, 768
# Prepare cameras
cameras = get_orthogonal_camera(
elevation_deg=[0, 0, 0, 0, 89.99, -89.99],
distance=[1.8] * NUM_VIEWS,
left=-0.55,
right=0.55,
bottom=-0.55,
top=0.55,
azimuth_deg=[x - 90 for x in [0, 90, 180, 270, 180, 180]],
device=DEVICE,
)
ctx = NVDiffRastContextWrapper(device=DEVICE, context_type="cuda")
mesh = load_mesh(mesh_path, rescale=True, device=DEVICE)
render_out = render(
ctx,
mesh,
cameras,
height=height,
width=width,
render_attr=False,
normal_background=0.0,
)
control_images = (
torch.cat(
[
(render_out.pos + 0.5).clamp(0, 1),
(render_out.normal / 2 + 0.5).clamp(0, 1),
],
dim=-1,
)
.permute(0, 3, 1, 2)
.to(DEVICE)
)
image = Image.open(image)
image = remove_bg_fn(image)
image = preprocess_image(image, height, width)
pipe_kwargs = {}
if seed != -1 and isinstance(seed, int):
pipe_kwargs["generator"] = torch.Generator(device=DEVICE).manual_seed(seed)
images = mv_adapter_pipe(
"high quality",
height=height,
width=width,
num_inference_steps=15,
guidance_scale=3.0,
num_images_per_prompt=NUM_VIEWS,
control_image=control_images,
control_conditioning_scale=1.0,
reference_image=image,
reference_conditioning_scale=1.0,
negative_prompt="watermark, ugly, deformed, noisy, blurry, low contrast",
cross_attention_kwargs={"scale": 1.0},
**pipe_kwargs,
).images
torch.cuda.empty_cache()
mv_image_path = os.path.join(save_dir, f"polygenixai_mv_{get_random_hex()}.png")
make_image_grid(images, rows=1).save(mv_image_path)
from texture import TexturePipeline, ModProcessConfig
texture_pipe = TexturePipeline(
upscaler_ckpt_path="checkpoints/RealESRGAN_x2plus.pth",
inpaint_ckpt_path="checkpoints/big-lama.pt",
device=DEVICE,
)
textured_glb_path = texture_pipe(
mesh_path=mesh_path,
save_dir=save_dir,
save_name=f"polygenixai_texture_mesh_{get_random_hex()}.glb",
uv_unwarp=True,
uv_size=4096,
rgb_path=mv_image_path,
rgb_process_config=ModProcessConfig(view_upscale=True, inpaint_mode="view"),
camera_azimuth_deg=[x - 90 for x in [0, 90, 180, 270, 180, 180]],
)
return image_seg, mesh_path, textured_glb_path
@spaces.GPU()
@torch.no_grad()
def run_segmentation(image: str):
image = prepare_image(image, bg_color=np.array([1.0, 1.0, 1.0]), rmbg_net=rmbg_net)
return image
@spaces.GPU(duration=90)
@torch.no_grad()
def image_to_3d(
image: Image.Image,
seed: int,
num_inference_steps: int,
guidance_scale: float,
simplify: bool,
target_face_num: int,
req: gr.Request
):
outputs = triposg_pipe(
image=image,
generator=torch.Generator(device=triposg_pipe.device).manual_seed(seed),
num_inference_steps=num_inference_steps,
guidance_scale=guidance_scale
).samples[0]
print("mesh extraction done")
mesh = trimesh.Trimesh(outputs[0].astype(np.float32), np.ascontiguousarray(outputs[1]))
if simplify:
print("start simplify")
from utils import simplify_mesh
mesh = simplify_mesh(mesh, target_face_num)
save_dir = os.path.join(TMP_DIR, str(req.session_hash))
mesh_path = os.path.join(save_dir, f"polygenixai_{get_random_hex()}.glb")
mesh.export(mesh_path)
print("save to ", mesh_path)
torch.cuda.empty_cache()
return mesh_path
@spaces.GPU(duration=120)
@torch.no_grad()
def run_texture(image: Image, mesh_path: str, seed: int, req: gr.Request):
height, width = 768, 768
# Prepare cameras
cameras = get_orthogonal_camera(
elevation_deg=[0, 0, 0, 0, 89.99, -89.99],
distance=[1.8] * NUM_VIEWS,
left=-0.55,
right=0.55,
bottom=-0.55,
top=0.55,
azimuth_deg=[x - 90 for x in [0, 90, 180, 270, 180, 180]],
device=DEVICE,
)
ctx = NVDiffRastContextWrapper(device=DEVICE, context_type="cuda")
mesh = load_mesh(mesh_path, rescale=True, device=DEVICE)
render_out = render(
ctx,
mesh,
cameras,
height=height,
width=width,
render_attr=False,
normal_background=0.0,
)
control_images = (
torch.cat(
[
(render_out.pos + 0.5).clamp(0, 1),
(render_out.normal / 2 + 0.5).clamp(0, 1),
],
dim=-1,
)
.permute(0, 3, 1, 2)
.to(DEVICE)
)
image = Image.open(image)
image = remove_bg_fn(image)
image = preprocess_image(image, height, width)
pipe_kwargs = {}
if seed != -1 and isinstance(seed, int):
pipe_kwargs["generator"] = torch.Generator(device=DEVICE).manual_seed(seed)
images = mv_adapter_pipe(
"high quality",
height=height,
width=width,
num_inference_steps=15,
guidance_scale=3.0,
num_images_per_prompt=NUM_VIEWS,
control_image=control_images,
control_conditioning_scale=1.0,
reference_image=image,
reference_conditioning_scale=1.0,
negative_prompt="watermark, ugly, deformed, noisy, blurry, low contrast",
cross_attention_kwargs={"scale": 1.0},
**pipe_kwargs,
).images
torch.cuda.empty_cache()
save_dir = os.path.join(TMP_DIR, str(req.session_hash))
mv_image_path = os.path.join(save_dir, f"polygenixai_mv_{get_random_hex()}.png")
make_image_grid(images, rows=1).save(mv_image_path)
from texture import TexturePipeline, ModProcessConfig
texture_pipe = TexturePipeline(
upscaler_ckpt_path="checkpoints/RealESRGAN_x2plus.pth",
inpaint_ckpt_path="checkpoints/big-lama.pt",
device=DEVICE,
)
textured_glb_path = texture_pipe(
mesh_path=mesh_path,
save_dir=save_dir,
save_name=f"polygenixai_texture_mesh_{get_random_hex()}.glb",
uv_unwarp=True,
uv_size=4096,
rgb_path=mv_image_path,
rgb_process_config=ModProcessConfig(view_upscale=True, inpaint_mode="view"),
camera_azimuth_deg=[x - 90 for x in [0, 90, 180, 270, 180, 180]],
)
return textured_glb_path
with gr.Blocks(title="PolyGenixAI", css="body { background-color: #F3F4F6; } .gr-panel { background-color: white; }") as demo:
gr.Markdown(HEADER)
with gr.Tabs():
with gr.Tab("Create 3D Model"):
with gr.Row():
with gr.Column(scale=1):
image_prompts = gr.Image(label="Upload Image", type="filepath", height=300)
seg_image = gr.Image(label="Preview Segmentation", type="pil", format="png", interactive=False, height=300)
with gr.Accordion("Style & Settings", open=True):
style_filter = gr.Dropdown(
choices=["None", "Realistic", "Fantasy", "Cartoon", "Sci-Fi", "Vintage"],
label="Style Filter",
value="None",
info="Select a style to enhance your 3D model (optional)"
)
seed = gr.Slider(
label="Seed",
minimum=0,
maximum=MAX_SEED,
step=1,
value=0
)
randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
num_inference_steps = gr.Slider(
label="Inference Steps",
minimum=8,
maximum=50,
step=1,
value=50,
info="Higher steps improve quality but take longer"
)
guidance_scale = gr.Slider(
label="Guidance Scale",
minimum=0.0,
maximum=20.0,
step=0.1,
value=7.0,
info="Controls how closely the model follows the input"
)
reduce_face = gr.Checkbox(label="Simplify Mesh", value=True)
target_face_num = gr.Slider(
maximum=1000000,
minimum=10000,
value=DEFAULT_FACE_NUMBER,
label="Target Face Number",
info="Adjust mesh complexity"
)
gen_button = gr.Button("Generate 3D Model", variant="primary")
gen_texture_button = gr.Button("Apply Texture", variant="secondary", interactive=False)
with gr.Column(scale=1):
model_output = gr.Model3D(label="3D Model Preview", interactive=False, height=400)
textured_model_output = gr.Model3D(label="Textured 3D Model", interactive=False, height=400)
download_button = gr.Button("Download GLB", variant="secondary")
with gr.Tab("Gallery & Community"):
gr.Markdown("### Explore Creations")
gr.Examples(
examples=[
f"{TRIPOSG_CODE_DIR}/assets/example_data/{image}"
for image in os.listdir(f"{TRIPOSG_CODE_DIR}/assets/example_data")
],
fn=run_full,
inputs=[image_prompts],
outputs=[seg_image, model_output, textured_model_output],
cache_examples=True,
)
gr.Markdown("Join our [PolyGenixAI Community](https://www.anvilinteractive.com/community) to share your creations and get inspired!")
gen_button.click(
run_segmentation,
inputs=[image_prompts],
outputs=[seg_image]
).then(
get_random_seed,
inputs=[randomize_seed, seed],
outputs=[seed],
).then(
image_to_3d,
inputs=[
seg_image,
seed,
num_inference_steps,
guidance_scale,
reduce_face,
target_face_num
],
outputs=[model_output]
).then(lambda: gr.Button(interactive=True), outputs=[gen_texture_button])
gen_texture_button.click(
run_texture,
inputs=[image_prompts, model_output, seed],
outputs=[textured_model_output]
)
demo.load(start_session)
demo.unload(end_session)
demo.launch()