PolyGenixAI6.0

Sleeping

App Files Files

PolyGenixAI6.0 / app.py

anvilinteractiv

Update app.py

ab04635 verified about 2 months ago

raw

history blame

17.4 kB

	import spaces
	import os
	import gradio as gr
	import numpy as np
	import torch
	from PIL import Image
	import trimesh
	import random
	from transformers import AutoModelForImageSegmentation
	from torchvision import transforms
	from huggingface_hub import hf_hub_download, snapshot_download
	import subprocess
	import shutil

	# install others
	subprocess.run("pip install spandrel==0.4.1 --no-deps", shell=True, check=True)

	DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
	DTYPE = torch.float16

	print("DEVICE: ", DEVICE)

	DEFAULT_FACE_NUMBER = 100000
	MAX_SEED = np.iinfo(np.int32).max
	TRIPOSG_REPO_URL = "https://github.com/VAST-AI-Research/TripoSG.git"
	MV_ADAPTER_REPO_URL = "https://github.com/huanngzh/MV-Adapter.git"

	RMBG_PRETRAINED_MODEL = "checkpoints/RMBG-1.4"
	TRIPOSG_PRETRAINED_MODEL = "checkpoints/TripoSG"

	TMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "tmp")
	os.makedirs(TMP_DIR, exist_ok=True)

	TRIPOSG_CODE_DIR = "./triposg"
	if not os.path.exists(TRIPOSG_CODE_DIR):
	os.system(f"git clone {TRIPOSG_REPO_URL} {TRIPOSG_CODE_DIR}")

	MV_ADAPTER_CODE_DIR = "./mv_adapter"
	if not os.path.exists(MV_ADAPTER_CODE_DIR):
	os.system(f"git clone {MV_ADAPTER_REPO_URL} {MV_ADAPTER_CODE_DIR} && cd {MV_ADAPTER_CODE_DIR} && git checkout 7d37a97e9bc223cdb8fd26a76bd8dd46504c7c3d")

	import sys
	sys.path.append(TRIPOSG_CODE_DIR)
	sys.path.append(os.path.join(TRIPOSG_CODE_DIR, "scripts"))
	sys.path.append(MV_ADAPTER_CODE_DIR)
	sys.path.append(os.path.join(MV_ADAPTER_CODE_DIR, "scripts"))

	HEADER = """
	# 🎨 PolyGenixAI: Transform Ideas into 3D Masterpieces
	## Unleash Your Creativity with AI-Powered 3D Generation by AnvilInteractive Solutions
	<p style="font-size: 1.1em;">By <a href="https://www.anvilinteractive.com/" style="color: #3B82F6; text-decoration: none; font-weight: bold;">AnvilInteractive Solutions</a></p>
	## 🚀 Get Started:
	1. Upload an Image (clear, single-object images work best)
	2. Select Filters to customize styles
	3. Click Generate 3D Model to create your mesh
	4. Click Apply Texture to enhance with realistic textures
	5. Download GLB to save your creation
	<p style="font-size: 0.9em; margin-top: 10px;">Powered by advanced AI and multi-view technology from AnvilInteractive Solutions. Join our community at <a href="https://www.anvilinteractive.com/polygenixai" style="color: #3B82F6; text-decoration: none;">PolyGenixAI</a> for tips and inspiration.</p>
	<style>
	.gr-button-primary {
	background-color: #3B82F6 !important;
	color: white !important;
	border-radius: 8px !important;
	padding: 10px 20px !important;
	font-weight: 600 !important;
	}
	.gr-button-secondary {
	background-color: #E5E7EB !important;
	color: #1F2937 !important;
	border-radius: 8px !important;
	padding: 10px 20px !important;
	}
	.gr-panel {
	border-radius: 12px !important;
	box-shadow: 0 4px 6px rgba(0,0,0,0.1) !important;
	padding: 20px !important;
	}
	.gr-accordion {
	background-color: #F9FAFB !important;
	border-radius: 8px !important;
	}
	</style>
	"""

	# triposg
	from image_process import prepare_image
	from briarmbg import BriaRMBG
	snapshot_download("briaai/RMBG-1.4", local_dir=RMBG_PRETRAINED_MODEL)
	rmbg_net = BriaRMBG.from_pretrained(RMBG_PRETRAINED_MODEL).to(DEVICE)
	rmbg_net.eval()
	from triposg.pipelines.pipeline_triposg import TripoSGPipeline
	snapshot_download("VAST-AI/TripoSG", local_dir=TRIPOSG_PRETRAINED_MODEL)
	triposg_pipe = TripoSGPipeline.from_pretrained(TRIPOSG_PRETRAINED_MODEL).to(DEVICE, DTYPE)

	# mv adapter
	NUM_VIEWS = 6
	from inference_ig2mv_sdxl import prepare_pipeline, preprocess_image, remove_bg
	from mvadapter.utils import get_orthogonal_camera, tensor_to_image, make_image_grid
	from mvadapter.utils.render import NVDiffRastContextWrapper, load_mesh, render
	mv_adapter_pipe = prepare_pipeline(
	base_model="stabilityai/stable-diffusion-xl-base-1.0",
	vae_model="madebyollin/sdxl-vae-fp16-fix",
	unet_model=None,
	lora_model=None,
	adapter_path="huanngzh/mv-adapter",
	scheduler=None,
	num_views=NUM_VIEWS,
	device=DEVICE,
	dtype=torch.float16,
	)
	birefnet = AutoModelForImageSegmentation.from_pretrained(
	"ZhengPeng7/BiRefNet", trust_remote_code=True
	)
	birefnet.to(DEVICE)
	transform_image = transforms.Compose(
	[
	transforms.Resize((1024, 1024)),
	transforms.ToTensor(),
	transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
	]
	)
	remove_bg_fn = lambda x: remove_bg(x, birefnet, transform_image, DEVICE)

	if not os.path.exists("checkpoints/RealESRGAN_x2plus.pth"):
	hf_hub_download("dtarnow/UPscaler", filename="RealESRGAN_x2plus.pth", local_dir="checkpoints")
	if not os.path.exists("checkpoints/big-lama.pt"):
	subprocess.run("wget -P checkpoints/ https://github.com/Sanster/models/releases/download/add_big_lama/big-lama.pt", shell=True, check=True)

	def start_session(req: gr.Request):
	save_dir = os.path.join(TMP_DIR, str(req.session_hash))
	os.makedirs(save_dir, exist_ok=True)
	print("start session, mkdir", save_dir)

	def end_session(req: gr.Request):
	save_dir = os.path.join(TMP_DIR, str(req.session_hash))
	shutil.rmtree(save_dir)

	def get_random_hex():
	random_bytes = os.urandom(8)
	random_hex = random_bytes.hex()
	return random_hex

	def get_random_seed(randomize_seed, seed):
	if randomize_seed:
	seed = random.randint(0, MAX_SEED)
	return seed

	@spaces.GPU(duration=180)
	def run_full(image: str, req: gr.Request):
	seed = 0
	num_inference_steps = 50
	guidance_scale = 7.5
	simplify = True
	target_face_num = DEFAULT_FACE_NUMBER

	image_seg = prepare_image(image, bg_color=np.array([1.0, 1.0, 1.0]), rmbg_net=rmbg_net)

	outputs = triposg_pipe(
	image=image_seg,
	generator=torch.Generator(device=triposg_pipe.device).manual_seed(seed),
	num_inference_steps=num_inference_steps,
	guidance_scale=guidance_scale
	).samples[0]
	print("mesh extraction done")
	mesh = trimesh.Trimesh(outputs[0].astype(np.float32), np.ascontiguousarray(outputs[1]))

	if simplify:
	print("start simplify")
	from utils import simplify_mesh
	mesh = simplify_mesh(mesh, target_face_num)

	save_dir = os.path.join(TMP_DIR, "examples")
	os.makedirs(save_dir, exist_ok=True)
	mesh_path = os.path.join(save_dir, f"polygenixai_{get_random_hex()}.glb")
	mesh.export(mesh_path)
	print("save to ", mesh_path)

	torch.cuda.empty_cache()

	height, width = 768, 768
	# Prepare cameras
	cameras = get_orthogonal_camera(
	elevation_deg=[0, 0, 0, 0, 89.99, -89.99],
	distance=[1.8] * NUM_VIEWS,
	left=-0.55,
	right=0.55,
	bottom=-0.55,
	top=0.55,
	azimuth_deg=[x - 90 for x in [0, 90, 180, 270, 180, 180]],
	device=DEVICE,
	)
	ctx = NVDiffRastContextWrapper(device=DEVICE, context_type="cuda")

	mesh = load_mesh(mesh_path, rescale=True, device=DEVICE)
	render_out = render(
	ctx,
	mesh,
	cameras,
	height=height,
	width=width,
	render_attr=False,
	normal_background=0.0,
	)
	control_images = (
	torch.cat(
	[
	(render_out.pos + 0.5).clamp(0, 1),
	(render_out.normal / 2 + 0.5).clamp(0, 1),
	],
	dim=-1,
	)
	.permute(0, 3, 1, 2)
	.to(DEVICE)
	)

	image = Image.open(image)
	image = remove_bg_fn(image)
	image = preprocess_image(image, height, width)

	pipe_kwargs = {}
	if seed != -1 and isinstance(seed, int):
	pipe_kwargs["generator"] = torch.Generator(device=DEVICE).manual_seed(seed)

	images = mv_adapter_pipe(
	"high quality",
	height=height,
	width=width,
	num_inference_steps=15,
	guidance_scale=3.0,
	num_images_per_prompt=NUM_VIEWS,
	control_image=control_images,
	control_conditioning_scale=1.0,
	reference_image=image,
	reference_conditioning_scale=1.0,
	negative_prompt="watermark, ugly, deformed, noisy, blurry, low contrast",
	cross_attention_kwargs={"scale": 1.0},
	**pipe_kwargs,
	).images

	torch.cuda.empty_cache()

	mv_image_path = os.path.join(save_dir, f"polygenixai_mv_{get_random_hex()}.png")
	make_image_grid(images, rows=1).save(mv_image_path)

	from texture import TexturePipeline, ModProcessConfig
	texture_pipe = TexturePipeline(
	upscaler_ckpt_path="checkpoints/RealESRGAN_x2plus.pth",
	inpaint_ckpt_path="checkpoints/big-lama.pt",
	device=DEVICE,
	)

	textured_glb_path = texture_pipe(
	mesh_path=mesh_path,
	save_dir=save_dir,
	save_name=f"polygenixai_texture_mesh_{get_random_hex()}.glb",
	uv_unwarp=True,
	uv_size=4096,
	rgb_path=mv_image_path,
	rgb_process_config=ModProcessConfig(view_upscale=True, inpaint_mode="view"),
	camera_azimuth_deg=[x - 90 for x in [0, 90, 180, 270, 180, 180]],
	)

	return image_seg, mesh_path, textured_glb_path


	@spaces.GPU()
	@torch.no_grad()
	def run_segmentation(image: str):
	image = prepare_image(image, bg_color=np.array([1.0, 1.0, 1.0]), rmbg_net=rmbg_net)
	return image

	@spaces.GPU(duration=90)
	@torch.no_grad()
	def image_to_3d(
	image: Image.Image,
	seed: int,
	num_inference_steps: int,
	guidance_scale: float,
	simplify: bool,
	target_face_num: int,
	req: gr.Request
	):
	outputs = triposg_pipe(
	image=image,
	generator=torch.Generator(device=triposg_pipe.device).manual_seed(seed),
	num_inference_steps=num_inference_steps,
	guidance_scale=guidance_scale
	).samples[0]
	print("mesh extraction done")
	mesh = trimesh.Trimesh(outputs[0].astype(np.float32), np.ascontiguousarray(outputs[1]))

	if simplify:
	print("start simplify")
	from utils import simplify_mesh
	mesh = simplify_mesh(mesh, target_face_num)

	save_dir = os.path.join(TMP_DIR, str(req.session_hash))
	mesh_path = os.path.join(save_dir, f"polygenixai_{get_random_hex()}.glb")
	mesh.export(mesh_path)
	print("save to ", mesh_path)

	torch.cuda.empty_cache()

	return mesh_path

	@spaces.GPU(duration=120)
	@torch.no_grad()
	def run_texture(image: Image, mesh_path: str, seed: int, req: gr.Request):
	height, width = 768, 768
	# Prepare cameras
	cameras = get_orthogonal_camera(
	elevation_deg=[0, 0, 0, 0, 89.99, -89.99],
	distance=[1.8] * NUM_VIEWS,
	left=-0.55,
	right=0.55,
	bottom=-0.55,
	top=0.55,
	azimuth_deg=[x - 90 for x in [0, 90, 180, 270, 180, 180]],
	device=DEVICE,
	)
	ctx = NVDiffRastContextWrapper(device=DEVICE, context_type="cuda")

	mesh = load_mesh(mesh_path, rescale=True, device=DEVICE)
	render_out = render(
	ctx,
	mesh,
	cameras,
	height=height,
	width=width,
	render_attr=False,
	normal_background=0.0,
	)
	control_images = (
	torch.cat(
	[
	(render_out.pos + 0.5).clamp(0, 1),
	(render_out.normal / 2 + 0.5).clamp(0, 1),
	],
	dim=-1,
	)
	.permute(0, 3, 1, 2)
	.to(DEVICE)
	)

	image = Image.open(image)
	image = remove_bg_fn(image)
	image = preprocess_image(image, height, width)

	pipe_kwargs = {}
	if seed != -1 and isinstance(seed, int):
	pipe_kwargs["generator"] = torch.Generator(device=DEVICE).manual_seed(seed)

	images = mv_adapter_pipe(
	"high quality",
	height=height,
	width=width,
	num_inference_steps=15,
	guidance_scale=3.0,
	num_images_per_prompt=NUM_VIEWS,
	control_image=control_images,
	control_conditioning_scale=1.0,
	reference_image=image,
	reference_conditioning_scale=1.0,
	negative_prompt="watermark, ugly, deformed, noisy, blurry, low contrast",
	cross_attention_kwargs={"scale": 1.0},
	**pipe_kwargs,
	).images

	torch.cuda.empty_cache()

	save_dir = os.path.join(TMP_DIR, str(req.session_hash))
	mv_image_path = os.path.join(save_dir, f"polygenixai_mv_{get_random_hex()}.png")
	make_image_grid(images, rows=1).save(mv_image_path)

	from texture import TexturePipeline, ModProcessConfig
	texture_pipe = TexturePipeline(
	upscaler_ckpt_path="checkpoints/RealESRGAN_x2plus.pth",
	inpaint_ckpt_path="checkpoints/big-lama.pt",
	device=DEVICE,
	)

	textured_glb_path = texture_pipe(
	mesh_path=mesh_path,
	save_dir=save_dir,
	save_name=f"polygenixai_texture_mesh_{get_random_hex()}.glb",
	uv_unwarp=True,
	uv_size=4096,
	rgb_path=mv_image_path,
	rgb_process_config=ModProcessConfig(view_upscale=True, inpaint_mode="view"),
	camera_azimuth_deg=[x - 90 for x in [0, 90, 180, 270, 180, 180]],
	)

	return textured_glb_path


	with gr.Blocks(title="PolyGenixAI", css="body { background-color: #F3F4F6; } .gr-panel { background-color: white; }") as demo:
	gr.Markdown(HEADER)

	with gr.Tabs():
	with gr.Tab("Create 3D Model"):
	with gr.Row():
	with gr.Column(scale=1):
	image_prompts = gr.Image(label="Upload Image", type="filepath", height=300)
	seg_image = gr.Image(label="Preview Segmentation", type="pil", format="png", interactive=False, height=300)
	with gr.Accordion("Style & Settings", open=True):
	style_filter = gr.Dropdown(
	choices=["None", "Realistic", "Fantasy", "Cartoon", "Sci-Fi", "Vintage"],
	label="Style Filter",
	value="None",
	info="Select a style to enhance your 3D model (optional)"
	)
	seed = gr.Slider(
	label="Seed",
	minimum=0,
	maximum=MAX_SEED,
	step=1,
	value=0
	)
	randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
	num_inference_steps = gr.Slider(
	label="Inference Steps",
	minimum=8,
	maximum=50,
	step=1,
	value=50,
	info="Higher steps improve quality but take longer"
	)
	guidance_scale = gr.Slider(
	label="Guidance Scale",
	minimum=0.0,
	maximum=20.0,
	step=0.1,
	value=7.0,
	info="Controls how closely the model follows the input"
	)
	reduce_face = gr.Checkbox(label="Simplify Mesh", value=True)
	target_face_num = gr.Slider(
	maximum=1000000,
	minimum=10000,
	value=DEFAULT_FACE_NUMBER,
	label="Target Face Number",
	info="Adjust mesh complexity"
	)
	gen_button = gr.Button("Generate 3D Model", variant="primary")
	gen_texture_button = gr.Button("Apply Texture", variant="secondary", interactive=False)

	with gr.Column(scale=1):
	model_output = gr.Model3D(label="3D Model Preview", interactive=False, height=400)
	textured_model_output = gr.Model3D(label="Textured 3D Model", interactive=False, height=400)
	download_button = gr.Button("Download GLB", variant="secondary")

	with gr.Tab("Gallery & Community"):
	gr.Markdown("### Explore Creations")
	gr.Examples(
	examples=[
	f"{TRIPOSG_CODE_DIR}/assets/example_data/{image}"
	for image in os.listdir(f"{TRIPOSG_CODE_DIR}/assets/example_data")
	],
	fn=run_full,
	inputs=[image_prompts],
	outputs=[seg_image, model_output, textured_model_output],
	cache_examples=True,
	)
	gr.Markdown("Join our [PolyGenixAI Community](https://www.anvilinteractive.com/community) to share your creations and get inspired!")

	gen_button.click(
	run_segmentation,
	inputs=[image_prompts],
	outputs=[seg_image]
	).then(
	get_random_seed,
	inputs=[randomize_seed, seed],
	outputs=[seed],
	).then(
	image_to_3d,
	inputs=[
	seg_image,
	seed,
	num_inference_steps,
	guidance_scale,
	reduce_face,
	target_face_num
	],
	outputs=[model_output]
	).then(lambda: gr.Button(interactive=True), outputs=[gen_texture_button])

	gen_texture_button.click(
	run_texture,
	inputs=[image_prompts, model_output, seed],
	outputs=[textured_model_output]
	)

	demo.load(start_session)
	demo.unload(end_session)

	demo.launch()