Spaces:

zhangxiaosong18
/

X-Omni-Zh

Running on Zero

App Files Files Community

X-Omni-Zh / app.py

zhangxiaosong18

Update app.py

4b93eec verified 12 days ago

raw

history blame contribute delete

5.71 kB

	import gradio as gr
	import numpy as np
	import random

	import spaces #[uncomment to use ZeroGPU]
	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM
	from transformers.generation import GenerationConfig


	device = "cuda"
	torch_dtype = torch.bfloat16
	model_name_or_path = "X-Omni/X-Omni-Zh"
	flux_model_name_or_path = "zhangxiaosong18/FLUX.1-dev-VAE"

	tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
	model = AutoModelForCausalLM.from_pretrained(
	model_name_or_path,
	torch_dtype=torch_dtype,
	trust_remote_code=True,
	).cuda()
	model.init_vision(flux_model_name_or_path)
	model.set_generation_mode('image')
	model.eval()

	@spaces.GPU(duration=199) #[uncomment to use ZeroGPU]
	def generate_image(
	image_prompt,
	image_size,
	top_p,
	min_p,
	seed,
	):
	image_prompt = image_prompt.strip()
	image_size = tuple(map(int, image_size.split('x')))
	token_h, token_w = image_size[0] // 16, image_size[1] // 16
	image_prefix = f'<SOM>{token_h} {token_w}<IMAGE>'
	generation_config = GenerationConfig(
	max_new_tokens=token_h * token_w,
	do_sample=True,
	temperature=1.0,
	min_p=min_p,
	top_p=top_p,
	guidance_scale=1.0,
	suppress_tokens=tokenizer.convert_tokens_to_ids(model.config.mm_special_tokens),
	)

	tokens = tokenizer(
	[image_prompt + image_prefix],
	return_tensors='pt',
	padding='longest',
	padding_side='left',
	)
	input_ids = tokens.input_ids.cuda()
	attention_mask = tokens.attention_mask.cuda()

	torch.manual_seed(seed)
	tokens = model.generate(
	inputs=input_ids,
	attention_mask=attention_mask,
	generation_config=generation_config,
	)
	torch.manual_seed(seed)
	_, images = model.mmdecode(tokenizer, tokens[0], skip_special_tokens=False)

	return images[0]


	examples = [
	'''
	生成一张雪中的紫禁城全景封面图，作为北京冬季旅游指南的主题。画面以近景构图展现建筑，红墙金瓦被皑皑白雪覆盖，朱红色宫墙，金黄色瓦片与洁白雪色形成强烈对比，琉璃瓦顶的积雪在阳光下折射出晶莹光泽。前景一枝腊梅花正在盛开，背景为灰蓝色冬日天空，飘落细雪，远处角楼轮廓若隐若现，增添朦胧诗意感。图片上有标题“雪落北平·穿越600年”，另有副标题“北京古建筑雪景深度游”。文字艺术感极强，与图片良好融合起来
	'''.strip(),
	'''
	画面的中心摆放着一个复古花瓶，瓶身主体为浓郁的蓝色，这种蓝色深邃而典雅，仿佛带着岁月的沉淀。花瓶设计极具复古风格，瓶颈处环绕着细致的金色雕花，宛如华丽的项链点缀其上；瓶身绘制着精美的花卉图案，笔触细腻，色彩过渡自然，展现出极高的工艺水准，整体彰显出优雅的古典韵味。花瓶放置在深色木质的圆桌上，旁边搭配了一束新鲜绽放的百合花，为画面增添了几分生机与活力。背景是一幅淡蓝色的壁纸，上面有着若隐若现的花纹，营造出一种静谧而温馨的氛围。图片中的文字信息十分醒目。“家居美学盛典”位于顶部中央，字体较大，在视觉上十分突出，吸引观众的目光；左下角写着“下单直降 100”，下方紧跟数字“399”，强调了价格优惠；右下角有“限量抢购速来咨询”的提示，引导观众进一步咨询；最底部中央，“前 50 名买一送一”的字样突出促销活动的紧迫性和吸引力。这些文字信息通过巧妙的颜色、大小和背景设计，在空间布局上层次分明，重点突出，有效地引导观众关注促销信息和价格优势。
	'''.strip(),
	]
	examples = [[prompt, '1152x1152', 1.0, 0.03, 0] for prompt in examples]


	css = """
	.app {
	max-width: 800px !important;
	margin: 0 auto !important;
	}
	"""

	with gr.Blocks(css=css) as demo:
	gr.HTML('''
	<h1 style="text-align:center">🎨X-Omni: Reinforcement Learning Makes Discrete Autoregressive Image Generative Models Great Again</h1>
	<h3 style="text-align:center">Model: <a href="https://huggingface.co/X-Omni/X-Omni-Zh">X-Omni-Zh</a> (support Chinese text rendering)</h3>
	<p align="center">
	<a href="https://x-omni-team.github.io">🏠 Project Page</a> \|
	<a href="https://arxiv.org/pdf/2507.22058">📄 Paper</a> \|
	<a href="https://github.com/X-Omni-Team/X-Omni">💻 Code</a> \|
	<a href="https://huggingface.co/collections/X-Omni/x-omni-models-6888aadcc54baad7997d7982">🤗 HuggingFace Model</a>
	</p>
	'''.strip())
	with gr.Row():
	textbox = gr.Textbox(lines=2, placeholder='text prompt for image generation', show_label=False)
	image = gr.Image(show_label=False, type='pil')
	with gr.Row():
	button = gr.Button("Generate", variant="primary")
	with gr.Accordion("Advanced Settings", open=False):
	image_size = gr.Dropdown(label="Image Size", choices=["1152x1152", "1152x768", "768x1152"], value="1152x1152")
	top_p = gr.Slider(label="Top P", minimum=0.0, maximum=1.0, value=1.0, step=0.01)
	min_p = gr.Slider(label="Min P", minimum=0.0, maximum=1.0, value=0.03, step=0.01)
	seed_input = gr.Number(label="Seed", value=0, precision=0)
	with gr.Row():
	gr.Examples(examples=examples, inputs=(textbox, image_size, top_p, min_p, seed_input), outputs=image, fn=generate_image, cache_examples=False, run_on_click=True)
	button.click(
	generate_image,
	inputs=(textbox, image_size, top_p, min_p, seed_input),
	outputs=image,
	)

	if __name__ == "__main__":
	demo.launch(ssr_mode=False)