import gradio as gr import numpy as np import random import spaces #[uncomment to use ZeroGPU] import torch from transformers import AutoTokenizer, AutoModelForCausalLM from transformers.generation import GenerationConfig device = "cuda" torch_dtype = torch.bfloat16 model_name_or_path = "X-Omni/X-Omni-Zh" flux_model_name_or_path = "zhangxiaosong18/FLUX.1-dev-VAE" tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True) model = AutoModelForCausalLM.from_pretrained( model_name_or_path, torch_dtype=torch_dtype, trust_remote_code=True, ).cuda() model.init_vision(flux_model_name_or_path) model.set_generation_mode('image') model.eval() @spaces.GPU(duration=199) #[uncomment to use ZeroGPU] def generate_image( image_prompt, image_size, top_p, min_p, seed, ): image_prompt = image_prompt.strip() image_size = tuple(map(int, image_size.split('x'))) token_h, token_w = image_size[0] // 16, image_size[1] // 16 image_prefix = f'{token_h} {token_w}' generation_config = GenerationConfig( max_new_tokens=token_h * token_w, do_sample=True, temperature=1.0, min_p=min_p, top_p=top_p, guidance_scale=1.0, suppress_tokens=tokenizer.convert_tokens_to_ids(model.config.mm_special_tokens), ) tokens = tokenizer( [image_prompt + image_prefix], return_tensors='pt', padding='longest', padding_side='left', ) input_ids = tokens.input_ids.cuda() attention_mask = tokens.attention_mask.cuda() torch.manual_seed(seed) tokens = model.generate( inputs=input_ids, attention_mask=attention_mask, generation_config=generation_config, ) torch.manual_seed(seed) _, images = model.mmdecode(tokenizer, tokens[0], skip_special_tokens=False) return images[0] examples = [ ''' 生成一张雪中的紫禁城全景封面图,作为北京冬季旅游指南的主题。画面以近景构图展现建筑,红墙金瓦被皑皑白雪覆盖,朱红色宫墙,金黄色瓦片与洁白雪色形成强烈对比,琉璃瓦顶的积雪在阳光下折射出晶莹光泽。前景一枝腊梅花正在盛开,背景为灰蓝色冬日天空,飘落细雪,远处角楼轮廓若隐若现,增添朦胧诗意感。图片上有标题“雪落北平·穿越600年”,另有副标题“北京古建筑雪景深度游”。文字艺术感极强,与图片良好融合起来 '''.strip(), ''' 画面的中心摆放着一个复古花瓶,瓶身主体为浓郁的蓝色,这种蓝色深邃而典雅,仿佛带着岁月的沉淀。花瓶设计极具复古风格,瓶颈处环绕着细致的金色雕花,宛如华丽的项链点缀其上;瓶身绘制着精美的花卉图案,笔触细腻,色彩过渡自然,展现出极高的工艺水准,整体彰显出优雅的古典韵味。花瓶放置在深色木质的圆桌上,旁边搭配了一束新鲜绽放的百合花,为画面增添了几分生机与活力。背景是一幅淡蓝色的壁纸,上面有着若隐若现的花纹,营造出一种静谧而温馨的氛围。图片中的文字信息十分醒目。“家居美学盛典”位于顶部中央,字体较大,在视觉上十分突出,吸引观众的目光;左下角写着“下单直降 100”,下方紧跟数字“399”,强调了价格优惠;右下角有“限量抢购 速来咨询”的提示,引导观众进一步咨询;最底部中央,“前 50 名买一送一”的字样突出促销活动的紧迫性和吸引力。这些文字信息通过巧妙的颜色、大小和背景设计,在空间布局上层次分明,重点突出,有效地引导观众关注促销信息和价格优势。 '''.strip(), ] examples = [[prompt, '1152x1152', 1.0, 0.03, 0] for prompt in examples] css = """ .app { max-width: 800px !important; margin: 0 auto !important; } """ with gr.Blocks(css=css) as demo: gr.HTML('''

🎨X-Omni: Reinforcement Learning Makes Discrete Autoregressive Image Generative Models Great Again

Model: X-Omni-Zh (support Chinese text rendering)

🏠 Project Page | 📄 Paper | 💻​ Code | 🤗 HuggingFace Model

'''.strip()) with gr.Row(): textbox = gr.Textbox(lines=2, placeholder='text prompt for image generation', show_label=False) image = gr.Image(show_label=False, type='pil') with gr.Row(): button = gr.Button("Generate", variant="primary") with gr.Accordion("Advanced Settings", open=False): image_size = gr.Dropdown(label="Image Size", choices=["1152x1152", "1152x768", "768x1152"], value="1152x1152") top_p = gr.Slider(label="Top P", minimum=0.0, maximum=1.0, value=1.0, step=0.01) min_p = gr.Slider(label="Min P", minimum=0.0, maximum=1.0, value=0.03, step=0.01) seed_input = gr.Number(label="Seed", value=0, precision=0) with gr.Row(): gr.Examples(examples=examples, inputs=(textbox, image_size, top_p, min_p, seed_input), outputs=image, fn=generate_image, cache_examples=False, run_on_click=True) button.click( generate_image, inputs=(textbox, image_size, top_p, min_p, seed_input), outputs=image, ) if __name__ == "__main__": demo.launch(ssr_mode=False)