import gradio as gr import numpy as np import random import os import spaces import torch from diffusers import DiffusionPipeline from transformers import pipeline from huggingface_hub import login hf_token = os.getenv("hf_token") login(token=hf_token) MAX_SEED = np.iinfo(np.int32).max MAX_IMAGE_SIZE = 2048 _text_gen_pipeline = None _image_gen_pipeline = None @spaces.GPU() def get_image_gen_pipeline(): global _image_gen_pipeline if _image_gen_pipeline is None: try: device = "cuda" if torch.cuda.is_available() else "cpu" dtype = torch.bfloat16 _image_gen_pipeline = DiffusionPipeline.from_pretrained( "black-forest-labs/FLUX.1-schnell", torch_dtype=dtype ).to(device) except Exception as e: print(f"Error loading image generation model: {e}") return None return _image_gen_pipeline @spaces.GPU() def get_text_gen_pipeline(): global _text_gen_pipeline if _text_gen_pipeline is None: try: device = "cuda" if torch.cuda.is_available() else "cpu" _text_gen_pipeline = pipeline( "text-generation", model="mistralai/Mistral-7B-Instruct-v0.3", max_new_tokens=2048, device=device, ) except Exception as e: print(f"Error loading text generation model: {e}") return None return _text_gen_pipeline @spaces.GPU() def refine_prompt(prompt): text_gen = get_text_gen_pipeline() if text_gen is None: return "Text generation model is unavailable." try: messages = [ {"role": "system", "content": "You are a product designer specialized in text-to-image generation. You will get a basic prompt of product request and you need to imagine a new product design to satisfy that need. Generate a visual product description that will then be used by a text-to-image AI (Flux) to suggest a visual. The prompt should be in the form of a strict product description, not a story, maximum 2048 tokens. You need to mention explicitly the visual aesthetics (ex: photo realistic, high quality, etc). Background should be a full white background."}, {"role": "user", "content": prompt}, ] refined_prompt = text_gen(messages) # Extract just the assistant's content from the response try: messages = refined_prompt[0]['generated_text'] # Find the last message with role 'assistant' assistant_messages = [msg for msg in messages if msg['role'] == 'assistant'] if not assistant_messages: return "Error: No assistant response found" assistant_content = assistant_messages[-1]['content'] return assistant_content except (KeyError, IndexError): return "Error: Unexpected response format from the model" except Exception as e: return f"Error refining prompt: {str(e)}" def validate_dimensions(width, height): if width * height > MAX_IMAGE_SIZE * MAX_IMAGE_SIZE: return False, "Image dimensions too large" return True, None @spaces.GPU() def infer(prompt, seed=42, randomize_seed=False, width=1024, height=1024, num_inference_steps=4, progress=gr.Progress(track_tqdm=True)): try: progress(0, desc="Starting generation...") pipe = get_image_gen_pipeline() if pipe is None: return None, "Image generation model is unavailable." # Validate that prompt is not empty if not prompt or prompt.strip() == "": return None, "Please provide a valid prompt." # Validate width/height dimensions is_valid, error_msg = validate_dimensions(width, height) if not is_valid: return None, error_msg if randomize_seed: seed = random.randint(0, MAX_SEED) progress(0.2, desc="Setting up generator...") generator = torch.Generator().manual_seed(seed) progress(0.4, desc="Generating image...") with torch.cuda.amp.autocast(): image = pipe( prompt=prompt, width=width, height=height, num_inference_steps=num_inference_steps, generator=generator, guidance_scale=5.0, max_sequence_length=2048 ).images[0] torch.cuda.empty_cache() # Clean up GPU memory after generation progress(1.0, desc="Done!") return image, seed except Exception as e: return None, f"Error generating image: {str(e)}" examples = [ "a tiny astronaut hatching from an egg on the moon", "a cat holding a sign that says hello world", "an anime illustration of a wiener schnitzel", ] css=""" #col-container { margin: 0 auto; max-width: 520px; } """ with gr.Blocks(css=css) as demo: # Compute the model loading status message ahead of creating the Info component. model_status = "Models loaded successfully!" info = gr.Info(model_status) with gr.Column(elem_id="col-container"): gr.Markdown(f"""# Text to Product Using Mistral + Flux + Trellis """) with gr.Row(): prompt = gr.Text( label="Prompt", show_label=False, max_lines=1, placeholder="Enter your prompt", container=False, ) prompt_button = gr.Button("Refine prompt", scale=0) refined_prompt = gr.Text( label="Refined Prompt", show_label=False, max_lines=10, placeholder="Prompt refined by Mistral", container=False, max_length=2048, ) run_button = gr.Button("Create visual", scale=0) generated_image = gr.Image(label="Generated Image", show_label=False) with gr.Accordion("Advanced Settings Mistral", open=False): gr.Slider( label="Temperature", value=0.9, minimum=0.0, maximum=1.0, step=0.05, interactive=True, info="Higher values produce more diverse outputs", ), gr.Slider( label="Max new tokens", value=256, minimum=0, maximum=1048, step=64, interactive=True, info="The maximum numbers of new tokens", ), gr.Slider( label="Top-p (nucleus sampling)", value=0.90, minimum=0.0, maximum=1, step=0.05, interactive=True, info="Higher values sample more low-probability tokens", ), gr.Slider( label="Repetition penalty", value=1.2, minimum=1.0, maximum=2.0, step=0.05, interactive=True, info="Penalize repeated tokens", ) with gr.Accordion("Advanced Settings Flux", open=False): seed = gr.Slider( label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0, ) randomize_seed = gr.Checkbox(label="Randomize seed", value=True) with gr.Row(): width = gr.Slider( label="Width", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=1024, ) height = gr.Slider( label="Height", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=1024, ) with gr.Row(): num_inference_steps = gr.Slider( label="Number of inference steps", minimum=1, maximum=50, step=1, value=4, ) gr.Examples( examples = examples, fn = infer, inputs = [prompt], outputs = [generated_image, seed], cache_examples="lazy" ) gr.on( triggers=[prompt_button.click, prompt.submit], fn = refine_prompt, inputs = [prompt], outputs = [refined_prompt] ) gr.on( triggers=[run_button.click], fn = infer, inputs = [refined_prompt, seed, randomize_seed, width, height, num_inference_steps], outputs = [generated_image, seed] ) demo.launch()