Spaces:

yonnel
/

text-to-3d_flux_trellis

Runtime error

lionelgarnier

message assistent content

16d258e 4 months ago

9.72 kB

	import gradio as gr
	import numpy as np
	import random
	import os
	import spaces
	import torch
	from diffusers import DiffusionPipeline
	from transformers import pipeline
	from huggingface_hub import login

	hf_token = os.getenv("hf_token")
	login(token=hf_token)

	MAX_SEED = np.iinfo(np.int32).max
	MAX_IMAGE_SIZE = 2048

	_text_gen_pipeline = None
	_image_gen_pipeline = None

	@spaces.GPU()
	def get_image_gen_pipeline():
	global _image_gen_pipeline
	if _image_gen_pipeline is None:
	try:
	device = "cuda" if torch.cuda.is_available() else "cpu"
	dtype = torch.bfloat16
	_image_gen_pipeline = DiffusionPipeline.from_pretrained(
	"black-forest-labs/FLUX.1-schnell",
	torch_dtype=dtype
	).to(device)
	except Exception as e:
	print(f"Error loading image generation model: {e}")
	return None
	return _image_gen_pipeline

	@spaces.GPU()
	def get_text_gen_pipeline():
	global _text_gen_pipeline
	if _text_gen_pipeline is None:
	try:
	device = "cuda" if torch.cuda.is_available() else "cpu"
	_text_gen_pipeline = pipeline(
	"text-generation",
	model="mistralai/Mistral-7B-Instruct-v0.3",
	max_new_tokens=2048,
	device=device,
	)
	except Exception as e:
	print(f"Error loading text generation model: {e}")
	return None
	return _text_gen_pipeline

	@spaces.GPU()
	def refine_prompt(prompt):
	text_gen = get_text_gen_pipeline()
	if text_gen is None:
	return "Text generation model is unavailable."
	try:
	messages = [
	{"role": "system", "content": "You are a product designer specialized in text-to-image generation. You will get a basic prompt of product request and you need to imagine a new product design to satisfy that need. Generate a visual product description that will then be used by a text-to-image AI (Flux) to suggest a visual. The prompt should be in the form of a strict product description, not a story, maximum 2048 tokens. You need to mention explicitly the visual aesthetics (ex: photo realistic, high quality, etc). Background should be a full white background."}, {"role": "system", "content": "You are a product designer specialized in text-to-image generation. You will get a basic prompt of product request and you need to imagine a new product design to satisfy that need. Genrerate a product description of product front view that will be used by a text-to-image AI (Flux) to generate a visual. The prompt should be in the form of a product description, not a story, maximum 2048 tokens. You need to mention explicitly the visual aesthetics (ex: photo realistic, high quality, etc). Background should be a full white background."},
	{"role": "user", "content": prompt},
	]
	refined_prompt = text_gen(messages)

	# Extract just the assistant's content from the response
	try:
	messages = refined_prompt[0]['generated_text']
	# Find the last message with role 'assistant'
	assistant_messages = [msg for msg in messages if msg['role'] == 'assistant']
	if not assistant_messages:
	return "Error: No assistant response found"
	assistant_content = assistant_messages[-1]['content']
	return assistant_content
	except (KeyError, IndexError):
	return "Error: Unexpected response format from the model"
	except Exception as e:
	return f"Error refining prompt: {str(e)}"

	def validate_dimensions(width, height):
	if width * height > MAX_IMAGE_SIZE * MAX_IMAGE_SIZE:
	return False, "Image dimensions too large"
	return True, None

	@spaces.GPU()
	def infer(prompt, seed=42, randomize_seed=False, width=1024, height=1024, num_inference_steps=4, progress=gr.Progress(track_tqdm=True)):
	try:
	progress(0, desc="Starting generation...")

	pipe = get_image_gen_pipeline()
	if pipe is None:
	return None, "Image generation model is unavailable."

	# Validate that prompt is not empty
	if not prompt or prompt.strip() == "":
	return None, "Please provide a valid prompt."

	# Validate width/height dimensions
	is_valid, error_msg = validate_dimensions(width, height)
	if not is_valid:
	return None, error_msg

	if randomize_seed:
	seed = random.randint(0, MAX_SEED)

	progress(0.2, desc="Setting up generator...")
	generator = torch.Generator().manual_seed(seed)

	progress(0.4, desc="Generating image...")
	with torch.cuda.amp.autocast():
	image = pipe(
	prompt=prompt,
	width=width,
	height=height,
	num_inference_steps=num_inference_steps,
	generator=generator,
	guidance_scale=5.0,
	max_sequence_length=2048
	).images[0]

	torch.cuda.empty_cache() # Clean up GPU memory after generation
	progress(1.0, desc="Done!")
	return image, seed
	except Exception as e:
	return None, f"Error generating image: {str(e)}"

	examples = [
	"a tiny astronaut hatching from an egg on the moon",
	"a cat holding a sign that says hello world",
	"an anime illustration of a wiener schnitzel",
	]

	css="""
	#col-container {
	margin: 0 auto;
	max-width: 520px;
	}
	"""

	with gr.Blocks(css=css) as demo:

	# Compute the model loading status message ahead of creating the Info component.
	model_status = "Models loaded successfully!"

	info = gr.Info(model_status)

	with gr.Column(elem_id="col-container"):
	gr.Markdown(f"""# Text to Product
	Using Mistral + Flux + Trellis
	""")

	with gr.Row():

	prompt = gr.Text(
	label="Prompt",
	show_label=False,
	max_lines=1,
	placeholder="Enter your prompt",
	container=False,
	)

	prompt_button = gr.Button("Refine prompt", scale=0)

	refined_prompt = gr.Text(
	label="Refined Prompt",
	show_label=False,
	max_lines=10,
	placeholder="Prompt refined by Mistral",
	container=False,
	max_length=2048,
	)


	run_button = gr.Button("Create visual", scale=0)

	generated_image = gr.Image(label="Generated Image", show_label=False)

	with gr.Accordion("Advanced Settings Mistral", open=False):
	gr.Slider(
	label="Temperature",
	value=0.9,
	minimum=0.0,
	maximum=1.0,
	step=0.05,
	interactive=True,
	info="Higher values produce more diverse outputs",
	),
	gr.Slider(
	label="Max new tokens",
	value=256,
	minimum=0,
	maximum=1048,
	step=64,
	interactive=True,
	info="The maximum numbers of new tokens",
	),
	gr.Slider(
	label="Top-p (nucleus sampling)",
	value=0.90,
	minimum=0.0,
	maximum=1,
	step=0.05,
	interactive=True,
	info="Higher values sample more low-probability tokens",
	),
	gr.Slider(
	label="Repetition penalty",
	value=1.2,
	minimum=1.0,
	maximum=2.0,
	step=0.05,
	interactive=True,
	info="Penalize repeated tokens",
	)

	with gr.Accordion("Advanced Settings Flux", open=False):

	seed = gr.Slider(
	label="Seed",
	minimum=0,
	maximum=MAX_SEED,
	step=1,
	value=0,
	)

	randomize_seed = gr.Checkbox(label="Randomize seed", value=True)

	with gr.Row():

	width = gr.Slider(
	label="Width",
	minimum=256,
	maximum=MAX_IMAGE_SIZE,
	step=32,
	value=1024,
	)

	height = gr.Slider(
	label="Height",
	minimum=256,
	maximum=MAX_IMAGE_SIZE,
	step=32,
	value=1024,
	)

	with gr.Row():


	num_inference_steps = gr.Slider(
	label="Number of inference steps",
	minimum=1,
	maximum=50,
	step=1,
	value=4,
	)

	gr.Examples(
	examples = examples,
	fn = infer,
	inputs = [prompt],
	outputs = [generated_image, seed],
	cache_examples="lazy"
	)


	gr.on(
	triggers=[prompt_button.click, prompt.submit],
	fn = refine_prompt,
	inputs = [prompt],
	outputs = [refined_prompt]
	)

	gr.on(
	triggers=[run_button.click],
	fn = infer,
	inputs = [refined_prompt, seed, randomize_seed, width, height, num_inference_steps],
	outputs = [generated_image, seed]
	)

	demo.launch()