LeX-FLUX

Running on Zero

stzhao commited on Mar 26

Commit

2092a85

verified ·

1 Parent(s): c6d11ec

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,9 +5,6 @@ import spaces
 from diffusers import Lumina2Pipeline
 from transformers import AutoModelForCausalLM, AutoTokenizer
-# # Set up environment
-# os.environ['CUDA_VISIBLE_DEVICES'] = "0"
 if torch.cuda.is_available():
     torch_dtype = torch.bfloat16
 else:
@@ -29,12 +26,20 @@ def load_models():
         torch_dtype=torch.bfloat16
     )
     device = "cuda" if torch.cuda.is_available() else "cpu"
-    # pipe.to(device, torch_dtype)
     return model, tokenizer, pipe
 model, tokenizer, pipe = load_models()
 @spaces.GPU(duration=200)
 def generate_enhanced_caption(image_caption, text_caption, progress=gr.Progress(track_tqdm=True)):
     """Generate enhanced caption using the LeX-Enhancer model"""
@@ -71,6 +76,9 @@ Below is the simple caption of an image with text. Please deduce the detailed de
 @spaces.GPU(duration=200)
 def generate_image(enhanced_caption, seed, num_inference_steps, guidance_scale, progress=gr.Progress(track_tqdm=True)):
     """Generate image using LeX-Lumina"""
     generator = torch.Generator("cpu").manual_seed(seed) if seed != 0 else None
     image = pipe(

 from diffusers import Lumina2Pipeline
 from transformers import AutoModelForCausalLM, AutoTokenizer
 if torch.cuda.is_available():
     torch_dtype = torch.bfloat16
 else:
         torch_dtype=torch.bfloat16
     )
     device = "cuda" if torch.cuda.is_available() else "cpu"
     return model, tokenizer, pipe
 model, tokenizer, pipe = load_models()
+def truncate_caption_by_tokens(caption, max_tokens=256):
+    """Truncate the caption to fit within the max token limit"""
+    tokens = tokenizer.encode(caption)
+    if len(tokens) > max_tokens:
+        truncated_tokens = tokens[:max_tokens]
+        caption = tokenizer.decode(truncated_tokens, skip_special_tokens=True)
+        print(f"Caption was truncated from {len(tokens)} tokens to {max_tokens} tokens")
+    return caption
 @spaces.GPU(duration=200)
 def generate_enhanced_caption(image_caption, text_caption, progress=gr.Progress(track_tqdm=True)):
     """Generate enhanced caption using the LeX-Enhancer model"""
 @spaces.GPU(duration=200)
 def generate_image(enhanced_caption, seed, num_inference_steps, guidance_scale, progress=gr.Progress(track_tqdm=True)):
     """Generate image using LeX-Lumina"""
+    # Truncate the caption if it's too long
+    enhanced_caption = truncate_caption_by_tokens(enhanced_caption, max_tokens=256)
     generator = torch.Generator("cpu").manual_seed(seed) if seed != 0 else None
     image = pipe(