Update app.py
Browse files
app.py
CHANGED
@@ -35,8 +35,8 @@ def load_models():
|
|
35 |
|
36 |
model, tokenizer, pipe = load_models()
|
37 |
|
38 |
-
@spaces.GPU(duration=
|
39 |
-
def generate_enhanced_caption(image_caption, text_caption):
|
40 |
"""Generate enhanced caption using the LeX-Enhancer model"""
|
41 |
combined_caption = f"{image_caption}, with the text on it: {text_caption}."
|
42 |
instruction = """
|
@@ -51,7 +51,8 @@ Below is the simple caption of an image with text. Please deduce the detailed de
|
|
51 |
tokenize=False,
|
52 |
add_generation_prompt=True
|
53 |
)
|
54 |
-
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
|
|
|
55 |
|
56 |
generated_ids = model.generate(
|
57 |
**model_inputs,
|
@@ -68,8 +69,8 @@ Below is the simple caption of an image with text. Please deduce the detailed de
|
|
68 |
|
69 |
return combined_caption, enhanced_caption
|
70 |
|
71 |
-
@spaces.GPU(duration=
|
72 |
-
def generate_image(enhanced_caption, seed, num_inference_steps, guidance_scale):
|
73 |
"""Generate image using LeX-Lumina"""
|
74 |
generator = torch.Generator("cpu").manual_seed(seed) if seed != 0 else None
|
75 |
|
@@ -86,11 +87,11 @@ def generate_image(enhanced_caption, seed, num_inference_steps, guidance_scale):
|
|
86 |
system_prompt="You are an assistant designed to generate superior images with the superior degree of image-text alignment based on textual prompts or user prompts.",
|
87 |
).images[0]
|
88 |
|
89 |
-
|
90 |
|
91 |
return image
|
92 |
|
93 |
-
|
94 |
def run_pipeline(image_caption, text_caption, seed, num_inference_steps, guidance_scale):
|
95 |
"""Run the complete pipeline from captions to final image"""
|
96 |
combined_caption, enhanced_caption = generate_enhanced_caption(image_caption, text_caption)
|
|
|
35 |
|
36 |
model, tokenizer, pipe = load_models()
|
37 |
|
38 |
+
@spaces.GPU(duration=200)
|
39 |
+
def generate_enhanced_caption(image_caption, text_caption, progress=gr.Progress(track_tqdm=True)):
|
40 |
"""Generate enhanced caption using the LeX-Enhancer model"""
|
41 |
combined_caption = f"{image_caption}, with the text on it: {text_caption}."
|
42 |
instruction = """
|
|
|
51 |
tokenize=False,
|
52 |
add_generation_prompt=True
|
53 |
)
|
54 |
+
# model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
|
55 |
+
model_inputs = tokenizer([text], return_tensors="pt")
|
56 |
|
57 |
generated_ids = model.generate(
|
58 |
**model_inputs,
|
|
|
69 |
|
70 |
return combined_caption, enhanced_caption
|
71 |
|
72 |
+
@spaces.GPU(duration=200)
|
73 |
+
def generate_image(enhanced_caption, seed, num_inference_steps, guidance_scale, progress=gr.Progress(track_tqdm=True)):
|
74 |
"""Generate image using LeX-Lumina"""
|
75 |
generator = torch.Generator("cpu").manual_seed(seed) if seed != 0 else None
|
76 |
|
|
|
87 |
system_prompt="You are an assistant designed to generate superior images with the superior degree of image-text alignment based on textual prompts or user prompts.",
|
88 |
).images[0]
|
89 |
|
90 |
+
torch.cuda.empty_cache()
|
91 |
|
92 |
return image
|
93 |
|
94 |
+
@spaces.GPU(duration=200)
|
95 |
def run_pipeline(image_caption, text_caption, seed, num_inference_steps, guidance_scale):
|
96 |
"""Run the complete pipeline from captions to final image"""
|
97 |
combined_caption, enhanced_caption = generate_enhanced_caption(image_caption, text_caption)
|