File size: 3,233 Bytes
fa9cc42 697f4c6 2892d42 fa9cc42 2892d42 fa9cc42 17cec15 fa9cc42 437e10c 2892d42 fa9cc42 40895e5 2892d42 437e10c 0d7789c 2892d42 2d292a8 2c75d59 2892d42 2c75d59 2892d42 40895e5 fa9cc42 5e2bbd7 fa9cc42 5e2bbd7 fa9cc42 d2a1b04 fa9cc42 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
import os
import textwrap
from PIL import Image, ImageDraw, ImageFont
import gradio as gr
import numpy as np
import torch
from lavis.models import load_model_and_preprocess
import openai
device = torch.device("cuda") if torch.cuda.is_available() else "cpu"
model, vis_processors, _ = load_model_and_preprocess(
name="blip2_opt", model_type="pretrain_opt2.7b", is_eval=True, device=device
)
openai.api_key = os.environ["OPENAI_API_KEY"]
def generate_caption(image):
pil_image = image.copy() # Create a copy of the input PIL image
image = vis_processors["eval"](image).unsqueeze(0).to(device)
caption = model.generate({"image": image})
caption = "\n".join(caption)
#use gpt-4 to generate a meme based on the caption
response = openai.ChatCompletion.create(
model="gpt-4",
messages=[
{"role": "system", "content": "Escribe un meme chistoso en español a partir de la descripción de una imagen dada por el usuario. No uses emojis, ni comillas, ni saltos de línea. No es necesario que empieces con 'cuando'. El output del asistente solo debe ser el texto del meme. Debe ser corto pero chistoso."},
{"role": "user", "content": caption}
],
temperature=0.6
)
meme_text = response.choices[0].message.content
print(meme_text)
# Put the meme text on the image
draw = ImageDraw.Draw(pil_image)
# Set the fixed font size to 80
font_size = 60
font = ImageFont.truetype("impact.ttf", font_size)
# Calculate the average character width for the font
alphabet = "ABCEMOPQRSTWXZ"
total_char_width = sum(draw.textlength(char, font=font) for char in alphabet)
average_char_width = total_char_width / len(alphabet)
# Calculate the number of characters that fit within the image width
chars_per_line = int(pil_image.width / average_char_width)
# Wrap the text to fit within the image width
wrapped_text = textwrap.fill(meme_text, width=chars_per_line)
# Calculate the position to place the text at the top and center horizontally
text_lines = wrapped_text.split('\n')
y = 10 # Adjust this value to add more or less padding from the top
for line in text_lines:
line_width = draw.textlength(line, font=font)
line_mask = font.getmask(line)
_, line_height = line_mask.size
x = (pil_image.width - line_width) // 2
draw.text((x, y), line, fill=(255, 255, 255), font=font)
y += line_height + int(line_height * 0.1)
pil_image = pil_image.convert('RGB')
if torch.cuda.is_available():
torch.cuda.empty_cache()
return pil_image
with gr.Blocks() as demo:
gr.Markdown(
"### Memero - Generador de Memes"
)
gr.Markdown(
"Genera un meme en español a partir de una imagen."
)
with gr.Row():
with gr.Column():
input_image = gr.Image(label="Imagen", type="pil")
btn_caption = gr.Button("Generar meme")
output_text = gr.Image(label="Meme", lines=5)
btn_caption.click(
generate_caption, inputs=[input_image], outputs=[output_text]
)
demo.launch() |