Spaces:
Sleeping
Sleeping
| import os | |
| import time | |
| import uuid | |
| from typing import List, Tuple, Optional, Union | |
| from PIL import Image | |
| import google.generativeai as genai | |
| import gradio as gr | |
| from dotenv import load_dotenv | |
| # Cargar las variables de entorno desde el archivo .env | |
| load_dotenv() | |
| API_KEY = os.getenv("GOOGLE_API_KEY") | |
| if not API_KEY: | |
| raise ValueError("La clave de API 'GOOGLE_API_KEY' no est谩 configurada en el archivo .env") | |
| # Configuraci贸n del modelo Gemini | |
| genai.configure(api_key=API_KEY) | |
| generation_config = { | |
| "temperature": 0.7, | |
| "top_p": 0.9, | |
| "top_k": 40, | |
| "max_output_tokens": 8192, | |
| "response_mime_type": "text/plain", | |
| } | |
| model = genai.GenerativeModel( | |
| model_name="gemini-1.5-flash", | |
| generation_config=generation_config, | |
| ) | |
| # Inicializar la sesi贸n de chat | |
| chat = model.start_chat(history=[]) | |
| # Constantes para el manejo de im谩genes | |
| IMAGE_CACHE_DIRECTORY = "/tmp" | |
| IMAGE_WIDTH = 512 | |
| CHAT_HISTORY = List[Tuple[Optional[Union[Tuple[str], str]], Optional[str]]] | |
| # Funci贸n para preprocesar una imagen | |
| def preprocess_image(image: Image.Image) -> Optional[Image.Image]: | |
| """Redimensiona una imagen manteniendo la relaci贸n de aspecto.""" | |
| if image: | |
| image_height = int(image.height * IMAGE_WIDTH / image.width) | |
| return image.resize((IMAGE_WIDTH, image_height)) | |
| # Funci贸n para almacenar una imagen en cach茅 | |
| def cache_pil_image(image: Image.Image) -> str: | |
| """Guarda la imagen como archivo JPEG en un directorio temporal.""" | |
| image_filename = f"{uuid.uuid4()}.jpeg" | |
| os.makedirs(IMAGE_CACHE_DIRECTORY, exist_ok=True) | |
| image_path = os.path.join(IMAGE_CACHE_DIRECTORY, image_filename) | |
| image.save(image_path, "JPEG") | |
| return image_path | |
| # Funci贸n para transformar el historial de Gradio al formato de Gemini | |
| def transform_history(history): | |
| """Transforma el historial del formato de Gradio al formato que Gemini espera.""" | |
| new_history = [] | |
| for chat in history: | |
| if chat[0]: # Mensaje del usuario | |
| new_history.append({"parts": [{"text": chat[0]}], "role": "user"}) | |
| if chat[1]: # Respuesta del modelo | |
| new_history.append({"parts": [{"text": chat[1]}], "role": "model"}) | |
| return new_history | |
| # Funci贸n principal para manejar las respuestas del chat | |
| def response(message, history): | |
| """Maneja la interacci贸n multimodal y env铆a texto e im谩genes al modelo.""" | |
| global chat | |
| # Transformar el historial al formato esperado por Gemini | |
| chat.history = transform_history(history) | |
| # Obtener el texto del mensaje y las im谩genes cargadas | |
| text_prompt = message["text"] | |
| files = message["files"] | |
| # Procesar im谩genes cargadas | |
| image_prompts = [preprocess_image(Image.open(file).convert('RGB')) for file in files] if files else [] | |
| if files: | |
| for file in files: | |
| image = Image.open(file).convert('RGB') | |
| image_preview = preprocess_image(image) | |
| if image_preview: | |
| # Guardar la imagen y obtener la ruta | |
| image_path = cache_pil_image(image) | |
| # Leer la imagen en formato binario para enviarla como Blob | |
| with open(image_path, "rb") as img_file: | |
| img_data = img_file.read() | |
| # Crear un diccionario con los datos binarios y su tipo MIME | |
| image_prompt = { | |
| "mime_type": "image/jpeg", | |
| "data": img_data | |
| } | |
| image_prompts.append(image_prompt) | |
| # Combinar texto e im谩genes para el modelo | |
| prompts = [text_prompt] + image_prompts | |
| response = chat.send_message(prompts) | |
| response.resolve() | |
| # Generar respuesta car谩cter por car谩cter para una experiencia m谩s fluida | |
| response_text = response.text | |
| result = "" | |
| for i in range(len(response_text)): | |
| time.sleep(0.01) # Espera para simular la generaci贸n incremental | |
| result += response_text[i] | |
| yield result # Devolver la respuesta incrementalmente | |
| # Crear la interfaz de usuario | |
| demo = gr.ChatInterface( | |
| response, | |
| examples=[{"text": "Describe the image:", "files": []}], | |
| multimodal=True, | |
| textbox=gr.MultimodalTextbox( | |
| file_count="multiple", | |
| file_types=["image"], | |
| sources=["upload", "microphone"], | |
| ), | |
| ) | |
| # Lanzar la aplicaci贸n | |
| if __name__ == "__main__": | |
| demo.launch(debug=True, show_error=True) | |