Spaces:
Running
Running
File size: 7,489 Bytes
f6d0c29 f40a3fa e973399 8ff6f1c f6d0c29 e973399 989f678 e973399 2cfcdc2 168ef78 f6d0c29 3f3f86a 168ef78 f40a3fa a599d3b 3f5d163 a599d3b ab4fe30 e8660fd a599d3b 696624e ab4fe30 a599d3b 500b4f9 e973399 ab4fe30 bd9258b a599d3b ab4fe30 e973399 a599d3b f40a3fa 03016eb f40a3fa 8ff6f1c a599d3b 168ef78 f6d0c29 168ef78 a599d3b f6d0c29 bd9258b f6d0c29 a599d3b f6d0c29 bd9258b 03016eb 168ef78 03016eb 168ef78 03016eb f40a3fa 03016eb e973399 f6d0c29 f143728 8ff6f1c 696624e f6d0c29 e28bb22 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 |
import gradio as gr
import os
import json
import requests
import time
from datetime import datetime, timedelta
from collections import deque
from supabase import create_client, Client
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
SUPABASE_URL = os.environ.get("SUPABASE_URL")
SUPABASE_KEY = os.environ.get("SUPABASE_KEY")
SYSTEM_MESSAGE = os.environ.get("System_Prompt")
GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"
MODEL_NAME = "meta-llama/llama-4-scout-17b-16e-instruct"
MAX_TOKENS = 2048
TEMPERATURE = 0.7
TOP_P = 0.95
MAX_REQUESTS_PER_MINUTE = 15
REQUEST_WINDOW = 60
class RateLimiter:
def __init__(self, max_requests=MAX_REQUESTS_PER_MINUTE, window=REQUEST_WINDOW):
self.max_requests = max_requests
self.window = window
self.requests = deque()
def can_make_request(self):
now = time.time()
while self.requests and self.requests[0] <= now - self.window:
self.requests.popleft()
return len(self.requests) < self.max_requests
def add_request(self):
self.requests.append(time.time())
def time_until_next_request(self):
if not self.requests:
return 0
oldest_request = self.requests[0]
time_passed = time.time() - oldest_request
if time_passed >= self.window:
return 0
return self.window - time_passed
rate_limiter = RateLimiter()
def persist_data(session_data, user_identifier=None):
if not SUPABASE_URL or not SUPABASE_KEY:
return
try:
supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
formatted_messages = []
for user_msg, assistant_msg in session_data:
if user_msg:
formatted_messages.append({"role": "user", "content": user_msg})
if assistant_msg:
formatted_messages.append({"role": "assistant", "content": assistant_msg})
data_to_insert = {
"timestamp": datetime.now().isoformat(),
"user_id": user_identifier,
"messages": formatted_messages
}
table_name = "conversations"
supabase.table(table_name).insert(data_to_insert).execute()
except Exception as e:
pass
def respond(message, history: list[tuple[str, str]]):
if not rate_limiter.can_make_request():
wait_time = rate_limiter.time_until_next_request()
yield f"Límite local alcanzado. Espera {int(wait_time)} segundos."
return
rate_limiter.add_request()
messages = [{"role": "system", "content": SYSTEM_MESSAGE}]
truncated_history = history[-5:] if len(history) > 5 else history
for user_msg, assistant_msg in truncated_history:
if user_msg:
messages.append({"role": "user", "content": user_msg})
if assistant_msg:
messages.append({"role": "assistant", "content": assistant_msg[:1000]})
messages.append({"role": "user", "content": message})
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {GROQ_API_KEY}"
}
payload = {
"model": MODEL_NAME,
"messages": messages,
"max_tokens": MAX_TOKENS,
"temperature": TEMPERATURE,
"top_p": TOP_P,
"stream": True
}
max_retries = 3
base_delay = 2
for attempt in range(max_retries):
try:
if attempt > 0:
delay = base_delay * (2 ** (attempt - 1))
yield f"Reintentando en {delay} segundos... (intento {attempt + 1}/{max_retries})"
time.sleep(delay)
response = requests.post(
GROQ_API_URL,
headers=headers,
json=payload,
stream=True,
timeout=30
)
response.raise_for_status()
accumulated_response = ""
for line in response.iter_lines():
if line:
line_text = line.decode('utf-8')
if line_text.startswith("data: "):
data_str = line_text[6:]
if data_str == "[DONE]":
break
try:
data = json.loads(data_str)
if 'choices' in data and len(data['choices']) > 0:
delta = data['choices'][0].get('delta', {})
if 'content' in delta and delta['content']:
token = delta['content']
accumulated_response += token
yield accumulated_response
except json.JSONDecodeError:
print(f"Error decodificando JSON del stream: {data_str}")
continue
if not accumulated_response:
if attempt < max_retries - 1:
continue
yield "Lo siento, no recibí una respuesta. Inténtalo de nuevo."
else:
current_session = history + [(message, accumulated_response)]
persist_data(current_session)
break
except requests.exceptions.HTTPError as e:
if e.response.status_code == 429:
error_text = e.response.text
if "TPM" in error_text or "tokens per minute" in error_text:
if attempt < max_retries - 1:
yield f"Límite de tokens por minuto alcanzado. Reintentando en 30 segundos..."
time.sleep(30)
continue
else:
yield "Límite de tokens por minuto excedido. Espera 1 minuto antes de continuar."
break
else:
if attempt < max_retries - 1:
retry_after = e.response.headers.get('retry-after', '10')
wait_time = min(int(retry_after), 30)
yield f"Servidor ocupado. Reintentando en {wait_time} segundos..."
time.sleep(wait_time)
continue
else:
yield "El servidor está muy ocupado. Inténtalo en unos minutos."
break
else:
print(f"HTTP Error: {e}")
yield f"Error del servidor: {e.response.status_code}. Inténtalo de nuevo."
break
except requests.exceptions.Timeout:
if attempt < max_retries - 1:
continue
print("Timeout en la solicitud a la API de Groq")
yield "La solicitud tardó demasiado tiempo. Por favor inténtalo de nuevo."
break
except requests.exceptions.RequestException as e:
print(f"Error en la solicitud a la API de Groq: {e}")
yield "Lo siento, ocurrió un error de conexión al procesar tu solicitud."
break
demo = gr.ChatInterface(
respond,
examples=[["¡Bienvenido a Tu Aliado Momentum!"],
["¿En qué consiste el programa y para quién es?"],
["¿Qué beneficios obtengo y con qué empresas me conecto?"],
["¿Cómo puedo participar o registrarme?"]
]
)
if __name__ == "__main__":
demo.launch() |