Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -13,12 +13,12 @@ SUPABASE_KEY = os.environ.get("SUPABASE_KEY")
|
|
13 |
SYSTEM_MESSAGE = os.environ.get("System_Prompt")
|
14 |
|
15 |
GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"
|
16 |
-
MODEL_NAME = "meta-llama/llama-4-
|
17 |
-
MAX_TOKENS =
|
18 |
TEMPERATURE = 0.7
|
19 |
TOP_P = 0.95
|
20 |
|
21 |
-
MAX_REQUESTS_PER_MINUTE =
|
22 |
REQUEST_WINDOW = 60
|
23 |
|
24 |
class RateLimiter:
|
@@ -86,11 +86,13 @@ def respond(message, history: list[tuple[str, str]]):
|
|
86 |
|
87 |
messages = [{"role": "system", "content": SYSTEM_MESSAGE}]
|
88 |
|
89 |
-
|
|
|
|
|
90 |
if user_msg:
|
91 |
messages.append({"role": "user", "content": user_msg})
|
92 |
if assistant_msg:
|
93 |
-
messages.append({"role": "assistant", "content": assistant_msg})
|
94 |
|
95 |
messages.append({"role": "user", "content": message})
|
96 |
|
@@ -160,15 +162,25 @@ def respond(message, history: list[tuple[str, str]]):
|
|
160 |
|
161 |
except requests.exceptions.HTTPError as e:
|
162 |
if e.response.status_code == 429:
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
|
|
|
|
|
|
169 |
else:
|
170 |
-
|
171 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
172 |
else:
|
173 |
print(f"HTTP Error: {e}")
|
174 |
yield f"Error del servidor: {e.response.status_code}. Inténtalo de nuevo."
|
|
|
13 |
SYSTEM_MESSAGE = os.environ.get("System_Prompt")
|
14 |
|
15 |
GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"
|
16 |
+
MODEL_NAME = "meta-llama/llama-4-scout-17b-16e-instruct"
|
17 |
+
MAX_TOKENS = 2048
|
18 |
TEMPERATURE = 0.7
|
19 |
TOP_P = 0.95
|
20 |
|
21 |
+
MAX_REQUESTS_PER_MINUTE = 15
|
22 |
REQUEST_WINDOW = 60
|
23 |
|
24 |
class RateLimiter:
|
|
|
86 |
|
87 |
messages = [{"role": "system", "content": SYSTEM_MESSAGE}]
|
88 |
|
89 |
+
truncated_history = history[-5:] if len(history) > 5 else history
|
90 |
+
|
91 |
+
for user_msg, assistant_msg in truncated_history:
|
92 |
if user_msg:
|
93 |
messages.append({"role": "user", "content": user_msg})
|
94 |
if assistant_msg:
|
95 |
+
messages.append({"role": "assistant", "content": assistant_msg[:1000]})
|
96 |
|
97 |
messages.append({"role": "user", "content": message})
|
98 |
|
|
|
162 |
|
163 |
except requests.exceptions.HTTPError as e:
|
164 |
if e.response.status_code == 429:
|
165 |
+
error_text = e.response.text
|
166 |
+
if "TPM" in error_text or "tokens per minute" in error_text:
|
167 |
+
if attempt < max_retries - 1:
|
168 |
+
yield f"Límite de tokens por minuto alcanzado. Reintentando en 30 segundos..."
|
169 |
+
time.sleep(30)
|
170 |
+
continue
|
171 |
+
else:
|
172 |
+
yield "Límite de tokens por minuto excedido. Espera 1 minuto antes de continuar."
|
173 |
+
break
|
174 |
else:
|
175 |
+
if attempt < max_retries - 1:
|
176 |
+
retry_after = e.response.headers.get('retry-after', '10')
|
177 |
+
wait_time = min(int(retry_after), 30)
|
178 |
+
yield f"Servidor ocupado. Reintentando en {wait_time} segundos..."
|
179 |
+
time.sleep(wait_time)
|
180 |
+
continue
|
181 |
+
else:
|
182 |
+
yield "El servidor está muy ocupado. Inténtalo en unos minutos."
|
183 |
+
break
|
184 |
else:
|
185 |
print(f"HTTP Error: {e}")
|
186 |
yield f"Error del servidor: {e.response.status_code}. Inténtalo de nuevo."
|