Danielbrdz commited on
Commit
168ef78
·
verified ·
1 Parent(s): 03016eb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -13
app.py CHANGED
@@ -13,12 +13,12 @@ SUPABASE_KEY = os.environ.get("SUPABASE_KEY")
13
  SYSTEM_MESSAGE = os.environ.get("System_Prompt")
14
 
15
  GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"
16
- MODEL_NAME = "meta-llama/llama-4-maverick-17b-128e-instruct"
17
- MAX_TOKENS = 4096
18
  TEMPERATURE = 0.7
19
  TOP_P = 0.95
20
 
21
- MAX_REQUESTS_PER_MINUTE = 25
22
  REQUEST_WINDOW = 60
23
 
24
  class RateLimiter:
@@ -86,11 +86,13 @@ def respond(message, history: list[tuple[str, str]]):
86
 
87
  messages = [{"role": "system", "content": SYSTEM_MESSAGE}]
88
 
89
- for user_msg, assistant_msg in history:
 
 
90
  if user_msg:
91
  messages.append({"role": "user", "content": user_msg})
92
  if assistant_msg:
93
- messages.append({"role": "assistant", "content": assistant_msg})
94
 
95
  messages.append({"role": "user", "content": message})
96
 
@@ -160,15 +162,25 @@ def respond(message, history: list[tuple[str, str]]):
160
 
161
  except requests.exceptions.HTTPError as e:
162
  if e.response.status_code == 429:
163
- if attempt < max_retries - 1:
164
- retry_after = e.response.headers.get('retry-after', '60')
165
- wait_time = min(int(retry_after), 60)
166
- yield f"Servidor ocupado. Reintentando en {wait_time} segundos..."
167
- time.sleep(wait_time)
168
- continue
 
 
 
169
  else:
170
- yield "El servidor está muy ocupado. Inténtalo en unos minutos."
171
- break
 
 
 
 
 
 
 
172
  else:
173
  print(f"HTTP Error: {e}")
174
  yield f"Error del servidor: {e.response.status_code}. Inténtalo de nuevo."
 
13
  SYSTEM_MESSAGE = os.environ.get("System_Prompt")
14
 
15
  GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"
16
+ MODEL_NAME = "meta-llama/llama-4-scout-17b-16e-instruct"
17
+ MAX_TOKENS = 2048
18
  TEMPERATURE = 0.7
19
  TOP_P = 0.95
20
 
21
+ MAX_REQUESTS_PER_MINUTE = 15
22
  REQUEST_WINDOW = 60
23
 
24
  class RateLimiter:
 
86
 
87
  messages = [{"role": "system", "content": SYSTEM_MESSAGE}]
88
 
89
+ truncated_history = history[-5:] if len(history) > 5 else history
90
+
91
+ for user_msg, assistant_msg in truncated_history:
92
  if user_msg:
93
  messages.append({"role": "user", "content": user_msg})
94
  if assistant_msg:
95
+ messages.append({"role": "assistant", "content": assistant_msg[:1000]})
96
 
97
  messages.append({"role": "user", "content": message})
98
 
 
162
 
163
  except requests.exceptions.HTTPError as e:
164
  if e.response.status_code == 429:
165
+ error_text = e.response.text
166
+ if "TPM" in error_text or "tokens per minute" in error_text:
167
+ if attempt < max_retries - 1:
168
+ yield f"Límite de tokens por minuto alcanzado. Reintentando en 30 segundos..."
169
+ time.sleep(30)
170
+ continue
171
+ else:
172
+ yield "Límite de tokens por minuto excedido. Espera 1 minuto antes de continuar."
173
+ break
174
  else:
175
+ if attempt < max_retries - 1:
176
+ retry_after = e.response.headers.get('retry-after', '10')
177
+ wait_time = min(int(retry_after), 30)
178
+ yield f"Servidor ocupado. Reintentando en {wait_time} segundos..."
179
+ time.sleep(wait_time)
180
+ continue
181
+ else:
182
+ yield "El servidor está muy ocupado. Inténtalo en unos minutos."
183
+ break
184
  else:
185
  print(f"HTTP Error: {e}")
186
  yield f"Error del servidor: {e.response.status_code}. Inténtalo de nuevo."