File size: 7,489 Bytes
f6d0c29
 
 
 
f40a3fa
 
 
e973399
8ff6f1c
f6d0c29
e973399
 
989f678
e973399
 
2cfcdc2
168ef78
f6d0c29
 
3f3f86a
168ef78
f40a3fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a599d3b
 
 
3f5d163
 
a599d3b
ab4fe30
e8660fd
 
 
 
 
 
 
a599d3b
 
696624e
ab4fe30
a599d3b
500b4f9
e973399
ab4fe30
bd9258b
a599d3b
ab4fe30
e973399
a599d3b
f40a3fa
 
03016eb
f40a3fa
 
 
 
8ff6f1c
a599d3b
168ef78
 
 
f6d0c29
 
 
168ef78
a599d3b
f6d0c29
bd9258b
f6d0c29
 
 
 
a599d3b
f6d0c29
 
 
 
 
 
 
 
bd9258b
03016eb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168ef78
 
 
 
 
 
 
 
 
03016eb
168ef78
 
 
 
 
 
 
 
 
03016eb
 
 
 
 
 
 
 
 
 
 
 
f40a3fa
03016eb
e973399
f6d0c29
f143728
8ff6f1c
 
 
 
696624e
f6d0c29
 
 
e28bb22
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
import gradio as gr
import os
import json
import requests
import time
from datetime import datetime, timedelta
from collections import deque
from supabase import create_client, Client

GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
SUPABASE_URL = os.environ.get("SUPABASE_URL")
SUPABASE_KEY = os.environ.get("SUPABASE_KEY")
SYSTEM_MESSAGE = os.environ.get("System_Prompt")

GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"
MODEL_NAME = "meta-llama/llama-4-scout-17b-16e-instruct"
MAX_TOKENS = 2048
TEMPERATURE = 0.7
TOP_P = 0.95

MAX_REQUESTS_PER_MINUTE = 15
REQUEST_WINDOW = 60

class RateLimiter:
    def __init__(self, max_requests=MAX_REQUESTS_PER_MINUTE, window=REQUEST_WINDOW):
        self.max_requests = max_requests
        self.window = window
        self.requests = deque()
    
    def can_make_request(self):
        now = time.time()
        while self.requests and self.requests[0] <= now - self.window:
            self.requests.popleft()
        
        return len(self.requests) < self.max_requests
    
    def add_request(self):
        self.requests.append(time.time())
    
    def time_until_next_request(self):
        if not self.requests:
            return 0
        
        oldest_request = self.requests[0]
        time_passed = time.time() - oldest_request
        if time_passed >= self.window:
            return 0
        
        return self.window - time_passed

rate_limiter = RateLimiter()

def persist_data(session_data, user_identifier=None):
    if not SUPABASE_URL or not SUPABASE_KEY:
        return

    try:
        supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
        
        formatted_messages = []
        for user_msg, assistant_msg in session_data:
            if user_msg:
                formatted_messages.append({"role": "user", "content": user_msg})
            if assistant_msg:
                formatted_messages.append({"role": "assistant", "content": assistant_msg})

        data_to_insert = {
            "timestamp": datetime.now().isoformat(),
            "user_id": user_identifier,
            "messages": formatted_messages
        }

        table_name = "conversations"
        supabase.table(table_name).insert(data_to_insert).execute()

    except Exception as e:
        pass

def respond(message, history: list[tuple[str, str]]):
    if not rate_limiter.can_make_request():
        wait_time = rate_limiter.time_until_next_request()
        yield f"Límite local alcanzado. Espera {int(wait_time)} segundos."
        return
    
    rate_limiter.add_request()
    
    messages = [{"role": "system", "content": SYSTEM_MESSAGE}]

    truncated_history = history[-5:] if len(history) > 5 else history
    
    for user_msg, assistant_msg in truncated_history:
        if user_msg:
            messages.append({"role": "user", "content": user_msg})
        if assistant_msg:
            messages.append({"role": "assistant", "content": assistant_msg[:1000]})

    messages.append({"role": "user", "content": message})

    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {GROQ_API_KEY}"
    }

    payload = {
        "model": MODEL_NAME,
        "messages": messages,
        "max_tokens": MAX_TOKENS,
        "temperature": TEMPERATURE,
        "top_p": TOP_P,
        "stream": True
    }

    max_retries = 3
    base_delay = 2
    
    for attempt in range(max_retries):
        try:
            if attempt > 0:
                delay = base_delay * (2 ** (attempt - 1))
                yield f"Reintentando en {delay} segundos... (intento {attempt + 1}/{max_retries})"
                time.sleep(delay)
            
            response = requests.post(
                GROQ_API_URL,
                headers=headers,
                json=payload,
                stream=True,
                timeout=30
            )
            response.raise_for_status()

            accumulated_response = ""
            for line in response.iter_lines():
                if line:
                    line_text = line.decode('utf-8')
                    if line_text.startswith("data: "):
                        data_str = line_text[6:]

                        if data_str == "[DONE]":
                            break

                        try:
                            data = json.loads(data_str)
                            if 'choices' in data and len(data['choices']) > 0:
                                delta = data['choices'][0].get('delta', {})
                                if 'content' in delta and delta['content']:
                                    token = delta['content']
                                    accumulated_response += token
                                    yield accumulated_response
                        except json.JSONDecodeError:
                            print(f"Error decodificando JSON del stream: {data_str}")
                            continue

            if not accumulated_response:
                if attempt < max_retries - 1:
                    continue
                yield "Lo siento, no recibí una respuesta. Inténtalo de nuevo."
            else:
                current_session = history + [(message, accumulated_response)]
                persist_data(current_session)
                break

        except requests.exceptions.HTTPError as e:
            if e.response.status_code == 429:
                error_text = e.response.text
                if "TPM" in error_text or "tokens per minute" in error_text:
                    if attempt < max_retries - 1:
                        yield f"Límite de tokens por minuto alcanzado. Reintentando en 30 segundos..."
                        time.sleep(30)
                        continue
                    else:
                        yield "Límite de tokens por minuto excedido. Espera 1 minuto antes de continuar."
                        break
                else:
                    if attempt < max_retries - 1:
                        retry_after = e.response.headers.get('retry-after', '10')
                        wait_time = min(int(retry_after), 30)
                        yield f"Servidor ocupado. Reintentando en {wait_time} segundos..."
                        time.sleep(wait_time)
                        continue
                    else:
                        yield "El servidor está muy ocupado. Inténtalo en unos minutos."
                        break
            else:
                print(f"HTTP Error: {e}")
                yield f"Error del servidor: {e.response.status_code}. Inténtalo de nuevo."
                break
        except requests.exceptions.Timeout:
            if attempt < max_retries - 1:
                continue
            print("Timeout en la solicitud a la API de Groq")
            yield "La solicitud tardó demasiado tiempo. Por favor inténtalo de nuevo."
            break
        except requests.exceptions.RequestException as e:
            print(f"Error en la solicitud a la API de Groq: {e}")
            yield "Lo siento, ocurrió un error de conexión al procesar tu solicitud."
            break

demo = gr.ChatInterface(
    respond,
    examples=[["¡Bienvenido a Tu Aliado Momentum!"],
              ["¿En qué consiste el programa y para quién es?"],
              ["¿Qué beneficios obtengo y con qué empresas me conecto?"],
              ["¿Cómo puedo participar o registrarme?"]
             ]
)

if __name__ == "__main__":
    demo.launch()