jomasego commited on
Commit
ce6bfac
·
verified ·
1 Parent(s): 227c7ac

Upload llm_assistant.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. llm_assistant.py +87 -45
llm_assistant.py CHANGED
@@ -81,6 +81,15 @@ class TradeAssistant:
81
  Returns:
82
  Dict containing the LLM response
83
  """
 
 
 
 
 
 
 
 
 
84
  if chat_history is None:
85
  chat_history = []
86
 
@@ -100,87 +109,120 @@ class TradeAssistant:
100
  # Add the current question
101
  messages.append({"role": "user", "content": user_question})
102
 
103
- try:
104
- # Send the request to the HuggingFace API
105
- payload = {
106
- "inputs": messages,
107
- "parameters": {
108
- "max_new_tokens": 500,
109
- "temperature": 0.7,
110
- "top_p": 0.9,
111
- "do_sample": True
112
- }
113
  }
114
-
115
- # Implement retry mechanism for model loading
116
- max_retries = 2
117
- retry_delay = 1 # seconds
118
-
119
- for attempt in range(max_retries):
 
 
 
 
 
 
120
  response = requests.post(
121
  self.api_url,
122
  headers=self.headers,
123
  json=payload,
124
- timeout=10 # Add timeout to prevent hanging requests
125
  )
126
 
127
- # If request succeeded, process the response
128
  if response.status_code == 200:
129
  try:
130
  result = response.json()
131
  if isinstance(result, list) and len(result) > 0:
132
- # Extract the assistant's response
133
  generated_text = result[0].get("generated_text", "")
134
-
135
- # Format for return
136
  return {
137
  "success": True,
138
  "response": generated_text,
139
  "message": "Successfully generated response"
140
  }
141
  else:
 
142
  return {
143
  "success": False,
144
  "response": self.get_fallback_response(user_question),
145
- "message": f"Unexpected API response format: {result}"
146
  }
147
- except (json.JSONDecodeError, KeyError, IndexError) as e:
148
- print(f"Error processing response: {str(e)}, Response: {response.text}")
149
  return {
150
- "success": True, # Return as success but with fallback response
151
  "response": self.get_fallback_response(user_question),
152
  "message": f"Error processing response: {str(e)}"
153
  }
154
-
155
- # If model is loading (status code 503), wait and retry
156
  elif response.status_code == 503:
157
- print(f"Model is loading or temporarily unavailable. Attempt {attempt+1}/{max_retries}.")
158
- if attempt < max_retries - 1: # Don't wait after the last attempt
159
  import time
160
  time.sleep(retry_delay)
161
  else:
162
- # If we've exhausted all retries, use fallback
163
  return {
164
- "success": True, # Mark as successful but using fallback
165
- "response": self.get_fallback_response(user_question),
166
- "message": f"Model unavailable (status: {response.status_code}). Using fallback response."
167
  }
 
 
168
  else:
169
- # Other errors - try fallback immediately
170
- error_message = f"API request failed with status code {response.status_code}"
171
- try:
172
- error_detail = response.json()
173
- error_message += f": {json.dumps(error_detail)}"
174
- except:
175
- error_message += f": {response.text}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
 
177
- print(error_message) # Log the error for debugging
 
 
 
 
 
 
 
 
 
 
178
 
179
- # Return fallback response instead of error
 
 
 
 
 
180
  return {
181
- "success": True, # Mark as successful but using fallback
182
- "response": self.get_fallback_response(user_question),
183
- "message": error_message
184
  }
185
 
186
  except Exception as e:
 
81
  Returns:
82
  Dict containing the LLM response
83
  """
84
+ # Check if API token is available
85
+ if not self.api_token:
86
+ print("Error: No Hugging Face API token found in environment variables or initialization")
87
+ return {
88
+ "success": False,
89
+ "response": "I'm unable to connect to my language model due to missing API credentials. Please check the HUGGINGFACE_API_TOKEN environment variable.",
90
+ "message": "Missing API token"
91
+ }
92
+
93
  if chat_history is None:
94
  chat_history = []
95
 
 
109
  # Add the current question
110
  messages.append({"role": "user", "content": user_question})
111
 
112
+ # Prepare payload for the API request
113
+ payload = {
114
+ "inputs": messages,
115
+ "parameters": {
116
+ "max_new_tokens": 500,
117
+ "temperature": 0.7,
118
+ "top_p": 0.9,
119
+ "do_sample": True
 
 
120
  }
121
+ }
122
+
123
+ # Implement retry mechanism
124
+ max_retries = 3
125
+ retry_delay = 2 # seconds
126
+
127
+ for attempt in range(max_retries):
128
+ try:
129
+ print(f"Attempt {attempt+1} of {max_retries} to query LLM at {self.api_url}")
130
+ print(f"API token begins with: {self.api_token[:5]}...")
131
+
132
+ # Make the API request
133
  response = requests.post(
134
  self.api_url,
135
  headers=self.headers,
136
  json=payload,
137
+ timeout=15 # Extended timeout for Spaces environment
138
  )
139
 
140
+ # Process successful responses
141
  if response.status_code == 200:
142
  try:
143
  result = response.json()
144
  if isinstance(result, list) and len(result) > 0:
 
145
  generated_text = result[0].get("generated_text", "")
 
 
146
  return {
147
  "success": True,
148
  "response": generated_text,
149
  "message": "Successfully generated response"
150
  }
151
  else:
152
+ print(f"Unexpected response format: {result}")
153
  return {
154
  "success": False,
155
  "response": self.get_fallback_response(user_question),
156
+ "message": "Invalid response format"
157
  }
158
+ except Exception as e:
159
+ print(f"Error processing response: {str(e)}")
160
  return {
161
+ "success": False,
162
  "response": self.get_fallback_response(user_question),
163
  "message": f"Error processing response: {str(e)}"
164
  }
165
+
166
+ # Handle model still loading
167
  elif response.status_code == 503:
168
+ print(f"Model is loading. Attempt {attempt+1}/{max_retries}")
169
+ if attempt < max_retries - 1:
170
  import time
171
  time.sleep(retry_delay)
172
  else:
 
173
  return {
174
+ "success": False,
175
+ "response": "The AI model is currently initializing. Please try again in a moment.",
176
+ "message": "Model loading"
177
  }
178
+
179
+ # Handle other error status codes
180
  else:
181
+ print(f"Request failed with status code {response.status_code}: {response.text}")
182
+ if attempt < max_retries - 1:
183
+ import time
184
+ time.sleep(retry_delay)
185
+ else:
186
+ return {
187
+ "success": False,
188
+ "response": "I'm having trouble connecting to my knowledge base. Please try again later.",
189
+ "message": f"API error: {response.status_code}"
190
+ }
191
+
192
+ except requests.exceptions.Timeout:
193
+ print(f"Request timed out. Attempt {attempt+1}/{max_retries}")
194
+ if attempt < max_retries - 1:
195
+ import time
196
+ time.sleep(retry_delay)
197
+ else:
198
+ return {
199
+ "success": False,
200
+ "response": "The request to the AI service timed out. Please try again later.",
201
+ "message": "Request timeout"
202
+ }
203
 
204
+ except requests.exceptions.ConnectionError:
205
+ print(f"Connection error. Attempt {attempt+1}/{max_retries}")
206
+ if attempt < max_retries - 1:
207
+ import time
208
+ time.sleep(retry_delay)
209
+ else:
210
+ return {
211
+ "success": False,
212
+ "response": "I'm having trouble connecting to the server. This might be due to network restrictions in the deployment environment.",
213
+ "message": "Connection error"
214
+ }
215
 
216
+ except Exception as e:
217
+ print(f"Unexpected error: {str(e)}")
218
+ if attempt < max_retries - 1:
219
+ import time
220
+ time.sleep(retry_delay)
221
+ else:
222
  return {
223
+ "success": False,
224
+ "response": "An unexpected error occurred while processing your request.",
225
+ "message": f"Unexpected error: {str(e)}"
226
  }
227
 
228
  except Exception as e: