daniel-wojahn commited on
Commit
6067356
·
1 Parent(s): 5a33af1

feat(llm): update fallback models based on user preference

Browse files
Files changed (1) hide show
  1. pipeline/llm_service.py +50 -32
pipeline/llm_service.py CHANGED
@@ -26,7 +26,12 @@ except ImportError:
26
 
27
  # Constants
28
  DEFAULT_MAX_TOKENS = 4000
29
- DEFAULT_MODEL = "moonshotai/kimi-k2:free"
 
 
 
 
 
30
  DEFAULT_TEMPERATURE = 0.3
31
  DEFAULT_TOP_P = 0.9
32
 
@@ -43,7 +48,7 @@ class LLMService:
43
  api_key: Optional API key for OpenRouter. If not provided, will try to load from environment.
44
  """
45
  self.api_key = api_key or os.getenv('OPENROUTER_API_KEY')
46
- self.model = DEFAULT_MODEL
47
  self.temperature = DEFAULT_TEMPERATURE
48
  self.top_p = DEFAULT_TOP_P
49
 
@@ -129,34 +134,53 @@ class LLMService:
129
 
130
  def _analyze_with_llm(self, df: pd.DataFrame, max_tokens: int) -> str:
131
  """
132
- Analyze metrics using an LLM via OpenRouter API.
133
-
134
  Args:
135
  df: Prepared DataFrame with metrics
136
  max_tokens: Maximum tokens for the response
137
-
138
  Returns:
139
  str: LLM analysis in markdown format
140
  """
141
- # Prepare the prompt with data and instructions
142
  prompt = self._create_llm_prompt(df)
143
-
144
- try:
145
- # Call the LLM API
146
- response = self._call_openrouter_api(
147
- prompt=prompt,
148
- system_message=self._get_system_prompt(),
149
- max_tokens=max_tokens,
150
- temperature=self.temperature,
151
- top_p=self.top_p
152
- )
153
-
154
- # Process and format the response
155
- return self._format_llm_response(response, df)
156
-
157
- except Exception as e:
158
- logger.error(f"Error in LLM analysis: {str(e)}")
159
- raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
 
161
  def _analyze_with_rules(self, df: pd.DataFrame) -> str:
162
  """
@@ -398,18 +422,12 @@ Here is the CSV data:
398
  """Get the system prompt for the LLM."""
399
  return """You are a senior scholar of Tibetan Buddhist texts, specializing in textual criticism. Your task is to analyze the provided similarity metrics and provide expert insights into the relationships between these texts. Ground your analysis in the data, be precise, and focus on what the metrics reveal about the texts' transmission and history."""
400
 
401
- def _call_openrouter_api(
402
- self,
403
- prompt: str,
404
- system_message: str = None,
405
- max_tokens: int = None,
406
- temperature: float = None,
407
- top_p: float = None
408
- ) -> str:
409
  """
410
  Call the OpenRouter API.
411
 
412
  Args:
 
413
  prompt: The user prompt
414
  system_message: Optional system message
415
  max_tokens: Maximum tokens for the response
@@ -444,7 +462,7 @@ Here is the CSV data:
444
  messages.append({"role": "user", "content": prompt})
445
 
446
  data = {
447
- "model": self.model,
448
  "messages": messages,
449
  "max_tokens": max_tokens or DEFAULT_MAX_TOKENS,
450
  "temperature": temperature or self.temperature,
 
26
 
27
  # Constants
28
  DEFAULT_MAX_TOKENS = 4000
29
+ PREFERRED_MODELS = [
30
+ "google/gemma-2-9b-it:free",
31
+ "deepseek/deepseek-r1-0528:free",
32
+ "qwen/qwen3-235b-a22b-07-25:free",
33
+ "moonshotai/kimi-k2:free" # Keep as a final fallback
34
+ ]
35
  DEFAULT_TEMPERATURE = 0.3
36
  DEFAULT_TOP_P = 0.9
37
 
 
48
  api_key: Optional API key for OpenRouter. If not provided, will try to load from environment.
49
  """
50
  self.api_key = api_key or os.getenv('OPENROUTER_API_KEY')
51
+ self.models = PREFERRED_MODELS
52
  self.temperature = DEFAULT_TEMPERATURE
53
  self.top_p = DEFAULT_TOP_P
54
 
 
134
 
135
  def _analyze_with_llm(self, df: pd.DataFrame, max_tokens: int) -> str:
136
  """
137
+ Analyze metrics using an LLM via OpenRouter API, with fallback models.
138
+
139
  Args:
140
  df: Prepared DataFrame with metrics
141
  max_tokens: Maximum tokens for the response
142
+
143
  Returns:
144
  str: LLM analysis in markdown format
145
  """
 
146
  prompt = self._create_llm_prompt(df)
147
+ last_error = None
148
+
149
+ for model in self.models:
150
+ try:
151
+ logger.info(f"Attempting analysis with model: {model}")
152
+ response = self._call_openrouter_api(
153
+ model=model,
154
+ prompt=prompt,
155
+ system_message=self._get_system_prompt(),
156
+ max_tokens=max_tokens,
157
+ temperature=self.temperature,
158
+ top_p=self.top_p
159
+ )
160
+ logger.info(f"Successfully received response from {model}.")
161
+ return self._format_llm_response(response, df)
162
+
163
+ except requests.exceptions.HTTPError as e:
164
+ last_error = e
165
+ # Check for server-side errors (5xx) to decide whether to retry
166
+ if 500 <= e.response.status_code < 600:
167
+ logger.warning(f"Model {model} failed with server error {e.response.status_code}. Trying next model.")
168
+ continue # Try the next model
169
+ else:
170
+ # For client-side errors (4xx), fail immediately
171
+ logger.error(f"LLM analysis failed with client error: {e}")
172
+ raise e
173
+ except Exception as e:
174
+ last_error = e
175
+ logger.error(f"An unexpected error occurred with model {model}: {e}")
176
+ continue # Try next model on other errors too
177
+
178
+ # If all models failed, raise the last recorded error
179
+ logger.error("All LLM models failed.")
180
+ if last_error:
181
+ raise last_error
182
+ else:
183
+ raise Exception("LLM analysis failed for all available models.")
184
 
185
  def _analyze_with_rules(self, df: pd.DataFrame) -> str:
186
  """
 
422
  """Get the system prompt for the LLM."""
423
  return """You are a senior scholar of Tibetan Buddhist texts, specializing in textual criticism. Your task is to analyze the provided similarity metrics and provide expert insights into the relationships between these texts. Ground your analysis in the data, be precise, and focus on what the metrics reveal about the texts' transmission and history."""
424
 
425
+ def _call_openrouter_api(self, model: str, prompt: str, system_message: str = None, max_tokens: int = None, temperature: float = None, top_p: float = None) -> str:
 
 
 
 
 
 
 
426
  """
427
  Call the OpenRouter API.
428
 
429
  Args:
430
+ model: Model to use for the API call
431
  prompt: The user prompt
432
  system_message: Optional system message
433
  max_tokens: Maximum tokens for the response
 
462
  messages.append({"role": "user", "content": prompt})
463
 
464
  data = {
465
+ "model": model,
466
  "messages": messages,
467
  "max_tokens": max_tokens or DEFAULT_MAX_TOKENS,
468
  "temperature": temperature or self.temperature,