Spaces:
Sleeping
Sleeping
Commit
·
6067356
1
Parent(s):
5a33af1
feat(llm): update fallback models based on user preference
Browse files- pipeline/llm_service.py +50 -32
pipeline/llm_service.py
CHANGED
@@ -26,7 +26,12 @@ except ImportError:
|
|
26 |
|
27 |
# Constants
|
28 |
DEFAULT_MAX_TOKENS = 4000
|
29 |
-
|
|
|
|
|
|
|
|
|
|
|
30 |
DEFAULT_TEMPERATURE = 0.3
|
31 |
DEFAULT_TOP_P = 0.9
|
32 |
|
@@ -43,7 +48,7 @@ class LLMService:
|
|
43 |
api_key: Optional API key for OpenRouter. If not provided, will try to load from environment.
|
44 |
"""
|
45 |
self.api_key = api_key or os.getenv('OPENROUTER_API_KEY')
|
46 |
-
self.
|
47 |
self.temperature = DEFAULT_TEMPERATURE
|
48 |
self.top_p = DEFAULT_TOP_P
|
49 |
|
@@ -129,34 +134,53 @@ class LLMService:
|
|
129 |
|
130 |
def _analyze_with_llm(self, df: pd.DataFrame, max_tokens: int) -> str:
|
131 |
"""
|
132 |
-
Analyze metrics using an LLM via OpenRouter API.
|
133 |
-
|
134 |
Args:
|
135 |
df: Prepared DataFrame with metrics
|
136 |
max_tokens: Maximum tokens for the response
|
137 |
-
|
138 |
Returns:
|
139 |
str: LLM analysis in markdown format
|
140 |
"""
|
141 |
-
# Prepare the prompt with data and instructions
|
142 |
prompt = self._create_llm_prompt(df)
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
|
161 |
def _analyze_with_rules(self, df: pd.DataFrame) -> str:
|
162 |
"""
|
@@ -398,18 +422,12 @@ Here is the CSV data:
|
|
398 |
"""Get the system prompt for the LLM."""
|
399 |
return """You are a senior scholar of Tibetan Buddhist texts, specializing in textual criticism. Your task is to analyze the provided similarity metrics and provide expert insights into the relationships between these texts. Ground your analysis in the data, be precise, and focus on what the metrics reveal about the texts' transmission and history."""
|
400 |
|
401 |
-
def _call_openrouter_api(
|
402 |
-
self,
|
403 |
-
prompt: str,
|
404 |
-
system_message: str = None,
|
405 |
-
max_tokens: int = None,
|
406 |
-
temperature: float = None,
|
407 |
-
top_p: float = None
|
408 |
-
) -> str:
|
409 |
"""
|
410 |
Call the OpenRouter API.
|
411 |
|
412 |
Args:
|
|
|
413 |
prompt: The user prompt
|
414 |
system_message: Optional system message
|
415 |
max_tokens: Maximum tokens for the response
|
@@ -444,7 +462,7 @@ Here is the CSV data:
|
|
444 |
messages.append({"role": "user", "content": prompt})
|
445 |
|
446 |
data = {
|
447 |
-
"model":
|
448 |
"messages": messages,
|
449 |
"max_tokens": max_tokens or DEFAULT_MAX_TOKENS,
|
450 |
"temperature": temperature or self.temperature,
|
|
|
26 |
|
27 |
# Constants
|
28 |
DEFAULT_MAX_TOKENS = 4000
|
29 |
+
PREFERRED_MODELS = [
|
30 |
+
"google/gemma-2-9b-it:free",
|
31 |
+
"deepseek/deepseek-r1-0528:free",
|
32 |
+
"qwen/qwen3-235b-a22b-07-25:free",
|
33 |
+
"moonshotai/kimi-k2:free" # Keep as a final fallback
|
34 |
+
]
|
35 |
DEFAULT_TEMPERATURE = 0.3
|
36 |
DEFAULT_TOP_P = 0.9
|
37 |
|
|
|
48 |
api_key: Optional API key for OpenRouter. If not provided, will try to load from environment.
|
49 |
"""
|
50 |
self.api_key = api_key or os.getenv('OPENROUTER_API_KEY')
|
51 |
+
self.models = PREFERRED_MODELS
|
52 |
self.temperature = DEFAULT_TEMPERATURE
|
53 |
self.top_p = DEFAULT_TOP_P
|
54 |
|
|
|
134 |
|
135 |
def _analyze_with_llm(self, df: pd.DataFrame, max_tokens: int) -> str:
|
136 |
"""
|
137 |
+
Analyze metrics using an LLM via OpenRouter API, with fallback models.
|
138 |
+
|
139 |
Args:
|
140 |
df: Prepared DataFrame with metrics
|
141 |
max_tokens: Maximum tokens for the response
|
142 |
+
|
143 |
Returns:
|
144 |
str: LLM analysis in markdown format
|
145 |
"""
|
|
|
146 |
prompt = self._create_llm_prompt(df)
|
147 |
+
last_error = None
|
148 |
+
|
149 |
+
for model in self.models:
|
150 |
+
try:
|
151 |
+
logger.info(f"Attempting analysis with model: {model}")
|
152 |
+
response = self._call_openrouter_api(
|
153 |
+
model=model,
|
154 |
+
prompt=prompt,
|
155 |
+
system_message=self._get_system_prompt(),
|
156 |
+
max_tokens=max_tokens,
|
157 |
+
temperature=self.temperature,
|
158 |
+
top_p=self.top_p
|
159 |
+
)
|
160 |
+
logger.info(f"Successfully received response from {model}.")
|
161 |
+
return self._format_llm_response(response, df)
|
162 |
+
|
163 |
+
except requests.exceptions.HTTPError as e:
|
164 |
+
last_error = e
|
165 |
+
# Check for server-side errors (5xx) to decide whether to retry
|
166 |
+
if 500 <= e.response.status_code < 600:
|
167 |
+
logger.warning(f"Model {model} failed with server error {e.response.status_code}. Trying next model.")
|
168 |
+
continue # Try the next model
|
169 |
+
else:
|
170 |
+
# For client-side errors (4xx), fail immediately
|
171 |
+
logger.error(f"LLM analysis failed with client error: {e}")
|
172 |
+
raise e
|
173 |
+
except Exception as e:
|
174 |
+
last_error = e
|
175 |
+
logger.error(f"An unexpected error occurred with model {model}: {e}")
|
176 |
+
continue # Try next model on other errors too
|
177 |
+
|
178 |
+
# If all models failed, raise the last recorded error
|
179 |
+
logger.error("All LLM models failed.")
|
180 |
+
if last_error:
|
181 |
+
raise last_error
|
182 |
+
else:
|
183 |
+
raise Exception("LLM analysis failed for all available models.")
|
184 |
|
185 |
def _analyze_with_rules(self, df: pd.DataFrame) -> str:
|
186 |
"""
|
|
|
422 |
"""Get the system prompt for the LLM."""
|
423 |
return """You are a senior scholar of Tibetan Buddhist texts, specializing in textual criticism. Your task is to analyze the provided similarity metrics and provide expert insights into the relationships between these texts. Ground your analysis in the data, be precise, and focus on what the metrics reveal about the texts' transmission and history."""
|
424 |
|
425 |
+
def _call_openrouter_api(self, model: str, prompt: str, system_message: str = None, max_tokens: int = None, temperature: float = None, top_p: float = None) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
426 |
"""
|
427 |
Call the OpenRouter API.
|
428 |
|
429 |
Args:
|
430 |
+
model: Model to use for the API call
|
431 |
prompt: The user prompt
|
432 |
system_message: Optional system message
|
433 |
max_tokens: Maximum tokens for the response
|
|
|
462 |
messages.append({"role": "user", "content": prompt})
|
463 |
|
464 |
data = {
|
465 |
+
"model": model,
|
466 |
"messages": messages,
|
467 |
"max_tokens": max_tokens or DEFAULT_MAX_TOKENS,
|
468 |
"temperature": temperature or self.temperature,
|