Spaces:

daniel-wojahn
/

ttm-webapp-hf

Sleeping

App Files Files Community

daniel-wojahn commited on Jul 24

Commit

6067356

1 Parent(s): 5a33af1

feat(llm): update fallback models based on user preference

Browse files

Files changed (1) hide show

pipeline/llm_service.py +50 -32

pipeline/llm_service.py CHANGED Viewed

@@ -26,7 +26,12 @@ except ImportError:
 # Constants
 DEFAULT_MAX_TOKENS = 4000
-DEFAULT_MODEL = "moonshotai/kimi-k2:free"
 DEFAULT_TEMPERATURE = 0.3
 DEFAULT_TOP_P = 0.9
@@ -43,7 +48,7 @@ class LLMService:
             api_key: Optional API key for OpenRouter. If not provided, will try to load from environment.
         """
         self.api_key = api_key or os.getenv('OPENROUTER_API_KEY')
-        self.model = DEFAULT_MODEL
         self.temperature = DEFAULT_TEMPERATURE
         self.top_p = DEFAULT_TOP_P
@@ -129,34 +134,53 @@ class LLMService:
     def _analyze_with_llm(self, df: pd.DataFrame, max_tokens: int) -> str:
         """
-        Analyze metrics using an LLM via OpenRouter API.
         Args:
             df: Prepared DataFrame with metrics
             max_tokens: Maximum tokens for the response
         Returns:
             str: LLM analysis in markdown format
         """
-        # Prepare the prompt with data and instructions
         prompt = self._create_llm_prompt(df)
-        try:
-            # Call the LLM API
-            response = self._call_openrouter_api(
-                prompt=prompt,
-                system_message=self._get_system_prompt(),
-                max_tokens=max_tokens,
-                temperature=self.temperature,
-                top_p=self.top_p
-            )
-            # Process and format the response
-            return self._format_llm_response(response, df)
-        except Exception as e:
-            logger.error(f"Error in LLM analysis: {str(e)}")
-            raise
     def _analyze_with_rules(self, df: pd.DataFrame) -> str:
         """
@@ -398,18 +422,12 @@ Here is the CSV data:
         """Get the system prompt for the LLM."""
         return """You are a senior scholar of Tibetan Buddhist texts, specializing in textual criticism. Your task is to analyze the provided similarity metrics and provide expert insights into the relationships between these texts. Ground your analysis in the data, be precise, and focus on what the metrics reveal about the texts' transmission and history."""
-    def _call_openrouter_api(
-        self,
-        prompt: str,
-        system_message: str = None,
-        max_tokens: int = None,
-        temperature: float = None,
-        top_p: float = None
-    ) -> str:
         """
         Call the OpenRouter API.
         Args:
             prompt: The user prompt
             system_message: Optional system message
             max_tokens: Maximum tokens for the response
@@ -444,7 +462,7 @@ Here is the CSV data:
         messages.append({"role": "user", "content": prompt})
         data = {
-            "model": self.model,
             "messages": messages,
             "max_tokens": max_tokens or DEFAULT_MAX_TOKENS,
             "temperature": temperature or self.temperature,

 # Constants
 DEFAULT_MAX_TOKENS = 4000
+PREFERRED_MODELS = [
+    "google/gemma-2-9b-it:free",
+    "deepseek/deepseek-r1-0528:free",
+    "qwen/qwen3-235b-a22b-07-25:free",
+    "moonshotai/kimi-k2:free" # Keep as a final fallback
+]
 DEFAULT_TEMPERATURE = 0.3
 DEFAULT_TOP_P = 0.9
             api_key: Optional API key for OpenRouter. If not provided, will try to load from environment.
         """
         self.api_key = api_key or os.getenv('OPENROUTER_API_KEY')
+        self.models = PREFERRED_MODELS
         self.temperature = DEFAULT_TEMPERATURE
         self.top_p = DEFAULT_TOP_P
     def _analyze_with_llm(self, df: pd.DataFrame, max_tokens: int) -> str:
         """
+        Analyze metrics using an LLM via OpenRouter API, with fallback models.
         Args:
             df: Prepared DataFrame with metrics
             max_tokens: Maximum tokens for the response
         Returns:
             str: LLM analysis in markdown format
         """
         prompt = self._create_llm_prompt(df)
+        last_error = None
+        for model in self.models:
+            try:
+                logger.info(f"Attempting analysis with model: {model}")
+                response = self._call_openrouter_api(
+                    model=model,
+                    prompt=prompt,
+                    system_message=self._get_system_prompt(),
+                    max_tokens=max_tokens,
+                    temperature=self.temperature,
+                    top_p=self.top_p
+                )
+                logger.info(f"Successfully received response from {model}.")
+                return self._format_llm_response(response, df)
+            except requests.exceptions.HTTPError as e:
+                last_error = e
+                # Check for server-side errors (5xx) to decide whether to retry
+                if 500 <= e.response.status_code < 600:
+                    logger.warning(f"Model {model} failed with server error {e.response.status_code}. Trying next model.")
+                    continue  # Try the next model
+                else:
+                    # For client-side errors (4xx), fail immediately
+                    logger.error(f"LLM analysis failed with client error: {e}")
+                    raise e
+            except Exception as e:
+                last_error = e
+                logger.error(f"An unexpected error occurred with model {model}: {e}")
+                continue # Try next model on other errors too
+        # If all models failed, raise the last recorded error
+        logger.error("All LLM models failed.")
+        if last_error:
+            raise last_error
+        else:
+            raise Exception("LLM analysis failed for all available models.")
     def _analyze_with_rules(self, df: pd.DataFrame) -> str:
         """
         """Get the system prompt for the LLM."""
         return """You are a senior scholar of Tibetan Buddhist texts, specializing in textual criticism. Your task is to analyze the provided similarity metrics and provide expert insights into the relationships between these texts. Ground your analysis in the data, be precise, and focus on what the metrics reveal about the texts' transmission and history."""
+    def _call_openrouter_api(self, model: str, prompt: str, system_message: str = None, max_tokens: int = None, temperature: float = None, top_p: float = None) -> str:
         """
         Call the OpenRouter API.
         Args:
+            model: Model to use for the API call
             prompt: The user prompt
             system_message: Optional system message
             max_tokens: Maximum tokens for the response
         messages.append({"role": "user", "content": prompt})
         data = {
+            "model": model,
             "messages": messages,
             "max_tokens": max_tokens or DEFAULT_MAX_TOKENS,
             "temperature": temperature or self.temperature,