Final_Assignment_Template

Sleeping

App Files Files Community

dygoo commited on Apr 29

Commit

a668c60

verified ·

1 Parent(s): a983eab

Update app.py

Browse files

Files changed (1) hide show

app.py +162 -64

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import requests
 import inspect
 import pandas as pd
 import smolagents
 from smolagents import DuckDuckGoSearchTool, VisitWebpageTool
 import time
 from functools import lru_cache
@@ -82,7 +83,14 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # Cache Wrapper
 @lru_cache(maxsize=100)
 def cached_search(query):
-    return search_tool(query)
 # --- Basic Agent Definition ---
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
@@ -97,105 +105,195 @@ class BasicAgent:
         self.history = []
         print(f"BasicAgent initialized with model: {model} and {len(self.tools)} tools.")
     def __call__(self, question: str) -> str:
-        print(f"Agent received question (first 50 chars): {question[:50]}...")
-        # Implement your agent logic here using self.model and self.tools
-        final_answer = self.process_question(question)
-        print(f"Agent returning answer: {final_answer[:50]}...")
-        return final_answer
-    def process_question(self, question:str) -> str:
         try:
             # Check if this is a request about a YouTube video
             youtube_patterns = ["youtube.com", "youtu.be", "watch youtube", "youtube video"]
             use_youtube_tool = any(pattern in question.lower() for pattern in youtube_patterns)
             if use_youtube_tool and any(isinstance(tool, YouTubeVideoTool) for tool in self.tools):
                 # Extract potential YouTube URL or ID
                 url_match = re.search(r'(?:https?:\/\/)?(?:www\.)?(?:youtube\.com|youtu\.be)\/[^\s]+', question)
                 youtube_url = url_match.group(0) if url_match else question
                 # Use YouTube tool
-                youtube_info = next(tool for tool in self.tools
-                                   if isinstance(tool, YouTubeVideoTool))(youtube_url)
-                relevant_info = self._extract_key_info(youtube_info, question)
-                return self._formulate_direct_answer(relevant_info, question)
             else:
-                # Use regular search
-                search_results = cached_search(question) if any(isinstance(tool, DuckDuckGoSearchTool) for tool in self.tools) else "No search results available."
                 relevant_info = self._extract_key_info(search_results, question)
                 return self._formulate_direct_answer(relevant_info, question)
-        except Exception as e:
-            if "too many requests" in str(e).lower():
-                time.sleep(2)
-                try:
-                    search_results = cached_search(question)
-                    relevant_info = self._extract_key_info(search_results, question)
-                    return self._formulate_direct_answer(relevant_info, question)
-                except:
-                    return self._get_fallback_answer(question)
-            return self._get_fallback_answer(question)
     def _extract_key_info(self, search_results, question):
         # Split results into sentences and find most relevant
         sentences = search_results.split('. ')
         if len(sentences) <= 3:
-            return search_results[:250]  # If few sentences, return first portion
-        # Try to find sentence with keywords from question
         keywords = [w for w in question.lower().split() if len(w) > 3]
         for sentence in sentences:
             sentence_lower = sentence.lower()
             if any(keyword in sentence_lower for keyword in keywords):
-                return sentence
         # Fallback to first few sentences
-        return '. '.join(sentences[:2])
     def _formulate_direct_answer(self, relevant_info, question):
-        if self.model and self.model.startswith('gemini'):
-            try:
-                # Configure the model
-                generation_config = {
-                    "temperature": 0.7,
-                    "top_p": 0.95,
-                    "top_k": 40,
-                    "max_output_tokens": 1024,
-                }
-                safety_settings = {
-                    HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
-                    HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
-                    HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
-                    HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
-                }
-                # Initialize the model
-                model = genai.GenerativeModel(
-                    model_name="gemini-pro",  # Adjust as needed based on your model string
-                    generation_config=generation_config,
-                    safety_settings=safety_settings
-                )
-                # Prepare prompt and generate response
-                prompt = f"Question: {question}\n\nRelevant information: {relevant_info}\n\nProvide a concise answer based only on the given information."
-                response = model.generate_content(prompt)
                 return response.text
-            except Exception as e:
-                print(f"Error using Gemini model: {e}")
-                return f"Based on the search: {relevant_info}"
-        return relevant_info
     def _get_fallback_answer(self, question):

 import inspect
 import pandas as pd
 import smolagents
+import traceback
 from smolagents import DuckDuckGoSearchTool, VisitWebpageTool
 import time
 from functools import lru_cache
 # Cache Wrapper
 @lru_cache(maxsize=100)
 def cached_search(query):
+    try:
+        print(f"Performing search for: {query[:50]}...")
+        result = search_tool(query)
+        print(f"Search successful, returned {len(result)} characters")
+        return result
+    except Exception as e:
+        print(f"Search error: {str(e)}")
+        return f"Search error: {str(e)}"
 # --- Basic Agent Definition ---
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
         self.history = []
         print(f"BasicAgent initialized with model: {model} and {len(self.tools)} tools.")
+        if self.model and self.model.startswith('gemini'):
+            try:
+                self._init_gemini_model()
+                print("Successfully initialized Gemini model")
+            except Exception as e:
+                print(f"Error initializing Gemini model: {e}")
+                print("Will try again when needed")
+                self.gemini_model = None
+        else:
+            self.gemini_model = None
+    def _init_gemini_model(self):
+        """Initialize the Gemini model with appropriate settings"""
+        generation_config = {
+            "temperature": 0.7,
+            "top_p": 0.95,
+            "top_k": 40,
+            "max_output_tokens": 1024,
+        }
+        safety_settings = {
+            HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
+            HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
+            HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
+            HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
+        }
+        model_name = "gemini-pro"
+        if "gemini-2.0" in self.model:
+            model_name = "gemini-1.5-pro"
+        self.gemini_model = genai.GenerativeModel(
+            model_name=model_name,
+            generation_config=generation_config,
+            safety_settings=safety_settings
+        )
     def __call__(self, question: str) -> str:
+        print(f"Agent received question: {question[:50]}...")
+        try:
+            final_answer = self.process_question(question)
+            print(f"Agent returning answer: {final_answer[:50]}...")
+            return final_answer
+        except Exception as e:
+            print(f"Agent error: {str(e)}")
+            traceback.print_exc()
+            return f"I apologize, but I encountered an error while processing your question. Error: {str(e)}"
+    def process_question(self, question: str) -> str:
         try:
             # Check if this is a request about a YouTube video
             youtube_patterns = ["youtube.com", "youtu.be", "watch youtube", "youtube video"]
             use_youtube_tool = any(pattern in question.lower() for pattern in youtube_patterns)
+            search_results = ""
+            youtube_info = ""
+            # Step 1: Gather information
             if use_youtube_tool and any(isinstance(tool, YouTubeVideoTool) for tool in self.tools):
                 # Extract potential YouTube URL or ID
                 url_match = re.search(r'(?:https?:\/\/)?(?:www\.)?(?:youtube\.com|youtu\.be)\/[^\s]+', question)
                 youtube_url = url_match.group(0) if url_match else question
+                print(f"Using YouTube tool with URL: {youtube_url}")
                 # Use YouTube tool
+                youtube_tool_instance = next((tool for tool in self.tools if isinstance(tool, YouTubeVideoTool)), None)
+                if youtube_tool_instance:
+                    youtube_info = youtube_tool_instance(youtube_url)
+                    print(f"YouTube info retrieved: {len(youtube_info)} characters")
+            #  Always search as backup or additional context
+            if any(isinstance(tool, DuckDuckGoSearchTool) for tool in self.tools):
+                search_results = cached_search(question)
+                print(f"Search results: {len(search_results)} characters")
+            #  Determine what information to use
+            if youtube_info and "Error processing YouTube video" not in youtube_info:
+                primary_info = youtube_info
+                print("Using YouTube info as primary source")
             else:
+                primary_info = search_results
+                print("Using search results as primary source")
+            # Extract key information
+            relevant_info = self._extract_key_info(primary_info, question)
+            print(f"Extracted relevant info: {len(relevant_info)} characters")
+            # Formulate an answer
+            return self._formulate_direct_answer(relevant_info, question)
+    except Exception as e:
+        print(f"Error in process_question: {str(e)}")
+        traceback.print_exc()
+        if "too many requests" in str(e).lower():
+            time.sleep(2)
+            try:
+                search_results = cached_search(question)
                 relevant_info = self._extract_key_info(search_results, question)
                 return self._formulate_direct_answer(relevant_info, question)
+            except Exception as retry_error:
+                print(f"Error in retry: {str(retry_error)}")
+                return self._get_fallback_answer(question)
+        return self._get_fallback_answer(question)
     def _extract_key_info(self, search_results, question):
+        # Basic check for empty results
+        if not search_results or len(search_results) < 10:
+            return "No relevant information found."
+        # For YouTube transcripts, extract the most relevant portion
+        if "Transcript from YouTube video" in search_results:
+            # Split by sentences but keep limited context
+            max_chars = 500  # Keep a reasonable chunk size
+            if len(search_results) > max_chars:
+                # Take a portion from the middle of the transcript for better relevance
+                start_idx = search_results.find("\n") + 1  # Skip the first line which is the header
+                # Get content chunk
+                return search_results[start_idx:start_idx+max_chars]
+            return search_results
+        # For search results
         # Split results into sentences and find most relevant
         sentences = search_results.split('. ')
         if len(sentences) <= 3:
+            return search_results[:300]
+        # Try to find sentences with keywords from question
         keywords = [w for w in question.lower().split() if len(w) > 3]
+        relevant_sentences = []  # NEW LINE
         for sentence in sentences:
             sentence_lower = sentence.lower()
             if any(keyword in sentence_lower for keyword in keywords):
+                relevant_sentences.append(sentence)
+                if len(relevant_sentences) >= 3:  # Get up to 3 relevant sentences
+                    break
+        # If we found relevant sentences, use them
+        if relevant_sentences:
+            return '. '.join(relevant_sentences)
         # Fallback to first few sentences
+        return '. '.join(sentences[:3])
     def _formulate_direct_answer(self, relevant_info, question):
+    if not self.model:
+        return f"Based on available information: {relevant_info}"
+    if self.model.startswith('gemini'):
+        try:
+            if not hasattr(self, 'gemini_model') or self.gemini_model is None:
+                self._init_gemini_model()
+            prompt = f"""
+            Question: {question}
+            Relevant information: {relevant_info}
+            Instructions:
+            1. Provide a concise answer based only on the given information
+            2. If the information doesn't contain the answer, say so honestly
+            3. Use only facts from the provided information
+            4. Format your response as a direct answer to the user
+            """
+            response = self.gemini_model.generate_content(prompt)
+            if response and hasattr(response, 'text'):
                 return response.text
+            else:
+                print("Gemini response was empty or invalid")
+                return f"Based on the information: {relevant_info[:200]}..."
+        except Exception as e:
+            print(f"Error using Gemini model: {e}")
+            traceback.print_exc()
+            return f"Based on the search: {relevant_info[:200]}..."
+    return f"Based on the information: {relevant_info[:200]}..."
     def _get_fallback_answer(self, question):