Final_Assignment_Project

Sleeping

App Files Files Community

wt002 commited on 27 days ago

Commit

2b9b092

verified ·

1 Parent(s): 622f98e

Update app.py

Browse files

Files changed (1) hide show

app.py +397 -50

app.py CHANGED Viewed

@@ -1,70 +1,417 @@
 import os
-import time
 import gradio as gr
-import pandas as pd
 import requests
-from smolagents import tool, Tool, CodeAgent, DuckDuckGoSearchTool, LiteLLMModel, HfApiModel, VisitWebpageTool, SpeechToTextTool, FinalAnswerTool
-from agent import (
-    analyze_audio_file,
-    analyze_image_file,
-    analyze_xlsx_file,
-    analyze_youtube_video,
-    download_file_of_task_id,
 )
-# --- Constants ---
-DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# --- Basic Agent Definition ---
-# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 class BasicAgent:
     def __init__(self):
-        print("BasicAgent initialized.")
-        # Initialize the model
-        model = LiteLLMModel(model_id=os.getenv("MODEL_ID"),
-                             api_key=os.getenv("GOOGLE_API_KEY"))
-        # Initialize the searchs tool
-        duck_duck_go_search_tool = DuckDuckGoSearchTool()
-        wikipedia_search_tool = WikipediaSearchTool()
-        # Initialize Agent
         self.agent = CodeAgent(
-            model = model,
-            tools=[download_file_of_task_id, analyze_audio_file, analyze_image_file,
-                   analyze_xlsx_file, duck_duck_go_search_tool, wikipedia_search_tool,
-                   analyze_youtube_video, FinalAnswerTool()]
         )
-    def __call__(self, question: str, task_id: str) -> str:
-        task = f"""
-            You are a general AI assistant.
-            I will ask you a question and you can use 8 steps to answer it.
-            You can use the tools I provided to you to answer the question.
-            Every time you use a tool, the number of steps will decrease by one.
-            If you have a list of possible pages to visit, prefer the wikipedia ones.
-            If a page does not allow visit, skip it.
-            Report your thoughts, and finish your answer with the following template:
-            FINAL ANSWER: [YOUR FINAL ANSWER].
-            YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
-            If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
-            If the answer is a number, represent it with digits.
-            If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
-            If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
-            The taskid is {task_id} in case you need to get extra files, use taskid and not name of the file
-            and the question is {question}
-            """
-        fixed_answer = self.agent.run(task)
-        print(f"Agent returning fixed answer: {fixed_answer}")
-        time.sleep(50)
-        return fixed_answer

 import os
 import gradio as gr
 import requests
+import inspect
+import pandas as pd
+from smolagents import tool, Tool, CodeAgent, DuckDuckGoSearchTool, HfApiModel, VisitWebpageTool, SpeechToTextTool, FinalAnswerTool
+from dotenv import load_dotenv
+import heapq
+from collections import Counter
+import re
+from io import BytesIO
+from youtube_transcript_api import YouTubeTranscriptApi
+from langchain_community.tools.tavily_search import TavilySearchResults
+from langchain_community.document_loaders import WikipediaLoader
+from langchain_community.utilities import WikipediaAPIWrapper
+from langchain_community.document_loaders import ArxivLoader
+# (Keep Constants as is)
+# --- Constants ---
+DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+#Load environment variables
+load_dotenv()
+import io
+import contextlib
+import traceback
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+from smolagents import Tool, CodeAgent, DuckDuckGoSearchTool, FinalAnswerTool, HfApiModel
+class CodeLlamaTool(Tool):
+    name = "code_llama_tool"
+    description = "Solves reasoning/code questions using Meta Code Llama 7B Instruct"
+    inputs = {
+        "question": {
+            "type": "string",
+            "description": "The question requiring code-based or reasoning-based solution"
+        }
+    }
+    output_type = "string"
+    def __init__(self):
+        self.model_id = "codellama/CodeLlama-7b-Instruct-hf"
+        token = os.getenv("HF_TOKEN")
+        self.tokenizer = AutoTokenizer.from_pretrained(self.model_id, token=token)
+        self.model = AutoModelForCausalLM.from_pretrained(
+            self.model_id,
+            device_map="auto",
+            torch_dtype="auto",
+            token=token
+        )
+        self.pipeline = pipeline(
+            "text-generation",
+            model=self.model,
+            tokenizer=self.tokenizer,
+            max_new_tokens=512,
+            temperature=0.2,
+            truncation=True
+        )
+    def forward(self, question: str) -> str:
+        prompt = f"""You are an AI that uses Python code to answer questions.
+Question: {question}
+Instructions:
+- If solving requires code, use a block like <tool>code</tool>.
+- Always end with <final>FINAL ANSWER</final> containing the final number or string.
+Example:
+Question: What is 5 * sqrt(36)?
+Answer:
+<tool>
+import math
+print(5 * math.sqrt(36))
+</tool>
+<final>30.0</final>
+Answer:"""
+        response = self.pipeline(prompt)[0]["generated_text"]
+        return self.parse_and_execute(response)
+    def parse_and_execute(self, response: str) -> str:
+        try:
+            # Extract and run code if exists
+            if "<tool>" in response and "</tool>" in response:
+                code = response.split("<tool>")[1].split("</tool>")[0].strip()
+                result = self._run_code(code)
+                return f"FINAL ANSWER (code output): {result}"
+            # Extract final result directly
+            elif "<final>" in response and "</final>" in response:
+                final = response.split("<final>")[1].split("</final>")[0].strip()
+                return f"FINAL ANSWER: {final}"
+            return f"Could not extract final answer.\n\n{response}"
+        except Exception as e:
+            return f"Error in parse_and_execute: {str(e)}\n\nFull response:\n{response}"
+    def _run_code(self, code: str) -> str:
+        buffer = io.StringIO()
+        try:
+            with contextlib.redirect_stdout(buffer):
+                exec(code, {})
+            return buffer.getvalue().strip()
+        except Exception:
+            return f"Error executing code:\n{traceback.format_exc()}"
+#from smolagents import Tool
+#from langchain_community.document_loaders import WikipediaLoader
+class WikiSearchTool(Tool):
+    name = "wiki_search"
+    description = "Search Wikipedia for a query and return up to 2 results."
+    inputs = {
+        "query": {"type": "string", "description": "The search term for Wikipedia."}
+    }
+    output_type = "string"
+    def forward(self, query: str) -> str:
+        search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
+        formatted_search_docs = "\n\n---\n\n".join(
+            [
+                f'<Document source="{doc.metadata.get("source", "Wikipedia")}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
+                for doc in search_docs
+            ]
+        )
+        return formatted_search_docs
+class StringReverseTool(Tool):
+    name = "reverse_message"
+    description = "When you received a strange text, try to reverse it and perform action described in reversed message."
+    inputs = {
+        "message": {
+            "type": "string",
+            "description": "A message, which looks like strange and can be reversed to get actions to execute."
+        }
+    }
+    output_type = "string"
+    def __init__(self):
+        return
+    def forward(self, message: str):
+        return message[::-1]
+class KeywordsExtractorTool(Tool):
+    """Extracts top 5 keywords from a given text based on frequency."""
+    name = "keywords_extractor"
+    description = "This tool returns the 5 most frequent keywords occur in provided block of text."
+    inputs = {
+        "text": {
+            "type": "string",
+            "description": "Text to analyze for keywords.",
+        }
+    }
+    output_type = "string"
+    def forward(self, text: str) -> str:
+        try:
+            all_words = re.findall(r'\b\w+\b', text.lower())
+            conjunctions = {'a', 'and', 'of', 'is', 'in', 'to', 'the'}
+            filtered_words = []
+            for w in all_words:
+                if w not in conjunctions:
+                    filtered_words.push(w)
+            word_counts = Counter(filtered_words)
+            k = 5
+            return heapq.nlargest(k, word_counts.items(), key=lambda x: x[1])
+        except Exception as e:
+            return f"Error during extracting most common words: {e}"
+@tool
+def parse_excel_to_json(task_id: str) -> dict:
+    """
+    For a given task_id fetch and parse an Excel file and save parsed data in structured JSON file.
+    Args:
+        task_id: An task ID to fetch.
+    Returns:
+        {
+            "task_id": str,
+            "sheets": {
+                "SheetName1": [ {col1: val1, col2: val2, ...}, ... ],
+                ...
+            },
+            "status": "Success" | "Error"
+        }
+    """
+    url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
+    try:
+        response = requests.get(url, timeout=100)
+        if response.status_code != 200:
+            return {"task_id": task_id, "sheets": {}, "status": f"{response.status_code} - Failed"}
+        xls_content = pd.ExcelFile(BytesIO(response.content))
+        json_sheets = {}
+        for sheet in xls_content.sheet_names:
+            df = xls_content.parse(sheet)
+            df = df.dropna(how="all")
+            rows = df.head(20).to_dict(orient="records")
+            json_sheets[sheet] = rows
+        return {
+            "task_id": task_id,
+            "sheets": json_sheets,
+            "status": "Success"
+        }
+    except Exception as e:
+        return {
+            "task_id": task_id,
+            "sheets": {},
+            "status": f"Error in parsing Excel file: {str(e)}"
+        }
+class VideoTranscriptionTool(Tool):
+    """Fetch transcripts from YouTube videos"""
+    name = "transcript_video"
+    description = "Fetch text transcript from YouTube movies with optional timestamps"
+    inputs = {
+        "url": {"type": "string", "description": "YouTube video URL or ID"},
+        "include_timestamps": {"type": "boolean", "description": "If timestamps should be included in output", "nullable": True}
+    }
+    output_type = "string"
+    def forward(self, url: str, include_timestamps: bool = False) -> str:
+        if "youtube.com/watch" in url:
+            video_id = url.split("v=")[1].split("&")[0]
+        elif "youtu.be/" in url:
+            video_id = url.split("youtu.be/")[1].split("?")[0]
+        elif len(url.strip()) == 11:  # Direct ID
+            video_id = url.strip()
+        else:
+            return f"YouTube URL or ID: {url} is invalid!"
+        try:
+            transcription = YouTubeTranscriptApi.get_transcript(video_id)
+            if include_timestamps:
+                formatted_transcription = []
+                for part in transcription:
+                    timestamp = f"{int(part['start']//60)}:{int(part['start']%60):02d}"
+                    formatted_transcription.append(f"[{timestamp}] {part['text']}")
+                return "\n".join(formatted_transcription)
+            else:
+                return " ".join([part['text'] for part in transcription])
+        except Exception as e:
+            return f"Error in extracting YouTube transcript: {str(e)}"
+import os
+import base64
+import requests
+import google.generativeai as genai
+from PIL import Image
+from io import BytesIO
+from smolagents import (
+    CodeAgent,
+    ToolCallingAgent,
+    InferenceClientModel,
+    WebSearchTool,
+    HfApiModel,
+    DuckDuckGoSearchTool,
+    FinalAnswerTool,
+    tool
 )
+# Configure Gemini
+genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
+# Define image analysis tool
+@tool
+def analyze_image(image_input: str) -> str:
+    """
+    Analyzes images using AI vision. Input can be:
+    - Image URL (http/https)
+    - Base64 encoded image
+    - Local file path
+    Returns detailed image analysis.
+    """
+    try:
+        # Handle URL input
+        if image_input.startswith(('http://', 'https://')):
+            response = requests.get(image_input)
+            response.raise_for_status()
+            img = Image.open(BytesIO(response.content))
+            buffer = BytesIO()
+            img.save(buffer, format="JPEG")
+            image_data = base64.b64encode(buffer.getvalue()).decode('utf-8')
+        # Handle base64 input
+        elif image_input.startswith('data:image'):
+            image_data = image_input.split(',')[1]
+        # Handle local file path
+        elif os.path.exists(image_input):
+            with open(image_input, "rb") as img_file:
+                image_data = base64.b64encode(img_file.read()).decode('utf-8')
+        else:
+            return "Invalid image input"
+        # Analyze with Gemini
+        model = genai.GenerativeModel('gemini-pro-vision')
+        response = model.generate_content([
+            "Analyze this image thoroughly. Describe all significant elements, text, objects, and context.",
+            genai.types.Part.from_data(
+                data=base64.b64decode(image_data),
+                mime_type="image/jpeg"
+            )
+        ])
+        return response.text
+    except Exception as e:
+        return f"Image analysis error: {str(e)}"
 class BasicAgent:
     def __init__(self):
+        token = os.environ.get("HF_API_TOKEN")
+        model = HfApiModel(
+            temperature=0.1,
+            token=token
+        )
+        # Existing tools
+        search_tool = DuckDuckGoSearchTool()
+        wiki_search_tool = WikiSearchTool()
+        str_reverse_tool = StringReverseTool()
+        keywords_extract_tool = KeywordsExtractorTool()
+        speech_to_text_tool = SpeechToTextTool()
+        visit_webpage_tool = VisitWebpageTool()
+        final_answer_tool = FinalAnswerTool()
+        video_transcription_tool = VideoTranscriptionTool()
+        code_llama_tool = CodeLlamaTool()
+        system_prompt = f"""
+You are my general AI assistant. Your task is to answer the question I asked.
+First, provide an explanation of your reasoning, step by step, to arrive at the answer.
+Then, return your final answer in a single line, formatted as follows: "FINAL ANSWER: [YOUR FINAL ANSWER]".
+[YOUR FINAL ANSWER] should be a number, a string, or a comma-separated list of numbers and/or strings, depending on the question.
+If the answer is a number, do not use commas or units (e.g., $, %) unless specified.
+If the answer is a string, do not use articles or abbreviations (e.g., for cities), and write digits in plain text unless specified.
+If the answer is a comma-separated list, apply the above rules for each element based on whether it is a number or a string.
+"""
+        # Create web agent with image analysis capability
+        self.web_agent = ToolCallingAgent(
+            tools=[
+                WebSearchTool(),
+                visit_webpage_tool,
+                analyze_image  # Add image analysis to web agent
+            ],
+            model=model,
+            max_steps=10,
+            name="web_search_agent",
+            description="Runs web searches and analyzes images",
+        )
+        # Create main agent with image analysis
         self.agent = CodeAgent(
+            model=model,
+            tools=[
+                search_tool,
+                wiki_search_tool,
+                str_reverse_tool,
+                keywords_extract_tool,
+                speech_to_text_tool,
+                visit_webpage_tool,
+                final_answer_tool,
+                video_transcription_tool,
+                code_llama_tool,
+                analyze_image  # Add to main agent too
+            ],
+            add_base_tools=True
         )
+        # Update system prompt
+        self.agent.prompt_templates["system_prompt"] = self.agent.prompt_templates["system_prompt"] + system_prompt
+    def __call__(self, question: str) -> str:
+        print(f"Agent received question (first 50 chars): {question[:50]}...")
+        # First try web agent for image-based queries
+        if any(keyword in question.lower() for keyword in ["image", "picture", "photo", "screenshot", "diagram"]):
+            print("Using web agent for image-related query")
+            answer = self.web_agent.run(question)
+        else:
+            print("Using main agent")
+            answer = self.agent.run(question)
+        print(f"Agent returning answer: {answer}")
+        return answer