Final_Assignment_Project

Sleeping

App Files Files Community

wt002 commited on Jun 3

Commit

84992c5

verified ·

1 Parent(s): 3985578

Update app.py

Browse files

Files changed (1) hide show

app.py +179 -166

app.py CHANGED Viewed

@@ -1,210 +1,223 @@
 import os
 import gradio as gr
 import requests
 import pandas as pd
-from smolagents import CodeAgent, OpenAIServerModel, DuckDuckGoSearchTool, VisitWebpageTool, tool, \
-    FinalAnswerTool, PythonInterpreterTool, SpeechToTextTool, ToolCallingAgent
-import yaml
-import importlib
 from io import BytesIO
-import tempfile
-import base64
 from youtube_transcript_api import YouTubeTranscriptApi
-from youtube_transcript_api._errors import TranscriptsDisabled, NoTranscriptFound, VideoUnavailable
-from urllib.parse import urlparse, parse_qs
-import json
-import whisper
-import re
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-@tool
-def transcribe_audio_file(file_path: str) -> str:
-    """
-    Transcribes a local MP3 audio file using Whisper.
-    Args:
-        file_path: Full path to the .mp3 audio file.
-    Returns:
-        A JSON-formatted string containing either the transcript or an error message.
-        {
-            "success": true,
-            "transcript": [
-                {"start": 0.0, "end": 5.2, "text": "Hello and welcome"},
-                ...
             ]
         }
-        OR
-        {
-            "success": false,
-            "error": "Reason why transcription failed"
-        }
-    """
-    try:
-        if not os.path.exists(file_path):
-            return json.dumps({"success": False, "error": "File does not exist."})
-        if not file_path.lower().endswith(".mp3"):
-            return json.dumps({"success": False, "error": "Invalid file type. Only MP3 files are supported."})
-        model = whisper.load_model("base")  # You can use 'tiny', 'base', 'small', 'medium', or 'large'
-        result = model.transcribe(file_path, verbose=False, word_timestamps=False)
-        transcript_data = [
-            {
-                "start": segment["start"],
-                "end": segment["end"],
-                "text": segment["text"].strip()
-            }
-            for segment in result["segments"]
-        ]
-        return json.dumps({"success": True, "transcript": transcript_data})
-    except Exception as e:
-        return json.dumps({"success": False, "error": str(e)})
 @tool
-def get_youtube_transcript(video_url: str) -> str:
     """
-    Retrieves the transcript from a YouTube video URL, including timestamps.
-    This tool fetches the English transcript for a given YouTube video. Automatically generated subtitles
-    are also supported. The result includes each snippet's start time, duration, and text.
     Args:
-        video_url: The full URL of the YouTube video (e.g., https://www.youtube.com/watch?v=12345)
     Returns:
-        A JSON-formatted string containing either the transcript with timestamps or an error message.
         {
-            "success": true,
-            "transcript": [
-                {"start": 0.0, "duration": 1.54, "text": "Hey there"},
-                {"start": 1.54, "duration": 4.16, "text": "how are you"},
                 ...
-            ]
-        }
-        OR
-        {
-            "success": false,
-            "error": "Reason why the transcript could not be retrieved"
         }
     """
-    try:
-        # Extract video ID from URL
-        parsed_url = urlparse(video_url)
-        query_params = parse_qs(parsed_url.query)
-        video_id = query_params.get("v", [None])[0]
-        if not video_id:
-            return json.dumps({"success": False, "error": "Invalid YouTube URL. Could not extract video ID."})
-        fetched_transcript = YouTubeTranscriptApi().fetch(video_id)
-        transcript_data = [
-            {
-                "start": snippet.start,
-                "duration": snippet.duration,
-                "text": snippet.text
-            }
-            for snippet in fetched_transcript
-        ]
-        return json.dumps({"success": True, "transcript": transcript_data})
-    except VideoUnavailable:
-        return json.dumps({"success": False, "error": "The video is unavailable."})
-    except TranscriptsDisabled:
-        return json.dumps({"success": False, "error": "Transcripts are disabled for this video."})
-    except NoTranscriptFound:
-        return json.dumps({"success": False, "error": "No transcript found for this video."})
-    except Exception as e:
-        return json.dumps({"success": False, "error": str(e)})
-# --- Basic Agent Definition ---
-# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
-class BasicAgent:
-    def __init__(self):
-        model = OpenAIServerModel(api_key=os.environ.get("OPENAI_API_KEY"), model_id="gpt-4o")
-        self.code_agent = CodeAgent(
-            tools=[PythonInterpreterTool(), DuckDuckGoSearchTool(), VisitWebpageTool(), transcribe_audio_file,
-                   get_youtube_transcript,
-                   FinalAnswerTool()],
-            model=model,
-            max_steps=20,
-            name="hf_agent_course_final_assignment_solver",
-            prompt_templates=yaml.safe_load(
-                importlib.resources.files("prompts").joinpath("code_agent.yaml").read_text()
-            )
-        )
-        print("BasicAgent initialized.")
-    def __call__(self, task_id: str, question: str, file_name: str) -> str:
-        if file_name:
-            question = self.enrich_question_with_associated_file_details(task_id, question, file_name)
-        final_result = self.code_agent.run(question)
-        # Extract text after "FINAL ANSWER:" (case-insensitive, and trims whitespace)
-        match = re.search(r'final answer:\s*(.*)', str(final_result), re.IGNORECASE | re.DOTALL)
-        if match:
-            return match.group(1).strip()
-        # Fallback in case the pattern is not found
-        return str(final_result).strip()
-    def enrich_question_with_associated_file_details(self, task_id:str, question: str, file_name: str) -> str:
-        api_url = DEFAULT_API_URL
-        get_associated_files_url = f"{api_url}/files/{task_id}"
-        response = requests.get(get_associated_files_url, timeout=15)
-        response.raise_for_status()
-        if file_name.endswith(".mp3"):
-            with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
-                tmp_file.write(response.content)
-                file_path = tmp_file.name
-                return question + "\n\nMentioned .mp3 file local path is: " + file_path
-        elif file_name.endswith(".py"):
-            file_content = response.text
-            return question + "\n\nBelow is mentioned Python file:\n\n```python\n" + file_content + "\n```\n"
-        elif file_name.endswith(".xlsx"):
-            xlsx_io = BytesIO(response.content)
-            df = pd.read_excel(xlsx_io)
-            file_content = df.to_csv(index=False)
-            return question + "\n\nBelow is mentioned excel file in CSV format:\n\n```csv\n" + file_content + "\n```\n"
-        elif file_name.endswith(".png"):
-            base64_str = base64.b64encode(response.content).decode('utf-8')
-            return question + "\n\nBelow is the .png image in base64 format:\n\n```base64\n" + base64_str + "\n```\n"
-    def enrich_question_with_associated_file_details(self, task_id:str, question: str, file_name: str) -> str:
-        api_url = DEFAULT_API_URL
-        get_associated_files_url = f"{api_url}/files/{task_id}"
-        response = requests.get(get_associated_files_url, timeout=15)
-        response.raise_for_status()
-        if file_name.endswith(".mp3"):
-            with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
-                tmp_file.write(response.content)
-                file_path = tmp_file.name
-                return question + "\n\nMentioned .mp3 file local path is: " + file_path
-        elif file_name.endswith(".py"):
-            file_content = response.text
-            return question + "\n\nBelow is mentioned Python file:\n\n```python\n" + file_content + "\n```\n"
-        elif file_name.endswith(".xlsx"):
-            xlsx_io = BytesIO(response.content)
-            df = pd.read_excel(xlsx_io)
-            file_content = df.to_csv(index=False)
-            return question + "\n\nBelow is mentioned excel file in CSV format:\n\n```csv\n" + file_content + "\n```\n"
-        elif file_name.endswith(".png"):
-            base64_str = base64.b64encode(response.content).decode('utf-8')
-            return question + "\n\nBelow is the .png image in base64 format:\n\n```base64\n" + base64_str + "\n```\n"
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """

 import os
 import gradio as gr
 import requests
+import inspect
 import pandas as pd
+from smolagents import tool, Tool, CodeAgent, DuckDuckGoSearchTool, HfApiModel, VisitWebpageTool, SpeechToTextTool, FinalAnswerTool
+from dotenv import load_dotenv
+import heapq
+from collections import Counter
+import re
 from io import BytesIO
 from youtube_transcript_api import YouTubeTranscriptApi
+from langchain_community.tools.tavily_search import TavilySearchResults
+from langchain_community.document_loaders import WikipediaLoader
+from langchain_community.utilities import WikipediaAPIWrapper
+from langchain_community.document_loaders import ArxivLoader
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+#Load environment variables
+load_dotenv()
+from smolagents import Tool
+from langchain_community.document_loaders import WikipediaLoader
+class WikiSearchTool(Tool):
+    name = "wiki_search"
+    description = "Search Wikipedia for a query and return up to 2 results."
+    inputs = {
+        "query": {"type": "string", "description": "The search term for Wikipedia."}
+    }
+    output_type = "string"
+    def forward(self, query: str) -> str:
+        search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
+        formatted_search_docs = "\n\n---\n\n".join(
+            [
+                f'<Document source="{doc.metadata.get("source", "Wikipedia")}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
+                for doc in search_docs
             ]
+        )
+        return formatted_search_docs
+class StringReverseTool(Tool):
+    name = "reverse_message"
+    description = "When you received a strange text, try to reverse it and perform action described in reversed message."
+    inputs = {
+        "message": {
+            "type": "string",
+            "description": "A message, which looks like strange and can be reversed to get actions to execute."
         }
+    }
+    output_type = "string"
+    def __init__(self):
+        return
+    def forward(self, message: str):
+        return message[::-1]
+class KeywordsExtractorTool(Tool):
+    """Extracts top 5 keywords from a given text based on frequency."""
+    name = "keywords_extractor"
+    description = "This tool returns the 5 most frequent keywords occur in provided block of text."
+    inputs = {
+        "text": {
+            "type": "string",
+            "description": "Text to analyze for keywords.",
+        }
+    }
+    output_type = "string"
+    def forward(self, text: str) -> str:
+        try:
+            all_words = re.findall(r'\b\w+\b', text.lower())
+            conjunctions = {'a', 'and', 'of', 'is', 'in', 'to', 'the'}
+            filtered_words = []
+            for w in all_words:
+                if w not in conjunctions:
+                    filtered_words.push(w)
+            word_counts = Counter(filtered_words)
+            k = 5
+            return heapq.nlargest(k, word_counts.items(), key=lambda x: x[1])
+        except Exception as e:
+            return f"Error during extracting most common words: {e}"
 @tool
+def parse_excel_to_json(task_id: str) -> dict:
     """
+    For a given task_id fetch and parse an Excel file and save parsed data in structured JSON file.
     Args:
+        task_id: An task ID to fetch.
     Returns:
         {
+            "task_id": str,
+            "sheets": {
+                "SheetName1": [ {col1: val1, col2: val2, ...}, ... ],
                 ...
+            },
+            "status": "Success" | "Error"
         }
     """
+    url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
+    try:
+        response = requests.get(url, timeout=100)
+        if response.status_code != 200:
+            return {"task_id": task_id, "sheets": {}, "status": f"{response.status_code} - Failed"}
+        xls_content = pd.ExcelFile(BytesIO(response.content))
+        json_sheets = {}
+        for sheet in xls_content.sheet_names:
+            df = xls_content.parse(sheet)
+            df = df.dropna(how="all")
+            rows = df.head(20).to_dict(orient="records")
+            json_sheets[sheet] = rows
+        return {
+            "task_id": task_id,
+            "sheets": json_sheets,
+            "status": "Success"
+        }
+    except Exception as e:
+        return {
+            "task_id": task_id,
+            "sheets": {},
+            "status": f"Error in parsing Excel file: {str(e)}"
+        }
+class VideoTranscriptionTool(Tool):
+    """Fetch transcripts from YouTube videos"""
+    name = "transcript_video"
+    description = "Fetch text transcript from YouTube movies with optional timestamps"
+    inputs = {
+        "url": {"type": "string", "description": "YouTube video URL or ID"},
+        "include_timestamps": {"type": "boolean", "description": "If timestamps should be included in output", "nullable": True}
+    }
+    output_type = "string"
+    def forward(self, url: str, include_timestamps: bool = False) -> str:
+        if "youtube.com/watch" in url:
+            video_id = url.split("v=")[1].split("&")[0]
+        elif "youtu.be/" in url:
+            video_id = url.split("youtu.be/")[1].split("?")[0]
+        elif len(url.strip()) == 11:  # Direct ID
+            video_id = url.strip()
+        else:
+            return f"YouTube URL or ID: {url} is invalid!"
+        try:
+            transcription = YouTubeTranscriptApi.get_transcript(video_id)
+            if include_timestamps:
+                formatted_transcription = []
+                for part in transcription:
+                    timestamp = f"{int(part['start']//60)}:{int(part['start']%60):02d}"
+                    formatted_transcription.append(f"[{timestamp}] {part['text']}")
+                return "\n".join(formatted_transcription)
+            else:
+                return " ".join([part['text'] for part in transcription])
+        except Exception as e:
+            return f"Error in extracting YouTube transcript: {str(e)}"
+class BasicAgent:
+    def __init__(self):
+        token = os.environ.get("HF_API_TOKEN")
+        model = HfApiModel(
+            temperature=0.1,
+            token=token
+        )
+        search_tool = DuckDuckGoSearchTool()
+        wiki_search_tool = WikiSearchTool()
+        str_reverse_tool = StringReverseTool()
+        keywords_extract_tool = KeywordsExtractorTool()
+        speech_to_text_tool = SpeechToTextTool()
+        visit_webpage_tool = VisitWebpageTool()
+        final_answer_tool = FinalAnswerTool()
+        video_transcription_tool = VideoTranscriptionTool()
+        system_prompt = f"""
+You are my general AI assistant. Your task is to answer the question I asked.
+First, provide an explanation of your reasoning, step by step, to arrive at the answer.
+Then, return your final answer in a single line, formatted as follows: "FINAL ANSWER: [YOUR FINAL ANSWER]".
+[YOUR FINAL ANSWER] should be a number, a string, or a comma-separated list of numbers and/or strings, depending on the question.
+If the answer is a number, do not use commas or units (e.g., $, %) unless specified.
+If the answer is a string, do not use articles or abbreviations (e.g., for cities), and write digits in plain text unless specified.
+If the answer is a comma-separated list, apply the above rules for each element based on whether it is a number or a string.
+"""
+        self.agent = CodeAgent(
+            model=model,
+            tools=[search_tool, wiki_search_tool, str_reverse_tool, keywords_extract_tool, speech_to_text_tool, visit_webpage_tool, final_answer_tool, parse_excel_to_json, video_transcription_tool],
+            add_base_tools=True
+        )
+        self.agent.prompt_templates["system_prompt"] = self.agent.prompt_templates["system_prompt"] + system_prompt
+    def __call__(self, question: str) -> str:
+        print(f"Agent received question (first 50 chars): {question[:50]}...")
+        answer = self.agent.run(question)
+        print(f"Agent returning answer: {answer}")
+        return answer
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """