Final_Assignment_Project

Sleeping

App Files Files Community

wt002 commited on May 9

Commit

11ac316

verified ·

1 Parent(s): 89fddc8

Update app.py

Browse files

Files changed (1) hide show

app.py +166 -15

app.py CHANGED Viewed

@@ -1,10 +1,20 @@
 import os
-import inspect
 import gradio as gr
 import requests
 import pandas as pd
-from langchain_core.messages import HumanMessage
-from agent import build_graph
@@ -12,23 +22,164 @@ from agent import build_graph
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# --- Basic Agent Definition ---
-# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 class BasicAgent:
-    """A langgraph agent."""
     def __init__(self):
         print("BasicAgent initialized.")
-        self.graph = build_graph()
-    def __call__(self, question: str) -> str:
-        print(f"Agent received question (first 50 chars): {question[:50]}...")
-        # Wrap the question in a HumanMessage from langchain_core
-        messages = [HumanMessage(content=question)]
-        messages = self.graph.invoke({"messages": messages})
-        answer = messages['messages'][-1].content
-        return answer[14:]

 import os
 import gradio as gr
 import requests
 import pandas as pd
+from smolagents import CodeAgent, OpenAIServerModel, DuckDuckGoSearchTool, VisitWebpageTool, tool, \
+    FinalAnswerTool, PythonInterpreterTool, SpeechToTextTool, ToolCallingAgent
+import yaml
+import importlib
+from io import BytesIO
+import tempfile
+import base64
+from youtube_transcript_api import YouTubeTranscriptApi
+from youtube_transcript_api._errors import TranscriptsDisabled, NoTranscriptFound, VideoUnavailable
+from urllib.parse import urlparse, parse_qs
+import json
+import whisper
+import re
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+@tool
+def transcribe_audio_file(file_path: str) -> str:
+    """
+    Transcribes a local MP3 audio file using Whisper.
+    Args:
+        file_path: Full path to the .mp3 audio file.
+    Returns:
+        A JSON-formatted string containing either the transcript or an error message.
+        {
+            "success": true,
+            "transcript": [
+                {"start": 0.0, "end": 5.2, "text": "Hello and welcome"},
+                ...
+            ]
+        }
+        OR
+        {
+            "success": false,
+            "error": "Reason why transcription failed"
+        }
+    """
+    try:
+        if not os.path.exists(file_path):
+            return json.dumps({"success": False, "error": "File does not exist."})
+        if not file_path.lower().endswith(".mp3"):
+            return json.dumps({"success": False, "error": "Invalid file type. Only MP3 files are supported."})
+        model = whisper.load_model("base")  # You can use 'tiny', 'base', 'small', 'medium', or 'large'
+        result = model.transcribe(file_path, verbose=False, word_timestamps=False)
+        transcript_data = [
+            {
+                "start": segment["start"],
+                "end": segment["end"],
+                "text": segment["text"].strip()
+            }
+            for segment in result["segments"]
+        ]
+        return json.dumps({"success": True, "transcript": transcript_data})
+    except Exception as e:
+        return json.dumps({"success": False, "error": str(e)})
+@tool
+def get_youtube_transcript(video_url: str) -> str:
+    """
+    Retrieves the transcript from a YouTube video URL, including timestamps.
+    This tool fetches the English transcript for a given YouTube video. Automatically generated subtitles
+    are also supported. The result includes each snippet's start time, duration, and text.
+    Args:
+        video_url: The full URL of the YouTube video (e.g., https://www.youtube.com/watch?v=12345)
+    Returns:
+        A JSON-formatted string containing either the transcript with timestamps or an error message.
+        {
+            "success": true,
+            "transcript": [
+                {"start": 0.0, "duration": 1.54, "text": "Hey there"},
+                {"start": 1.54, "duration": 4.16, "text": "how are you"},
+                ...
+            ]
+        }
+        OR
+        {
+            "success": false,
+            "error": "Reason why the transcript could not be retrieved"
+        }
+    """
+    try:
+        # Extract video ID from URL
+        parsed_url = urlparse(video_url)
+        query_params = parse_qs(parsed_url.query)
+        video_id = query_params.get("v", [None])[0]
+        if not video_id:
+            return json.dumps({"success": False, "error": "Invalid YouTube URL. Could not extract video ID."})
+        fetched_transcript = YouTubeTranscriptApi().fetch(video_id)
+        transcript_data = [
+            {
+                "start": snippet.start,
+                "duration": snippet.duration,
+                "text": snippet.text
+            }
+            for snippet in fetched_transcript
+        ]
+        return json.dumps({"success": True, "transcript": transcript_data})
+    except VideoUnavailable:
+        return json.dumps({"success": False, "error": "The video is unavailable."})
+    except TranscriptsDisabled:
+        return json.dumps({"success": False, "error": "Transcripts are disabled for this video."})
+    except NoTranscriptFound:
+        return json.dumps({"success": False, "error": "No transcript found for this video."})
+    except Exception as e:
+        return json.dumps({"success": False, "error": str(e)})
+# --- Basic Agent Definition ---
+# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 class BasicAgent:
     def __init__(self):
+        model = OpenAIServerModel(api_key=os.environ.get("OPENAI_API_KEY"), model_id="gpt-4o")
+        self.code_agent = CodeAgent(
+            tools=[PythonInterpreterTool(), DuckDuckGoSearchTool(), VisitWebpageTool(), transcribe_audio_file,
+                   get_youtube_transcript,
+                   FinalAnswerTool()],
+            model=model,
+            max_steps=20,
+            name="hf_agent_course_final_assignment_solver",
+            prompt_templates=yaml.safe_load(
+                importlib.resources.files("prompts").joinpath("code_agent.yaml").read_text()
+            )
+        )
         print("BasicAgent initialized.")
+    def __call__(self, task_id: str, question: str, file_name: str) -> str:
+        if file_name:
+            question = self.enrich_question_with_associated_file_details(task_id, question, file_name)
+        final_result = self.code_agent.run(question)
+        # Extract text after "FINAL ANSWER:" (case-insensitive, and trims whitespace)
+        match = re.search(r'final answer:\s*(.*)', str(final_result), re.IGNORECASE | re.DOTALL)
+        if match:
+            return match.group(1).strip()
+        # Fallback in case the pattern is not found
+        return str(final_result).strip()
+    def enrich_question_with_associated_file_details(self, task_id:str, question: str, file_name: str) -> str:
+        api_url = DEFAULT_API_URL
+        get_associated_files_url = f"{api_url}/files/{task_id}"
+        response = requests.get(get_associated_files_url, timeout=15)
+        response.raise_for_status()
+        if file_name.endswith(".mp3"):
+            with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
+                tmp_file.write(response.content)
+                file_path = tmp_file.name
+                return question + "\n\nMentioned .mp3 file local path is: " + file_path
+        elif file_name.endswith(".py"):
+            file_content = response.text
+            return question + "\n\nBelow is mentioned Python file:\n\n```python\n" + file_content + "\n```\n"
+        elif file_name.endswith(".xlsx"):
+            xlsx_io = BytesIO(response.content)
+            df = pd.read_excel(xlsx_io)
+            file_content = df.to_csv(index=False)
+            return question + "\n\nBelow is mentioned excel file in CSV format:\n\n```csv\n" + file_content + "\n```\n"
+        elif file_name.endswith(".png"):
+            base64_str = base64.b64encode(response.content).decode('utf-8')
+            return question + "\n\nBelow is the .png image in base64 format:\n\n```base64\n" + base64_str + "\n```\n"