Final_Assignment_Project

Sleeping

App Files Files Community

wt002 commited on May 15

Commit

2c66cb4

verified ·

1 Parent(s): 73ff364

Update app.py

Browse files

Files changed (1) hide show

app.py +170 -15

app.py CHANGED Viewed

@@ -1,34 +1,189 @@
 import os
 import gradio as gr
-from dotenv import load_dotenv
-import inspect
 import requests
 import pandas as pd
-from langchain_core.messages import HumanMessage
-from agent import build_graph
-load_dotenv()
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# --- Basic Agent Definition ---
 class BasicAgent:
-    """A langgraph agent."""
     def __init__(self):
-        print("BasicAgent initialized.")
-        self.graph = build_graph()
     def __call__(self, question: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
-        # Wrap the question in a HumanMessage from langchain_core
-        messages = [HumanMessage(content=question)]
-        messages = self.graph.invoke({"messages": messages})
-        answer = messages['messages'][-1].content
-        return answer[14:]
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """

 import os
 import gradio as gr
 import requests
+import inspect
 import pandas as pd
+from smolagents import tool, Tool, CodeAgent, DuckDuckGoSearchTool, HfApiModel, WikipediaSearchTool, VisitWebpageTool, SpeechToTextTool, FinalAnswerTool
+from dotenv import load_dotenv
+import heapq
+from collections import Counter
+import re
+from io import BytesIO
+from youtube_transcript_api import YouTubeTranscriptApi
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+#Load environment variables
+load_dotenv()
+class StringReverseTool(Tool):
+    name = "reverse_message"
+    description = "When you received a strange text, try to reverse it and perform action described in reversed message."
+    inputs = {
+        "message": {
+            "type": "string",
+            "description": "A message, which looks like strange and can be reversed to get actions to execute."
+        }
+    }
+    output_type = "string"
+    def __init__(self):
+        return
+    def forward(self, message: str):
+        return message[::-1]
+class KeywordsExtractorTool(Tool):
+    """Extracts top 5 keywords from a given text based on frequency."""
+    name = "keywords_extractor"
+    description = "This tool returns the 5 most frequent keywords occur in provided block of text."
+    inputs = {
+        "text": {
+            "type": "string",
+            "description": "Text to analyze for keywords.",
+        }
+    }
+    output_type = "string"
+    def forward(self, text: str) -> str:
+        try:
+            all_words = re.findall(r'\b\w+\b', text.lower())
+            conjunctions = {'a', 'and', 'of', 'is', 'in', 'to', 'the'}
+            filtered_words = []
+            for w in all_words:
+                if w not in conjunctions:
+                    filtered_words.push(w)
+            word_counts = Counter(filtered_words)
+            k = 5
+            return heapq.nlargest(k, word_counts.items(), key=lambda x: x[1])
+        except Exception as e:
+            return f"Error during extracting most common words: {e}"
+@tool
+def parse_excel_to_json(task_id: str) -> dict:
+    """
+    For a given task_id fetch and parse an Excel file and save parsed data in structured JSON file.
+    Args:
+        task_id: An task ID to fetch.
+    Returns:
+        {
+            "task_id": str,
+            "sheets": {
+                "SheetName1": [ {col1: val1, col2: val2, ...}, ... ],
+                ...
+            },
+            "status": "Success" | "Error"
+        }
+    """
+    url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
+    try:
+        response = requests.get(url, timeout=100)
+        if response.status_code != 200:
+            return {"task_id": task_id, "sheets": {}, "status": f"{response.status_code} - Failed"}
+        xls_content = pd.ExcelFile(BytesIO(response.content))
+        json_sheets = {}
+        for sheet in xls_content.sheet_names:
+            df = xls_content.parse(sheet)
+            df = df.dropna(how="all")
+            rows = df.head(20).to_dict(orient="records")
+            json_sheets[sheet] = rows
+        return {
+            "task_id": task_id,
+            "sheets": json_sheets,
+            "status": "Success"
+        }
+    except Exception as e:
+        return {
+            "task_id": task_id,
+            "sheets": {},
+            "status": f"Error in parsing Excel file: {str(e)}"
+        }
+class VideoTranscriptionTool(Tool):
+    """Fetch transcripts from YouTube videos"""
+    name = "transcript_video"
+    description = "Fetch text transcript from YouTube movies with optional timestamps"
+    inputs = {
+        "url": {"type": "string", "description": "YouTube video URL or ID"},
+        "include_timestamps": {"type": "boolean", "description": "If timestamps should be included in output", "nullable": True}
+    }
+    output_type = "string"
+    def forward(self, url: str, include_timestamps: bool = False) -> str:
+        if "youtube.com/watch" in url:
+            video_id = url.split("v=")[1].split("&")[0]
+        elif "youtu.be/" in url:
+            video_id = url.split("youtu.be/")[1].split("?")[0]
+        elif len(url.strip()) == 11:  # Direct ID
+            video_id = url.strip()
+        else:
+            return f"YouTube URL or ID: {url} is invalid!"
+        try:
+            transcription = YouTubeTranscriptApi.get_transcript(video_id)
+            if include_timestamps:
+                formatted_transcription = []
+                for part in transcription:
+                    timestamp = f"{int(part['start']//60)}:{int(part['start']%60):02d}"
+                    formatted_transcription.append(f"[{timestamp}] {part['text']}")
+                return "\n".join(formatted_transcription)
+            else:
+                return " ".join([part['text'] for part in transcription])
+        except Exception as e:
+            return f"Error in extracting YouTube transcript: {str(e)}"
 class BasicAgent:
     def __init__(self):
+        token = os.environ.get("HF_API_TOKEN")
+        model = HfApiModel(
+            temperature=0.1,
+            token=token
+        )
+        search_tool = DuckDuckGoSearchTool()
+        wiki_search_tool = WikipediaSearchTool()
+        str_reverse_tool = StringReverseTool()
+        keywords_extract_tool = KeywordsExtractorTool()
+        speech_to_text_tool = SpeechToTextTool()
+        visit_webpage_tool = VisitWebpageTool()
+        final_answer_tool = FinalAnswerTool()
+        video_transcription_tool = VideoTranscriptionTool()
+        system_prompt = f"""
+You are my general AI assistant. Your task is to answer the question I asked.
+First, provide an explanation of your reasoning, step by step, to arrive at the answer.
+Then, return your final answer in a single line, formatted as follows: "FINAL ANSWER: [YOUR FINAL ANSWER]".
+[YOUR FINAL ANSWER] should be a number, a string, or a comma-separated list of numbers and/or strings, depending on the question.
+If the answer is a number, do not use commas or units (e.g., $, %) unless specified.
+If the answer is a string, do not use articles or abbreviations (e.g., for cities), and write digits in plain text unless specified.
+If the answer is a comma-separated list, apply the above rules for each element based on whether it is a number or a string.
+"""
+        self.agent = CodeAgent(
+            model=model,
+            tools=[search_tool, wiki_search_tool, str_reverse_tool, keywords_extract_tool, speech_to_text_tool, visit_webpage_tool, final_answer_tool, parse_excel_to_json, video_transcription_tool],
+            add_base_tools=True
+        )
+        self.agent.prompt_templates["system_prompt"] = self.agent.prompt_templates["system_prompt"] + system_prompt
     def __call__(self, question: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
+        answer = self.agent.run(question)
+        print(f"Agent returning answer: {answer}")
+        return answer
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """