Final_Assignment_Project

Sleeping

App Files Files Community

wt002 commited on 27 days ago

Commit

6c9430d

verified ·

1 Parent(s): f57a425

Update app.py

Browse files

Files changed (1) hide show

app.py +115 -35

app.py CHANGED Viewed

@@ -231,41 +231,73 @@ def parse_excel_to_json(task_id: str) -> dict:
-class VideoTranscriptionTool(Tool):
-    """Fetch transcripts from YouTube videos"""
-    name = "transcript_video"
-    description = "Fetch text transcript from YouTube movies with optional timestamps"
     inputs = {
-        "url": {"type": "string", "description": "YouTube video URL or ID"},
-        "include_timestamps": {"type": "boolean", "description": "If timestamps should be included in output", "nullable": True}
     }
-    output_type = "string"
-    def forward(self, url: str, include_timestamps: bool = False) -> str:
-        if "youtube.com/watch" in url:
-            video_id = url.split("v=")[1].split("&")[0]
-        elif "youtu.be/" in url:
-            video_id = url.split("youtu.be/")[1].split("?")[0]
-        elif len(url.strip()) == 11:  # Direct ID
-            video_id = url.strip()
-        else:
-            return f"YouTube URL or ID: {url} is invalid!"
         try:
-            transcription = YouTubeTranscriptApi.get_transcript(video_id)
-            if include_timestamps:
-                formatted_transcription = []
-                for part in transcription:
-                    timestamp = f"{int(part['start']//60)}:{int(part['start']%60):02d}"
-                    formatted_transcription.append(f"[{timestamp}] {part['text']}")
-                return "\n".join(formatted_transcription)
             else:
-                return " ".join([part['text'] for part in transcription])
         except Exception as e:
-            return f"Error in extracting YouTube transcript: {str(e)}"
@@ -277,7 +309,7 @@ from io import BytesIO
 from smolagents import (
     CodeAgent,
     ToolCallingAgent,
-    InferenceClientModel,
     WebSearchTool,
     HfApiModel,
     DuckDuckGoSearchTool,
@@ -285,15 +317,61 @@ from smolagents import (
     tool
 )
-# Configure Gemini
-genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
 # Define image analysis tool
-@tool
-def image_analysis(image_url: str) -> str:
-    API_URL = "https://api-inference.huggingface.co/models/llava-hf/llava-1.5-7b-hf"
-    response = requests.post(API_URL, json={"inputs": image_url})
-    return response.json()[0]['generated_text']
 class BasicAgent:
@@ -313,6 +391,8 @@ class BasicAgent:
         visit_webpage_tool = VisitWebpageTool()
         final_answer_tool = FinalAnswerTool()
         video_transcription_tool = VideoTranscriptionTool()
         code_llama_tool = CodeLlamaTool()
         system_prompt = f"""

+import os
+from langchain_community.document_loaders import PyMuPDFLoader
+from docx import Document as DocxDocument
+import openpyxl
+class AnalyseAttachmentTool:
+    """
+    A tool for analyzing various attachment types (PY, PDF, TXT, DOCX, XLSX)
+    and extracting their text content.
+    """
+    name = "analyze_attachment"
+    description = (
+        "Analyzes attachments including PY, PDF, TXT, DOCX, and XLSX files and returns text content. "
+        "Useful for understanding the content of various document types. "
+        "The output is limited to the first 3000 characters for readability."
+    )
     inputs = {
+        "file_path": {
+            "type": "string",
+            "description": "Local path to the attachment file (e.g., 'documents/report.pdf').",
+        }
     }
+    def _run(self, file_path: str) -> str:
+        """
+        Executes the attachment analysis. This method is called internally by the tool.
+        """
+        if not os.path.exists(file_path):
+            return f"File not found: {file_path}"
         try:
+            ext = os.path.splitext(file_path)[1].lower()
+            content = ""
+            if ext == ".pdf":
+                loader = PyMuPDFLoader(file_path)
+                documents = loader.load()
+                content = "\n\n".join([doc.page_content for doc in documents])
+            elif ext == ".txt" or ext == ".py":
+                with open(file_path, "r", encoding="utf-8") as file:
+                    content = file.read()
+            elif ext == ".docx":
+                doc = DocxDocument(file_path)
+                content = "\n".join([para.text for para in doc.paragraphs])
+            elif ext == ".xlsx":
+                wb = openpyxl.load_workbook(file_path, data_only=True)
+                for sheet in wb:
+                    content += f"Sheet: {sheet.title}\n"
+                    for row in sheet.iter_rows(values_only=True):
+                        content += "\t".join([str(cell) if cell is not None else "" for cell in row]) + "\n"
             else:
+                return "Unsupported file format. Please use PY, PDF, TXT, DOCX, or XLSX."
+            return content[:3000]
         except Exception as e:
+            return f"An error occurred while processing the file: {str(e)}"
+    def __call__(self, file_path: str) -> str:
+        """
+        Makes the instance callable directly, invoking the _run method.
+        """
+        return self._run(file_path)
 from smolagents import (
     CodeAgent,
     ToolCallingAgent,
+    #InferenceClientModel,
     WebSearchTool,
     HfApiModel,
     DuckDuckGoSearchTool,
     tool
 )
 # Define image analysis tool
+import requests
+class ImageAnalysisTool:
+    """
+    A tool for analyzing images using a hosted Hugging Face model.
+    """
+    name = "image_analysis"
+    description = (
+        "Analyzes an image provided via a URL and returns a textual description of its content. "
+        "This tool is useful for understanding the visual content of an image."
+    )
+    inputs = {
+        "image_url": {
+            "type": "string",
+            "description": "The URL of the image to be analyzed (e.g., 'https://example.com/image.jpg').",
+        }
+    }
+    # You might consider making API_URL a class attribute if it's constant
+    # or an instance attribute if it could vary per instance.
+    # For this example, we'll keep it within the _run method for directness.
+    def _run(self, image_url: str) -> str:
+        """
+        Executes the image analysis by sending the image URL to the Hugging Face API.
+        """
+        API_URL = "https://api-inference.huggingface.co/models/llava-hf/llava-1.5-7b-hf"
+        try:
+            response = requests.post(API_URL, json={"inputs": image_url})
+            response.raise_for_status()  # Raise an HTTPError for bad responses (4xx or 5xx)
+            # Assuming the response structure is always a list with a dictionary
+            # and 'generated_text' is the key for the description.
+            if response.json() and isinstance(response.json(), list) and 'generated_text' in response.json()[0]:
+                return response.json()[0]['generated_text']
+            else:
+                return f"Unexpected API response format: {response.text}"
+        except requests.exceptions.RequestException as e:
+            return f"An error occurred during the API request: {e}"
+        except IndexError:
+            return "API response did not contain expected 'generated_text'."
+        except Exception as e:
+            return f"An unexpected error occurred: {e}"
+    def __call__(self, image_url: str) -> str:
+        """
+        Makes the instance callable directly, invoking the _run method for convenience.
+        """
+        return self._run(image_url)
 class BasicAgent:
         visit_webpage_tool = VisitWebpageTool()
         final_answer_tool = FinalAnswerTool()
         video_transcription_tool = VideoTranscriptionTool()
+        Image_Analysis_Tool = ImageAnalysisTool()
+        Analyse_Attachment_Tool = AnalyseAttachmentTool()
         code_llama_tool = CodeLlamaTool()
         system_prompt = f"""