wt002 commited on
Commit
0b4fcaa
·
verified ·
1 Parent(s): 6646d0c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -57
app.py CHANGED
@@ -8,6 +8,22 @@ import tempfile
8
  from smolagents import CodeAgent, OpenAIServerModel, tool
9
  from dotenv import load_dotenv
10
  from openai import OpenAI
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  # Load environment variables
13
  load_dotenv()
@@ -26,62 +42,18 @@ model = OpenAIServerModel(
26
  openAiClient = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
27
 
28
  @tool
29
- def VisitWebpageTool(Tool):
30
- name = "visit_webpage"
31
- description = (
32
- "Visits a webpage at the given url and reads its content as a markdown string. Use this to browse webpages."
33
- )
34
- inputs = {
35
- "url": {
36
- "type": "string",
37
- "description": "The url of the webpage to visit.",
38
- }
39
- }
40
- output_type = "string"
41
-
42
- def __init__(self, max_output_length: int = 40000):
43
- super().__init__()
44
- self.max_output_length = max_output_length
45
-
46
- def _truncate_content(self, content: str, max_length: int) -> str:
47
- if len(content) <= max_length:
48
- return content
49
- return (
50
- content[: max_length // 2]
51
- + f"\n..._This content has been truncated to stay below {max_length} characters_...\n"
52
- + content[-max_length // 2 :]
53
- )
54
-
55
- def forward(self, url: str) -> str:
56
- try:
57
- import re
58
-
59
- import requests
60
- from markdownify import markdownify
61
- from requests.exceptions import RequestException
62
- except ImportError as e:
63
- raise ImportError(
64
- "You must install packages `markdownify` and `requests` to run this tool: for instance run `pip install markdownify requests`."
65
- ) from e
66
- try:
67
- # Send a GET request to the URL with a 20-second timeout
68
- response = requests.get(url, timeout=20)
69
- response.raise_for_status() # Raise an exception for bad status codes
70
-
71
- # Convert the HTML content to Markdown
72
- markdown_content = markdownify(response.text).strip()
73
-
74
- # Remove multiple line breaks
75
- markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
76
-
77
- return self._truncate_content(markdown_content, self.max_output_length)
78
-
79
- except requests.exceptions.Timeout:
80
- return "The request timed out. Please try again later or check the URL."
81
- except RequestException as e:
82
- return f"Error fetching the webpage: {str(e)}"
83
- except Exception as e:
84
- return f"An unexpected error occurred: {str(e)}"
85
 
86
 
87
  @tool
@@ -370,7 +342,13 @@ class BasicAgent:
370
  def __init__(self):
371
  print("BasicAgent initialized.")
372
  # Reuse a single CodeAgent instance for all queries
373
- self.agent = CodeAgent(tools=[tavily_search, analyze_image, analyze_sound, analyze_excel, analyze_text, transcribe_youtube, process_file], model=model)
 
 
 
 
 
 
374
 
375
  def __call__(self, question: str) -> str:
376
  print(f"Agent received question (first 50 chars): {question[:50]}...")
 
8
  from smolagents import CodeAgent, OpenAIServerModel, tool
9
  from dotenv import load_dotenv
10
  from openai import OpenAI
11
+ from markdownify import markdownify
12
+ from requests.exceptions import RequestException
13
+
14
+ from typing import Optional, List
15
+ from langchain_core.tools import BaseTool, tool
16
+ from langchain_community.tools import DuckDuckGoSearchResults
17
+ from langchain_experimental.tools import PythonREPLTool
18
+ import requests
19
+ from bs4 import BeautifulSoup
20
+ import markdownify
21
+ import pandas as pd
22
+ from io import BytesIO
23
+ #import pytesseract
24
+ from PIL import Image
25
+ from youtube_transcript_api import YouTubeTranscriptApi
26
+ import re
27
 
28
  # Load environment variables
29
  load_dotenv()
 
42
  openAiClient = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
43
 
44
  @tool
45
+ def arvix_search(query: str) -> str:
46
+ """Search Arxiv for a query and return maximum 3 result.
47
+
48
+ Args:
49
+ query: The search query."""
50
+ search_docs = ArxivLoader(query=query, load_max_docs=3).load()
51
+ formatted_search_docs = "\n\n---\n\n".join(
52
+ [
53
+ f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
54
+ for doc in search_docs
55
+ ])
56
+ return {"arvix_results": formatted_search_docs}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
 
59
  @tool
 
342
  def __init__(self):
343
  print("BasicAgent initialized.")
344
  # Reuse a single CodeAgent instance for all queries
345
+ self.agent = CodeAgent(tools=[arvix_search,
346
+ analyze_image,
347
+ analyze_sound,
348
+ analyze_excel,
349
+ analyze_text,
350
+ transcribe_youtube,
351
+ process_file], model=model)
352
 
353
  def __call__(self, question: str) -> str:
354
  print(f"Agent received question (first 50 chars): {question[:50]}...")