wt002 commited on
Commit
7c40d5d
·
verified ·
1 Parent(s): 81bbafd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +115 -54
app.py CHANGED
@@ -7,11 +7,12 @@ from typing import List, Dict, Union, Optional
7
  import pandas as pd
8
  import wikipediaapi
9
  import requests
10
- from bs4 import BeautifulSoup
11
  import random
12
  import re
13
  from typing import Optional
14
  from datetime import datetime
 
15
 
16
  load_dotenv()
17
 
@@ -22,65 +23,125 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
22
 
23
  # --- Basic Agent Definition ---
24
 
25
- import requests
26
- from bs4 import BeautifulSoup
27
-
28
  class BasicAgent:
29
- def __init__(self):
 
 
 
 
 
 
 
 
 
 
30
  print("BasicAgent initialized.")
31
 
32
- def __call__(self, question: str) -> str:
33
- print(f"Agent received question: {question[:50]}...")
34
- answer = self.basic_search(question)
35
- print(f"Answer: {answer[:200]}...") # Truncate long answers
36
- return answer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
- def basic_search(self, query):
 
 
 
 
 
 
39
  try:
40
- # DuckDuckGo search URL
41
- url = "https://html.duckduckgo.com/html/"
42
-
43
- # Headers to mimic a browser request
44
- headers = {
45
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:91.0) Gecko/20100101 Firefox/91.0',
46
- 'Accept': 'text/html,application/xhtml+xml',
47
- 'Accept-Language': 'en-US,en;q=0.5',
48
- }
49
-
50
- # Search parameters
51
- params = {
52
- 'q': query,
53
- 'kl': 'us-en'
54
- }
55
-
56
- # Make the POST request
57
- response = requests.post(url, headers=headers, data=params)
58
- response.raise_for_status() # Raise exception for bad status codes
59
-
60
- # Parse the HTML response
61
- soup = BeautifulSoup(response.text, 'html.parser')
62
-
63
- # Find all search results
64
- results = soup.find_all('div', class_='result')
65
-
66
- if not results:
67
- return "No results found for your query."
68
-
69
- # Prepare the answer with top 3 results
70
- answer = "Here are the top search results:\n\n"
71
- for i, result in enumerate(results[:3], 1): # Limit to 3 results
72
- title = result.find('a', class_='result__a').get_text(strip=True)
73
- link = result.find('a', class_='result__a')['href']
74
- snippet = result.find('a', class_='result__snippet').get_text(strip=True) if result.find('a', class_='result__snippet') else "No description available"
75
-
76
- answer += f"{i}. {title}\n URL: {link}\n Description: {snippet}\n\n"
77
-
78
- return answer
79
-
80
- except requests.exceptions.RequestException as e:
81
- return f"Failed to complete the search request: {str(e)}"
82
  except Exception as e:
83
- return f"An unexpected error occurred: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
 
86
 
 
7
  import pandas as pd
8
  import wikipediaapi
9
  import requests
10
+ #from bs4 import BeautifulSoup
11
  import random
12
  import re
13
  from typing import Optional
14
  from datetime import datetime
15
+ import google.generativeai as genai
16
 
17
  load_dotenv()
18
 
 
23
 
24
  # --- Basic Agent Definition ---
25
 
 
 
 
26
  class BasicAgent:
27
+ def __init__(self, model_name: str = "gemini-pro"):
28
+ """
29
+ Multi-modal agent powered by Google Gemini with:
30
+ - Web search
31
+ - Wikipedia access
32
+ - Document processing
33
+ """
34
+ self.model = genai.GenerativeModel(model_name)
35
+ self.wiki = wikipediaapi.Wikipedia('en')
36
+ self.searx_url = "https://searx.space/search" # Public Searx instance
37
+
38
  print("BasicAgent initialized.")
39
 
40
+ def __call__(self, question: str) -> str:
41
+ print(f"Agent received question (first 50 chars): {question[:50]}...")
42
+ fixed_answer = self.process_request(question)
43
+ print(f"Agent returning answer: {fixed_answer}")
44
+ return fixed_answer
45
+
46
+
47
+
48
+ def generate_response(self, prompt: str) -> str:
49
+ """Get response from Gemini"""
50
+ try:
51
+ response = self.model.generate_content(prompt)
52
+ return response.text
53
+ except Exception as e:
54
+ return f"Error generating response: {str(e)}"
55
+
56
+ def web_search(self, query: str) -> List[Dict]:
57
+ """Use SearxNG meta-search engine"""
58
+ params = {
59
+ "q": query,
60
+ "format": "json",
61
+ "engines": "google,bing,duckduckgo"
62
+ }
63
+ try:
64
+ response = requests.get(self.searx_url, params=params)
65
+ response.raise_for_status()
66
+ return response.json().get("results", [])
67
+ except requests.RequestException:
68
+ return []
69
+
70
+ def wikipedia_search(self, query: str) -> str:
71
+ """Get Wikipedia summary"""
72
+ page = self.wiki.page(query)
73
+ return page.summary if page.exists() else "No Wikipedia page found"
74
 
75
+ def process_document(self, file_path: str) -> str:
76
+ """Handle PDF, Word, CSV, Excel files"""
77
+ if not os.path.exists(file_path):
78
+ return "File not found"
79
+
80
+ ext = os.path.splitext(file_path)[1].lower()
81
+
82
  try:
83
+ if ext == '.pdf':
84
+ return self._process_pdf(file_path)
85
+ elif ext in ('.doc', '.docx'):
86
+ return self._process_word(file_path)
87
+ elif ext == '.csv':
88
+ return pd.read_csv(file_path).to_string()
89
+ elif ext in ('.xls', '.xlsx'):
90
+ return pd.read_excel(file_path).to_string()
91
+ else:
92
+ return "Unsupported file format"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  except Exception as e:
94
+ return f"Error processing document: {str(e)}"
95
+
96
+ def _process_pdf(self, file_path: str) -> str:
97
+ """Process PDF using Gemini's vision capability"""
98
+ try:
99
+ # For Gemini 1.5 or later which supports file uploads
100
+ with open(file_path, "rb") as f:
101
+ file = genai.upload_file(f)
102
+ response = self.model.generate_content(
103
+ ["Extract and summarize the key points from this document:", file]
104
+ )
105
+ return response.text
106
+ except:
107
+ # Fallback for older Gemini versions
108
+ try:
109
+ import PyPDF2
110
+ with open(file_path, 'rb') as f:
111
+ reader = PyPDF2.PdfReader(f)
112
+ return "\n".join([page.extract_text() for page in reader.pages])
113
+ except ImportError:
114
+ return "PDF processing requires PyPDF2 (pip install PyPDF2)"
115
+
116
+ def _process_word(self, file_path: str) -> str:
117
+ """Process Word documents"""
118
+ try:
119
+ from docx import Document
120
+ doc = Document(file_path)
121
+ return "\n".join([para.text for para in doc.paragraphs])
122
+ except ImportError:
123
+ return "Word processing requires python-docx (pip install python-docx)"
124
+
125
+ def process_request(self, request: Union[str, Dict]) -> str:
126
+ """
127
+ Handle different request types:
128
+ - Direct text queries
129
+ - File processing requests
130
+ - Complex multi-step requests
131
+ """
132
+ if isinstance(request, dict):
133
+ if 'steps' in request:
134
+ results = []
135
+ for step in request['steps']:
136
+ if step['type'] == 'search':
137
+ results.append(self.web_search(step['query']))
138
+ elif step['type'] == 'process':
139
+ results.append(self.process_document(step['file']))
140
+ return self.generate_response(f"Process these results: {results}")
141
+ return "Unsupported request format"
142
+
143
+ return self.generate_response(request)
144
+
145
 
146
 
147