wt002 commited on
Commit
25e901d
·
verified ·
1 Parent(s): df67b8a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -54
app.py CHANGED
@@ -3,13 +3,13 @@ from dotenv import load_dotenv
3
  import gradio as gr
4
  import requests
5
 
6
- import os
7
- import requests
8
  from typing import List, Dict, Union
9
  import pandas as pd
10
  import wikipediaapi
11
- from serpapi import GoogleSearch
12
- from typing import List, Dict, Optional
 
 
13
 
14
  load_dotenv()
15
 
@@ -21,76 +21,74 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
21
  # --- Basic Agent Definition ---
22
 
23
  class BasicAgent:
24
- def __init__(self, api_key: str = None):
25
- self.api_key = api_key or os.getenv("SERP_API_KEY")
26
- if not self.api_key:
27
- raise ValueError("Missing SERPAPI_API_KEY. Get one at https://serpapi.com/")
28
- print("SerpAPI Agent initialized")
29
-
30
- def search(self, query: str, num_results: int = 3) -> List[Dict]:
31
- """Execute search and return structured results"""
32
- params = {
33
- "q": query,
34
- "api_key": self.api_key,
35
- "num": num_results,
36
- "hl": "en", # Language: English
37
- "gl": "us" # Country: United States
38
  }
 
39
 
 
 
 
 
 
40
  try:
41
- search = GoogleSearch(params)
42
- results = search.get_dict()
43
- return self._format_results(results)
44
  except Exception as e:
45
  print(f"Search failed: {str(e)}")
46
  return []
47
 
48
- def _format_results(self, raw_results: Dict) -> List[Dict]:
49
- """Extract and format organic results"""
50
- formatted = []
51
- for result in raw_results.get("organic_results", []):
52
- formatted.append({
53
- "position": result.get("position"),
54
- "title": result.get("title"),
55
- "link": result.get("link"),
56
- "snippet": result.get("snippet"),
57
- "source": result.get("source")
58
- })
59
- return formatted
60
-
61
- def __call__(self, query: str) -> str:
62
- """Callable interface that returns a string"""
63
- results = self.search(query)
64
- if not results:
65
- return "No results found"
66
 
 
 
 
 
67
  output = []
68
  for res in results:
69
  output.append(
70
  f"{res['position']}. {res['title']}\n"
71
  f" {res['link']}\n"
72
- f" {res['snippet']}\n"
73
- f" Source: {res['source']}"
74
  )
75
- return "\n\n".join(output)
76
-
77
 
78
  # Usage Example
79
  if __name__ == "__main__":
80
- # Initialize with API key (or set SERPAPI_API_KEY environment variable)
81
- agent = BasicAgent()
82
 
83
- # Perform search
84
- query = "What is Python programming language?"
85
- print(f"Searching for: {query}")
86
 
87
- # Option 1: Get structured data
88
- structured_results = agent.search(query)
89
- print("\nStructured Results:", structured_results[0]) # Print first result
90
 
91
- # Option 2: Get printable string
92
- printable_results = agent(query)
93
- print("\nFormatted Results:\n", printable_results)
 
 
94
 
95
 
96
  def run_and_submit_all( profile: gr.OAuthProfile | None):
 
3
  import gradio as gr
4
  import requests
5
 
 
 
6
  from typing import List, Dict, Union
7
  import pandas as pd
8
  import wikipediaapi
9
+ from bs4 import BeautifulSoup
10
+ import urllib.parse
11
+ from typing import List, Dict
12
+ import fake_useragent # For realistic user-agent rotation
13
 
14
  load_dotenv()
15
 
 
21
  # --- Basic Agent Definition ---
22
 
23
  class BasicAgent:
24
+ def __init__(self):
25
+ self.user_agent = fake_useragent.UserAgent().random
26
+ self.headers = {
27
+ 'User-Agent': self.user_agent,
28
+ 'Accept-Language': 'en-US,en;q=0.5',
 
 
 
 
 
 
 
 
 
29
  }
30
+ print("GoogleScraper initialized with User-Agent:", self.user_agent[:50] + "...")
31
 
32
+ def search(self, query: str, num_results: int = 3) -> List[Dict]:
33
+ """Perform Google search and return structured results"""
34
+ encoded_query = urllib.parse.quote_plus(query)
35
+ url = f"https://www.google.com/search?q={encoded_query}&num={num_results + 2}" # +2 for buffer
36
+
37
  try:
38
+ response = requests.get(url, headers=self.headers, timeout=10)
39
+ response.raise_for_status()
40
+ return self._parse_results(response.text, num_results)
41
  except Exception as e:
42
  print(f"Search failed: {str(e)}")
43
  return []
44
 
45
+ def _parse_results(self, html: str, max_results: int) -> List[Dict]:
46
+ """Parse HTML and extract search results"""
47
+ soup = BeautifulSoup(html, 'html.parser')
48
+ results = []
49
+
50
+ # Main result blocks (class names may change - this works as of July 2024)
51
+ for i, result in enumerate(soup.select('.tF2Cxc')[:max_results]):
52
+ title = result.select_one('h3')
53
+ link = result.find('a')['href']
54
+ snippet = result.select_one('.IsZvec')
55
+
56
+ if title and link:
57
+ results.append({
58
+ 'position': i + 1,
59
+ 'title': title.get_text(),
60
+ 'link': link,
61
+ 'snippet': snippet.get_text() if snippet else None
62
+ })
63
 
64
+ return results
65
+
66
+ def pretty_print(self, results: List[Dict]) -> str:
67
+ """Format results for human-readable output"""
68
  output = []
69
  for res in results:
70
  output.append(
71
  f"{res['position']}. {res['title']}\n"
72
  f" {res['link']}\n"
73
+ f" {res['snippet'] or 'No description available'}\n"
 
74
  )
75
+ return "\n".join(output)
 
76
 
77
  # Usage Example
78
  if __name__ == "__main__":
79
+ scraper = BasicAgent()
 
80
 
81
+ # Search for Python programming
82
+ query = "Python programming language"
83
+ print(f"Searching Google for: '{query}'")
84
 
85
+ results = scraper.search(query)
 
 
86
 
87
+ if results:
88
+ print("\nTop Results:")
89
+ print(scraper.pretty_print(results))
90
+ else:
91
+ print("No results found or search failed")
92
 
93
 
94
  def run_and_submit_all( profile: gr.OAuthProfile | None):