Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -22,7 +22,7 @@ site_config = {
|
|
22 |
|
23 |
|
24 |
@tool
|
25 |
-
def get_latest_news(news_sites: List[str]) -> Dict[str,List[str]]:
|
26 |
"""
|
27 |
Tool returns news headlines from multiple news sites.
|
28 |
|
@@ -31,19 +31,26 @@ def get_latest_news(news_sites: List[str]) -> Dict[str,List[str]]:
|
|
31 |
|
32 |
Returns:
|
33 |
Dict[str, List[str]]: A dictionary where the keys are the news site URLs and the values are lists of headlines.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
"""
|
35 |
-
|
36 |
headlines = {}
|
37 |
for site in news_sites:
|
38 |
try:
|
39 |
-
config = site_config.get(site,{'tag': 'h2', 'class':'headline'})
|
40 |
response = requests.get(site)
|
41 |
response.raise_for_status()
|
42 |
soup = BeautifulSoup(response.content, 'html.parser')
|
43 |
-
site_headlines=soup.find_all(config['tag'], class_
|
44 |
headlines[site] = [headline.text for headline in site_headlines]
|
45 |
except requests.RequestException as e:
|
46 |
-
headlines[site] = f"Error fetching news:{e}"
|
47 |
return headlines
|
48 |
|
49 |
|
|
|
22 |
|
23 |
|
24 |
@tool
|
25 |
+
def get_latest_news(news_sites: List[str]) -> Dict[str, List[str]]:
|
26 |
"""
|
27 |
Tool returns news headlines from multiple news sites.
|
28 |
|
|
|
31 |
|
32 |
Returns:
|
33 |
Dict[str, List[str]]: A dictionary where the keys are the news site URLs and the values are lists of headlines.
|
34 |
+
|
35 |
+
Notes:
|
36 |
+
The function uses a predefined `site_config` dictionary to determine the HTML tag and class to extract headlines from each site.
|
37 |
+
The `site_config` dictionary should have the following structure:
|
38 |
+
{
|
39 |
+
"site_url": {'tag': 'html_tag', 'class': 'css_class'}
|
40 |
+
}
|
41 |
+
If a site is not found in `site_config`, it defaults to {'tag': 'h2', 'class': 'headline'}.
|
42 |
"""
|
|
|
43 |
headlines = {}
|
44 |
for site in news_sites:
|
45 |
try:
|
46 |
+
config = site_config.get(site, {'tag': 'h2', 'class': 'headline'})
|
47 |
response = requests.get(site)
|
48 |
response.raise_for_status()
|
49 |
soup = BeautifulSoup(response.content, 'html.parser')
|
50 |
+
site_headlines = soup.find_all(config['tag'], class_=config['class'])
|
51 |
headlines[site] = [headline.text for headline in site_headlines]
|
52 |
except requests.RequestException as e:
|
53 |
+
headlines[site] = f"Error fetching news: {e}"
|
54 |
return headlines
|
55 |
|
56 |
|