Robin Chiu commited on
Commit
07cc8e5
·
1 Parent(s): e225706

Move model to env and add exception

Browse files
Files changed (1) hide show
  1. app.py +119 -70
app.py CHANGED
@@ -4,30 +4,46 @@ from bs4 import BeautifulSoup
4
  import gradio as gr
5
 
6
  def parse_news_item(html: str) -> dict:
7
- soup = BeautifulSoup(html, "html.parser")
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- # Get the anchor tag containing the link
10
- link_tag = soup.find("a", href=True)
11
- link = link_tag["href"] if link_tag else None
12
 
13
- # Get the headline inside <h3>
14
- headline_tag = soup.find("h3", class_="story__headline")
15
- headline = headline_tag.get_text(strip=True) if headline_tag else None
16
 
17
- # Get the text inside <p>
18
- text_tag = soup.find("p", class_="story__text")
19
- text = text_tag.get_text(strip=True) if text_tag else None
20
 
21
- # Get the time inside <time>
22
- time_tag = soup.find("time")
23
- time = time_tag.get_text(strip=True) if time_tag else None
24
 
25
- return {
26
- "link": link,
27
- "time": time,
28
- "headline": headline,
29
- "text": text,
30
- }
 
 
 
31
 
32
 
33
  # %%
@@ -41,26 +57,40 @@ def search_news(keyword, page=1) -> list:
41
 
42
  Returns:
43
  A list of dictionaries containing link, time, headline and text of news article data.
 
 
 
 
44
  """
45
- url = f"https://money.udn.com/search/result/1001/{keyword}/{page}"
46
- response = requests.get(url)
47
-
48
- if response.status_code != 200:
49
- print(f"Failed to retrieve data: {response.status_code}")
50
- return []
51
-
52
- soup = BeautifulSoup(response.text, 'html.parser')
53
- articles = soup.select('div > div > main > section > ul > li')
54
-
55
- results = []
56
- for article in articles:
57
- article_html = article.prettify()
58
- data = parse_news_item(article_html)
59
- # change dict to list
60
- data_list = list(data.values())
61
- results.append(data_list)
62
-
63
- return results
 
 
 
 
 
 
 
 
 
 
64
 
65
  # search_news('台積電', 1) # Example usage to fetch news articles related to '台積電'
66
 
@@ -75,33 +105,44 @@ def get_content(url) -> dict:
75
 
76
  Returns:
77
  A dictionary containing the title, text content, and HTML of the page.
 
 
 
 
78
  """
79
- response = requests.get(url)
80
-
81
- if response.status_code != 200:
82
- print(f"Failed to retrieve {url}: {response.status_code}")
83
- return None
84
-
85
- soup = BeautifulSoup(response.text, 'html.parser')
86
-
87
- # using select to get the text inside the #article_body
88
- # This assumes the content is inside an element with id="article_body"
89
- article_body = soup.select_one('#article_body')
90
- text_content = ''
91
- if article_body:
92
- text_content = article_body.get_text(separator='\n', strip=True)
93
-
94
- return {
95
- 'link': url,
96
- 'title': soup.title.string if soup.title else 'No title',
97
- 'text': text_content
98
- }
 
 
 
 
 
 
99
 
100
  # %%
101
  from smolagents import Tool, CodeAgent, LiteLLMModel, ToolCollection, ActionStep, FinalAnswerStep
102
  import os
103
 
104
- model = LiteLLMModel("openrouter/qwen/qwen-2.5-coder-32b-instruct:free", api_key=os.environ["OPENROUTER_API_KEY"])
 
105
  url = "https://robin0307-newsmcp.hf.space/gradio_api/mcp/sse"
106
  server_parameters = {"url": url, "transport": "sse"}
107
 
@@ -114,18 +155,26 @@ def newsAgent(task: str) -> str:
114
 
115
  Returns:
116
  The result of the Task.
 
 
 
117
  """
118
- result = ""
119
- with ToolCollection.from_mcp(server_parameters, trust_remote_code=True) as mcp_tools:
120
- agent = CodeAgent(tools=[*mcp_tools.tools[:2]], model=model)
121
- for event in agent.run(task, stream=True, max_steps=5):
122
- if isinstance(event, ActionStep):
123
- result += f"\n## ======Step {event.step_number}======\n### Action\n```python\n{event.code_action}\n```\n### Observation\n{event.observations}"
124
- # yield result
125
- if isinstance(event, FinalAnswerStep):
126
- result += f"\n## ======Final======\n{event.output}"
127
- # yield result
128
- return result
 
 
 
 
 
129
 
130
  # get_content('https://money.udn.com/money/story/5612/8832289?from=edn_search_result') # Example usage to fetch content from a specific URL
131
 
 
4
  import gradio as gr
5
 
6
  def parse_news_item(html: str) -> dict:
7
+ """
8
+ Parse HTML of a news item to extract link, time, headline, and text.
9
+
10
+ Args:
11
+ html: The HTML string of a news item.
12
+
13
+ Returns:
14
+ A dictionary containing link, time, headline, and text.
15
+
16
+ Raises:
17
+ Exception: For parsing errors or other unexpected errors.
18
+ """
19
+ try:
20
+ soup = BeautifulSoup(html, "html.parser")
21
 
22
+ # Get the anchor tag containing the link
23
+ link_tag = soup.find("a", href=True)
24
+ link = link_tag["href"] if link_tag else None
25
 
26
+ # Get the headline inside <h3>
27
+ headline_tag = soup.find("h3", class_="story__headline")
28
+ headline = headline_tag.get_text(strip=True) if headline_tag else None
29
 
30
+ # Get the text inside <p>
31
+ text_tag = soup.find("p", class_="story__text")
32
+ text = text_tag.get_text(strip=True) if text_tag else None
33
 
34
+ # Get the time inside <time>
35
+ time_tag = soup.find("time")
36
+ time = time_tag.get_text(strip=True) if time_tag else None
37
 
38
+ return {
39
+ "link": link,
40
+ "time": time,
41
+ "headline": headline,
42
+ "text": text,
43
+ }
44
+ except Exception as e:
45
+ print(f"Error parsing news item: {e}")
46
+ raise
47
 
48
 
49
  # %%
 
57
 
58
  Returns:
59
  A list of dictionaries containing link, time, headline and text of news article data.
60
+
61
+ Raises:
62
+ requests.RequestException: If there's an error fetching data from the URL.
63
+ Exception: For other unexpected errors.
64
  """
65
+ try:
66
+ url = f"https://money.udn.com/search/result/1001/{keyword}/{page}"
67
+ response = requests.get(url)
68
+
69
+ if response.status_code != 200:
70
+ raise requests.RequestException(f"Failed to retrieve data: {response.status_code}")
71
+
72
+ soup = BeautifulSoup(response.text, 'html.parser')
73
+ articles = soup.select('div > div > main > section > ul > li')
74
+
75
+ results = []
76
+ for article in articles:
77
+ try:
78
+ article_html = article.prettify()
79
+ data = parse_news_item(article_html)
80
+ # change dict to list
81
+ data_list = list(data.values())
82
+ results.append(data_list)
83
+ except Exception as e:
84
+ print(f"Error parsing article: {e}")
85
+ continue
86
+
87
+ return results
88
+ except requests.RequestException as e:
89
+ print(f"Network error in search_news: {e}")
90
+ raise
91
+ except Exception as e:
92
+ print(f"Unexpected error in search_news: {e}")
93
+ raise
94
 
95
  # search_news('台積電', 1) # Example usage to fetch news articles related to '台積電'
96
 
 
105
 
106
  Returns:
107
  A dictionary containing the title, text content, and HTML of the page.
108
+
109
+ Raises:
110
+ requests.RequestException: If there's an error fetching data from the URL.
111
+ Exception: For other unexpected errors.
112
  """
113
+ try:
114
+ response = requests.get(url)
115
+
116
+ if response.status_code != 200:
117
+ raise requests.RequestException(f"Failed to retrieve {url}: {response.status_code}")
118
+
119
+ soup = BeautifulSoup(response.text, 'html.parser')
120
+
121
+ # using select to get the text inside the #article_body
122
+ # This assumes the content is inside an element with id="article_body"
123
+ article_body = soup.select_one('#article_body')
124
+ text_content = ''
125
+ if article_body:
126
+ text_content = article_body.get_text(separator='\n', strip=True)
127
+
128
+ return {
129
+ 'link': url,
130
+ 'title': soup.title.string if soup.title else 'No title',
131
+ 'text': text_content
132
+ }
133
+ except requests.RequestException as e:
134
+ print(f"Network error in get_content: {e}")
135
+ raise
136
+ except Exception as e:
137
+ print(f"Unexpected error in get_content: {e}")
138
+ raise
139
 
140
  # %%
141
  from smolagents import Tool, CodeAgent, LiteLLMModel, ToolCollection, ActionStep, FinalAnswerStep
142
  import os
143
 
144
+ model_name = os.environ.get("AI_MODEL", "openrouter/qwen/qwen-2.5-coder-32b-instruct:free")
145
+ model = LiteLLMModel(model_name, api_key=os.environ["OPENROUTER_API_KEY"])
146
  url = "https://robin0307-newsmcp.hf.space/gradio_api/mcp/sse"
147
  server_parameters = {"url": url, "transport": "sse"}
148
 
 
155
 
156
  Returns:
157
  The result of the Task.
158
+
159
+ Raises:
160
+ Exception: For errors during agent execution.
161
  """
162
+ try:
163
+ result = ""
164
+ with ToolCollection.from_mcp(server_parameters, trust_remote_code=True) as mcp_tools:
165
+ agent = CodeAgent(tools=[*mcp_tools.tools[:2]], model=model)
166
+ for event in agent.run(task, stream=True, max_steps=5):
167
+ if isinstance(event, ActionStep):
168
+ result += f"\n## ======Step {event.step_number}======\n### Action\n```python\n{event.code_action}\n```\n### Observation\n{event.observations}"
169
+ # yield result
170
+ if isinstance(event, FinalAnswerStep):
171
+ result += f"\n## ======Final======\n{event.output}"
172
+ # yield result
173
+ return result
174
+ except Exception as e:
175
+ error_msg = f"Error in newsAgent: {e}"
176
+ print(error_msg)
177
+ raise Exception(error_msg) from e
178
 
179
  # get_content('https://money.udn.com/money/story/5612/8832289?from=edn_search_result') # Example usage to fetch content from a specific URL
180