Spaces:

robin0307
/

NewsMCP

Running

App Files Files Community

Robin Chiu commited on Jul 25

Commit

07cc8e5

1 Parent(s): e225706

Move model to env and add exception

Browse files

Files changed (1) hide show

app.py +119 -70

app.py CHANGED Viewed

@@ -4,30 +4,46 @@ from bs4 import BeautifulSoup
 import gradio as gr
 def parse_news_item(html: str) -> dict:
-    soup = BeautifulSoup(html, "html.parser")
-    # Get the anchor tag containing the link
-    link_tag = soup.find("a", href=True)
-    link = link_tag["href"] if link_tag else None
-    # Get the headline inside <h3>
-    headline_tag = soup.find("h3", class_="story__headline")
-    headline = headline_tag.get_text(strip=True) if headline_tag else None
-    # Get the text inside <p>
-    text_tag = soup.find("p", class_="story__text")
-    text = text_tag.get_text(strip=True) if text_tag else None
-    # Get the time inside <time>
-    time_tag = soup.find("time")
-    time = time_tag.get_text(strip=True) if time_tag else None
-    return {
-        "link": link,
-        "time": time,
-        "headline": headline,
-        "text": text,
-    }
 # %%
@@ -41,26 +57,40 @@ def search_news(keyword, page=1) -> list:
     Returns:
         A list of dictionaries containing link, time, headline and text of news article data.
     """
-    url = f"https://money.udn.com/search/result/1001/{keyword}/{page}"
-    response = requests.get(url)
-    if response.status_code != 200:
-        print(f"Failed to retrieve data: {response.status_code}")
-        return []
-    soup = BeautifulSoup(response.text, 'html.parser')
-    articles = soup.select('div > div > main > section > ul > li')
-    results = []
-    for article in articles:
-        article_html = article.prettify()
-        data = parse_news_item(article_html)
-        # change dict to list
-        data_list = list(data.values())
-        results.append(data_list)
-    return results
 # search_news('台積電', 1)  # Example usage to fetch news articles related to '台積電'
@@ -75,33 +105,44 @@ def get_content(url) -> dict:
     Returns:
         A dictionary containing the title, text content, and HTML of the page.
     """
-    response = requests.get(url)
-    if response.status_code != 200:
-        print(f"Failed to retrieve {url}: {response.status_code}")
-        return None
-    soup = BeautifulSoup(response.text, 'html.parser')
-    # using select to get the text inside the #article_body
-    # This assumes the content is inside an element with id="article_body"
-    article_body = soup.select_one('#article_body')
-    text_content = ''
-    if article_body:
-        text_content = article_body.get_text(separator='\n', strip=True)
-    return {
-        'link': url,
-        'title': soup.title.string if soup.title else 'No title',
-        'text': text_content
-    }
 # %%
 from smolagents import Tool, CodeAgent, LiteLLMModel, ToolCollection, ActionStep, FinalAnswerStep
 import os
-model = LiteLLMModel("openrouter/qwen/qwen-2.5-coder-32b-instruct:free", api_key=os.environ["OPENROUTER_API_KEY"])
 url = "https://robin0307-newsmcp.hf.space/gradio_api/mcp/sse"
 server_parameters = {"url": url, "transport": "sse"}
@@ -114,18 +155,26 @@ def newsAgent(task: str) -> str:
     Returns:
         The result of the Task.
     """
-    result = ""
-    with ToolCollection.from_mcp(server_parameters, trust_remote_code=True) as mcp_tools:
-        agent = CodeAgent(tools=[*mcp_tools.tools[:2]], model=model)
-        for event in agent.run(task, stream=True, max_steps=5):
-            if isinstance(event, ActionStep):
-                result += f"\n## ======Step {event.step_number}======\n### Action\n```python\n{event.code_action}\n```\n### Observation\n{event.observations}"
-                # yield result
-            if isinstance(event, FinalAnswerStep):
-                result += f"\n## ======Final======\n{event.output}"
-                # yield result
-    return result
 # get_content('https://money.udn.com/money/story/5612/8832289?from=edn_search_result')  # Example usage to fetch content from a specific URL

 import gradio as gr
 def parse_news_item(html: str) -> dict:
+    """
+    Parse HTML of a news item to extract link, time, headline, and text.
+    Args:
+        html: The HTML string of a news item.
+    Returns:
+        A dictionary containing link, time, headline, and text.
+    Raises:
+        Exception: For parsing errors or other unexpected errors.
+    """
+    try:
+        soup = BeautifulSoup(html, "html.parser")
+        # Get the anchor tag containing the link
+        link_tag = soup.find("a", href=True)
+        link = link_tag["href"] if link_tag else None
+        # Get the headline inside <h3>
+        headline_tag = soup.find("h3", class_="story__headline")
+        headline = headline_tag.get_text(strip=True) if headline_tag else None
+        # Get the text inside <p>
+        text_tag = soup.find("p", class_="story__text")
+        text = text_tag.get_text(strip=True) if text_tag else None
+        # Get the time inside <time>
+        time_tag = soup.find("time")
+        time = time_tag.get_text(strip=True) if time_tag else None
+        return {
+            "link": link,
+            "time": time,
+            "headline": headline,
+            "text": text,
+        }
+    except Exception as e:
+        print(f"Error parsing news item: {e}")
+        raise
 # %%
     Returns:
         A list of dictionaries containing link, time, headline and text of news article data.
+    Raises:
+        requests.RequestException: If there's an error fetching data from the URL.
+        Exception: For other unexpected errors.
     """
+    try:
+        url = f"https://money.udn.com/search/result/1001/{keyword}/{page}"
+        response = requests.get(url)
+        if response.status_code != 200:
+            raise requests.RequestException(f"Failed to retrieve data: {response.status_code}")
+        soup = BeautifulSoup(response.text, 'html.parser')
+        articles = soup.select('div > div > main > section > ul > li')
+        results = []
+        for article in articles:
+            try:
+                article_html = article.prettify()
+                data = parse_news_item(article_html)
+                # change dict to list
+                data_list = list(data.values())
+                results.append(data_list)
+            except Exception as e:
+                print(f"Error parsing article: {e}")
+                continue
+        return results
+    except requests.RequestException as e:
+        print(f"Network error in search_news: {e}")
+        raise
+    except Exception as e:
+        print(f"Unexpected error in search_news: {e}")
+        raise
 # search_news('台積電', 1)  # Example usage to fetch news articles related to '台積電'
     Returns:
         A dictionary containing the title, text content, and HTML of the page.
+    Raises:
+        requests.RequestException: If there's an error fetching data from the URL.
+        Exception: For other unexpected errors.
     """
+    try:
+        response = requests.get(url)
+        if response.status_code != 200:
+            raise requests.RequestException(f"Failed to retrieve {url}: {response.status_code}")
+        soup = BeautifulSoup(response.text, 'html.parser')
+        # using select to get the text inside the #article_body
+        # This assumes the content is inside an element with id="article_body"
+        article_body = soup.select_one('#article_body')
+        text_content = ''
+        if article_body:
+            text_content = article_body.get_text(separator='\n', strip=True)
+        return {
+            'link': url,
+            'title': soup.title.string if soup.title else 'No title',
+            'text': text_content
+        }
+    except requests.RequestException as e:
+        print(f"Network error in get_content: {e}")
+        raise
+    except Exception as e:
+        print(f"Unexpected error in get_content: {e}")
+        raise
 # %%
 from smolagents import Tool, CodeAgent, LiteLLMModel, ToolCollection, ActionStep, FinalAnswerStep
 import os
+model_name = os.environ.get("AI_MODEL", "openrouter/qwen/qwen-2.5-coder-32b-instruct:free")
+model = LiteLLMModel(model_name, api_key=os.environ["OPENROUTER_API_KEY"])
 url = "https://robin0307-newsmcp.hf.space/gradio_api/mcp/sse"
 server_parameters = {"url": url, "transport": "sse"}
     Returns:
         The result of the Task.
+    Raises:
+        Exception: For errors during agent execution.
     """
+    try:
+        result = ""
+        with ToolCollection.from_mcp(server_parameters, trust_remote_code=True) as mcp_tools:
+            agent = CodeAgent(tools=[*mcp_tools.tools[:2]], model=model)
+            for event in agent.run(task, stream=True, max_steps=5):
+                if isinstance(event, ActionStep):
+                    result += f"\n## ======Step {event.step_number}======\n### Action\n```python\n{event.code_action}\n```\n### Observation\n{event.observations}"
+                    # yield result
+                if isinstance(event, FinalAnswerStep):
+                    result += f"\n## ======Final======\n{event.output}"
+                    # yield result
+        return result
+    except Exception as e:
+        error_msg = f"Error in newsAgent: {e}"
+        print(error_msg)
+        raise Exception(error_msg) from e
 # get_content('https://money.udn.com/money/story/5612/8832289?from=edn_search_result')  # Example usage to fetch content from a specific URL