# %% import requests from bs4 import BeautifulSoup import gradio as gr def parse_news_item(html: str) -> dict: """ Parse HTML of a news item to extract link, time, headline, and text. Args: html: The HTML string of a news item. Returns: A dictionary containing link, time, headline, and text. Raises: Exception: For parsing errors or other unexpected errors. """ try: soup = BeautifulSoup(html, "html.parser") # Get the anchor tag containing the link link_tag = soup.find("a", href=True) link = link_tag["href"] if link_tag else None # Get the headline inside

headline_tag = soup.find("h3", class_="story__headline") headline = headline_tag.get_text(strip=True) if headline_tag else None # Get the text inside

text_tag = soup.find("p", class_="story__text") text = text_tag.get_text(strip=True) if text_tag else None # Get the time inside