from langchain.tools import tool import requests from bs4 import BeautifulSoup from typing import Optional import re import sys from constants import TAVILY_KEY import os from tavily import TavilyClient # Initialize Tavily client (set your API key as an environment variable) tavily = TavilyClient(TAVILY_KEY) @tool def get_npb_player_info(player_name: str) -> str: """ Finds a Nippon Professional Baseball (NPB) player's jersey number and team name using Tavily search. Parameters: - player_name (str): Full name of the player (e.g., "Taisho Tamai") Returns: - str: The player's jersey number and team name, or an error message if not found. """ return get_npb_player_info_(player_name) def get_npb_player_info_(player_name: str) -> str: try: # Search using Tavily for the NPB English profile page query = f"site:npb.jp/bis/eng/players {player_name}" search_results = tavily.search(query=query, include_answer=False) # Find first relevant link to the NPB player profile player_url = next( (result["url"] for result in search_results["results"] if re.match(r"https://npb\.jp/bis/eng/players/\d+\.html", result["url"])), None ) if not player_url: return f"No player page found for '{player_name}'." # Fetch the player page and extract number and team headers = {"User-Agent": "Mozilla/5.0"} response = requests.get(player_url, headers=headers) soup = BeautifulSoup(response.text, "html.parser") number = soup.find("li", {"id": "pc_v_no"}) team = soup.find("li", {"id": "pc_v_team"}) if number and team: return f"{player_name} - #{number.text.strip()} - Team: {team.text.strip()}" else: return f"Player page found, but info not parsed properly: {player_url}" except Exception as e: return f"Error fetching info for '{player_name}': {e}" #if __name__ == "__main__": # print(get_npb_player_info("Taisho Tamai")) @tool def get_team_roster(team_name: str, year: int) -> list[dict]: """ Returns the team roster for a given NPB team and season. Each player is represented as a dict with: - name: "Last, First" - number: jersey number (str or None if not found) - role (pitcher, etc.) Parameters: - team_name (str): e.g., "Yomiuri Giants" - year (int): e.g., 2023 Returns: - List of dicts representing players. """ roster = [] player_names = get_team_players_by_season(team_name, year) for player_name in player_names: # Get player info info = get_npb_player_info_(player_name) # Try to extract number from the response match = re.search(r"#(\d+)", info) number = match.group(1) if match else None roster.append({ "name": player_name, "number": number, "role": "Pitcher" # Static for now }) return roster def get_team_players_by_season(team_name: str, year: int) -> list[str]: """ Given a team name and year, returns the list of player names for that NPB season. Parameters: - team_name (str): Team name (e.g., "Yomiuri Giants") - year (int): Season year (e.g., 2023) Returns: - List of player names (strings) in the format last name, first name """ try: query = f"site:npb.jp/bis/eng/{year}/stats {team_name}" search_results = tavily.search(query=query, include_answer=False) # Find the correct stats page URL for that year stats_url = next( (result["url"] for result in search_results["results"] if re.match(rf"https://npb\.jp/bis/eng/{year}/stats/.+\.html", result["url"])), None ) if not stats_url: print(f"No stats page found for {team_name} in {year}.") return [] print(stats_url) headers = {"User-Agent": "Mozilla/5.0"} response = requests.get(stats_url, headers=headers) safe_text = response.text.encode(sys.stdout.encoding, errors='replace').decode(sys.stdout.encoding) response = requests.get(stats_url, headers=headers) soup = BeautifulSoup(response.text, "html.parser") # Find all