from langchain.tools import tool import requests from bs4 import BeautifulSoup from typing import Optional import re import sys from constants import TAVILY_KEY import os from tavily import TavilyClient # Initialize Tavily client (set your API key as an environment variable) tavily = TavilyClient(TAVILY_KEY) @tool def get_npb_player_info(player_name: str) -> str: """ Finds a Nippon Professional Baseball (NPB) player's jersey number and team name using Tavily search. Parameters: - player_name (str): Full name of the player (e.g., "Taisho Tamai") Returns: - str: The player's jersey number and team name, or an error message if not found. """ try: # Search using Tavily for the NPB English profile page query = f"site:npb.jp/bis/eng/players {player_name}" search_results = tavily.search(query=query, include_answer=False) # Find first relevant link to the NPB player profile player_url = next( (result["url"] for result in search_results["results"] if re.match(r"https://npb\.jp/bis/eng/players/\d+\.html", result["url"])), None ) if not player_url: return f"No player page found for '{player_name}'." # Fetch the player page and extract number and team headers = {"User-Agent": "Mozilla/5.0"} response = requests.get(player_url, headers=headers) soup = BeautifulSoup(response.text, "html.parser") number = soup.find("li", {"id": "pc_v_no"}) team = soup.find("li", {"id": "pc_v_team"}) if number and team: return f"{player_name} - #{number.text.strip()} - Team: {team.text.strip()}" else: return f"Player page found, but info not parsed properly: {player_url}" except Exception as e: return f"Error fetching info for '{player_name}': {e}" #if __name__ == "__main__": # print(get_npb_player_info("Taisho Tamai")) @tool def get_team_players_by_season(team_name: str, year: int) -> list[str]: """ Given a team name and year, returns the list of player names for that NPB season. Parameters: - team_name (str): Team name (e.g., "Yomiuri Giants") - year (int): Season year (e.g., 2023) Returns: - List of player names (strings) in the format last name, first name """ try: query = f"site:npb.jp/bis/eng/{year}/stats {team_name}" search_results = tavily.search(query=query, include_answer=False) # Find the correct stats page URL for that year stats_url = next( (result["url"] for result in search_results["results"] if re.match(rf"https://npb\.jp/bis/eng/{year}/stats/.+\.html", result["url"])), None ) if not stats_url: print(f"No stats page found for {team_name} in {year}.") return [] print(stats_url) headers = {"User-Agent": "Mozilla/5.0"} response = requests.get(stats_url, headers=headers) safe_text = response.text.encode(sys.stdout.encoding, errors='replace').decode(sys.stdout.encoding) response = requests.get(stats_url, headers=headers) soup = BeautifulSoup(response.text, "html.parser") # Find all