Spaces:
Sleeping
Sleeping
from langchain.tools import tool | |
import requests | |
from bs4 import BeautifulSoup | |
from typing import Optional | |
import re | |
import sys | |
from constants import TAVILY_KEY | |
import os | |
from tavily import TavilyClient | |
# Initialize Tavily client (set your API key as an environment variable) | |
tavily = TavilyClient(TAVILY_KEY) | |
def get_npb_player_info(player_name: str) -> str: | |
""" | |
Finds a Nippon Professional Baseball (NPB) player's jersey number and team name using Tavily search. | |
Parameters: | |
- player_name (str): Full name of the player (e.g., "Taisho Tamai") | |
Returns: | |
- str: The player's jersey number and team name, or an error message if not found. | |
""" | |
return get_npb_player_info_(player_name) | |
def get_npb_player_info_(player_name: str) -> str: | |
try: | |
# Search using Tavily for the NPB English profile page | |
query = f"site:npb.jp/bis/eng/players {player_name}" | |
search_results = tavily.search(query=query, include_answer=False) | |
# Find first relevant link to the NPB player profile | |
player_url = next( | |
(result["url"] for result in search_results["results"] | |
if re.match(r"https://npb\.jp/bis/eng/players/\d+\.html", result["url"])), | |
None | |
) | |
if not player_url: | |
return f"No player page found for '{player_name}'." | |
# Fetch the player page and extract number and team | |
headers = {"User-Agent": "Mozilla/5.0"} | |
response = requests.get(player_url, headers=headers) | |
soup = BeautifulSoup(response.text, "html.parser") | |
number = soup.find("li", {"id": "pc_v_no"}) | |
team = soup.find("li", {"id": "pc_v_team"}) | |
if number and team: | |
return f"{player_name} - #{number.text.strip()} - Team: {team.text.strip()}" | |
else: | |
return f"Player page found, but info not parsed properly: {player_url}" | |
except Exception as e: | |
return f"Error fetching info for '{player_name}': {e}" | |
#if __name__ == "__main__": | |
# print(get_npb_player_info("Taisho Tamai")) | |
def get_team_roster(team_name: str, year: int) -> list[dict]: | |
""" | |
Returns the team roster for a given NPB team and season. | |
Each player is represented as a dict with: | |
- name: "Last, First" | |
- number: jersey number (str or None if not found) | |
- role (pitcher, etc.) | |
Parameters: | |
- team_name (str): e.g., "Yomiuri Giants" | |
- year (int): e.g., 2023 | |
Returns: | |
- List of dicts representing players. | |
""" | |
roster = [] | |
player_names = get_team_players_by_season(team_name, year) | |
for player_name in player_names: | |
# Get player info | |
info = get_npb_player_info_(player_name) | |
# Try to extract number from the response | |
match = re.search(r"#(\d+)", info) | |
number = match.group(1) if match else None | |
roster.append({ | |
"name": player_name, | |
"number": number, | |
"role": "Pitcher" # Static for now | |
}) | |
return roster | |
def get_team_players_by_season(team_name: str, year: int) -> list[str]: | |
""" | |
Given a team name and year, returns the list of player names for that NPB season. | |
Parameters: | |
- team_name (str): Team name (e.g., "Yomiuri Giants") | |
- year (int): Season year (e.g., 2023) | |
Returns: | |
- List of player names (strings) in the format last name, first name | |
""" | |
try: | |
query = f"site:npb.jp/bis/eng/{year}/stats {team_name}" | |
search_results = tavily.search(query=query, include_answer=False) | |
# Find the correct stats page URL for that year | |
stats_url = next( | |
(result["url"] for result in search_results["results"] | |
if re.match(rf"https://npb\.jp/bis/eng/{year}/stats/.+\.html", result["url"])), | |
None | |
) | |
if not stats_url: | |
print(f"No stats page found for {team_name} in {year}.") | |
return [] | |
print(stats_url) | |
headers = {"User-Agent": "Mozilla/5.0"} | |
response = requests.get(stats_url, headers=headers) | |
safe_text = response.text.encode(sys.stdout.encoding, errors='replace').decode(sys.stdout.encoding) | |
response = requests.get(stats_url, headers=headers) | |
soup = BeautifulSoup(response.text, "html.parser") | |
# Find all <td class="stplayer"> elements | |
player_tds = soup.find_all("td", class_="stplayer") | |
player_names = [] | |
for td in player_tds: | |
player_name = td.get_text(strip=True) | |
player_names.append(player_name) | |
# Remove duplicates preserving order | |
player_names = list(dict.fromkeys(player_names)) | |
return player_names | |
except Exception as e: | |
print(f"Error fetching players for {team_name} {year}: {e}") | |
return [] | |
if __name__ == "__main__": | |
team = "Hokkaido Nippon-Ham Fighters" | |
season_year = 2023 | |
players = get_team_players_by_season(team, season_year) | |
print(f"Players for {team} in {season_year} season:\n", players) | |