Alexandre Gazola commited on
Commit
15b55f0
·
1 Parent(s): 718426b
Files changed (1) hide show
  1. nb_tool.py +125 -0
nb_tool.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.tools import tool
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+ from typing import Optional
5
+ import re
6
+ import sys
7
+ from constants import TAVILY_KEY
8
+
9
+
10
+ TAVILY_API_KEY = "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"
11
+ import os
12
+ import requests
13
+ import re
14
+ from bs4 import BeautifulSoup
15
+ from tavily import TavilyClient
16
+
17
+ # Initialize Tavily client (set your API key as an environment variable)
18
+ tavily = TavilyClient(TAVILY_API_KEY)
19
+
20
+ def get_npb_player_info(player_name: str) -> str:
21
+ """
22
+ Finds a Nippon Professional Baseball (NPB) player's jersey number and team name using Tavily search.
23
+
24
+ Parameters:
25
+ - player_name (str): Full name of the player (e.g., "Taisho Tamai")
26
+
27
+ Returns:
28
+ - str: The player's jersey number and team name, or an error message if not found.
29
+ """
30
+ try:
31
+ # Search using Tavily for the NPB English profile page
32
+ query = f"site:npb.jp/bis/eng/players {player_name}"
33
+ search_results = tavily.search(query=query, include_answer=False)
34
+
35
+ # Find first relevant link to the NPB player profile
36
+ player_url = next(
37
+ (result["url"] for result in search_results["results"]
38
+ if re.match(r"https://npb\.jp/bis/eng/players/\d+\.html", result["url"])),
39
+ None
40
+ )
41
+
42
+ if not player_url:
43
+ return f"No player page found for '{player_name}'."
44
+
45
+ # Fetch the player page and extract number and team
46
+ headers = {"User-Agent": "Mozilla/5.0"}
47
+ response = requests.get(player_url, headers=headers)
48
+ soup = BeautifulSoup(response.text, "html.parser")
49
+
50
+ number = soup.find("li", {"id": "pc_v_no"})
51
+ team = soup.find("li", {"id": "pc_v_team"})
52
+
53
+ if number and team:
54
+ return f"{player_name} - #{number.text.strip()} - Team: {team.text.strip()}"
55
+ else:
56
+ return f"Player page found, but info not parsed properly: {player_url}"
57
+
58
+ except Exception as e:
59
+ return f"Error fetching info for '{player_name}': {e}"
60
+
61
+ #if __name__ == "__main__":
62
+ # print(get_npb_player_info("Taisho Tamai"))
63
+
64
+
65
+
66
+ def get_team_players_by_season(team_name: str, year: int) -> list[str]:
67
+ """
68
+ Given a team name and year, returns the list of player names for that NPB season.
69
+
70
+ Parameters:
71
+ - team_name (str): Team name (e.g., "Yomiuri Giants")
72
+ - year (int): Season year (e.g., 2023)
73
+
74
+ Returns:
75
+ - List of player names (strings) in the format last name, first name
76
+ """
77
+ try:
78
+ query = f"site:npb.jp/bis/eng/{year}/stats {team_name}"
79
+ search_results = tavily.search(query=query, include_answer=False)
80
+
81
+ # Find the correct stats page URL for that year
82
+ stats_url = next(
83
+ (result["url"] for result in search_results["results"]
84
+ if re.match(rf"https://npb\.jp/bis/eng/{year}/stats/.+\.html", result["url"])),
85
+ None
86
+ )
87
+
88
+ if not stats_url:
89
+ print(f"No stats page found for {team_name} in {year}.")
90
+ return []
91
+
92
+ print(stats_url)
93
+
94
+ headers = {"User-Agent": "Mozilla/5.0"}
95
+ response = requests.get(stats_url, headers=headers)
96
+ safe_text = response.text.encode(sys.stdout.encoding, errors='replace').decode(sys.stdout.encoding)
97
+
98
+
99
+ response = requests.get(stats_url, headers=headers)
100
+
101
+ soup = BeautifulSoup(response.text, "html.parser")
102
+
103
+ # Find all <td class="stplayer"> elements
104
+
105
+ player_tds = soup.find_all("td", class_="stplayer")
106
+
107
+ player_names = []
108
+ for td in player_tds:
109
+ player_name = td.get_text(strip=True)
110
+ player_names.append(player_name)
111
+
112
+ # Remove duplicates preserving order
113
+ player_names = list(dict.fromkeys(player_names))
114
+
115
+ return player_names
116
+
117
+ except Exception as e:
118
+ print(f"Error fetching players for {team_name} {year}: {e}")
119
+ return []
120
+
121
+ if __name__ == "__main__":
122
+ team = "Hokkaido Nippon-Ham Fighters"
123
+ season_year = 2023
124
+ players = get_team_players_by_season(team, season_year)
125
+ print(f"Players for {team} in {season_year} season:\n", players)