Spaces:
Running
Running
Jon Solow
commited on
Commit
·
77fb55b
1
Parent(s):
1f1a2a2
Parse out team names of practice reports to fix bug where only opponent report exists
Browse files
src/queries/nfl_teams/practice_reports.py
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
|
|
| 1 |
import datetime
|
| 2 |
from multiprocessing import Pool
|
| 3 |
import numpy as np
|
| 4 |
import pandas as pd
|
| 5 |
from pydantic import BaseModel, Field
|
|
|
|
| 6 |
from typing import Optional
|
| 7 |
from urllib.parse import urljoin
|
| 8 |
|
|
@@ -72,11 +74,23 @@ class PracticeReportRawRow(BaseModel):
|
|
| 72 |
return cls(**{DAY_OF_WEEK_STRING_MAPPING.get(k, k): cls.replace_nan(v) for k, v in input_dict.items()})
|
| 73 |
|
| 74 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
def scrape_team_injury_report(team: NFLTeam) -> pd.DataFrame:
|
| 76 |
print(f"Scraping Injury Report for: {team.team_full_name}")
|
| 77 |
-
injury_report_url = urljoin(team.injury_report_url, f"week/REG-{CURRENT_WEEK}")
|
| 78 |
try:
|
| 79 |
-
team_report =
|
| 80 |
except Exception:
|
| 81 |
print(f"Failed to scrape practice report for: {team.team_full_name}")
|
| 82 |
return pd.DataFrame()
|
|
|
|
| 1 |
+
from bs4 import BeautifulSoup
|
| 2 |
import datetime
|
| 3 |
from multiprocessing import Pool
|
| 4 |
import numpy as np
|
| 5 |
import pandas as pd
|
| 6 |
from pydantic import BaseModel, Field
|
| 7 |
+
import requests
|
| 8 |
from typing import Optional
|
| 9 |
from urllib.parse import urljoin
|
| 10 |
|
|
|
|
| 74 |
return cls(**{DAY_OF_WEEK_STRING_MAPPING.get(k, k): cls.replace_nan(v) for k, v in input_dict.items()})
|
| 75 |
|
| 76 |
|
| 77 |
+
def get_injury_report_dataframe(team: NFLTeam):
|
| 78 |
+
injury_report_url = urljoin(team.injury_report_url, f"week/REG-{CURRENT_WEEK}")
|
| 79 |
+
report_request = requests.get(injury_report_url)
|
| 80 |
+
report_soup = BeautifulSoup(report_request.content)
|
| 81 |
+
team_names_spans = report_soup.find_all("span", {"class": "nfl-o-injury-report__club-name"})
|
| 82 |
+
assert team_names_spans
|
| 83 |
+
team_names_str = [x.get_text() for x in team_names_spans]
|
| 84 |
+
assert team_names_str[0] == team.team_full_name
|
| 85 |
+
tables = report_soup.find_all("table")
|
| 86 |
+
df_report = pd.read_html(str(tables))[0]
|
| 87 |
+
return df_report
|
| 88 |
+
|
| 89 |
+
|
| 90 |
def scrape_team_injury_report(team: NFLTeam) -> pd.DataFrame:
|
| 91 |
print(f"Scraping Injury Report for: {team.team_full_name}")
|
|
|
|
| 92 |
try:
|
| 93 |
+
team_report = get_injury_report_dataframe(team)
|
| 94 |
except Exception:
|
| 95 |
print(f"Failed to scrape practice report for: {team.team_full_name}")
|
| 96 |
return pd.DataFrame()
|