Spaces:
Sleeping
Sleeping
import pandas as pd | |
from oauth2client.service_account import ServiceAccountCredentials | |
import gspread | |
from dotenv import load_dotenv | |
import os | |
import json | |
from enviroments.convert import get_json_from_env_var | |
load_dotenv() | |
def sheet2df(sheet_name:str = "model"): | |
""" | |
Reads data from a specified Google Spreadsheet and converts it into a Pandas DataFrame. | |
Steps: | |
1. Authenticate using a service account JSON key. | |
2. Open the spreadsheet by its URL. | |
3. Select the worksheet to read. | |
4. Convert the worksheet data to a Pandas DataFrame. | |
5. Clean up the DataFrame: | |
- Rename columns using the first row of data. | |
- Drop the first row after renaming columns. | |
Returns: | |
pd.DataFrame: A Pandas DataFrame containing the cleaned data from the spreadsheet. | |
Note: | |
- The following variables must be configured before using this function: | |
- `json_key_path`: Path to the service account JSON key file. | |
- `spreadsheet_url`: URL of the Google Spreadsheet. | |
- `sheet_name`: Name of the worksheet to load. | |
Dependencies: | |
- pandas | |
- gspread | |
- oauth2client | |
""" | |
scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive'] | |
json_key_dict =get_json_from_env_var("GOOGLE_CREDENTIALS") | |
credential = ServiceAccountCredentials.from_json_keyfile_dict(json_key_dict, scope) | |
gc = gspread.authorize(credential) | |
spreadsheet_url = os.getenv("SPREADSHEET_URL") | |
doc = gc.open_by_url(spreadsheet_url) | |
sheet = doc.worksheet(sheet_name) | |
# Convert to DataFrame | |
df = pd.DataFrame(sheet.get_all_values()) | |
# Clean DataFrame | |
df.rename(columns=df.iloc[0], inplace=True) | |
df.drop(df.index[0], inplace=True) | |
return df | |