jisujang's picture
first
a005c19
import pandas as pd
from oauth2client.service_account import ServiceAccountCredentials
import gspread
from dotenv import load_dotenv
import os
import json
from enviroments.convert import get_json_from_env_var
load_dotenv()
def sheet2df(sheet_name:str = "model"):
"""
Reads data from a specified Google Spreadsheet and converts it into a Pandas DataFrame.
Steps:
1. Authenticate using a service account JSON key.
2. Open the spreadsheet by its URL.
3. Select the worksheet to read.
4. Convert the worksheet data to a Pandas DataFrame.
5. Clean up the DataFrame:
- Rename columns using the first row of data.
- Drop the first row after renaming columns.
Returns:
pd.DataFrame: A Pandas DataFrame containing the cleaned data from the spreadsheet.
Note:
- The following variables must be configured before using this function:
- `json_key_path`: Path to the service account JSON key file.
- `spreadsheet_url`: URL of the Google Spreadsheet.
- `sheet_name`: Name of the worksheet to load.
Dependencies:
- pandas
- gspread
- oauth2client
"""
scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']
json_key_dict =get_json_from_env_var("GOOGLE_CREDENTIALS")
credential = ServiceAccountCredentials.from_json_keyfile_dict(json_key_dict, scope)
gc = gspread.authorize(credential)
spreadsheet_url = os.getenv("SPREADSHEET_URL")
doc = gc.open_by_url(spreadsheet_url)
sheet = doc.worksheet(sheet_name)
# Convert to DataFrame
df = pd.DataFrame(sheet.get_all_values())
# Clean DataFrame
df.rename(columns=df.iloc[0], inplace=True)
df.drop(df.index[0], inplace=True)
return df