Spaces:
Sleeping
Sleeping
File size: 1,775 Bytes
a005c19 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
import pandas as pd
from oauth2client.service_account import ServiceAccountCredentials
import gspread
from dotenv import load_dotenv
import os
import json
from enviroments.convert import get_json_from_env_var
load_dotenv()
def sheet2df(sheet_name:str = "model"):
"""
Reads data from a specified Google Spreadsheet and converts it into a Pandas DataFrame.
Steps:
1. Authenticate using a service account JSON key.
2. Open the spreadsheet by its URL.
3. Select the worksheet to read.
4. Convert the worksheet data to a Pandas DataFrame.
5. Clean up the DataFrame:
- Rename columns using the first row of data.
- Drop the first row after renaming columns.
Returns:
pd.DataFrame: A Pandas DataFrame containing the cleaned data from the spreadsheet.
Note:
- The following variables must be configured before using this function:
- `json_key_path`: Path to the service account JSON key file.
- `spreadsheet_url`: URL of the Google Spreadsheet.
- `sheet_name`: Name of the worksheet to load.
Dependencies:
- pandas
- gspread
- oauth2client
"""
scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']
json_key_dict =get_json_from_env_var("GOOGLE_CREDENTIALS")
credential = ServiceAccountCredentials.from_json_keyfile_dict(json_key_dict, scope)
gc = gspread.authorize(credential)
spreadsheet_url = os.getenv("SPREADSHEET_URL")
doc = gc.open_by_url(spreadsheet_url)
sheet = doc.worksheet(sheet_name)
# Convert to DataFrame
df = pd.DataFrame(sheet.get_all_values())
# Clean DataFrame
df.rename(columns=df.iloc[0], inplace=True)
df.drop(df.index[0], inplace=True)
return df
|