dialect-map / scripts /sheets_sync.py
Kakashi75's picture
Modified and added automation
a25ac93
#!/usr/bin/env python3
"""
Google Sheets Sync
Downloads data from Google Sheets and saves as CSV files
"""
import json
import os
import sys
from pathlib import Path
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google.oauth2 import service_account
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
# If modifying these scopes, delete the file token.json.
SCOPES = ['https://www.googleapis.com/auth/spreadsheets.readonly']
# Paths
BASE_DIR = Path(__file__).parent.parent
CONFIG_FILE = BASE_DIR / "config.json"
SHEETS_OUTPUT_DIR = BASE_DIR / "sheets_output"
def load_config():
"""Load configuration from config.json"""
if not CONFIG_FILE.exists():
print(f"❌ Config file not found: {CONFIG_FILE}")
sys.exit(1)
with open(CONFIG_FILE, 'r') as f:
return json.load(f)
def get_credentials(config):
"""Get Google Sheets API credentials"""
creds = None
credentials_file = BASE_DIR / config['google_sheets']['credentials_file']
token_file = BASE_DIR / 'token.json'
# Check if credentials file exists
if not credentials_file.exists():
print(f"❌ Credentials file not found: {credentials_file}")
print("\nπŸ“ To set up Google Sheets API:")
print("1. Go to https://console.cloud.google.com/")
print("2. Create a new project or select existing")
print("3. Enable Google Sheets API")
print("4. Create credentials (OAuth 2.0 or Service Account)")
print(f"5. Download credentials.json to {BASE_DIR}")
sys.exit(1)
# Try service account first (recommended for automation)
try:
creds = service_account.Credentials.from_service_account_file(
str(credentials_file), scopes=SCOPES)
print("βœ… Using service account credentials")
return creds
except Exception:
pass
# Fall back to OAuth flow
if token_file.exists():
creds = Credentials.from_authorized_user_file(str(token_file), SCOPES)
# If there are no (valid) credentials available, let the user log in.
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(
str(credentials_file), SCOPES)
creds = flow.run_local_server(port=0)
# Save the credentials for the next run
with open(token_file, 'w') as token:
token.write(creds.to_json())
print("βœ… Using OAuth credentials")
return creds
def download_sheet_as_csv(service, spreadsheet_id, sheet_name, output_filename):
"""Download a specific sheet as CSV"""
try:
# Get the sheet data
result = service.spreadsheets().values().get(
spreadsheetId=spreadsheet_id,
range=f"{sheet_name}!A:Z" # Adjust range as needed
).execute()
values = result.get('values', [])
if not values:
print(f"⚠️ No data found in sheet: {sheet_name}")
return False
# Write to CSV
output_file = SHEETS_OUTPUT_DIR / output_filename
SHEETS_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
import csv
with open(output_file, 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
writer.writerows(values)
print(f"βœ… Downloaded {sheet_name} β†’ {output_filename}")
print(f" {len(values)} rows")
return True
except HttpError as error:
print(f"❌ Error downloading {sheet_name}: {error}")
return False
def sync_all_sheets():
"""Sync all configured sheets from Google Sheets"""
config = load_config()
if not config['google_sheets']['enabled']:
print("⚠️ Google Sheets sync is disabled in config.json")
return False
# Check if using new multi-spreadsheet config
spreadsheets = config['google_sheets'].get('spreadsheets', [])
if not spreadsheets:
# Fallback to old single spreadsheet config
spreadsheet_id = config['google_sheets'].get('spreadsheet_id', 'YOUR_SHEET_ID_HERE')
if spreadsheet_id == "YOUR_SHEET_ID_HERE":
print("❌ Please update config.json with your spreadsheet IDs")
print("\nπŸ“ To find your spreadsheet ID:")
print("Open your Google Sheet and copy the ID from the URL:")
print("https://docs.google.com/spreadsheets/d/[SPREADSHEET_ID]/edit")
return False
# Convert old format to new format for processing
spreadsheets = [{
'name': 'Main Spreadsheet',
'spreadsheet_id': spreadsheet_id,
'sheets': config['google_sheets'].get('sheets', {})
}]
print("πŸ”„ Starting Google Sheets sync...")
print(f"πŸ“Š Syncing from {len(spreadsheets)} spreadsheet(s)\n")
try:
creds = get_credentials(config)
service = build('sheets', 'v4', credentials=creds)
total_success = 0
total_sheets = 0
# Process each spreadsheet
for spreadsheet in spreadsheets:
spreadsheet_id = spreadsheet.get('spreadsheet_id', '')
spreadsheet_name = spreadsheet.get('name', 'Unnamed')
sheets_config = spreadsheet.get('sheets', {})
if not spreadsheet_id or spreadsheet_id.startswith('YOUR_'):
print(f"⚠️ Skipping '{spreadsheet_name}': ID not configured")
continue
print(f"πŸ“‹ {spreadsheet_name}")
print(f" ID: {spreadsheet_id[:20]}...")
# Download each sheet from this spreadsheet
for csv_name, sheet_name in sheets_config.items():
total_sheets += 1
if download_sheet_as_csv(service, spreadsheet_id, sheet_name, f"{csv_name}.csv"):
total_success += 1
print() # Blank line between spreadsheets
print(f"✨ Sync complete: {total_success}/{total_sheets} sheets downloaded")
return total_success > 0
except Exception as e:
print(f"❌ Error during sync: {e}")
return False
def main():
"""Main entry point"""
sync_all_sheets()
if __name__ == '__main__':
main()