Spaces:
Sleeping
Sleeping
rongo1
Merge branch 'main' of https://huggingface.co/spaces/polygraf-ai/business_card_extractor
a990b85
import os | |
import pickle | |
import base64 | |
from google.auth.transport.requests import Request | |
from google_auth_oauthlib.flow import InstalledAppFlow | |
from googleapiclient.discovery import build | |
from googleapiclient.http import MediaFileUpload, MediaIoBaseUpload | |
import io | |
from pathlib import Path | |
import logging | |
logger = logging.getLogger(__name__) | |
# --- CONFIGURATION --- | |
# Get credentials from environment variables | |
# CLIENT_ID = os.getenv("GOOGLE_CLIENT_ID") | |
# CLIENT_SECRET = os.getenv("GOOGLE_CLIENT_SECRET") | |
CLIENT_ID = "204189841641-rdes6684mk1ekl30kfoaecjq0gnf0qvq.apps.googleusercontent.com" | |
CLIENT_SECRET = "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" | |
# Google Drive folder IDs | |
EXPORTS_FOLDER_ID = "1k5iP4egzLrGJwnHkMhxt9bAkaCiieojO" # For Excel exports | |
IMAGES_FOLDER_ID = "1gd280IqcAzpAFTPeYsZjoBUOU9S7Zx3c" # For business card images | |
# Scopes define the level of access you are requesting. | |
SCOPES = ['https://www.googleapis.com/auth/drive.file'] | |
TOKEN_PICKLE_FILE = 'token.pickle' | |
def get_drive_service(): | |
"""Authenticates with Google and returns a Drive service object.""" | |
creds = None | |
# --- NEW CODE FOR DEPLOYMENT ENVIRONMENTS --- | |
# If token file doesn't exist, try to create it from environment variable | |
if not os.path.exists(TOKEN_PICKLE_FILE): | |
# encoded_token = os.environ.get('GOOGLE_TOKEN_BASE64') | |
encoded_token = "gASVyQMAAAAAAACMGWdvb2dsZS5vYXV0aDIuY3JlZGVudGlhbHOUjAtDcmVkZW50aWFsc5STlCmBlH2UKIwFdG9rZW6UjN55YTI5LmEwQVMzSDZOeW9FRnJtbmpXLUxTWkF1c3N0aU9lLUtxR1pSQmlsRDhVWGNtSEhJd1lCLXlXOTZEQ2NnclhPdDBHOWlSdnpOMUxlNUZwa25YazlYeTRvaXJLZktJb0tsNzRhU0NpTVVXWDNTV0h1LTNKM3IzLW1IUk1wdVg3bzVrS2U5Q3pFdUVMSUs1VHp4cEdHdzZsYWhNbDZsdW1UNnFrcDBEMkh1VC1fYUNnWUtBZnNTQVJBU0ZRSEdYMk1peXNudFF3bFVCdHRUblIxb0NMSzhaQTAxNzWUjAZleHBpcnmUjAhkYXRldGltZZSMCGRhdGV0aW1llJOUQwoH6QcPFzsxAOfTlIWUUpSMEV9xdW90YV9wcm9qZWN0X2lklE6MD190cnVzdF9ib3VuZGFyeZROjBBfdW5pdmVyc2VfZG9tYWlulIwOZ29vZ2xlYXBpcy5jb22UjBlfdXNlX25vbl9ibG9ja2luZ19yZWZyZXNolImMB19zY29wZXOUXZSMKmh0dHBzOi8vd3d3Lmdvb2dsZWFwaXMuY29tL2F1dGgvZHJpdmUuZmlsZZRhjA9fZGVmYXVsdF9zY29wZXOUTowOX3JlZnJlc2hfdG9rZW6UjGcxLy8wY3JmV2NzZ1FVbjZzQ2dZSUFSQUFHQXdTTndGLUw5SXI4bG1qd1ZvaHluSkFtRFFoZmhuOE14dUwwSkhIQklodl8xanpsYjdfeEFrTHlNTC1ybENPUm96a3RSa3BuUjNWcXFBlIwJX2lkX3Rva2VulE6MD19ncmFudGVkX3Njb3Blc5RdlIwqaHR0cHM6Ly93d3cuZ29vZ2xlYXBpcy5jb20vYXV0aC9kcml2ZS5maWxllGGMCl90b2tlbl91cmmUjCNodHRwczovL29hdXRoMi5nb29nbGVhcGlzLmNvbS90b2tlbpSMCl9jbGllbnRfaWSUjEgyMDQxODk4NDE2NDEtcmRlczY2ODRtazFla2wzMGtmb2FlY2pxMGduZjBxdnEuYXBwcy5nb29nbGV1c2VyY29udGVudC5jb22UjA5fY2xpZW50X3NlY3JldJSMI0dPQ1NQWC1CUnRKVHAwemtydkM2Q2JMRUl3LWdqZGhMMjJylIwLX3JhcHRfdG9rZW6UTowWX2VuYWJsZV9yZWF1dGhfcmVmcmVzaJSJjAhfYWNjb3VudJSMAJR1Yi4=" | |
if encoded_token: | |
logger.info("Found token in environment variable. Recreating token.pickle file.") | |
try: | |
decoded_token = base64.b64decode(encoded_token) | |
with open(TOKEN_PICKLE_FILE, "wb") as token_file: | |
token_file.write(decoded_token) | |
logger.info("Successfully recreated token.pickle from environment variable") | |
except Exception as e: | |
logger.error(f"Failed to decode token from environment variable: {e}") | |
# --- END OF NEW CODE --- | |
# The file token.pickle stores the user's access and refresh tokens. | |
if os.path.exists(TOKEN_PICKLE_FILE): | |
with open(TOKEN_PICKLE_FILE, 'rb') as token: | |
creds = pickle.load(token) | |
# If there are no (valid) credentials available, let the user log in. | |
if not creds or not creds.valid: | |
if creds and creds.expired and creds.refresh_token: | |
logger.info("Refreshing expired credentials") | |
creds.refresh(Request()) | |
else: | |
if not CLIENT_ID or not CLIENT_SECRET: | |
raise ValueError("GOOGLE_CLIENT_ID and GOOGLE_CLIENT_SECRET environment variables are required") | |
logger.info("Starting OAuth flow for new credentials") | |
# Use client_config dictionary instead of a client_secret.json file | |
client_config = { | |
"installed": { | |
"client_id": CLIENT_ID, | |
"client_secret": CLIENT_SECRET, | |
"auth_uri": "https://accounts.google.com/o/oauth2/auth", | |
"token_uri": "https://oauth2.googleapis.com/token", | |
"redirect_uris": ["http://localhost"] | |
} | |
} | |
flow = InstalledAppFlow.from_client_config(client_config, SCOPES) | |
creds = flow.run_local_server(port=0) | |
# Save the credentials for the next run | |
with open(TOKEN_PICKLE_FILE, 'wb') as token: | |
pickle.dump(creds, token) | |
logger.info("Saved new credentials to token.pickle") | |
return build('drive', 'v3', credentials=creds) | |
def upload_file_to_drive(service, file_path=None, file_data=None, filename=None, folder_id=None, mimetype='application/octet-stream'): | |
""" | |
Uploads a file to a specific folder in Google Drive. | |
Args: | |
service: Google Drive service object | |
file_path: Path to local file (for file uploads) | |
file_data: Bytes data (for in-memory uploads) | |
filename: Name for the file in Drive | |
folder_id: ID of the target folder | |
mimetype: MIME type of the file | |
Returns: | |
dict: File information (id, webViewLink) or None if failed | |
""" | |
try: | |
if file_path and os.path.exists(file_path): | |
# Upload from local file | |
if not filename: | |
filename = os.path.basename(file_path) | |
media = MediaFileUpload(file_path, mimetype=mimetype, resumable=True) | |
logger.info(f"Uploading file from path: {file_path}") | |
elif file_data and filename: | |
# Upload from bytes data | |
file_io = io.BytesIO(file_data) | |
media = MediaIoBaseUpload(file_io, mimetype=mimetype, resumable=True) | |
logger.info(f"Uploading file from memory: {filename}") | |
else: | |
logger.error("Either file_path or (file_data + filename) must be provided") | |
return None | |
# Define the file's metadata | |
file_metadata = { | |
'name': filename, | |
'parents': [folder_id] if folder_id else [] | |
} | |
logger.info(f"Uploading '{filename}' to Google Drive folder {folder_id}") | |
# Execute the upload request | |
file = service.files().create( | |
body=file_metadata, | |
media_body=media, | |
fields='id, webViewLink, name' | |
).execute() | |
logger.info(f"✅ File uploaded successfully!") | |
logger.info(f" File ID: {file.get('id')}") | |
logger.info(f" File Name: {file.get('name')}") | |
logger.info(f" View Link: {file.get('webViewLink')}") | |
return { | |
'id': file.get('id'), | |
'name': file.get('name'), | |
'webViewLink': file.get('webViewLink') | |
} | |
except Exception as e: | |
logger.error(f"Failed to upload file to Google Drive: {e}") | |
return None | |
def upload_excel_to_exports_folder(service, file_path=None, file_data=None, filename=None): | |
"""Upload Excel file to the exports folder.""" | |
return upload_file_to_drive( | |
service, | |
file_path=file_path, | |
file_data=file_data, | |
filename=filename, | |
folder_id=EXPORTS_FOLDER_ID, | |
mimetype='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' | |
) | |
def upload_image_to_images_folder(service, file_path=None, file_data=None, filename=None, mimetype='image/png'): | |
"""Upload image file to the images folder.""" | |
return upload_file_to_drive( | |
service, | |
file_path=file_path, | |
file_data=file_data, | |
filename=filename, | |
folder_id=IMAGES_FOLDER_ID, | |
mimetype=mimetype | |
) | |
def list_files_in_folder(service, folder_id, max_results=100): | |
"""List files in a specific Google Drive folder.""" | |
try: | |
query = f"'{folder_id}' in parents" | |
results = service.files().list( | |
q=query, | |
maxResults=max_results, | |
fields="files(id, name, size, createdTime, webViewLink)" | |
).execute() | |
files = results.get('files', []) | |
logger.info(f"Found {len(files)} files in folder {folder_id}") | |
return files | |
except Exception as e: | |
logger.error(f"Failed to list files in folder {folder_id}: {e}") | |
return [] | |
if __name__ == '__main__': | |
# Test the Google Drive connection | |
try: | |
drive_service = get_drive_service() | |
logger.info("Google Drive service initialized successfully") | |
# List files in both folders to verify access | |
exports_files = list_files_in_folder(drive_service, EXPORTS_FOLDER_ID) | |
images_files = list_files_in_folder(drive_service, IMAGES_FOLDER_ID) | |
print(f"Exports folder contains {len(exports_files)} files") | |
print(f"Images folder contains {len(images_files)} files") | |
except Exception as e: | |
logger.error(f"Failed to initialize Google Drive: {e}") |