rongo1
commited on
Commit
·
dae9b98
1
Parent(s):
46dec01
feat: added google drive support
Browse files- app.py +153 -275
- env.example +11 -6
- google.py +175 -0
- requirements.txt +3 -1
app.py
CHANGED
@@ -10,11 +10,12 @@ import io
|
|
10 |
import base64
|
11 |
import logging
|
12 |
import sys
|
13 |
-
import
|
14 |
-
|
|
|
|
|
15 |
|
16 |
# Configure logging
|
17 |
-
# Simplified logging for cloud deployment
|
18 |
logging.basicConfig(
|
19 |
level=logging.INFO,
|
20 |
format='%(asctime)s - %(levelname)s - %(funcName)s:%(lineno)d - %(message)s',
|
@@ -30,72 +31,63 @@ gemini_api_key = os.getenv("Gemini_API")
|
|
30 |
if not gemini_api_key:
|
31 |
logger.error("Gemini_API environment variable not found!")
|
32 |
logger.error("Please set the Gemini_API environment variable with your Google Gemini API key")
|
33 |
-
|
34 |
-
raise ValueError("❌ Gemini_API environment variable is required. Please set it in your environment or Hugging Face Space secrets.")
|
35 |
|
36 |
genai.configure(api_key=gemini_api_key)
|
37 |
logger.info("Gemini API configured successfully")
|
38 |
|
39 |
-
#
|
40 |
-
|
41 |
-
hf_token = os.getenv("HF_TOKEN") # Optional - only needed for Hub uploads
|
42 |
-
|
43 |
-
if hf_token:
|
44 |
-
hf_api = HfApi(token=hf_token)
|
45 |
-
logger.info("Hugging Face Hub API configured for file uploads")
|
46 |
-
logger.info(f"Space repo: {space_repo_id}")
|
47 |
-
else:
|
48 |
-
hf_api = None
|
49 |
-
logger.info("HF_TOKEN not found - files will be saved to persistent storage only")
|
50 |
-
logger.info("To enable Hub uploads: Set HF_TOKEN in Space secrets (optional)")
|
51 |
-
|
52 |
-
# Create output directories
|
53 |
-
logger.info("Setting up output directories")
|
54 |
-
# Try /data for persistent storage, fallback to current directory
|
55 |
try:
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
raise PermissionError("No access to /data")
|
63 |
-
except (PermissionError, OSError):
|
64 |
-
# Fallback to current directory for free tier
|
65 |
-
BASE_PATH = Path(".")
|
66 |
-
logger.info("Using current directory (files will be ephemeral without persistent storage)")
|
67 |
-
|
68 |
-
output_dir = BASE_PATH / "business_card_exports"
|
69 |
-
images_dir = BASE_PATH / "business_cards"
|
70 |
-
output_dir.mkdir(parents=True, exist_ok=True)
|
71 |
-
images_dir.mkdir(parents=True, exist_ok=True)
|
72 |
-
logger.info(f"Export directory created/verified: {output_dir}")
|
73 |
-
logger.info(f"Images directory created/verified: {images_dir}")
|
74 |
|
75 |
# Log startup
|
76 |
-
logger.info("Business Card Data Extractor starting up")
|
77 |
-
logger.info(f"Working directory: {os.getcwd()}")
|
78 |
-
logger.info(f"Export directory: {output_dir.absolute()}")
|
79 |
-
logger.info(f"Images directory: {images_dir.absolute()}")
|
80 |
|
81 |
-
def
|
82 |
-
"""Upload a file to
|
83 |
-
|
84 |
-
|
85 |
-
logger.info(f"Uploading
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
)
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
|
100 |
def extract_business_card_data_batch(images, filenames, model_name="gemini-2.5-flash"):
|
101 |
"""Extract data from multiple business card images in a single API call"""
|
@@ -277,6 +269,7 @@ def process_business_cards(images, model_name="gemini-2.5-flash", save_images=Tr
|
|
277 |
# Load and group images into batches of 5
|
278 |
loaded_images = []
|
279 |
filenames = []
|
|
|
280 |
|
281 |
logger.info(f"Loading {len(images)} images")
|
282 |
for idx, image_path in enumerate(images):
|
@@ -302,10 +295,9 @@ def process_business_cards(images, model_name="gemini-2.5-flash", save_images=Tr
|
|
302 |
|
303 |
logger.info(f"Successfully loaded {len(loaded_images)} out of {len(images)} images")
|
304 |
|
305 |
-
# Save images if requested
|
306 |
-
saved_image_paths = []
|
307 |
if save_images and loaded_images:
|
308 |
-
logger.info(f"Saving {len(loaded_images)} images to
|
309 |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
310 |
|
311 |
for i, (image, filename) in enumerate(zip(loaded_images, filenames)):
|
@@ -315,33 +307,27 @@ def process_business_cards(images, model_name="gemini-2.5-flash", save_images=Tr
|
|
315 |
if not ext:
|
316 |
ext = '.png'
|
317 |
unique_filename = f"{timestamp}_{i+1:03d}_{name}{ext}"
|
318 |
-
image_path = images_dir / unique_filename
|
319 |
|
320 |
-
#
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
|
325 |
-
# Upload
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
|
|
|
|
|
|
331 |
|
332 |
except Exception as e:
|
333 |
logger.error(f"Failed to save image {filename}: {e}")
|
|
|
334 |
|
335 |
-
logger.info(f"Successfully
|
336 |
-
|
337 |
-
# List directory contents for debugging
|
338 |
-
try:
|
339 |
-
images_list = list(images_dir.iterdir())
|
340 |
-
logger.info(f"Images directory contains {len(images_list)} files")
|
341 |
-
for img_file in images_list[-5:]: # Show last 5 files
|
342 |
-
logger.debug(f" - {img_file.name}")
|
343 |
-
except Exception as e:
|
344 |
-
logger.error(f"Error listing images directory: {e}")
|
345 |
|
346 |
# Group into batches
|
347 |
logger.info(f"Grouping {len(loaded_images)} images into batches of {batch_size}")
|
@@ -377,13 +363,13 @@ def process_business_cards(images, model_name="gemini-2.5-flash", save_images=Tr
|
|
377 |
data['processed_date'] = timestamp
|
378 |
logger.debug(f"Added timestamp {timestamp} to {card_filename}")
|
379 |
|
380 |
-
# Add
|
381 |
global_index = batch_idx * batch_size + i
|
382 |
-
if save_images and global_index < len(
|
383 |
-
data['
|
384 |
-
logger.debug(f"Added
|
385 |
else:
|
386 |
-
data['
|
387 |
|
388 |
# Handle multiple values (emails, phones) by joining with commas
|
389 |
list_fields_processed = []
|
@@ -454,138 +440,96 @@ def process_business_cards(images, model_name="gemini-2.5-flash", save_images=Tr
|
|
454 |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
455 |
logger.debug(f"Generated timestamp: {timestamp}")
|
456 |
|
457 |
-
# Create
|
458 |
-
|
459 |
-
|
|
|
|
|
460 |
|
461 |
-
|
462 |
-
cumulative_filename =
|
463 |
-
logger.info(f"Checking for existing cumulative file: {cumulative_filename}")
|
464 |
|
465 |
-
|
466 |
-
|
467 |
-
|
468 |
-
|
469 |
-
|
470 |
-
|
471 |
-
|
472 |
-
|
473 |
-
|
474 |
-
|
475 |
-
|
476 |
-
|
|
|
|
|
|
|
477 |
cumulative_df = current_df
|
478 |
-
logger.info("Using current data only
|
479 |
-
|
480 |
-
|
|
|
|
|
|
|
481 |
cumulative_df = current_df
|
482 |
|
483 |
# Write current run Excel file
|
484 |
-
logger.info(f"
|
485 |
try:
|
486 |
-
|
487 |
-
current_filename = Path(current_filename)
|
488 |
-
with pd.ExcelWriter(current_filename, engine='openpyxl') as writer:
|
489 |
current_df.to_excel(writer, index=False, sheet_name='Current Run')
|
490 |
logger.debug(f"Written {len(current_df)} rows to 'Current Run' sheet")
|
491 |
|
492 |
# Auto-adjust column widths
|
493 |
logger.debug("Auto-adjusting column widths for current run file")
|
494 |
worksheet = writer.sheets['Current Run']
|
495 |
-
adjusted_columns = []
|
496 |
for column in current_df:
|
497 |
column_length = max(current_df[column].astype(str).map(len).max(), len(column))
|
498 |
col_idx = current_df.columns.get_loc(column)
|
499 |
final_width = min(column_length + 2, 50)
|
500 |
worksheet.column_dimensions[chr(65 + col_idx)].width = final_width
|
501 |
-
adjusted_columns.append(f"{column}:{final_width}")
|
502 |
-
logger.debug(f"Adjusted column widths: {adjusted_columns}")
|
503 |
|
504 |
-
logger.info(f"Current run Excel file
|
505 |
|
506 |
-
# Upload current run file to
|
507 |
-
|
508 |
-
|
509 |
-
f"
|
510 |
-
f"Add current run export {current_filename.name}"
|
511 |
-
)
|
512 |
|
513 |
-
# Create a manifest file for the current run directory
|
514 |
-
manifest_filename = output_dir / f"current_run_{timestamp}_manifest.txt"
|
515 |
-
try:
|
516 |
-
with open(manifest_filename, "w") as f:
|
517 |
-
f.write(f"Current Run Directory: {output_dir}\n")
|
518 |
-
f.write(f"Images Directory: {images_dir}\n")
|
519 |
-
f.write(f"Current Run File: {current_filename}\n")
|
520 |
-
f.write(f"Cumulative File: {cumulative_filename}\n")
|
521 |
-
f.write(f"Total Cards in Database: {len(cumulative_df)}\n")
|
522 |
-
f.write(f"Total Images Saved: {len(saved_image_paths) if saved_image_paths else 0}\n")
|
523 |
-
f.write(f"Total API Calls Made: {len(image_batches)}\n")
|
524 |
-
f.write(f"Model Used: {model_name}\n")
|
525 |
-
f.write(f"Save Images: {save_images}\n")
|
526 |
-
f.write(f"Processing Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
|
527 |
-
f.write(f"Session ID: {timestamp}\n")
|
528 |
-
f.write(f"Errors: {len(errors) if 'errors' in locals() else 0}\n")
|
529 |
-
logger.info(f"Manifest file created: {manifest_filename}")
|
530 |
-
except Exception as e:
|
531 |
-
logger.error(f"Failed to create manifest file: {e}")
|
532 |
except Exception as e:
|
533 |
-
logger.error(f"Failed to
|
534 |
raise
|
535 |
|
536 |
# Write cumulative Excel file
|
537 |
-
logger.info(f"
|
538 |
try:
|
539 |
-
|
540 |
-
cumulative_filename = Path(cumulative_filename)
|
541 |
-
with pd.ExcelWriter(cumulative_filename, engine='openpyxl') as writer:
|
542 |
cumulative_df.to_excel(writer, index=False, sheet_name='All Business Cards')
|
543 |
logger.debug(f"Written {len(cumulative_df)} rows to 'All Business Cards' sheet")
|
544 |
|
545 |
# Auto-adjust column widths
|
546 |
logger.debug("Auto-adjusting column widths for cumulative file")
|
547 |
worksheet = writer.sheets['All Business Cards']
|
548 |
-
adjusted_columns = []
|
549 |
for column in cumulative_df:
|
550 |
column_length = max(cumulative_df[column].astype(str).map(len).max(), len(column))
|
551 |
col_idx = cumulative_df.columns.get_loc(column)
|
552 |
final_width = min(column_length + 2, 50)
|
553 |
worksheet.column_dimensions[chr(65 + col_idx)].width = final_width
|
554 |
-
adjusted_columns.append(f"{column}:{final_width}")
|
555 |
-
logger.debug(f"Adjusted column widths: {adjusted_columns}")
|
556 |
|
557 |
-
logger.info(f"Cumulative Excel file
|
558 |
|
559 |
-
# Upload cumulative file to
|
560 |
-
|
561 |
-
|
562 |
-
f"
|
563 |
-
f"Update cumulative database - {len(cumulative_df)} total cards"
|
564 |
-
)
|
565 |
|
566 |
-
# Create a manifest file for the cumulative directory
|
567 |
-
manifest_filename = output_dir / f"all_business_cards_total_{timestamp}_manifest.txt"
|
568 |
-
try:
|
569 |
-
with open(manifest_filename, "w") as f:
|
570 |
-
f.write(f"All Business Cards Directory: {output_dir}\n")
|
571 |
-
f.write(f"Images Directory: {images_dir}\n")
|
572 |
-
f.write(f"Current Run File: {current_filename}\n")
|
573 |
-
f.write(f"Cumulative File: {cumulative_filename}\n")
|
574 |
-
f.write(f"Total Cards in Database: {len(cumulative_df)}\n")
|
575 |
-
f.write(f"Total Images Saved: {len(saved_image_paths) if saved_image_paths else 0}\n")
|
576 |
-
f.write(f"Total API Calls Made: {len(image_batches)}\n")
|
577 |
-
f.write(f"Model Used: {model_name}\n")
|
578 |
-
f.write(f"Save Images: {save_images}\n")
|
579 |
-
f.write(f"Processing Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
|
580 |
-
f.write(f"Session ID: {timestamp}\n")
|
581 |
-
f.write(f"Errors: {len(errors) if 'errors' in locals() else 0}\n")
|
582 |
-
logger.info(f"Manifest file created: {manifest_filename}")
|
583 |
-
except Exception as e:
|
584 |
-
logger.error(f"Failed to create manifest file: {e}")
|
585 |
except Exception as e:
|
586 |
-
logger.error(f"Failed to
|
587 |
raise
|
588 |
|
|
|
|
|
|
|
589 |
# Create summary message
|
590 |
logger.info("Creating summary message")
|
591 |
num_batches = len(image_batches) if 'image_batches' in locals() else 1
|
@@ -594,20 +538,23 @@ def process_business_cards(images, model_name="gemini-2.5-flash", save_images=Tr
|
|
594 |
summary += f"⚡ API calls made: {num_batches} (instead of {len(all_data)})\n"
|
595 |
|
596 |
if save_images:
|
597 |
-
|
598 |
-
summary += f"💾 Images
|
599 |
else:
|
600 |
-
summary += f"💾 Images
|
601 |
|
602 |
-
summary += f"📁 Current run file: {current_filename
|
603 |
-
summary += f"
|
604 |
summary += f"📊 Total cards in database: {len(cumulative_df)}\n\n"
|
605 |
|
606 |
-
# Add
|
607 |
-
summary += "
|
608 |
-
|
609 |
-
|
610 |
-
|
|
|
|
|
|
|
611 |
|
612 |
if errors:
|
613 |
logger.warning(f"Encountered {len(errors)} errors during processing")
|
@@ -625,78 +572,8 @@ def process_business_cards(images, model_name="gemini-2.5-flash", save_images=Tr
|
|
625 |
logger.info("Business card processing session completed successfully")
|
626 |
logger.info(f"Session summary - Cards: {len(all_data)}, Batches: {num_batches}, API calls: {num_batches}, Total DB size: {len(cumulative_df)}")
|
627 |
|
628 |
-
#
|
629 |
-
|
630 |
-
logger.info(f"Current file exists: {current_filename}")
|
631 |
-
else:
|
632 |
-
logger.error(f"Current file NOT found: {current_filename}")
|
633 |
-
|
634 |
-
if cumulative_filename.exists():
|
635 |
-
logger.info(f"Cumulative file exists: {cumulative_filename}")
|
636 |
-
else:
|
637 |
-
logger.error(f"Cumulative file NOT found: {cumulative_filename}")
|
638 |
-
|
639 |
-
# List export directory contents for debugging
|
640 |
-
try:
|
641 |
-
export_list = list(output_dir.iterdir())
|
642 |
-
logger.info(f"Export directory contains {len(export_list)} files")
|
643 |
-
for exp_file in export_list[-5:]: # Show last 5 files
|
644 |
-
logger.debug(f" - {exp_file.name}")
|
645 |
-
except Exception as e:
|
646 |
-
logger.error(f"Error listing export directory: {e}")
|
647 |
-
|
648 |
-
# List root directory Excel/ZIP files for debugging
|
649 |
-
try:
|
650 |
-
root_files = [f for f in Path(".").iterdir() if f.suffix in ['.xlsx', '.zip'] and f.is_file()]
|
651 |
-
logger.info(f"Root directory contains {len(root_files)} Excel/ZIP files")
|
652 |
-
for root_file in root_files[-5:]: # Show last 5 files
|
653 |
-
logger.info(f" - {root_file.name} ({root_file.stat().st_size} bytes)")
|
654 |
-
except Exception as e:
|
655 |
-
logger.error(f"Error listing root directory: {e}")
|
656 |
-
|
657 |
-
# Create a directory listing file in root showing folder contents
|
658 |
-
try:
|
659 |
-
with open("FOLDER_CONTENTS.txt", "w") as f:
|
660 |
-
f.write("=== BUSINESS CARD ANALYZER - FOLDER CONTENTS ===\n")
|
661 |
-
f.write(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
|
662 |
-
|
663 |
-
# List business_card_exports folder
|
664 |
-
f.write("📁 business_card_exports/\n")
|
665 |
-
f.write("-" * 50 + "\n")
|
666 |
-
if output_dir.exists():
|
667 |
-
files = sorted(output_dir.iterdir())
|
668 |
-
for file in files:
|
669 |
-
if file.is_file():
|
670 |
-
size = file.stat().st_size
|
671 |
-
f.write(f" 📄 {file.name} ({size:,} bytes)\n")
|
672 |
-
else:
|
673 |
-
f.write(" [Folder does not exist]\n")
|
674 |
-
f.write(f"\nTotal files: {len(list(output_dir.glob('*')))}\n\n")
|
675 |
-
|
676 |
-
# List business_cards folder
|
677 |
-
f.write("📁 business_cards/\n")
|
678 |
-
f.write("-" * 50 + "\n")
|
679 |
-
if images_dir.exists():
|
680 |
-
files = sorted(images_dir.iterdir())
|
681 |
-
for file in files:
|
682 |
-
if file.is_file():
|
683 |
-
size = file.stat().st_size
|
684 |
-
f.write(f" 🖼️ {file.name} ({size:,} bytes)\n")
|
685 |
-
else:
|
686 |
-
f.write(" [Folder does not exist]\n")
|
687 |
-
f.write(f"\nTotal files: {len(list(images_dir.glob('*')))}\n\n")
|
688 |
-
|
689 |
-
# Add latest processing summary
|
690 |
-
f.write("📊 Latest Processing Summary\n")
|
691 |
-
f.write("-" * 50 + "\n")
|
692 |
-
f.write(summary)
|
693 |
-
|
694 |
-
logger.info("Created FOLDER_CONTENTS.txt in root directory")
|
695 |
-
except Exception as e:
|
696 |
-
logger.error(f"Failed to create folder contents file: {e}")
|
697 |
-
|
698 |
-
# Return string paths for Gradio File components
|
699 |
-
return str(current_filename), str(cumulative_filename), summary, preview_df
|
700 |
|
701 |
# Create Gradio interface
|
702 |
logger.info("Creating Gradio interface")
|
@@ -712,20 +589,21 @@ with gr.Blocks(title="Business Card Data Extractor") as demo:
|
|
712 |
- 📁 **Current Run**: Contains only the cards you just processed
|
713 |
- 📊 **Total Database**: Contains ALL cards ever processed (cumulative)
|
714 |
|
715 |
-
|
716 |
-
- 📂 Excel files:
|
717 |
-
- 🖼️ Images:
|
718 |
-
-
|
719 |
-
-
|
720 |
|
721 |
**📌 File Access:**
|
722 |
-
- ⬇️ Download directly from interface buttons
|
723 |
-
-
|
724 |
-
-
|
725 |
-
-
|
726 |
|
727 |
-
**⚙️
|
728 |
-
-
|
|
|
729 |
"""
|
730 |
)
|
731 |
|
@@ -784,10 +662,10 @@ with gr.Blocks(title="Business Card Data Extractor") as demo:
|
|
784 |
- 📄 **Data Extraction**: Names, emails, phone numbers, addresses, and more
|
785 |
- 📞 **Smart Combination**: Multiple emails/phones combined with commas
|
786 |
- 🏠 **Address Merging**: All phone types and address fields combined
|
787 |
-
-
|
788 |
-
-
|
789 |
- 📊 **Dual Output**: Current run + cumulative database files
|
790 |
-
- 📝 **Full Tracking**: Processing date, filename,
|
791 |
- 🎯 **One Row Per Card**: Each business card becomes one spreadsheet row
|
792 |
"""
|
793 |
)
|
|
|
10 |
import base64
|
11 |
import logging
|
12 |
import sys
|
13 |
+
import tempfile
|
14 |
+
|
15 |
+
# Import Google Drive functionality
|
16 |
+
from google import get_drive_service, upload_excel_to_exports_folder, upload_image_to_images_folder, list_files_in_folder
|
17 |
|
18 |
# Configure logging
|
|
|
19 |
logging.basicConfig(
|
20 |
level=logging.INFO,
|
21 |
format='%(asctime)s - %(levelname)s - %(funcName)s:%(lineno)d - %(message)s',
|
|
|
31 |
if not gemini_api_key:
|
32 |
logger.error("Gemini_API environment variable not found!")
|
33 |
logger.error("Please set the Gemini_API environment variable with your Google Gemini API key")
|
34 |
+
raise ValueError("❌ Gemini_API environment variable is required. Please set it in your environment.")
|
|
|
35 |
|
36 |
genai.configure(api_key=gemini_api_key)
|
37 |
logger.info("Gemini API configured successfully")
|
38 |
|
39 |
+
# Initialize Google Drive service
|
40 |
+
logger.info("Initializing Google Drive service")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
try:
|
42 |
+
drive_service = get_drive_service()
|
43 |
+
logger.info("Google Drive service initialized successfully")
|
44 |
+
except Exception as e:
|
45 |
+
logger.error(f"Failed to initialize Google Drive service: {e}")
|
46 |
+
logger.error("Please ensure GOOGLE_CLIENT_ID and GOOGLE_CLIENT_SECRET environment variables are set")
|
47 |
+
raise ValueError("❌ Google Drive credentials are required. Please set GOOGLE_CLIENT_ID and GOOGLE_CLIENT_SECRET environment variables.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
# Log startup
|
50 |
+
logger.info("Business Card Data Extractor starting up with Google Drive storage")
|
|
|
|
|
|
|
51 |
|
52 |
+
def upload_to_google_drive(file_path, is_excel=False, filename=None):
|
53 |
+
"""Upload a file to Google Drive"""
|
54 |
+
try:
|
55 |
+
if is_excel:
|
56 |
+
logger.info(f"Uploading Excel file to Google Drive: {filename or file_path}")
|
57 |
+
result = upload_excel_to_exports_folder(drive_service, file_path=file_path, filename=filename)
|
58 |
+
else:
|
59 |
+
logger.info(f"Uploading image file to Google Drive: {filename or file_path}")
|
60 |
+
result = upload_image_to_images_folder(drive_service, file_path=file_path, filename=filename)
|
61 |
+
|
62 |
+
if result:
|
63 |
+
logger.info(f"Successfully uploaded to Google Drive: {result['webViewLink']}")
|
64 |
+
return result
|
65 |
+
else:
|
66 |
+
logger.error("Failed to upload to Google Drive")
|
67 |
+
return None
|
68 |
+
except Exception as e:
|
69 |
+
logger.error(f"Failed to upload to Google Drive: {e}")
|
70 |
+
return None
|
71 |
+
|
72 |
+
def upload_bytes_to_google_drive(file_data, filename, is_excel=False):
|
73 |
+
"""Upload file data (bytes) to Google Drive"""
|
74 |
+
try:
|
75 |
+
if is_excel:
|
76 |
+
logger.info(f"Uploading Excel data to Google Drive: {filename}")
|
77 |
+
result = upload_excel_to_exports_folder(drive_service, file_data=file_data, filename=filename)
|
78 |
+
else:
|
79 |
+
logger.info(f"Uploading image data to Google Drive: {filename}")
|
80 |
+
result = upload_image_to_images_folder(drive_service, file_data=file_data, filename=filename)
|
81 |
+
|
82 |
+
if result:
|
83 |
+
logger.info(f"Successfully uploaded to Google Drive: {result['webViewLink']}")
|
84 |
+
return result
|
85 |
+
else:
|
86 |
+
logger.error("Failed to upload to Google Drive")
|
87 |
+
return None
|
88 |
+
except Exception as e:
|
89 |
+
logger.error(f"Failed to upload to Google Drive: {e}")
|
90 |
+
return None
|
91 |
|
92 |
def extract_business_card_data_batch(images, filenames, model_name="gemini-2.5-flash"):
|
93 |
"""Extract data from multiple business card images in a single API call"""
|
|
|
269 |
# Load and group images into batches of 5
|
270 |
loaded_images = []
|
271 |
filenames = []
|
272 |
+
uploaded_image_links = []
|
273 |
|
274 |
logger.info(f"Loading {len(images)} images")
|
275 |
for idx, image_path in enumerate(images):
|
|
|
295 |
|
296 |
logger.info(f"Successfully loaded {len(loaded_images)} out of {len(images)} images")
|
297 |
|
298 |
+
# Save images to Google Drive if requested
|
|
|
299 |
if save_images and loaded_images:
|
300 |
+
logger.info(f"Saving {len(loaded_images)} images to Google Drive")
|
301 |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
302 |
|
303 |
for i, (image, filename) in enumerate(zip(loaded_images, filenames)):
|
|
|
307 |
if not ext:
|
308 |
ext = '.png'
|
309 |
unique_filename = f"{timestamp}_{i+1:03d}_{name}{ext}"
|
|
|
310 |
|
311 |
+
# Convert image to bytes
|
312 |
+
img_buffer = io.BytesIO()
|
313 |
+
image.save(img_buffer, format='PNG')
|
314 |
+
img_bytes = img_buffer.getvalue()
|
315 |
|
316 |
+
# Upload to Google Drive
|
317 |
+
result = upload_bytes_to_google_drive(img_bytes, unique_filename, is_excel=False)
|
318 |
+
|
319 |
+
if result:
|
320 |
+
uploaded_image_links.append(result['webViewLink'])
|
321 |
+
logger.debug(f"Saved image {i+1}: {unique_filename}")
|
322 |
+
else:
|
323 |
+
uploaded_image_links.append(None)
|
324 |
+
logger.error(f"Failed to upload image {unique_filename}")
|
325 |
|
326 |
except Exception as e:
|
327 |
logger.error(f"Failed to save image {filename}: {e}")
|
328 |
+
uploaded_image_links.append(None)
|
329 |
|
330 |
+
logger.info(f"Successfully uploaded {sum(1 for link in uploaded_image_links if link)} images to Google Drive")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
331 |
|
332 |
# Group into batches
|
333 |
logger.info(f"Grouping {len(loaded_images)} images into batches of {batch_size}")
|
|
|
363 |
data['processed_date'] = timestamp
|
364 |
logger.debug(f"Added timestamp {timestamp} to {card_filename}")
|
365 |
|
366 |
+
# Add Google Drive image link if images were saved
|
367 |
global_index = batch_idx * batch_size + i
|
368 |
+
if save_images and global_index < len(uploaded_image_links) and uploaded_image_links[global_index]:
|
369 |
+
data['google_drive_image_link'] = uploaded_image_links[global_index]
|
370 |
+
logger.debug(f"Added Google Drive image link for {card_filename}: {uploaded_image_links[global_index]}")
|
371 |
else:
|
372 |
+
data['google_drive_image_link'] = None
|
373 |
|
374 |
# Handle multiple values (emails, phones) by joining with commas
|
375 |
list_fields_processed = []
|
|
|
440 |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
441 |
logger.debug(f"Generated timestamp: {timestamp}")
|
442 |
|
443 |
+
# Create temporary files for Excel generation
|
444 |
+
with tempfile.NamedTemporaryFile(suffix='.xlsx', delete=False) as current_temp:
|
445 |
+
current_temp_path = current_temp.name
|
446 |
+
with tempfile.NamedTemporaryFile(suffix='.xlsx', delete=False) as cumulative_temp:
|
447 |
+
cumulative_temp_path = cumulative_temp.name
|
448 |
|
449 |
+
current_filename = f"current_run_{timestamp}.xlsx"
|
450 |
+
cumulative_filename = "all_business_cards_total.xlsx"
|
|
|
451 |
|
452 |
+
# Try to download existing cumulative data from Google Drive
|
453 |
+
logger.info("Checking for existing cumulative file in Google Drive")
|
454 |
+
try:
|
455 |
+
# List files in exports folder to find existing cumulative file
|
456 |
+
exports_files = list_files_in_folder(drive_service, "1k5iP4egzLrGJwnHkMhxt9bAkaCiieojO")
|
457 |
+
cumulative_file = None
|
458 |
+
for file in exports_files:
|
459 |
+
if file['name'] == 'all_business_cards_total.xlsx':
|
460 |
+
cumulative_file = file
|
461 |
+
break
|
462 |
+
|
463 |
+
if cumulative_file:
|
464 |
+
logger.info("Existing cumulative file found in Google Drive")
|
465 |
+
# For now, we'll just use current data since downloading and merging is complex
|
466 |
+
# In production, you'd want to implement Google Drive file download
|
467 |
cumulative_df = current_df
|
468 |
+
logger.info("Using current data only (Google Drive download not implemented yet)")
|
469 |
+
else:
|
470 |
+
logger.info("No existing cumulative file found, using current data only")
|
471 |
+
cumulative_df = current_df
|
472 |
+
except Exception as e:
|
473 |
+
logger.warning(f"Could not check for existing data in Google Drive: {e}")
|
474 |
cumulative_df = current_df
|
475 |
|
476 |
# Write current run Excel file
|
477 |
+
logger.info(f"Creating current run Excel file: {current_filename}")
|
478 |
try:
|
479 |
+
with pd.ExcelWriter(current_temp_path, engine='openpyxl') as writer:
|
|
|
|
|
480 |
current_df.to_excel(writer, index=False, sheet_name='Current Run')
|
481 |
logger.debug(f"Written {len(current_df)} rows to 'Current Run' sheet")
|
482 |
|
483 |
# Auto-adjust column widths
|
484 |
logger.debug("Auto-adjusting column widths for current run file")
|
485 |
worksheet = writer.sheets['Current Run']
|
|
|
486 |
for column in current_df:
|
487 |
column_length = max(current_df[column].astype(str).map(len).max(), len(column))
|
488 |
col_idx = current_df.columns.get_loc(column)
|
489 |
final_width = min(column_length + 2, 50)
|
490 |
worksheet.column_dimensions[chr(65 + col_idx)].width = final_width
|
|
|
|
|
491 |
|
492 |
+
logger.info(f"Current run Excel file created locally")
|
493 |
|
494 |
+
# Upload current run file to Google Drive
|
495 |
+
current_result = upload_to_google_drive(current_temp_path, is_excel=True, filename=current_filename)
|
496 |
+
if current_result:
|
497 |
+
logger.info(f"Current run file uploaded to Google Drive: {current_result['webViewLink']}")
|
|
|
|
|
498 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
499 |
except Exception as e:
|
500 |
+
logger.error(f"Failed to create current run Excel file: {e}")
|
501 |
raise
|
502 |
|
503 |
# Write cumulative Excel file
|
504 |
+
logger.info(f"Creating cumulative Excel file: {cumulative_filename}")
|
505 |
try:
|
506 |
+
with pd.ExcelWriter(cumulative_temp_path, engine='openpyxl') as writer:
|
|
|
|
|
507 |
cumulative_df.to_excel(writer, index=False, sheet_name='All Business Cards')
|
508 |
logger.debug(f"Written {len(cumulative_df)} rows to 'All Business Cards' sheet")
|
509 |
|
510 |
# Auto-adjust column widths
|
511 |
logger.debug("Auto-adjusting column widths for cumulative file")
|
512 |
worksheet = writer.sheets['All Business Cards']
|
|
|
513 |
for column in cumulative_df:
|
514 |
column_length = max(cumulative_df[column].astype(str).map(len).max(), len(column))
|
515 |
col_idx = cumulative_df.columns.get_loc(column)
|
516 |
final_width = min(column_length + 2, 50)
|
517 |
worksheet.column_dimensions[chr(65 + col_idx)].width = final_width
|
|
|
|
|
518 |
|
519 |
+
logger.info(f"Cumulative Excel file created locally")
|
520 |
|
521 |
+
# Upload cumulative file to Google Drive
|
522 |
+
cumulative_result = upload_to_google_drive(cumulative_temp_path, is_excel=True, filename=cumulative_filename)
|
523 |
+
if cumulative_result:
|
524 |
+
logger.info(f"Cumulative file uploaded to Google Drive: {cumulative_result['webViewLink']}")
|
|
|
|
|
525 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
526 |
except Exception as e:
|
527 |
+
logger.error(f"Failed to create cumulative Excel file: {e}")
|
528 |
raise
|
529 |
|
530 |
+
# Note: Don't delete temp files here - Gradio needs them for download
|
531 |
+
# Gradio will handle cleanup automatically
|
532 |
+
|
533 |
# Create summary message
|
534 |
logger.info("Creating summary message")
|
535 |
num_batches = len(image_batches) if 'image_batches' in locals() else 1
|
|
|
538 |
summary += f"⚡ API calls made: {num_batches} (instead of {len(all_data)})\n"
|
539 |
|
540 |
if save_images:
|
541 |
+
num_uploaded = sum(1 for link in uploaded_image_links if link) if 'uploaded_image_links' in locals() else 0
|
542 |
+
summary += f"💾 Images uploaded to Google Drive: {num_uploaded} cards\n\n"
|
543 |
else:
|
544 |
+
summary += f"💾 Images uploaded to Google Drive: No (save option was disabled)\n\n"
|
545 |
|
546 |
+
summary += f"📁 Current run file: {current_filename} (uploaded to Google Drive)\n"
|
547 |
+
summary += f"📊 Total cumulative file: {cumulative_filename} (uploaded to Google Drive)\n"
|
548 |
summary += f"📊 Total cards in database: {len(cumulative_df)}\n\n"
|
549 |
|
550 |
+
# Add Google Drive links
|
551 |
+
summary += "🔗 Google Drive Links:\n"
|
552 |
+
if 'current_result' in locals() and current_result:
|
553 |
+
summary += f" 📄 Current Run: {current_result['webViewLink']}\n"
|
554 |
+
if 'cumulative_result' in locals() and cumulative_result:
|
555 |
+
summary += f" 📊 Total Database: {cumulative_result['webViewLink']}\n"
|
556 |
+
summary += f" 📁 Exports Folder: https://drive.google.com/drive/folders/1k5iP4egzLrGJwnHkMhxt9bAkaCiieojO\n"
|
557 |
+
summary += f" 🖼️ Images Folder: https://drive.google.com/drive/folders/1gd280IqcAzpAFTPeYsZjoBUOU9S7Zx3c\n\n"
|
558 |
|
559 |
if errors:
|
560 |
logger.warning(f"Encountered {len(errors)} errors during processing")
|
|
|
572 |
logger.info("Business card processing session completed successfully")
|
573 |
logger.info(f"Session summary - Cards: {len(all_data)}, Batches: {num_batches}, API calls: {num_batches}, Total DB size: {len(cumulative_df)}")
|
574 |
|
575 |
+
# Return the temporary file paths for download (Gradio will handle the download)
|
576 |
+
return current_temp_path, cumulative_temp_path, summary, preview_df
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
577 |
|
578 |
# Create Gradio interface
|
579 |
logger.info("Creating Gradio interface")
|
|
|
589 |
- 📁 **Current Run**: Contains only the cards you just processed
|
590 |
- 📊 **Total Database**: Contains ALL cards ever processed (cumulative)
|
591 |
|
592 |
+
**☁️ Google Drive Storage:**
|
593 |
+
- 📂 Excel files: Automatically uploaded to Google Drive exports folder
|
594 |
+
- 🖼️ Images: Uploaded to Google Drive images folder (if save option enabled)
|
595 |
+
- 🔗 **Direct Links**: Access files directly through provided Google Drive links
|
596 |
+
- 📁 **Organized Folders**: Separate folders for exports and images
|
597 |
|
598 |
**📌 File Access:**
|
599 |
+
- ⬇️ Download directly from interface buttons (temporary copies)
|
600 |
+
- 🔗 Access permanent files via Google Drive links in results
|
601 |
+
- 📁 **Exports Folder**: https://drive.google.com/drive/folders/1k5iP4egzLrGJwnHkMhxt9bAkaCiieojO
|
602 |
+
- 🖼️ **Images Folder**: https://drive.google.com/drive/folders/1gd280IqcAzpAFTPeYsZjoBUOU9S7Zx3c
|
603 |
|
604 |
+
**⚙️ Google Drive Integration:**
|
605 |
+
- Requires `GOOGLE_CLIENT_ID` and `GOOGLE_CLIENT_SECRET` environment variables
|
606 |
+
- Files are automatically uploaded and organized in predefined folders
|
607 |
"""
|
608 |
)
|
609 |
|
|
|
662 |
- 📄 **Data Extraction**: Names, emails, phone numbers, addresses, and more
|
663 |
- 📞 **Smart Combination**: Multiple emails/phones combined with commas
|
664 |
- 🏠 **Address Merging**: All phone types and address fields combined
|
665 |
+
- ☁️ **Google Drive Storage**: Automatic upload to organized Drive folders
|
666 |
+
- 🔗 **Direct Links**: Instant access to files via Google Drive URLs
|
667 |
- 📊 **Dual Output**: Current run + cumulative database files
|
668 |
+
- 📝 **Full Tracking**: Processing date, filename, Google Drive links, and AI model used
|
669 |
- 🎯 **One Row Per Card**: Each business card becomes one spreadsheet row
|
670 |
"""
|
671 |
)
|
env.example
CHANGED
@@ -3,14 +3,19 @@
|
|
3 |
|
4 |
# Google Gemini API Key (Required)
|
5 |
# Get your key from: https://aistudio.google.com/
|
6 |
-
# For
|
7 |
Gemini_API=your_gemini_api_key_here
|
8 |
|
9 |
-
#
|
10 |
-
# Get
|
11 |
-
#
|
12 |
-
|
|
|
|
|
|
|
|
|
13 |
|
14 |
# Examples:
|
15 |
# Gemini_API=AIzaSyBxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
|
16 |
-
#
|
|
|
|
3 |
|
4 |
# Google Gemini API Key (Required)
|
5 |
# Get your key from: https://aistudio.google.com/
|
6 |
+
# For deployment: Add this as an environment variable named "Gemini_API"
|
7 |
Gemini_API=your_gemini_api_key_here
|
8 |
|
9 |
+
# Google Drive API Credentials (Required - for file storage)
|
10 |
+
# Get these from Google Cloud Console:
|
11 |
+
# 1. Create a project at https://console.cloud.google.com/
|
12 |
+
# 2. Enable Google Drive API
|
13 |
+
# 3. Create OAuth 2.0 credentials (Desktop application)
|
14 |
+
# 4. Download the JSON and extract client_id and client_secret
|
15 |
+
GOOGLE_CLIENT_ID=your_google_client_id_here
|
16 |
+
GOOGLE_CLIENT_SECRET=your_google_client_secret_here
|
17 |
|
18 |
# Examples:
|
19 |
# Gemini_API=AIzaSyBxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
|
20 |
+
# GOOGLE_CLIENT_ID=1234567890-abcdefghijklmnopqrstuvwxyz.apps.googleusercontent.com
|
21 |
+
# GOOGLE_CLIENT_SECRET=GOCSPX-xxxxxxxxxxxxxxxxxxxxxxxx
|
google.py
ADDED
@@ -0,0 +1,175 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import pickle
|
3 |
+
from google.auth.transport.requests import Request
|
4 |
+
from google_auth_oauthlib.flow import InstalledAppFlow
|
5 |
+
from googleapiclient.discovery import build
|
6 |
+
from googleapiclient.http import MediaFileUpload, MediaIoBaseUpload
|
7 |
+
import io
|
8 |
+
from pathlib import Path
|
9 |
+
import logging
|
10 |
+
|
11 |
+
logger = logging.getLogger(__name__)
|
12 |
+
|
13 |
+
# --- CONFIGURATION ---
|
14 |
+
# Get credentials from environment variables
|
15 |
+
CLIENT_ID = os.getenv("GOOGLE_CLIENT_ID")
|
16 |
+
CLIENT_SECRET = os.getenv("GOOGLE_CLIENT_SECRET")
|
17 |
+
|
18 |
+
# Google Drive folder IDs
|
19 |
+
EXPORTS_FOLDER_ID = "1k5iP4egzLrGJwnHkMhxt9bAkaCiieojO" # For Excel exports
|
20 |
+
IMAGES_FOLDER_ID = "1gd280IqcAzpAFTPeYsZjoBUOU9S7Zx3c" # For business card images
|
21 |
+
|
22 |
+
# Scopes define the level of access you are requesting.
|
23 |
+
SCOPES = ['https://www.googleapis.com/auth/drive.file']
|
24 |
+
TOKEN_PICKLE_FILE = 'token.pickle'
|
25 |
+
|
26 |
+
def get_drive_service():
|
27 |
+
"""Authenticates with Google and returns a Drive service object."""
|
28 |
+
creds = None
|
29 |
+
# The file token.pickle stores the user's access and refresh tokens.
|
30 |
+
if os.path.exists(TOKEN_PICKLE_FILE):
|
31 |
+
with open(TOKEN_PICKLE_FILE, 'rb') as token:
|
32 |
+
creds = pickle.load(token)
|
33 |
+
|
34 |
+
# If there are no (valid) credentials available, let the user log in.
|
35 |
+
if not creds or not creds.valid:
|
36 |
+
if creds and creds.expired and creds.refresh_token:
|
37 |
+
creds.refresh(Request())
|
38 |
+
else:
|
39 |
+
if not CLIENT_ID or not CLIENT_SECRET:
|
40 |
+
raise ValueError("GOOGLE_CLIENT_ID and GOOGLE_CLIENT_SECRET environment variables are required")
|
41 |
+
|
42 |
+
# Use client_config dictionary instead of a client_secret.json file
|
43 |
+
client_config = {
|
44 |
+
"installed": {
|
45 |
+
"client_id": CLIENT_ID,
|
46 |
+
"client_secret": CLIENT_SECRET,
|
47 |
+
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
48 |
+
"token_uri": "https://oauth2.googleapis.com/token",
|
49 |
+
"redirect_uris": ["http://localhost"]
|
50 |
+
}
|
51 |
+
}
|
52 |
+
flow = InstalledAppFlow.from_client_config(client_config, SCOPES)
|
53 |
+
creds = flow.run_local_server(port=0)
|
54 |
+
|
55 |
+
# Save the credentials for the next run
|
56 |
+
with open(TOKEN_PICKLE_FILE, 'wb') as token:
|
57 |
+
pickle.dump(creds, token)
|
58 |
+
|
59 |
+
return build('drive', 'v3', credentials=creds)
|
60 |
+
|
61 |
+
def upload_file_to_drive(service, file_path=None, file_data=None, filename=None, folder_id=None, mimetype='application/octet-stream'):
|
62 |
+
"""
|
63 |
+
Uploads a file to a specific folder in Google Drive.
|
64 |
+
|
65 |
+
Args:
|
66 |
+
service: Google Drive service object
|
67 |
+
file_path: Path to local file (for file uploads)
|
68 |
+
file_data: Bytes data (for in-memory uploads)
|
69 |
+
filename: Name for the file in Drive
|
70 |
+
folder_id: ID of the target folder
|
71 |
+
mimetype: MIME type of the file
|
72 |
+
|
73 |
+
Returns:
|
74 |
+
dict: File information (id, webViewLink) or None if failed
|
75 |
+
"""
|
76 |
+
try:
|
77 |
+
if file_path and os.path.exists(file_path):
|
78 |
+
# Upload from local file
|
79 |
+
if not filename:
|
80 |
+
filename = os.path.basename(file_path)
|
81 |
+
media = MediaFileUpload(file_path, mimetype=mimetype, resumable=True)
|
82 |
+
logger.info(f"Uploading file from path: {file_path}")
|
83 |
+
elif file_data and filename:
|
84 |
+
# Upload from bytes data
|
85 |
+
file_io = io.BytesIO(file_data)
|
86 |
+
media = MediaIoBaseUpload(file_io, mimetype=mimetype, resumable=True)
|
87 |
+
logger.info(f"Uploading file from memory: {filename}")
|
88 |
+
else:
|
89 |
+
logger.error("Either file_path or (file_data + filename) must be provided")
|
90 |
+
return None
|
91 |
+
|
92 |
+
# Define the file's metadata
|
93 |
+
file_metadata = {
|
94 |
+
'name': filename,
|
95 |
+
'parents': [folder_id] if folder_id else []
|
96 |
+
}
|
97 |
+
|
98 |
+
logger.info(f"Uploading '{filename}' to Google Drive folder {folder_id}")
|
99 |
+
|
100 |
+
# Execute the upload request
|
101 |
+
file = service.files().create(
|
102 |
+
body=file_metadata,
|
103 |
+
media_body=media,
|
104 |
+
fields='id, webViewLink, name'
|
105 |
+
).execute()
|
106 |
+
|
107 |
+
logger.info(f"✅ File uploaded successfully!")
|
108 |
+
logger.info(f" File ID: {file.get('id')}")
|
109 |
+
logger.info(f" File Name: {file.get('name')}")
|
110 |
+
logger.info(f" View Link: {file.get('webViewLink')}")
|
111 |
+
|
112 |
+
return {
|
113 |
+
'id': file.get('id'),
|
114 |
+
'name': file.get('name'),
|
115 |
+
'webViewLink': file.get('webViewLink')
|
116 |
+
}
|
117 |
+
|
118 |
+
except Exception as e:
|
119 |
+
logger.error(f"Failed to upload file to Google Drive: {e}")
|
120 |
+
return None
|
121 |
+
|
122 |
+
def upload_excel_to_exports_folder(service, file_path=None, file_data=None, filename=None):
|
123 |
+
"""Upload Excel file to the exports folder."""
|
124 |
+
return upload_file_to_drive(
|
125 |
+
service,
|
126 |
+
file_path=file_path,
|
127 |
+
file_data=file_data,
|
128 |
+
filename=filename,
|
129 |
+
folder_id=EXPORTS_FOLDER_ID,
|
130 |
+
mimetype='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
|
131 |
+
)
|
132 |
+
|
133 |
+
def upload_image_to_images_folder(service, file_path=None, file_data=None, filename=None, mimetype='image/png'):
|
134 |
+
"""Upload image file to the images folder."""
|
135 |
+
return upload_file_to_drive(
|
136 |
+
service,
|
137 |
+
file_path=file_path,
|
138 |
+
file_data=file_data,
|
139 |
+
filename=filename,
|
140 |
+
folder_id=IMAGES_FOLDER_ID,
|
141 |
+
mimetype=mimetype
|
142 |
+
)
|
143 |
+
|
144 |
+
def list_files_in_folder(service, folder_id, max_results=100):
|
145 |
+
"""List files in a specific Google Drive folder."""
|
146 |
+
try:
|
147 |
+
query = f"'{folder_id}' in parents"
|
148 |
+
results = service.files().list(
|
149 |
+
q=query,
|
150 |
+
maxResults=max_results,
|
151 |
+
fields="files(id, name, size, createdTime, webViewLink)"
|
152 |
+
).execute()
|
153 |
+
|
154 |
+
files = results.get('files', [])
|
155 |
+
logger.info(f"Found {len(files)} files in folder {folder_id}")
|
156 |
+
return files
|
157 |
+
except Exception as e:
|
158 |
+
logger.error(f"Failed to list files in folder {folder_id}: {e}")
|
159 |
+
return []
|
160 |
+
|
161 |
+
if __name__ == '__main__':
|
162 |
+
# Test the Google Drive connection
|
163 |
+
try:
|
164 |
+
drive_service = get_drive_service()
|
165 |
+
logger.info("Google Drive service initialized successfully")
|
166 |
+
|
167 |
+
# List files in both folders to verify access
|
168 |
+
exports_files = list_files_in_folder(drive_service, EXPORTS_FOLDER_ID)
|
169 |
+
images_files = list_files_in_folder(drive_service, IMAGES_FOLDER_ID)
|
170 |
+
|
171 |
+
print(f"Exports folder contains {len(exports_files)} files")
|
172 |
+
print(f"Images folder contains {len(images_files)} files")
|
173 |
+
|
174 |
+
except Exception as e:
|
175 |
+
logger.error(f"Failed to initialize Google Drive: {e}")
|
requirements.txt
CHANGED
@@ -4,4 +4,6 @@ google-generativeai==0.8.0
|
|
4 |
pandas==2.1.4
|
5 |
openpyxl==3.1.2
|
6 |
Pillow==10.2.0
|
7 |
-
|
|
|
|
|
|
4 |
pandas==2.1.4
|
5 |
openpyxl==3.1.2
|
6 |
Pillow==10.2.0
|
7 |
+
google-auth==2.23.4
|
8 |
+
google-auth-oauthlib==1.1.0
|
9 |
+
google-api-python-client==2.108.0
|