rongo1
commited on
Commit
Β·
87132f1
1
Parent(s):
c79be8a
feat: remove unnecessary stuff
Browse files- README.md +6 -6
- app.py +11 -10
- env.example +2 -2
- setup_hf_space.md +3 -3
README.md
CHANGED
@@ -11,12 +11,12 @@ pinned: false
|
|
11 |
|
12 |
# Business Card Data Extractor πΌ
|
13 |
|
14 |
-
An AI-powered tool that extracts structured data from business card images using
|
15 |
|
16 |
## Features
|
17 |
|
18 |
- **Batch Processing**: Process multiple business cards at once (up to 5 per batch)
|
19 |
-
- **AI Model Selection**: Choose between
|
20 |
- **Google Drive Storage**: Automatic upload to organized Drive folders
|
21 |
- **Excel Export**: Get data in two formats:
|
22 |
- Current session results
|
@@ -28,7 +28,7 @@ An AI-powered tool that extracts structured data from business card images using
|
|
28 |
|
29 |
1. **Setup**: Complete the setup process below (one-time)
|
30 |
2. **Upload Images**: Select up to 5 business card images
|
31 |
-
3. **Choose Model**: Select
|
32 |
4. **Process**: Click "Extract Business Card Data"
|
33 |
5. **Access Files**: Download temporary copies or access permanent files via Google Drive links
|
34 |
|
@@ -45,7 +45,7 @@ An AI-powered tool that extracts structured data from business card images using
|
|
45 |
|
46 |
## Setup Instructions
|
47 |
|
48 |
-
### 1.
|
49 |
- Get your API key from: https://aistudio.google.com/
|
50 |
- Set as environment variable: `Gemini_API`
|
51 |
|
@@ -93,7 +93,7 @@ An AI-powered tool that extracts structured data from business card images using
|
|
93 |
|
94 |
2. **Set Environment Variables** in your deployment platform:
|
95 |
```bash
|
96 |
-
Gemini_API=
|
97 |
GOOGLE_CLIENT_ID=your_google_client_id
|
98 |
GOOGLE_CLIENT_SECRET=your_google_client_secret
|
99 |
GOOGLE_TOKEN_BASE64=your_base64_encoded_token
|
@@ -109,4 +109,4 @@ An AI-powered tool that extracts structured data from business card images using
|
|
109 |
- **Maximum File Size**: 10MB per image
|
110 |
- **Batch Processing**: Up to 5 cards per API call
|
111 |
- **Storage**: Automatic upload to Google Drive
|
112 |
-
- **Models**:
|
|
|
11 |
|
12 |
# Business Card Data Extractor πΌ
|
13 |
|
14 |
+
An AI-powered tool that extracts structured data from business card images using advanced AI models. Upload business card images and get organized data exported to Excel files with automatic Google Drive storage.
|
15 |
|
16 |
## Features
|
17 |
|
18 |
- **Batch Processing**: Process multiple business cards at once (up to 5 per batch)
|
19 |
+
- **AI Model Selection**: Choose between Speed-Optimized AI (fast) or Accuracy-Optimized AI (high accuracy)
|
20 |
- **Google Drive Storage**: Automatic upload to organized Drive folders
|
21 |
- **Excel Export**: Get data in two formats:
|
22 |
- Current session results
|
|
|
28 |
|
29 |
1. **Setup**: Complete the setup process below (one-time)
|
30 |
2. **Upload Images**: Select up to 5 business card images
|
31 |
+
3. **Choose Model**: Select AI model (Speed-Optimized for fast processing, Accuracy-Optimized for best results)
|
32 |
4. **Process**: Click "Extract Business Card Data"
|
33 |
5. **Access Files**: Download temporary copies or access permanent files via Google Drive links
|
34 |
|
|
|
45 |
|
46 |
## Setup Instructions
|
47 |
|
48 |
+
### 1. AI API Key
|
49 |
- Get your API key from: https://aistudio.google.com/
|
50 |
- Set as environment variable: `Gemini_API`
|
51 |
|
|
|
93 |
|
94 |
2. **Set Environment Variables** in your deployment platform:
|
95 |
```bash
|
96 |
+
Gemini_API=your_ai_api_key
|
97 |
GOOGLE_CLIENT_ID=your_google_client_id
|
98 |
GOOGLE_CLIENT_SECRET=your_google_client_secret
|
99 |
GOOGLE_TOKEN_BASE64=your_base64_encoded_token
|
|
|
109 |
- **Maximum File Size**: 10MB per image
|
110 |
- **Batch Processing**: Up to 5 cards per API call
|
111 |
- **Storage**: Automatic upload to Google Drive
|
112 |
+
- **Models**: Speed-Optimized AI (fast processing) / Accuracy-Optimized AI (best accuracy)
|
app.py
CHANGED
@@ -34,16 +34,16 @@ logging.basicConfig(
|
|
34 |
)
|
35 |
logger = logging.getLogger(__name__)
|
36 |
|
37 |
-
# Configure
|
38 |
-
logger.info("Configuring
|
39 |
gemini_api_key = os.getenv("Gemini_API")
|
40 |
if not gemini_api_key:
|
41 |
logger.error("Gemini_API environment variable not found!")
|
42 |
-
logger.error("Please set the Gemini_API environment variable with your
|
43 |
raise ValueError("β Gemini_API environment variable is required. Please set it in your environment.")
|
44 |
|
45 |
genai.configure(api_key=gemini_api_key)
|
46 |
-
logger.info("
|
47 |
|
48 |
# Initialize Google Drive service
|
49 |
logger.info("Initializing Google Drive service")
|
@@ -119,7 +119,7 @@ def extract_business_card_data_batch(images, filenames, model_name="gemini-2.5-f
|
|
119 |
raise
|
120 |
|
121 |
# Configure model
|
122 |
-
logger.debug(f"Configuring
|
123 |
generation_config = {
|
124 |
"temperature": 0.1,
|
125 |
"response_mime_type": "application/json"
|
@@ -131,9 +131,9 @@ def extract_business_card_data_batch(images, filenames, model_name="gemini-2.5-f
|
|
131 |
generation_config=generation_config,
|
132 |
system_instruction=system_prompt
|
133 |
)
|
134 |
-
logger.debug("
|
135 |
except Exception as e:
|
136 |
-
logger.error(f"Failed to configure
|
137 |
raise
|
138 |
|
139 |
# Prepare multiple images for the model
|
@@ -578,7 +578,8 @@ def process_business_cards(images, model_name="gemini-2.5-flash", save_images=Tr
|
|
578 |
logger.info("Creating summary message")
|
579 |
num_batches = len(image_batches) if 'image_batches' in locals() else 1
|
580 |
summary = f"Successfully processed {len(all_data)} business card(s) in {num_batches} batch(es) of up to 5 cards.\n"
|
581 |
-
|
|
|
582 |
summary += f"β‘ API calls made: {num_batches} (instead of {len(all_data)})\n"
|
583 |
|
584 |
if save_images:
|
@@ -667,7 +668,7 @@ with gr.Blocks(title="Business Card Data Extractor") as demo:
|
|
667 |
)
|
668 |
|
669 |
model_selector = gr.Dropdown(
|
670 |
-
choices=["gemini-2.5-pro", "gemini-2.5-flash"],
|
671 |
value="gemini-2.5-pro",
|
672 |
label="AI Model Selection"
|
673 |
)
|
@@ -708,7 +709,7 @@ with gr.Blocks(title="Business Card Data Extractor") as demo:
|
|
708 |
gr.Markdown(
|
709 |
"""
|
710 |
## Features:
|
711 |
-
- π€ **Model Selection**: Choose between
|
712 |
- β‘ **Batch Processing**: Processes 5 cards per API call for efficiency
|
713 |
- π **Data Extraction**: Names, emails, phone numbers, addresses, and more
|
714 |
- π **Smart Combination**: Multiple emails/phones combined with commas
|
|
|
34 |
)
|
35 |
logger = logging.getLogger(__name__)
|
36 |
|
37 |
+
# Configure AI API
|
38 |
+
logger.info("Configuring AI API")
|
39 |
gemini_api_key = os.getenv("Gemini_API")
|
40 |
if not gemini_api_key:
|
41 |
logger.error("Gemini_API environment variable not found!")
|
42 |
+
logger.error("Please set the Gemini_API environment variable with your AI API key")
|
43 |
raise ValueError("β Gemini_API environment variable is required. Please set it in your environment.")
|
44 |
|
45 |
genai.configure(api_key=gemini_api_key)
|
46 |
+
logger.info("AI API configured successfully")
|
47 |
|
48 |
# Initialize Google Drive service
|
49 |
logger.info("Initializing Google Drive service")
|
|
|
119 |
raise
|
120 |
|
121 |
# Configure model
|
122 |
+
logger.debug(f"Configuring AI model: {model_name}")
|
123 |
generation_config = {
|
124 |
"temperature": 0.1,
|
125 |
"response_mime_type": "application/json"
|
|
|
131 |
generation_config=generation_config,
|
132 |
system_instruction=system_prompt
|
133 |
)
|
134 |
+
logger.debug("AI model configured successfully")
|
135 |
except Exception as e:
|
136 |
+
logger.error(f"Failed to configure AI model: {e}")
|
137 |
raise
|
138 |
|
139 |
# Prepare multiple images for the model
|
|
|
578 |
logger.info("Creating summary message")
|
579 |
num_batches = len(image_batches) if 'image_batches' in locals() else 1
|
580 |
summary = f"Successfully processed {len(all_data)} business card(s) in {num_batches} batch(es) of up to 5 cards.\n"
|
581 |
+
model_display = "Speed-Optimized AI" if "flash" in model_name else "Accuracy-Optimized AI"
|
582 |
+
summary += f"π€ AI Model used: {model_display}\n"
|
583 |
summary += f"β‘ API calls made: {num_batches} (instead of {len(all_data)})\n"
|
584 |
|
585 |
if save_images:
|
|
|
668 |
)
|
669 |
|
670 |
model_selector = gr.Dropdown(
|
671 |
+
choices=[("Accuracy-Optimized AI", "gemini-2.5-pro"), ("Speed-Optimized AI", "gemini-2.5-flash")],
|
672 |
value="gemini-2.5-pro",
|
673 |
label="AI Model Selection"
|
674 |
)
|
|
|
709 |
gr.Markdown(
|
710 |
"""
|
711 |
## Features:
|
712 |
+
- π€ **Model Selection**: Choose between Speed-Optimized AI (fast) or Accuracy-Optimized AI (accurate)
|
713 |
- β‘ **Batch Processing**: Processes 5 cards per API call for efficiency
|
714 |
- π **Data Extraction**: Names, emails, phone numbers, addresses, and more
|
715 |
- π **Smart Combination**: Multiple emails/phones combined with commas
|
env.example
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
# Environment Variables for Business Card Data Extractor
|
2 |
# Copy this file to .env and replace with your actual values
|
3 |
|
4 |
-
#
|
5 |
# Get your key from: https://aistudio.google.com/
|
6 |
# For deployment: Add this as an environment variable named "Gemini_API"
|
7 |
-
Gemini_API=
|
8 |
|
9 |
# Google Drive API Credentials (Required - for file storage)
|
10 |
# Get these from Google Cloud Console:
|
|
|
1 |
# Environment Variables for Business Card Data Extractor
|
2 |
# Copy this file to .env and replace with your actual values
|
3 |
|
4 |
+
# AI API Key (Required)
|
5 |
# Get your key from: https://aistudio.google.com/
|
6 |
# For deployment: Add this as an environment variable named "Gemini_API"
|
7 |
+
Gemini_API=your_ai_api_key_here
|
8 |
|
9 |
# Google Drive API Credentials (Required - for file storage)
|
10 |
# Get these from Google Cloud Console:
|
setup_hf_space.md
CHANGED
@@ -30,9 +30,9 @@ business_cards/.gitkeep
|
|
30 |
3. Click **Add a new secret**
|
31 |
4. Set:
|
32 |
- **Name**: `Gemini_API`
|
33 |
-
- **Value**: Your
|
34 |
|
35 |
-
### 4. Get Your
|
36 |
1. Go to [Google AI Studio](https://aistudio.google.com/)
|
37 |
2. Click "Get API key"
|
38 |
3. Create a new API key
|
@@ -51,7 +51,7 @@ business_cards/.gitkeep
|
|
51 |
β
**Batch processing**: Efficient 5-cards-per-API-call processing
|
52 |
|
53 |
## Environment Variables Required
|
54 |
-
- `Gemini_API`: Your
|
55 |
|
56 |
## Notes
|
57 |
- The space will create necessary directories automatically
|
|
|
30 |
3. Click **Add a new secret**
|
31 |
4. Set:
|
32 |
- **Name**: `Gemini_API`
|
33 |
+
- **Value**: Your AI API key
|
34 |
|
35 |
+
### 4. Get Your AI API Key
|
36 |
1. Go to [Google AI Studio](https://aistudio.google.com/)
|
37 |
2. Click "Get API key"
|
38 |
3. Create a new API key
|
|
|
51 |
β
**Batch processing**: Efficient 5-cards-per-API-call processing
|
52 |
|
53 |
## Environment Variables Required
|
54 |
+
- `Gemini_API`: Your AI API key (required)
|
55 |
|
56 |
## Notes
|
57 |
- The space will create necessary directories automatically
|