Spaces:
Running
Running
import os | |
import sys | |
cur_dir = os.getcwd() | |
parent_dir = os.path.realpath(os.path.join(os.path.dirname(cur_dir))) | |
if parent_dir not in sys.path: | |
sys.path.append(parent_dir) | |
sys.path.append(cur_dir) | |
sys.path.insert(1, ".") | |
from PIL import Image | |
import logging | |
from app.utils import * | |
# Set up logging | |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
logger = logging.getLogger(__name__) | |
## processing engine for PDF or Image | |
def process_document_file(file_path, model, max_retries=3, base_delay=15): | |
""" | |
Processes a PDF or image file using Gemini, with retry logic for quota errors. | |
Args: | |
file_path (str): Path to the document file (PDF or image). | |
model: The GenerativeModel instance. | |
max_retries (int): Maximum retry attempts for quota errors. | |
base_delay (int): Base delay for exponential backoff (seconds). | |
Returns: | |
str: Generated JSON content from the model. | |
""" | |
for attempt in range(max_retries): | |
try: | |
# Open file as an image | |
pil_image = Image.open(file_path) | |
# Define prompt with specific fields | |
prompt = """ | |
Analyze the document provided in the image. | |
First, identify the type of document: 'ID', 'driving_license', or 'librea' (car ownership document). | |
Then, based on the document type, extract only the following specific information as key-value pairs. | |
The document may contain text in both English and Amharic. Extract the values corresponding to the specified labels. | |
If a field has an Amharic version distinct from the English one, include it with a suffix '_amharic' (e.g., full_name_amharic). | |
If a field is not found, use "N/A" as the value. | |
For 'ID': | |
- full_name (Extract from "Full Name" or equivalent Amharic label) | |
- date_of_birth (Extract from "Date of Birth" or equivalent) | |
- expiration_date (Extract from "Expiration Date" or equivalent) | |
- sex (Extract from "Sex" or equivalent) | |
- country_of_citizenship (Extract from "Country of Citizenship" or equivalent) | |
- fcn_number (Extract from "FCN Number" or equivalent) | |
- phone_number (Extract from "Phone Number" or equivalent) | |
- address (Extract from "Address" or equivalent) | |
- city (Extract from "City" or equivalent) | |
- woreda (Extract from "Woreda" or equivalent) | |
For 'driving_license': | |
- full_name (Extract from "Full Name" or "ስም") | |
- region(Extract form "ክልል") | |
- zone or city (Extract from "ዞን/ከተማ") | |
- nationality (Extracted from "ዜግነት") | |
- kebele (extracted from "ቀበሌ") | |
- license_number (Extract from "License Number" or "የፍቃድ ቁጥር") | |
- date_of_birth (Extract from "Date of Birth" or "የልደት ቀን") | |
- issue_date (Extract from "Date of Issue" or "የተሰጠበት ቀን") | |
- expiration_date (Extract from "Expiration Date" or "የሚያበቃበት ቀን") | |
- phone_number (Extract from "Phone Number" or "ስልክ ቁጥር" or "ስልክ") | |
- issuing_authority (Extract from "Issuing Authority" or "የኢትዮጵያ መንግስት የትራንስፖርት እና ትራፊክ ቁጥጥር ባለስልጣን") | |
- gender (Extract from "Gender" or "ፆታ") | |
- issuing_place (Extract from "Issuing Place" or "የተሰጠበት ቦታ") | |
For 'librea': | |
- owner_full_name (Extract from "Owner's Full Name" or "ስም") | |
- owner_address (Combine and extract from "Owner's Address" or "ክልል", "ከተማ", "ክ/ከተማ", "ቀበሌ/ወረዳ", "የቤት ቁጥር") | |
- gender (Extract from "Gender" or "ፆታ") | |
- nationality (Extract from "Nationality" or "ዜግነት") | |
- phone_number (Extract from "Phone Number" or "ስልክ") | |
- car type (Extract from "የተሽከርካሪዉ አይነት") | |
- registration_number (Extract from "Registration Number" or "የሠሌዳ ቁጥር") | |
- car_make (Extract from "Make" or "የተሰራበት ሀገር") | |
- car_model (Extract from "Model" or "የተሽ/ሞዴል") | |
- car_year (Extract from "Year" or "የተሰራበት አመት") | |
- engine_number (Extract from "Engine Number" or "የሞተር ቁጥር") | |
- vin_number (Extract from "Chassis Number" or "የሻንሲ ቁጥር") | |
- body_type (Extract from "Body Type" or "የአካሉ አይነት") | |
- color (Extract from "ቀለም") | |
- fuel type (Extract from " የነዳጅ አይነት") | |
- engine horsepower (Extract from "የሞተር የፈረስ ጉልበት") | |
- car horsepower (extract from " የተሽ/ጠቅ/ጉልበት") | |
- single weight (extract from " ነጠላ ክብደት") | |
- issue_date (Extract from "Issue Date" or "የተሰጠበት ቀን") | |
- expiration_date (Extract from "Expiration Date" or "የሚያበቃበት ቀን") | |
- motor capacity -cc (extracted from "የሞተር ችሎታ /ሲሲ/") | |
- cilinder capacity (extractd form "የሲሊንደርብዛት") | |
- Approved work (extract from " የተፈቀለት የስራ ፀባይ") | |
Output *only* valid JSON, with no additional text, comments, or explanations. Use this format: | |
{"document_type": "type_here", "extracted_data": {"key1": "value1", "key2": "value2", ...}} | |
Example output for an ID: | |
{"document_type": "ID", "extracted_data": {"full_name": "John Doe", "date_of_birth": "1990-01-01", "expiration_date": "2030-01-01", "sex": "Male", "country_of_citizenship": "Ethiopia", "fcn_number": "123456789", "phone_number": "+251912345678", "address": "123 Main St", "city": "Addis Ababa", "woreda": "Bole"}} | |
Example output for a driving_license: | |
{"document_type": "driving_license", "extracted_data": {"full_name": "Jane Smith", "full_name_amharic": "ጄን ስሚት", "license_number": "DL987654", "date_of_birth": "1985-03-15", "issue_date": "2020-05-10", "expiration_date": "2030-05-10", "class_type": "B", "restrictions": "None", "phone_number": "+251912345678", "blood_type": "O+", "address": "456 Oak St", "address_amharic": "456 ኦክ ስትሪት", "issuing_authority": "Ethiopian Transport Authority", "gender": "Female", "issuing_place": "Addis Ababa"}} | |
Example output for a librea: | |
{"document_type": "librea", "extracted_data": {"owner_full_name": "Alice Brown", "owner_full_name_amharic": "አሊስ ብራውን", "owner_address_amharic": "789 ፓይን ስትሪት, አዲስ አበባ", "phone_number": "+251987654321", "registration_number": "XYZ123", "car_make": "Toyota", "car_model": "Camry", "car_year": "2020", "engine_number": "ENG789", "vin_number": "1HGCM82633A123456", "body_type": "Sedan", "gender": "Female", "nationality": "Ethiopian", "passenger_capacity": "5", "loading_weight": "1500 kg"}} | |
""" | |
response = model.generate_content([prompt, pil_image]) | |
response.resolve() | |
# Log raw response for debugging | |
logger.debug(f"Raw model response: {response.text}") | |
# Try cleaning the response | |
cleaned_response = clean_json_response(response.text.strip()) | |
if cleaned_response: | |
logger.info(f"Successfully processed file: {file_path}") | |
return cleaned_response | |
# Validate JSON | |
try: | |
json.loads(response.text.strip()) | |
logger.info(f"Successfully processed file: {file_path}") | |
return response.text.strip() | |
except json.JSONDecodeError: | |
logger.error("Invalid JSON response from model") | |
if attempt < max_retries - 1: | |
logger.warning(f"Retrying with simplified prompt... (Attempt {attempt + 1}/{max_retries})") | |
continue | |
return json.dumps({"error": "Invalid JSON response from model after retries"}) | |
except Exception as e: | |
if "429" in str(e) and attempt < max_retries - 1: | |
delay = base_delay * (2 ** attempt) | |
logger.warning(f"Quota exceeded, retrying in {delay} seconds... (Attempt {attempt + 1}/{max_retries})") | |
time.sleep(delay) | |
continue | |
logger.error(f"Error processing file: {str(e)}") | |
return json.dumps({"error": f"An error occurred: {str(e)}"}) | |
## Main function to process document | |
def process_document(file_path, api_key, model_name='gemini-1.5-flash'): | |
""" | |
Processes a PDF or image file and returns JSON output. | |
Args: | |
file_path (str): Path to the document file (PDF or image). | |
api_key (str): API key for Gemini. | |
model_name (str): Model name, default 'gemini-1.5-flash'. | |
Returns: | |
str: JSON string with document type and extracted data. | |
""" | |
try: | |
model = create_connection(api_key, model_name) | |
ext = os.path.splitext(file_path)[1].lower() | |
if ext in ['.pdf', '.jpg', '.jpeg', '.png', '.bmp', '.gif', '.tiff']: | |
return process_document_file(file_path, model) | |
else: | |
logger.error("Unsupported file type") | |
return json.dumps({"error": "Unsupported file type. Please provide a PDF or image file."}) | |
except Exception as e: | |
logger.error(f"Error in process_document: {str(e)}") | |
return json.dumps({"error": f"An error occurred: {str(e)}"}) |