import os import sys cur_dir = os.getcwd() parent_dir = os.path.realpath(os.path.join(os.path.dirname(cur_dir))) if parent_dir not in sys.path: sys.path.append(parent_dir) sys.path.append(cur_dir) sys.path.insert(1, ".") from PIL import Image import logging from app.utils import * # Set up logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) ## processing engine for PDF or Image def process_document_file(file_path, model, max_retries=3, base_delay=15): """ Processes a PDF or image file using Gemini, with retry logic for quota errors. Args: file_path (str): Path to the document file (PDF or image). model: The GenerativeModel instance. max_retries (int): Maximum retry attempts for quota errors. base_delay (int): Base delay for exponential backoff (seconds). Returns: str: Generated JSON content from the model. """ for attempt in range(max_retries): try: # Open file as an image pil_image = Image.open(file_path) # Define prompt with specific fields prompt = """ Analyze the document provided in the image. First, identify the type of document: 'ID', 'driving_license', or 'librea' (car ownership document). Then, based on the document type, extract only the following specific information as key-value pairs. The document may contain text in both English and Amharic. Extract the values corresponding to the specified labels. If a field has an Amharic version distinct from the English one, include it with a suffix '_amharic' (e.g., full_name_amharic). If a field is not found, use "N/A" as the value. For 'ID': - full_name (Extract from "Full Name" or equivalent Amharic label) - date_of_birth (Extract from "Date of Birth" or equivalent) - expiration_date (Extract from "Expiration Date" or equivalent) - sex (Extract from "Sex" or equivalent) - country_of_citizenship (Extract from "Country of Citizenship" or equivalent) - fcn_number (Extract from "FCN Number" or equivalent) - phone_number (Extract from "Phone Number" or equivalent) - address (Extract from "Address" or equivalent) - city (Extract from "City" or equivalent) - woreda (Extract from "Woreda" or equivalent) For 'driving_license': - full_name (Extract from "Full Name" or "ስም") - region(Extract form "ክልል") - zone or city (Extract from "ዞን/ከተማ") - nationality (Extracted from "ዜግነት") - kebele (extracted from "ቀበሌ") - license_number (Extract from "License Number" or "የፍቃድ ቁጥር") - date_of_birth (Extract from "Date of Birth" or "የልደት ቀን") - issue_date (Extract from "Date of Issue" or "የተሰጠበት ቀን") - expiration_date (Extract from "Expiration Date" or "የሚያበቃበት ቀን") - phone_number (Extract from "Phone Number" or "ስልክ ቁጥር" or "ስልክ") - issuing_authority (Extract from "Issuing Authority" or "የኢትዮጵያ መንግስት የትራንስፖርት እና ትራፊክ ቁጥጥር ባለስልጣን") - gender (Extract from "Gender" or "ፆታ") - issuing_place (Extract from "Issuing Place" or "የተሰጠበት ቦታ") For 'librea': - owner_full_name (Extract from "Owner's Full Name" or "ስም") - owner_address (Combine and extract from "Owner's Address" or "ክልል", "ከተማ", "ክ/ከተማ", "ቀበሌ/ወረዳ", "የቤት ቁጥር") - gender (Extract from "Gender" or "ፆታ") - nationality (Extract from "Nationality" or "ዜግነት") - phone_number (Extract from "Phone Number" or "ስልክ") - car type (Extract from "የተሽከርካሪዉ አይነት") - registration_number (Extract from "Registration Number" or "የሠሌዳ ቁጥር") - car_make (Extract from "Make" or "የተሰራበት ሀገር") - car_model (Extract from "Model" or "የተሽ/ሞዴል") - car_year (Extract from "Year" or "የተሰራበት አመት") - engine_number (Extract from "Engine Number" or "የሞተር ቁጥር") - vin_number (Extract from "Chassis Number" or "የሻንሲ ቁጥር") - body_type (Extract from "Body Type" or "የአካሉ አይነት") - color (Extract from "ቀለም") - fuel type (Extract from " የነዳጅ አይነት") - engine horsepower (Extract from "የሞተር የፈረስ ጉልበት") - car horsepower (extract from " የተሽ/ጠቅ/ጉልበት") - single weight (extract from " ነጠላ ክብደት") - issue_date (Extract from "Issue Date" or "የተሰጠበት ቀን") - expiration_date (Extract from "Expiration Date" or "የሚያበቃበት ቀን") - motor capacity -cc (extracted from "የሞተር ችሎታ /ሲሲ/") - cilinder capacity (extractd form "የሲሊንደርብዛት") - Approved work (extract from " የተፈቀለት የስራ ፀባይ") Output *only* valid JSON, with no additional text, comments, or explanations. Use this format: {"document_type": "type_here", "extracted_data": {"key1": "value1", "key2": "value2", ...}} Example output for an ID: {"document_type": "ID", "extracted_data": {"full_name": "John Doe", "date_of_birth": "1990-01-01", "expiration_date": "2030-01-01", "sex": "Male", "country_of_citizenship": "Ethiopia", "fcn_number": "123456789", "phone_number": "+251912345678", "address": "123 Main St", "city": "Addis Ababa", "woreda": "Bole"}} Example output for a driving_license: {"document_type": "driving_license", "extracted_data": {"full_name": "Jane Smith", "full_name_amharic": "ጄን ስሚት", "license_number": "DL987654", "date_of_birth": "1985-03-15", "issue_date": "2020-05-10", "expiration_date": "2030-05-10", "class_type": "B", "restrictions": "None", "phone_number": "+251912345678", "blood_type": "O+", "address": "456 Oak St", "address_amharic": "456 ኦክ ስትሪት", "issuing_authority": "Ethiopian Transport Authority", "gender": "Female", "issuing_place": "Addis Ababa"}} Example output for a librea: {"document_type": "librea", "extracted_data": {"owner_full_name": "Alice Brown", "owner_full_name_amharic": "አሊስ ብራውን", "owner_address_amharic": "789 ፓይን ስትሪት, አዲስ አበባ", "phone_number": "+251987654321", "registration_number": "XYZ123", "car_make": "Toyota", "car_model": "Camry", "car_year": "2020", "engine_number": "ENG789", "vin_number": "1HGCM82633A123456", "body_type": "Sedan", "gender": "Female", "nationality": "Ethiopian", "passenger_capacity": "5", "loading_weight": "1500 kg"}} """ response = model.generate_content([prompt, pil_image]) response.resolve() # Log raw response for debugging logger.debug(f"Raw model response: {response.text}") # Try cleaning the response cleaned_response = clean_json_response(response.text.strip()) if cleaned_response: logger.info(f"Successfully processed file: {file_path}") return cleaned_response # Validate JSON try: json.loads(response.text.strip()) logger.info(f"Successfully processed file: {file_path}") return response.text.strip() except json.JSONDecodeError: logger.error("Invalid JSON response from model") if attempt < max_retries - 1: logger.warning(f"Retrying with simplified prompt... (Attempt {attempt + 1}/{max_retries})") continue return json.dumps({"error": "Invalid JSON response from model after retries"}) except Exception as e: if "429" in str(e) and attempt < max_retries - 1: delay = base_delay * (2 ** attempt) logger.warning(f"Quota exceeded, retrying in {delay} seconds... (Attempt {attempt + 1}/{max_retries})") time.sleep(delay) continue logger.error(f"Error processing file: {str(e)}") return json.dumps({"error": f"An error occurred: {str(e)}"}) ## Main function to process document def process_document(file_path, api_key, model_name='gemini-1.5-flash'): """ Processes a PDF or image file and returns JSON output. Args: file_path (str): Path to the document file (PDF or image). api_key (str): API key for Gemini. model_name (str): Model name, default 'gemini-1.5-flash'. Returns: str: JSON string with document type and extracted data. """ try: model = create_connection(api_key, model_name) ext = os.path.splitext(file_path)[1].lower() if ext in ['.pdf', '.jpg', '.jpeg', '.png', '.bmp', '.gif', '.tiff']: return process_document_file(file_path, model) else: logger.error("Unsupported file type") return json.dumps({"error": "Unsupported file type. Please provide a PDF or image file."}) except Exception as e: logger.error(f"Error in process_document: {str(e)}") return json.dumps({"error": f"An error occurred: {str(e)}"})