Spaces:

mussie1212
/

melhiq_ocr

Running

App Files Files Community

melhiq_ocr / app /extraction.py

mussie1212

fix:first commit on the ocr

c53b292 about 1 month ago

raw

history blame

9.98 kB



	import os
	import sys

	cur_dir = os.getcwd()
	parent_dir = os.path.realpath(os.path.join(os.path.dirname(cur_dir)))
	if parent_dir not in sys.path:
	sys.path.append(parent_dir)
	sys.path.append(cur_dir)
	sys.path.insert(1, ".")


	from PIL import Image
	import logging
	from app.utils import *

	# Set up logging
	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
	logger = logging.getLogger(__name__)






	## processing engine for PDF or Image
	def process_document_file(file_path, model, max_retries=3, base_delay=15):
	"""
	Processes a PDF or image file using Gemini, with retry logic for quota errors.

	Args:
	file_path (str): Path to the document file (PDF or image).
	model: The GenerativeModel instance.
	max_retries (int): Maximum retry attempts for quota errors.
	base_delay (int): Base delay for exponential backoff (seconds).

	Returns:
	str: Generated JSON content from the model.
	"""
	for attempt in range(max_retries):
	try:
	# Open file as an image
	pil_image = Image.open(file_path)

	# Define prompt with specific fields
	prompt = """
	Analyze the document provided in the image.
	First, identify the type of document: 'ID', 'driving_license', or 'librea' (car ownership document).

	Then, based on the document type, extract only the following specific information as key-value pairs.
	The document may contain text in both English and Amharic. Extract the values corresponding to the specified labels.
	If a field has an Amharic version distinct from the English one, include it with a suffix '_amharic' (e.g., full_name_amharic).
	If a field is not found, use "N/A" as the value.

	For 'ID':
	- full_name (Extract from "Full Name" or equivalent Amharic label)
	- date_of_birth (Extract from "Date of Birth" or equivalent)
	- expiration_date (Extract from "Expiration Date" or equivalent)
	- sex (Extract from "Sex" or equivalent)
	- country_of_citizenship (Extract from "Country of Citizenship" or equivalent)
	- fcn_number (Extract from "FCN Number" or equivalent)
	- phone_number (Extract from "Phone Number" or equivalent)
	- address (Extract from "Address" or equivalent)
	- city (Extract from "City" or equivalent)
	- woreda (Extract from "Woreda" or equivalent)

	For 'driving_license':

	- full_name (Extract from "Full Name" or "ስም")
	- region(Extract form "ክልል")
	- zone or city (Extract from "ዞን/ከተማ")
	- nationality (Extracted from "ዜግነት")
	- kebele (extracted from "ቀበሌ")
	- license_number (Extract from "License Number" or "የፍቃድ ቁጥር")
	- date_of_birth (Extract from "Date of Birth" or "የልደት ቀን")
	- issue_date (Extract from "Date of Issue" or "የተሰጠበት ቀን")
	- expiration_date (Extract from "Expiration Date" or "የሚያበቃበት ቀን")
	- phone_number (Extract from "Phone Number" or "ስልክ ቁጥር" or "ስልክ")
	- issuing_authority (Extract from "Issuing Authority" or "የኢትዮጵያ መንግስት የትራንስፖርት እና ትራፊክ ቁጥጥር ባለስልጣን")
	- gender (Extract from "Gender" or "ፆታ")
	- issuing_place (Extract from "Issuing Place" or "የተሰጠበት ቦታ")


	For 'librea':

	- owner_full_name (Extract from "Owner's Full Name" or "ስም")
	- owner_address (Combine and extract from "Owner's Address" or "ክልል", "ከተማ", "ክ/ከተማ", "ቀበሌ/ወረዳ", "የቤት ቁጥር")
	- gender (Extract from "Gender" or "ፆታ")
	- nationality (Extract from "Nationality" or "ዜግነት")
	- phone_number (Extract from "Phone Number" or "ስልክ")

	- car type (Extract from "የተሽከርካሪዉ አይነት")
	- registration_number (Extract from "Registration Number" or "የሠሌዳ ቁጥር")
	- car_make (Extract from "Make" or "የተሰራበት ሀገር")
	- car_model (Extract from "Model" or "የተሽ/ሞዴል")
	- car_year (Extract from "Year" or "የተሰራበት አመት")
	- engine_number (Extract from "Engine Number" or "የሞተር ቁጥር")
	- vin_number (Extract from "Chassis Number" or "የሻንሲ ቁጥር")
	- body_type (Extract from "Body Type" or "የአካሉ አይነት")
	- color (Extract from "ቀለም")
	- fuel type (Extract from " የነዳጅ አይነት")
	- engine horsepower (Extract from "የሞተር የፈረስ ጉልበት")
	- car horsepower (extract from " የተሽ/ጠቅ/ጉልበት")
	- single weight (extract from " ነጠላ ክብደት")
	- issue_date (Extract from "Issue Date" or "የተሰጠበት ቀን")
	- expiration_date (Extract from "Expiration Date" or "የሚያበቃበት ቀን")
	- motor capacity -cc (extracted from "የሞተር ችሎታ /ሲሲ/")
	- cilinder capacity (extractd form "የሲሊንደርብዛት")
	- Approved work (extract from " የተፈቀለት የስራ ፀባይ")


	Output only valid JSON, with no additional text, comments, or explanations. Use this format:
	{"document_type": "type_here", "extracted_data": {"key1": "value1", "key2": "value2", ...}}

	Example output for an ID:
	{"document_type": "ID", "extracted_data": {"full_name": "John Doe", "date_of_birth": "1990-01-01", "expiration_date": "2030-01-01", "sex": "Male", "country_of_citizenship": "Ethiopia", "fcn_number": "123456789", "phone_number": "+251912345678", "address": "123 Main St", "city": "Addis Ababa", "woreda": "Bole"}}

	Example output for a driving_license:
	{"document_type": "driving_license", "extracted_data": {"full_name": "Jane Smith", "full_name_amharic": "ጄን ስሚት", "license_number": "DL987654", "date_of_birth": "1985-03-15", "issue_date": "2020-05-10", "expiration_date": "2030-05-10", "class_type": "B", "restrictions": "None", "phone_number": "+251912345678", "blood_type": "O+", "address": "456 Oak St", "address_amharic": "456 ኦክ ስትሪት", "issuing_authority": "Ethiopian Transport Authority", "gender": "Female", "issuing_place": "Addis Ababa"}}

	Example output for a librea:
	{"document_type": "librea", "extracted_data": {"owner_full_name": "Alice Brown", "owner_full_name_amharic": "አሊስ ብራውን", "owner_address_amharic": "789 ፓይን ስትሪት, አዲስ አበባ", "phone_number": "+251987654321", "registration_number": "XYZ123", "car_make": "Toyota", "car_model": "Camry", "car_year": "2020", "engine_number": "ENG789", "vin_number": "1HGCM82633A123456", "body_type": "Sedan", "gender": "Female", "nationality": "Ethiopian", "passenger_capacity": "5", "loading_weight": "1500 kg"}}

	"""


	response = model.generate_content([prompt, pil_image])
	response.resolve()

	# Log raw response for debugging
	logger.debug(f"Raw model response: {response.text}")

	# Try cleaning the response
	cleaned_response = clean_json_response(response.text.strip())
	if cleaned_response:
	logger.info(f"Successfully processed file: {file_path}")
	return cleaned_response

	# Validate JSON
	try:
	json.loads(response.text.strip())
	logger.info(f"Successfully processed file: {file_path}")
	return response.text.strip()
	except json.JSONDecodeError:
	logger.error("Invalid JSON response from model")
	if attempt < max_retries - 1:
	logger.warning(f"Retrying with simplified prompt... (Attempt {attempt + 1}/{max_retries})")
	continue
	return json.dumps({"error": "Invalid JSON response from model after retries"})

	except Exception as e:
	if "429" in str(e) and attempt < max_retries - 1:
	delay = base_delay * (2 ** attempt)
	logger.warning(f"Quota exceeded, retrying in {delay} seconds... (Attempt {attempt + 1}/{max_retries})")
	time.sleep(delay)
	continue
	logger.error(f"Error processing file: {str(e)}")
	return json.dumps({"error": f"An error occurred: {str(e)}"})



	## Main function to process document
	def process_document(file_path, api_key, model_name='gemini-1.5-flash'):
	"""
	Processes a PDF or image file and returns JSON output.

	Args:
	file_path (str): Path to the document file (PDF or image).
	api_key (str): API key for Gemini.
	model_name (str): Model name, default 'gemini-1.5-flash'.

	Returns:
	str: JSON string with document type and extracted data.
	"""
	try:
	model = create_connection(api_key, model_name)
	ext = os.path.splitext(file_path)[1].lower()

	if ext in ['.pdf', '.jpg', '.jpeg', '.png', '.bmp', '.gif', '.tiff']:
	return process_document_file(file_path, model)
	else:
	logger.error("Unsupported file type")
	return json.dumps({"error": "Unsupported file type. Please provide a PDF or image file."})
	except Exception as e:
	logger.error(f"Error in process_document: {str(e)}")
	return json.dumps({"error": f"An error occurred: {str(e)}"})