Sandy2636 commited on
Commit
ca25c3d
·
1 Parent(s): 0b430af

Add application file

Browse files
Files changed (1) hide show
  1. app.py +30 -2
app.py CHANGED
@@ -12,7 +12,35 @@ def process_passport(image):
12
  encoded_image = base64.b64encode(f.read()).decode("utf-8")
13
  data_url = f"data:image/jpeg;base64,{encoded_image}"
14
 
15
- prompt = "Extract all visible information from the front page of the passport. Output in JSON format."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  payload = {
18
  "model": IMAGE_MODEL,
@@ -35,7 +63,7 @@ def process_passport(image):
35
  response = requests.post("https://openrouter.ai/api/v1/chat/completions", headers=headers, json=payload)
36
  result = response.json()
37
 
38
- return json.dumps(result, indent=2)
39
 
40
  except Exception as e:
41
  return f"⚠️ Error: {str(e)}"
 
12
  encoded_image = base64.b64encode(f.read()).decode("utf-8")
13
  data_url = f"data:image/jpeg;base64,{encoded_image}"
14
 
15
+ prompt = f"""You are an advanced OCR and information extraction AI.
16
+ Your task is to meticulously analyze this image and extract all relevant information.
17
+
18
+ Output Format Instructions:
19
+ Provide your response as a SINGLE, VALID JSON OBJECT. Do not include any explanatory text before or after the JSON.
20
+ The JSON object should have the following top-level keys:
21
+ - "document_type_detected": (string) Your best guess of the specific document type (e.g., "Passport", "National ID Card", "Driver's License", "Visa Sticker", "Hotel Confirmation Voucher", "Boarding Pass", "Photograph of a person").
22
+ - "extracted_fields": (object) A key-value map of all extracted information. Be comprehensive. Examples:
23
+ - For passports/IDs: "Surname", "Given Names", "Document Number", "Nationality", "Date of Birth", "Sex", "Place of Birth", "Date of Issue", "Date of Expiry", "Issuing Authority", "Country Code".
24
+ - For hotel reservations: "Guest Name", "Hotel Name", "Booking Reference", "Check-in Date", "Check-out Date", "Room Type".
25
+ - For photos: "Description" (e.g., "Portrait of a person", "Image contains text: [text if any]").
26
+ - "mrz_data": (object or null) If a Machine Readable Zone (MRZ) is present:
27
+ - "raw_mrz_lines": (array of strings) Each line of the MRZ.
28
+ - "parsed_mrz": (object) Key-value pairs of parsed MRZ fields (e.g., "passport_type", "issuing_country", "surname", "given_names", "passport_number", "nationality", "dob", "sex", "expiry_date", "personal_number").
29
+ If no MRZ, this field should be null.
30
+ - "multilingual_info": (array of objects or null) For any text segments not in English:
31
+ - Each object: {{"language_detected": "ISO 639-1 code", "original_text": "...", "english_translation_or_transliteration": "..."}}
32
+ If no non-English text, this field can be null or an empty array.
33
+ - "full_text_ocr": (string) Concatenation of all text found on the document.
34
+
35
+ Extraction Guidelines:
36
+ 1. Prioritize accuracy. If unsure about a character or word, indicate uncertainty if possible, or extract the most likely interpretation.
37
+ 2. Extract all visible text, including small print, stamps, and handwritten annotations if legible.
38
+ 3. For dates, try to use ISO 8601 format (YYYY-MM-DD) if possible, but retain original format if conversion is ambiguous.
39
+ 4. If the image is a photo of a person without much text, the "extracted_fields" might contain a description, and "full_text_ocr" might be minimal.
40
+ 5. If the document is multi-page and only one page is provided, note this if apparent.
41
+
42
+ Ensure the entire output strictly adheres to the JSON format.
43
+ """
44
 
45
  payload = {
46
  "model": IMAGE_MODEL,
 
63
  response = requests.post("https://openrouter.ai/api/v1/chat/completions", headers=headers, json=payload)
64
  result = response.json()
65
 
66
+ return json.dumps(result["choices"][0]["message"]["content"], indent=2)
67
 
68
  except Exception as e:
69
  return f"⚠️ Error: {str(e)}"