Spaces:

SauravCh11
/

PassportOCR

Running

App Files Files Community

Sandy2636 commited on May 10

Commit

ca25c3d

1 Parent(s): 0b430af

Add application file

Browse files

Files changed (1) hide show

app.py +30 -2

app.py CHANGED Viewed

@@ -12,7 +12,35 @@ def process_passport(image):
             encoded_image = base64.b64encode(f.read()).decode("utf-8")
         data_url = f"data:image/jpeg;base64,{encoded_image}"
-        prompt = "Extract all visible information from the front page of the passport. Output in JSON format."
         payload = {
             "model": IMAGE_MODEL,
@@ -35,7 +63,7 @@ def process_passport(image):
         response = requests.post("https://openrouter.ai/api/v1/chat/completions", headers=headers, json=payload)
         result = response.json()
-        return json.dumps(result, indent=2)
     except Exception as e:
         return f"⚠️ Error: {str(e)}"

             encoded_image = base64.b64encode(f.read()).decode("utf-8")
         data_url = f"data:image/jpeg;base64,{encoded_image}"
+        prompt = f"""You are an advanced OCR and information extraction AI.
+        Your task is to meticulously analyze this image and extract all relevant information.
+        Output Format Instructions:
+        Provide your response as a SINGLE, VALID JSON OBJECT. Do not include any explanatory text before or after the JSON.
+        The JSON object should have the following top-level keys:
+        - "document_type_detected": (string) Your best guess of the specific document type (e.g., "Passport", "National ID Card", "Driver's License", "Visa Sticker", "Hotel Confirmation Voucher", "Boarding Pass", "Photograph of a person").
+        - "extracted_fields": (object) A key-value map of all extracted information. Be comprehensive. Examples:
+            - For passports/IDs: "Surname", "Given Names", "Document Number", "Nationality", "Date of Birth", "Sex", "Place of Birth", "Date of Issue", "Date of Expiry", "Issuing Authority", "Country Code".
+            - For hotel reservations: "Guest Name", "Hotel Name", "Booking Reference", "Check-in Date", "Check-out Date", "Room Type".
+            - For photos: "Description" (e.g., "Portrait of a person", "Image contains text: [text if any]").
+        - "mrz_data": (object or null) If a Machine Readable Zone (MRZ) is present:
+            - "raw_mrz_lines": (array of strings) Each line of the MRZ.
+            - "parsed_mrz": (object) Key-value pairs of parsed MRZ fields (e.g., "passport_type", "issuing_country", "surname", "given_names", "passport_number", "nationality", "dob", "sex", "expiry_date", "personal_number").
+            If no MRZ, this field should be null.
+        - "multilingual_info": (array of objects or null) For any text segments not in English:
+            - Each object: {{"language_detected": "ISO 639-1 code", "original_text": "...", "english_translation_or_transliteration": "..."}}
+            If no non-English text, this field can be null or an empty array.
+        - "full_text_ocr": (string) Concatenation of all text found on the document.
+        Extraction Guidelines:
+        1.  Prioritize accuracy. If unsure about a character or word, indicate uncertainty if possible, or extract the most likely interpretation.
+        2.  Extract all visible text, including small print, stamps, and handwritten annotations if legible.
+        3.  For dates, try to use ISO 8601 format (YYYY-MM-DD) if possible, but retain original format if conversion is ambiguous.
+        4.  If the image is a photo of a person without much text, the "extracted_fields" might contain a description, and "full_text_ocr" might be minimal.
+        5.  If the document is multi-page and only one page is provided, note this if apparent.
+        Ensure the entire output strictly adheres to the JSON format.
+        """
         payload = {
             "model": IMAGE_MODEL,
         response = requests.post("https://openrouter.ai/api/v1/chat/completions", headers=headers, json=payload)
         result = response.json()
+        return json.dumps(result["choices"][0]["message"]["content"], indent=2)
     except Exception as e:
         return f"⚠️ Error: {str(e)}"