Spaces:

prthm11
/

Scratch_Vision_Game

Runtime error

App Files Files Community

prthm11 commited on Jul 17

Commit

0b4bd44

verified ·

1 Parent(s): 6754f55

Upload app_main.py

Browse files

Files changed (1) hide show

app_main.py +148 -0

app_main.py ADDED Viewed

	@@ -0,0 +1,148 @@

+from flask import Flask, render_template, Response, flash, redirect, url_for, request, jsonify
+import cv2
+import numpy as np
+from unstructured.partition.pdf import partition_pdf
+import json, base64, io, os
+from PIL import Image, ImageEnhance, ImageDraw
+from imutils.perspective import four_point_transform
+from dotenv import load_dotenv
+import pytesseract
+from transformers import AutoProcessor, AutoModelForImageTextToText
+from langchain_community.document_loaders.image_captions import ImageCaptionLoader
+app = Flask(__name__)
+pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
+poppler_path=r"C:\poppler-23.11.0\Library\bin"
+count = 0
+PDF_GET = r"E:\Pratham\2025\Harsh Sir\Scratch Vision\images\scratch_crab.pdf"
+OUTPUT_FOLDER = "OUTPUTS"
+DETECTED_IMAGE_FOLDER_PATH = os.path.join(OUTPUT_FOLDER,"DETECTED_IMAGE")
+IMAGE_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "SCANNED_IMAGE")
+JSON_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "EXTRACTED_JSON")
+for path in [OUTPUT_FOLDER, IMAGE_FOLDER_PATH, DETECTED_IMAGE_FOLDER_PATH, JSON_FOLDER_PATH]:
+    os.makedirs(path, exist_ok=True)
+# Model Initialization
+smolvlm256m_processor = AutoProcessor.from_pretrained("HuggingFaceTB/SmolVLM-256M-Instruct")
+smolvlm256m_model = AutoModelForImageTextToText.from_pretrained("HuggingFaceTB/SmolVLM-256M-Instruct").to("cpu")
+# SmolVLM Image Captioning functioning
+def get_smolvlm_caption(image: Image.Image, prompt: str = "") -> str:
+    # Ensure exactly one <image> token
+    if "<image>" not in prompt:
+        prompt = f"<image> {prompt.strip()}"
+    num_image_tokens = prompt.count("<image>")
+    if num_image_tokens != 1:
+        raise ValueError(f"Prompt must contain exactly 1 <image> token. Found {num_image_tokens}")
+    inputs = smolvlm256m_processor(images=[image], text=[prompt], return_tensors="pt").to("cpu")
+    output_ids = smolvlm256m_model.generate(**inputs, max_new_tokens=100)
+    return smolvlm256m_processor.decode(output_ids[0], skip_special_tokens=True)
+# --- FUNCTION: Extract images from saved PDF ---
+def extract_images_from_pdf(pdf_path, output_json_path):
+    ''' Extract images from PDF and generate structured sprite JSON '''
+    pdf_filename = os.path.splitext(os.path.basename(pdf_path))[0]  # e.g., "scratch_crab"
+    pdf_dir_path = os.path.dirname(pdf_path).replace("/", "\\")
+    # Create subfolders
+    extracted_image_subdir = os.path.join(DETECTED_IMAGE_FOLDER_PATH, pdf_filename)
+    json_subdir = os.path.join(JSON_FOLDER_PATH, pdf_filename)
+    os.makedirs(extracted_image_subdir, exist_ok=True)
+    os.makedirs(json_subdir, exist_ok=True)
+    # Output paths
+    output_json_path = os.path.join(json_subdir, "extracted.json")
+    final_json_path = os.path.join(json_subdir, "extracted_sprites.json")
+    elements = partition_pdf(
+        filename=pdf_path,
+        strategy="hi_res",
+        extract_image_block_types=["Image"],
+        extract_image_block_to_payload=True,  # Set to True to get base64 in output
+    )
+    with open(output_json_path, "w") as f:
+        json.dump([element.to_dict() for element in elements], f, indent=4)
+    # Display extracted images
+    with open(output_json_path, 'r') as file:
+        file_elements = json.load(file)
+    # extracted_images_dir = os.path.join(os.path.dirname(output_json_path), "extracted_images")
+    # os.makedirs(extracted_images_dir, exist_ok=True)
+     # Prepare manipulated sprite JSON structure
+    manipulated_json = {}
+    # Final manipulated file (for captions)
+    final_json_path = output_json_path.replace(".json", "_sprites.json")
+    # If JSON already exists, load it and find the next available Sprite number
+    if os.path.exists(final_json_path):
+        with open(final_json_path, "r") as existing_file:
+            manipulated = json.load(existing_file)
+        # Determine the next available index (e.g., Sprite 4 if 1–3 already exist)
+        existing_keys = [int(k.replace("Sprite ", "")) for k in manipulated.keys()]
+        start_count = max(existing_keys, default=0) + 1
+    else:
+        start_count = 1
+    sprite_count = start_count
+    for i,element in enumerate(file_elements):
+        if "image_base64" in element["metadata"]:
+            image_data = base64.b64decode(element["metadata"]["image_base64"])
+            image = Image.open(io.BytesIO(image_data)).convert("RGB")
+            image.show(title=f"Extracted Image {i+1}")
+            image_path = os.path.join(extracted_image_subdir, f"Sprite_{i+1}.png")
+            image.save(image_path)
+            description = get_smolvlm_caption(image, prompt="Give a brief Description")
+            name = get_smolvlm_caption(image, prompt="give a short name/title of this Image.")
+            manipulated_json[f"Sprite {sprite_count}"] = {
+                "name": name,
+                "base64": element["metadata"]["image_base64"],
+                "file-path": pdf_dir_path,
+                "description":description
+            }
+            sprite_count += 1
+# Save manipulated JSON
+    with open(final_json_path, "w") as sprite_file:
+        json.dump(manipulated_json, sprite_file, indent=4)
+    print(f"✅ Manipulated sprite JSON saved: {final_json_path}")
+    return final_json_path, manipulated_json
+# API endpoint
+@app.route('/process_static_pdf', methods=['POST'])
+def process_static_pdf():
+    # Option 1: Use hardcoded static PDF
+    pdf_path = r"E:\Pratham\2025\Harsh Sir\Scratch Vision\images\scratch_crab.pdf"
+    # Optional: Allow override via JSON request body
+    if request.json and "pdf_path" in request.json:
+        pdf_path = request.json["pdf_path"]
+    if not os.path.isfile(pdf_path):
+        return jsonify({"error": f"File not found: {pdf_path}"}), 400
+    # json_path = os.path.join(JSON_FOLDER_PATH, "extracted.json")
+    json_path = None
+    output_path, result = extract_images_from_pdf(pdf_path, json_path)
+    return jsonify({
+        "message": "✅ PDF processed successfully",
+        "output_json": output_path,
+        "sprites": result
+    })
+if __name__ == '__main__':
+    app.run(debug=True)