pdf_gemini / app.py
Sebbe33's picture
Update app.py
91b6e57 verified
raw
history blame
3.37 kB
import os
import re
import io
import streamlit as st
from PIL import Image, ImageDraw
from google import genai
from google.genai import types
from pdf2image import convert_from_bytes
# Constants
DETECTION_PROMPT = """\
Identify ALL text regions in this document. Return bounding boxes as a Python list of lists
in format [[xmin, ymin, xmax, ymax]] where coordinates are normalized between 0-1.
Only return the list, nothing else. Example:
[[0.05, 0.12, 0.25, 0.18], [0.30, 0.40, 0.50, 0.55]]
"""
def parse_list_boxes(text):
"""Improved parsing with better error handling"""
try:
return eval(text) # Safer alternative: Use ast.literal_eval
except:
matches = re.findall(r'\[([\d\.]+),\s*([\d\.]+),\s*([\d\.]+),\s*([\d\.]+)\]', text)
return [[float(x) for x in m] for m in matches]
def draw_bounding_boxes(image, boxes):
"""Enhanced drawing with diagnostics"""
if not boxes:
return image
draw = ImageDraw.Draw(image)
width, height = image.size
for box in boxes:
try:
xmin = max(0.0, min(1.0, box[0])) * width
ymin = max(0.0, min(1.0, box[1])) * height
xmax = max(0.0, min(1.0, box[2])) * width
ymax = max(0.0, min(1.0, box[3])) * height
draw.rectangle([xmin, ymin, xmax, ymax], outline="#00FF00", width=3)
except Exception as e:
st.error(f"Error drawing box: {str(e)}")
return image
# Streamlit UI
st.title("PDF Text Detection")
uploaded_file = st.file_uploader("Upload PDF", type=["pdf"])
if uploaded_file and st.button("Analyze"):
with st.spinner("Processing..."):
try:
images = convert_from_bytes(uploaded_file.read(), dpi=300) # Increased DPI
client = genai.Client(api_key=os.getenv("KEY")) # Verify env var name
for idx, image in enumerate(images):
with st.expander(f"Page {idx+1}", expanded=True):
img_byte_arr = io.BytesIO()
image.save(img_byte_arr, format='PNG')
# Get bounding boxes
response = client.models.generate_content(
model="gemini-2.0-flash-exp",
contents=[
DETECTION_PROMPT,
types.Part.from_bytes(
data=img_byte_arr.getvalue(),
mime_type="image/png"
)
]
)
# Debug output
with st.expander("Raw API Response"):
st.code(response.text)
# Parse and draw
boxes = parse_list_boxes(response.text)
annotated = draw_bounding_boxes(image.copy(), boxes)
# Display
cols = st.columns(2)
cols[0].image(image, caption="Original", use_column_width=True)
cols[1].image(annotated,
caption=f"Detected {len(boxes)} text regions",
use_column_width=True)
except Exception as e:
st.error(f"Error: {str(e)}")