pdf_gemini

Sleeping

App Files Files Community

pdf_gemini / app.py

Sebbe33

Update app.py

91b6e57 verified 10 months ago

raw

history blame

3.37 kB

	import os
	import re
	import io
	import streamlit as st
	from PIL import Image, ImageDraw
	from google import genai
	from google.genai import types
	from pdf2image import convert_from_bytes

	# Constants
	DETECTION_PROMPT = """\
	Identify ALL text regions in this document. Return bounding boxes as a Python list of lists
	in format [[xmin, ymin, xmax, ymax]] where coordinates are normalized between 0-1.
	Only return the list, nothing else. Example:
	[[0.05, 0.12, 0.25, 0.18], [0.30, 0.40, 0.50, 0.55]]
	"""

	def parse_list_boxes(text):
	"""Improved parsing with better error handling"""
	try:
	return eval(text) # Safer alternative: Use ast.literal_eval
	except:
	matches = re.findall(r'\[([\d\.]+),\s([\d\.]+),\s([\d\.]+),\s*([\d\.]+)\]', text)
	return [[float(x) for x in m] for m in matches]

	def draw_bounding_boxes(image, boxes):
	"""Enhanced drawing with diagnostics"""
	if not boxes:
	return image

	draw = ImageDraw.Draw(image)
	width, height = image.size

	for box in boxes:
	try:
	xmin = max(0.0, min(1.0, box[0])) * width
	ymin = max(0.0, min(1.0, box[1])) * height
	xmax = max(0.0, min(1.0, box[2])) * width
	ymax = max(0.0, min(1.0, box[3])) * height

	draw.rectangle([xmin, ymin, xmax, ymax], outline="#00FF00", width=3)
	except Exception as e:
	st.error(f"Error drawing box: {str(e)}")
	return image

	# Streamlit UI
	st.title("PDF Text Detection")
	uploaded_file = st.file_uploader("Upload PDF", type=["pdf"])

	if uploaded_file and st.button("Analyze"):
	with st.spinner("Processing..."):
	try:
	images = convert_from_bytes(uploaded_file.read(), dpi=300) # Increased DPI
	client = genai.Client(api_key=os.getenv("KEY")) # Verify env var name

	for idx, image in enumerate(images):
	with st.expander(f"Page {idx+1}", expanded=True):
	img_byte_arr = io.BytesIO()
	image.save(img_byte_arr, format='PNG')


	# Get bounding boxes
	response = client.models.generate_content(
	model="gemini-2.0-flash-exp",
	contents=[
	DETECTION_PROMPT,
	types.Part.from_bytes(
	data=img_byte_arr.getvalue(),
	mime_type="image/png"
	)
	]
	)


	# Debug output
	with st.expander("Raw API Response"):
	st.code(response.text)

	# Parse and draw
	boxes = parse_list_boxes(response.text)
	annotated = draw_bounding_boxes(image.copy(), boxes)

	# Display
	cols = st.columns(2)
	cols[0].image(image, caption="Original", use_column_width=True)
	cols[1].image(annotated,
	caption=f"Detected {len(boxes)} text regions",
	use_column_width=True)

	except Exception as e:
	st.error(f"Error: {str(e)}")