Spaces:
Sleeping
Sleeping
| import os | |
| import re | |
| import io | |
| import streamlit as st | |
| from PIL import Image, ImageDraw | |
| from google import genai | |
| from google.genai import types | |
| from pdf2image import convert_from_bytes | |
| # Constants | |
| DETECTION_PROMPT = """\ | |
| Identify ALL text regions in this document. Return bounding boxes as a Python list of lists | |
| in format [[xmin, ymin, xmax, ymax]] where coordinates are normalized between 0-1. | |
| Only return the list, nothing else. Example: | |
| [[0.05, 0.12, 0.25, 0.18], [0.30, 0.40, 0.50, 0.55]] | |
| """ | |
| def parse_list_boxes(text): | |
| """Improved parsing with better error handling""" | |
| try: | |
| return eval(text) # Safer alternative: Use ast.literal_eval | |
| except: | |
| matches = re.findall(r'\[([\d\.]+),\s*([\d\.]+),\s*([\d\.]+),\s*([\d\.]+)\]', text) | |
| return [[float(x) for x in m] for m in matches] | |
| def draw_bounding_boxes(image, boxes): | |
| """Enhanced drawing with diagnostics""" | |
| if not boxes: | |
| return image | |
| draw = ImageDraw.Draw(image) | |
| width, height = image.size | |
| for box in boxes: | |
| try: | |
| xmin = max(0.0, min(1.0, box[0])) * width | |
| ymin = max(0.0, min(1.0, box[1])) * height | |
| xmax = max(0.0, min(1.0, box[2])) * width | |
| ymax = max(0.0, min(1.0, box[3])) * height | |
| draw.rectangle([xmin, ymin, xmax, ymax], outline="#00FF00", width=3) | |
| except Exception as e: | |
| st.error(f"Error drawing box: {str(e)}") | |
| return image | |
| # Streamlit UI | |
| st.title("PDF Text Detection") | |
| uploaded_file = st.file_uploader("Upload PDF", type=["pdf"]) | |
| if uploaded_file and st.button("Analyze"): | |
| with st.spinner("Processing..."): | |
| try: | |
| images = convert_from_bytes(uploaded_file.read(), dpi=300) # Increased DPI | |
| client = genai.Client(api_key=os.getenv("KEY")) # Verify env var name | |
| for idx, image in enumerate(images): | |
| with st.expander(f"Page {idx+1}", expanded=True): | |
| img_byte_arr = io.BytesIO() | |
| image.save(img_byte_arr, format='PNG') | |
| # Get bounding boxes | |
| response = client.models.generate_content( | |
| model="gemini-2.0-flash-exp", | |
| contents=[ | |
| DETECTION_PROMPT, | |
| types.Part.from_bytes( | |
| data=img_byte_arr.getvalue(), | |
| mime_type="image/png" | |
| ) | |
| ] | |
| ) | |
| # Debug output | |
| with st.expander("Raw API Response"): | |
| st.code(response.text) | |
| # Parse and draw | |
| boxes = parse_list_boxes(response.text) | |
| annotated = draw_bounding_boxes(image.copy(), boxes) | |
| # Display | |
| cols = st.columns(2) | |
| cols[0].image(image, caption="Original", use_column_width=True) | |
| cols[1].image(annotated, | |
| caption=f"Detected {len(boxes)} text regions", | |
| use_column_width=True) | |
| except Exception as e: | |
| st.error(f"Error: {str(e)}") |