Spaces:

adil9858
/

DOCSUM

Sleeping

App Files Files Community

DOCSUM / app.py

adil9858

Update app.py

80e430b verified 3 months ago

raw

history blame

8.99 kB

	import gradio as gr
	from openai import OpenAI
	import base64
	from PIL import Image
	import io
	import fitz # PyMuPDF
	import tempfile
	import os

	# --- HELPER FUNCTIONS ---
	def convert_pdf_to_images(pdf_file):
	"""Convert PDF to list of PIL Images"""
	images = []
	try:
	# Save uploaded file to a temporary file
	with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
	tmp_file.write(pdf_file)
	tmp_file_path = tmp_file.name

	# Open the PDF file
	pdf_document = fitz.open(tmp_file_path)

	# Iterate through each page
	for page_num in range(len(pdf_document)):
	page = pdf_document.load_page(page_num)
	pix = page.get_pixmap()
	img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
	images.append(img)

	# Clean up
	pdf_document.close()
	os.unlink(tmp_file_path)

	except Exception as e:
	raise gr.Error(f"Error converting PDF: {e}")
	return images

	def image_to_base64(image):
	"""Convert PIL Image to base64 string"""
	with io.BytesIO() as buffer:
	image.save(buffer, format="PNG")
	return base64.b64encode(buffer.getvalue()).decode("utf-8")

	def generate_summary(extracted_texts, api_key):
	"""Generate a comprehensive summary of all extracted texts"""
	try:
	client = OpenAI(
	base_url="https://openrouter.ai/api/v1",
	api_key=api_key
	)

	summary_prompt = f"""
	You are an expert document analyst. Below are the extracted contents from multiple pages of a document.
	Please provide a comprehensive, detailed summary that:
	1. Organizes all key information logically
	2. Identifies relationships between data points
	3. Highlights important figures, dates, names
	4. Presents the information in a clear, structured format

	Extracted contents from pages:
	{extracted_texts}

	Comprehensive Summary:
	"""

	response = client.chat.completions.create(
	model="opengvlab/internvl3-14b:free",
	messages=[
	{"role": "system", "content": "You are Dalton, an expert in analyzing and summarizing document contents."},
	{"role": "user", "content": summary_prompt}
	],
	max_tokens=2048
	)

	return response.choices[0].message.content

	except Exception as e:
	raise gr.Error(f"Error generating summary: {e}")

	def analyze_document(api_key, user_prompt, uploaded_file):
	"""Main processing function"""
	if not api_key:
	raise gr.Error("Please enter your OpenRouter API key")

	if uploaded_file is None:
	raise gr.Error("Please upload a document")

	images_to_analyze = []
	file_ext = os.path.splitext(uploaded_file.name)[1].lower()

	# Handle PDF or image
	if file_ext == '.pdf':
	with open(uploaded_file.name, "rb") as f:
	pdf_data = f.read()
	pdf_images = convert_pdf_to_images(pdf_data)
	images_to_analyze = pdf_images # For simplicity, using all pages
	else:
	image = Image.open(uploaded_file.name)
	images_to_analyze = [image]

	# Process each image
	all_results = []
	extracted_texts = []

	for idx, image in enumerate(images_to_analyze, 1):
	try:
	client = OpenAI(
	base_url="https://openrouter.ai/api/v1",
	api_key=api_key
	)

	image_base64 = image_to_base64(image)

	response = client.chat.completions.create(
	model="opengvlab/internvl3-14b:free",
	messages=[
	{"role": "system", "content": "You are Dalton, an expert in understanding images that can analyze images and provide detailed descriptions."},
	{"role": "user", "content": [
	{"type": "text", "text": user_prompt},
	{"type": "image_url", "image_url": {
	"url": f"data:image/png;base64,{image_base64}"
	}}
	]}
	],
	max_tokens=1024
	)

	result = response.choices[0].message.content
	extracted_texts.append(f"### Page {idx}\n{result}\n")
	all_results.append(f"## 📄 Page {idx} Results\n{result}\n---\n")

	except Exception as e:
	raise gr.Error(f"Error analyzing page {idx}: {e}")

	# Generate summary if multiple pages
	markdown_output = "\n".join(all_results)

	if len(extracted_texts) > 1:
	summary = generate_summary("\n".join(extracted_texts), api_key)
	markdown_output += f"\n## 📝 Comprehensive Summary\n{summary}\n"

	# Add structured data section
	markdown_output += f"\n## 🔍 Key Data Extracted\n"
	markdown_output += "- Important Figures: [Extracted values]\n"
	markdown_output += "- Critical Dates: [Extracted dates]\n"
	markdown_output += "- Main Entities: [Identified names/companies]\n"
	markdown_output += "- Action Items: [Key tasks identified]\n"

	# Add document metadata
	markdown_output += f"\n---\nDocument processed: {uploaded_file.name}"

	return markdown_output

	# Custom CSS for dark theme with green text
	custom_css = """
	:root {
	--primary: #00ff00;
	--primary-50: #00ff0033;
	--primary-100: #00ff0066;
	--primary-200: #00ff0099;
	--primary-300: #00ff00cc;
	--secondary: #00cc00;
	--secondary-50: #00cc0033;
	--secondary-100: #00cc0066;
	--secondary-200: #00cc0099;
	--secondary-300: #00cc00cc;
	--color-background-primary: #000000;
	--color-background-secondary: #111111;
	--color-background-tertiary: #222222;
	--text-color: #00ff00;
	--block-background-fill: #111111;
	--block-border-color: #00aa00;
	--block-label-text-color: #00ff00;
	--block-title-text-color: #00ff00;
	--input-background-fill: #111111;
	--input-border-color: #00aa00;
	--input-text-color: #00ff00;
	}

	body {
	background-color: var(--color-background-primary) !important;
	color: var(--text-color) !important;
	}

	.markdown-output {
	padding: 20px;
	border-radius: 8px;
	background: var(--color-background-secondary);
	border: 1px solid var(--block-border-color);
	max-height: 600px;
	overflow-y: auto;
	color: var(--text-color) !important;
	}

	.markdown-output h1,
	.markdown-output h2,
	.markdown-output h3 {
	color: var(--primary) !important;
	border-bottom: 1px solid var(--primary-300);
	}

	.markdown-output a {
	color: var(--secondary) !important;
	}

	.markdown-output code {
	background-color: var(--color-background-tertiary);
	color: var(--secondary);
	}

	.markdown-output pre {
	background-color: var(--color-background-tertiary) !important;
	border: 1px solid var(--block-border-color);
	}

	.markdown-output ul,
	.markdown-output ol {
	color: var(--text-color);
	}

	button {
	background: var(--primary) !important;
	color: black !important;
	font-weight: bold !important;
	}

	button:hover {
	background: var(--primary-300) !important;
	}
	"""

	# Create dark theme
	dark_green_theme = gr.themes.Default(
	primary_hue="green",
	secondary_hue="green",
	neutral_hue="green",
	).set(
	background_fill_primary="#000000",
	background_fill_secondary="#111111",
	block_background_fill="#111111",
	border_color_accent="#00aa00",
	block_label_text_color="#00ff00",
	body_text_color="#00ff00",
	button_primary_text_color="#000000",
	)

	# --- GRADIO INTERFACE ---
	with gr.Blocks(
	title="DocSum - Document Summarizer",
	theme=dark_green_theme,
	css=custom_css
	) as demo:
	gr.Markdown("# 🧾 DocSum")
	gr.Markdown("Document Summarizer Powered by VLM • Developed by [Koshur AI](https://koshurai.com)")

	with gr.Row():
	api_key = gr.Textbox(
	label="🔑 OpenRouter API Key",
	type="password",
	placeholder="Enter your OpenRouter API key"
	)
	user_prompt = gr.Textbox(
	label="📝 Enter Your Prompt",
	value="Extract all content structurally",
	placeholder="What would you like to extract?"
	)

	uploaded_file = gr.File(
	label="Upload Document (PDF/Image)",
	file_types=[".pdf", ".jpg", ".jpeg", ".png"]
	)

	submit_btn = gr.Button("🔍 Analyze Document", variant="primary")

	# Markdown output with custom class
	output = gr.Markdown(
	label="Analysis Results",
	elem_classes=["markdown-output"]
	)

	submit_btn.click(
	fn=analyze_document,
	inputs=[api_key, user_prompt, uploaded_file],
	outputs=output
	)

	if __name__ == "__main__":
	demo.launch()