Spaces:

oberbics
/

HistorySpace

Running on Zero

App Files Files Community

HistorySpace / app.py

oberbics

Update app.py

cede142 verified 5 months ago

raw

history blame

5.71 kB

	import gradio as gr
	from transformers import pipeline
	import torch
	import json
	import time
	from functools import lru_cache

	# 1. Model Loading with Health Checks
	@lru_cache(maxsize=1)
	def load_model():
	try:
	print("⚙️ Initializing NuExtract-1.5 model...")
	start_time = time.time()

	model = pipeline(
	"text2text-generation",
	model="numind/NuExtract-1.5",
	device="cuda" if torch.cuda.is_available() else "cpu",
	torch_dtype=torch.float16 if torch.cuda.is_available() else None
	)

	load_time = round(time.time() - start_time, 2)
	print(f"✅ Model loaded successfully in {load_time}s")
	return model
	except Exception as e:
	print(f"❌ Model loading failed: {str(e)}")
	return None

	# 2. Warm Start Mechanism
	def keep_model_warm():
	"""Periodic ping to prevent Hugging Face from unloading the model"""
	if extractor:
	try:
	extractor("ping", max_length=1)
	except:
	pass

	# 3. Processing Function with Streamed Output
	def extract_structure(template, text):
	# Input validation
	if not text.strip():
	yield "❌ Error: Empty input text", "", "<p style='color:red'>Please enter text to analyze</p>"
	return

	try:
	template_data = json.loads(template) if template.strip() else {}
	except json.JSONDecodeError:
	yield "❌ Error: Invalid JSON template", "", "<p style='color:red'>Malformed JSON template</p>"
	return

	# Processing stages
	stages = [
	("🔍 Initializing model...", 0.5),
	("📖 Parsing document structure...", 1.2),
	("🔄 Matching template fields...", 0.8),
	("✨ Finalizing extraction...", 0.3)
	]

	for msg, delay in stages:
	yield msg, "", ""
	time.sleep(delay)

	try:
	# Actual inference
	result = extractor(
	text,
	**template_data,
	max_length=512,
	num_return_sequences=1,
	temperature=0.7
	)[0]['generated_text']

	# Format output
	formatted_json = json.dumps(json.loads(result), indent=2)
	html_output = f"""
	<div style='
	padding: 15px;
	background: #f8f9fa;
	border-radius: 8px;
	border-left: 4px solid #4CAF50;
	margin-top: 10px;
	'>
	<h3 style='margin-top:0'>Extracted Data</h3>
	<pre style='white-space: pre-wrap'>{formatted_json}</pre>
	</div>
	"""

	yield "✅ Extraction complete", formatted_json, html_output

	except Exception as e:
	error_msg = f"❌ Processing error: {str(e)}"
	yield error_msg, "", f"<p style='color:red'>{error_msg}</p>"

	# 4. Gradio Interface
	with gr.Blocks(theme=gr.themes.Soft(), title="NuExtract 1.5") as demo:
	# Header
	gr.Markdown("""
	<div style='text-align:center'>
	<h1>🧠 NuExtract-1.5</h1>
	<p>Advanced Information Extraction System</p>
	</div>
	""")

	# Main layout
	with gr.Row():
	# Input Column
	with gr.Column(scale=1, min_width=400):
	gr.Markdown("### 📥 Input")
	template_input = gr.Textbox(
	label="Extraction Template (JSON)",
	value='{"fields": ["name", "email", "phone"]}',
	lines=5
	)
	text_input = gr.TextArea(
	label="Document Text",
	placeholder="John Smith ([email protected]) called regarding order #12345...",
	lines=12
	)
	gr.Examples(
	examples=[
	[
	'{"fields": ["name", "email"]}',
	"Please contact Dr. Sarah Johnson at [email protected]"
	],
	[
	'{"fields": ["product", "price"]}',
	"The new MacBook Pro costs $1,299 at our store"
	]
	],
	inputs=[template_input, text_input],
	label="Try Examples:"
	)

	# Output Column
	with gr.Column(scale=1, min_width=500):
	gr.Markdown("### 📤 Results")
	status = gr.Textbox(
	label="Status",
	value="🟢 System Ready",
	interactive=False
	)
	json_output = gr.JSON(
	label="Structured Output",
	interactive=False
	)
	html_output = gr.HTML(
	label="Formatted View",
	value="<div style='min-height:200px'></div>"
	)

	# Controls
	submit_btn = gr.Button("Extract Information", variant="primary")
	clear_btn = gr.Button("Clear")

	# Event handlers
	submit_btn.click(
	fn=extract_structure,
	inputs=[template_input, text_input],
	outputs=[status, json_output, html_output]
	)

	clear_btn.click(
	fn=lambda: ["", "", "", "<div></div>"],
	inputs=[],
	outputs=[template_input, text_input, json_output, html_output]
	)

	# 5. Launch Configuration
	if __name__ == "__main__":
	# Initialize model
	extractor = load_model()

	# Start keep-alive thread
	import threading
	threading.Thread(
	target=lambda: [keep_model_warm() for _ in iter(int, 1)],
	daemon=True
	).start()

	# Launch app
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	show_error=True,
	share=False,
	favicon_path="https://huggingface.co/favicon.ico"
	)