Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
from transformers import pipeline | |
import torch | |
import json | |
import time | |
from functools import lru_cache | |
# 1. Model Loading with Health Checks | |
def load_model(): | |
try: | |
print("βοΈ Initializing NuExtract-1.5 model...") | |
start_time = time.time() | |
model = pipeline( | |
"text2text-generation", | |
model="numind/NuExtract-1.5", | |
device="cuda" if torch.cuda.is_available() else "cpu", | |
torch_dtype=torch.float16 if torch.cuda.is_available() else None | |
) | |
load_time = round(time.time() - start_time, 2) | |
print(f"β Model loaded successfully in {load_time}s") | |
return model | |
except Exception as e: | |
print(f"β Model loading failed: {str(e)}") | |
return None | |
# 2. Warm Start Mechanism | |
def keep_model_warm(): | |
"""Periodic ping to prevent Hugging Face from unloading the model""" | |
if extractor: | |
try: | |
extractor("ping", max_length=1) | |
except: | |
pass | |
# 3. Processing Function with Streamed Output | |
def extract_structure(template, text): | |
# Input validation | |
if not text.strip(): | |
yield "β Error: Empty input text", "", "<p style='color:red'>Please enter text to analyze</p>" | |
return | |
try: | |
template_data = json.loads(template) if template.strip() else {} | |
except json.JSONDecodeError: | |
yield "β Error: Invalid JSON template", "", "<p style='color:red'>Malformed JSON template</p>" | |
return | |
# Processing stages | |
stages = [ | |
("π Initializing model...", 0.5), | |
("π Parsing document structure...", 1.2), | |
("π Matching template fields...", 0.8), | |
("β¨ Finalizing extraction...", 0.3) | |
] | |
for msg, delay in stages: | |
yield msg, "", "" | |
time.sleep(delay) | |
try: | |
# Actual inference | |
result = extractor( | |
text, | |
**template_data, | |
max_length=512, | |
num_return_sequences=1, | |
temperature=0.7 | |
)[0]['generated_text'] | |
# Format output | |
formatted_json = json.dumps(json.loads(result), indent=2) | |
html_output = f""" | |
<div style=' | |
padding: 15px; | |
background: #f8f9fa; | |
border-radius: 8px; | |
border-left: 4px solid #4CAF50; | |
margin-top: 10px; | |
'> | |
<h3 style='margin-top:0'>Extracted Data</h3> | |
<pre style='white-space: pre-wrap'>{formatted_json}</pre> | |
</div> | |
""" | |
yield "β Extraction complete", formatted_json, html_output | |
except Exception as e: | |
error_msg = f"β Processing error: {str(e)}" | |
yield error_msg, "", f"<p style='color:red'>{error_msg}</p>" | |
# 4. Gradio Interface | |
with gr.Blocks(theme=gr.themes.Soft(), title="NuExtract 1.5") as demo: | |
# Header | |
gr.Markdown(""" | |
<div style='text-align:center'> | |
<h1>π§ NuExtract-1.5</h1> | |
<p>Advanced Information Extraction System</p> | |
</div> | |
""") | |
# Main layout | |
with gr.Row(): | |
# Input Column | |
with gr.Column(scale=1, min_width=400): | |
gr.Markdown("### π₯ Input") | |
template_input = gr.Textbox( | |
label="Extraction Template (JSON)", | |
value='{"fields": ["name", "email", "phone"]}', | |
lines=5 | |
) | |
text_input = gr.TextArea( | |
label="Document Text", | |
placeholder="John Smith ([email protected]) called regarding order #12345...", | |
lines=12 | |
) | |
gr.Examples( | |
examples=[ | |
[ | |
'{"fields": ["name", "email"]}', | |
"Please contact Dr. Sarah Johnson at [email protected]" | |
], | |
[ | |
'{"fields": ["product", "price"]}', | |
"The new MacBook Pro costs $1,299 at our store" | |
] | |
], | |
inputs=[template_input, text_input], | |
label="Try Examples:" | |
) | |
# Output Column | |
with gr.Column(scale=1, min_width=500): | |
gr.Markdown("### π€ Results") | |
status = gr.Textbox( | |
label="Status", | |
value="π’ System Ready", | |
interactive=False | |
) | |
json_output = gr.JSON( | |
label="Structured Output", | |
interactive=False | |
) | |
html_output = gr.HTML( | |
label="Formatted View", | |
value="<div style='min-height:200px'></div>" | |
) | |
# Controls | |
submit_btn = gr.Button("Extract Information", variant="primary") | |
clear_btn = gr.Button("Clear") | |
# Event handlers | |
submit_btn.click( | |
fn=extract_structure, | |
inputs=[template_input, text_input], | |
outputs=[status, json_output, html_output] | |
) | |
clear_btn.click( | |
fn=lambda: ["", "", "", "<div></div>"], | |
inputs=[], | |
outputs=[template_input, text_input, json_output, html_output] | |
) | |
# 5. Launch Configuration | |
if __name__ == "__main__": | |
# Initialize model | |
extractor = load_model() | |
# Start keep-alive thread | |
import threading | |
threading.Thread( | |
target=lambda: [keep_model_warm() for _ in iter(int, 1)], | |
daemon=True | |
).start() | |
# Launch app | |
demo.launch( | |
server_name="0.0.0.0", | |
server_port=7860, | |
show_error=True, | |
share=False, | |
favicon_path="https://huggingface.co/favicon.ico" | |
) |