import gradio as gr from transformers import pipeline import torch import json import time from functools import lru_cache # 1. Model Loading with Health Checks @lru_cache(maxsize=1) def load_model(): try: print("⚙️ Initializing NuExtract-1.5 model...") start_time = time.time() model = pipeline( "text2text-generation", model="numind/NuExtract-1.5", device="cuda" if torch.cuda.is_available() else "cpu", torch_dtype=torch.float16 if torch.cuda.is_available() else None ) load_time = round(time.time() - start_time, 2) print(f"✅ Model loaded successfully in {load_time}s") return model except Exception as e: print(f"❌ Model loading failed: {str(e)}") return None # 2. Warm Start Mechanism def keep_model_warm(): """Periodic ping to prevent Hugging Face from unloading the model""" if extractor: try: extractor("ping", max_length=1) except: pass # 3. Processing Function with Streamed Output def extract_structure(template, text): # Input validation if not text.strip(): yield "❌ Error: Empty input text", "", "
Please enter text to analyze
" return try: template_data = json.loads(template) if template.strip() else {} except json.JSONDecodeError: yield "❌ Error: Invalid JSON template", "", "Malformed JSON template
" return # Processing stages stages = [ ("🔍 Initializing model...", 0.5), ("📖 Parsing document structure...", 1.2), ("🔄 Matching template fields...", 0.8), ("✨ Finalizing extraction...", 0.3) ] for msg, delay in stages: yield msg, "", "" time.sleep(delay) try: # Actual inference result = extractor( text, **template_data, max_length=512, num_return_sequences=1, temperature=0.7 )[0]['generated_text'] # Format output formatted_json = json.dumps(json.loads(result), indent=2) html_output = f"""{formatted_json}
{error_msg}
" # 4. Gradio Interface with gr.Blocks(theme=gr.themes.Soft(), title="NuExtract 1.5") as demo: # Header gr.Markdown("""Advanced Information Extraction System