Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
import torch | |
import json | |
# Initialize model with error handling | |
try: | |
tokenizer = AutoTokenizer.from_pretrained("numind/NuExtract-1.5") | |
model = AutoModelForCausalLM.from_pretrained( | |
"numind/NuExtract-1.5", | |
device_map="auto", | |
torch_dtype=torch.float16 | |
) | |
MODEL_LOADED = True | |
except Exception as e: | |
MODEL_LOADED = False | |
print(f"Model loading failed: {e}") | |
def extract_structure(template, text): | |
if not MODEL_LOADED: | |
return "β Model not loaded", {}, "<p style='color:red'>Model failed to initialize</p>" | |
prompt = f"""Extract from text: | |
Template: {template} | |
Text: {text} | |
JSON Output:""" | |
try: | |
inputs = tokenizer(prompt, return_tensors="pt").to(model.device) | |
outputs = model.generate(**inputs, max_new_tokens=512) | |
result = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
# Extract JSON portion | |
json_start = result.find("{") | |
json_end = result.rfind("}") + 1 | |
extracted = json.loads(result[json_start:json_end]) | |
return "β Success", extracted, f"<pre>{json.dumps(extracted, indent=2)}</pre>" | |
except Exception as e: | |
return f"β Error: {str(e)}", {}, f"<p style='color:red'>{str(e)}</p>" | |
# Gradio interface (properly indented block) | |
with gr.Blocks() as demo: | |
gr.Markdown("# NuExtract-1.5 Structured Data Extractor") | |
with gr.Row(): | |
with gr.Column(): | |
template = gr.Textbox(label="Template (JSON)", value='{"fields": ["name", "email"]}') | |
text = gr.TextArea(label="Input Text") | |
btn = gr.Button("Extract") | |
with gr.Column(): | |
status = gr.Textbox(label="Status") | |
json_out = gr.JSON(label="Output") | |
html_out = gr.HTML() | |
btn.click(extract_structure, [template, text], [status, json_out, html_out]) | |
if __name__ == "__main__": | |
demo.launch() |