Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
import torch | |
import json | |
import time | |
# Model Loading | |
tokenizer = AutoTokenizer.from_pretrained("numind/NuExtract-1.5") | |
model = AutoModelForCausalLM.from_pretrained( | |
"numind/NuExtract-1.5", | |
device_map="auto", | |
torch_dtype=torch.float16 | |
) | |
def extract_structure(template, text): | |
prompt = f"""Extract the following fields from the text: | |
Template: {template} | |
Text: {text} | |
Extracted JSON:""" | |
try: | |
inputs = tokenizer(prompt, return_tensors="pt").to("cuda") | |
outputs = model.generate(**inputs, max_new_tokens=512) | |
result = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
# Extract JSON portion | |
json_start = result.find("{") | |
json_end = result.rfind("}") + 1 | |
extracted = json.loads(result[json_start:json_end]) | |
return "β Success", extracted, f"<pre>{json.dumps(extracted, indent=2)}</pre>" | |
except Exception as e: | |
return f"β Error: {str(e)}", {}, f"<p style='color:red'>{str(e)}</p>" | |
# Gradio Interface | |
with gr.Blocks() as demo: | |
gr.Markdown("# NuExtract-1.5 Structured Data Extractor") | |
with gr.Row(): | |
with gr.Column(): | |
template = gr.Textbox(label="Template (JSON)", value='{"fields": ["name", "email"]}') | |
text = gr.TextArea(label="Input Text") | |
btn = gr.Button("Extract") | |
with gr.Column(): | |
status = gr.Textbox(label="Status") | |
json_out = gr.JSON(label="Output") | |
html_out = gr.HTML() | |
btn.click(extract_structure, [template, text], [status, json_out, html_out]) | |
demo.launch() |