Spaces:
Running
on
Zero
Running
on
Zero
import json | |
import torch | |
import gradio as gr | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
# Simplified extraction function | |
def extract_structure(template, text, progress=None): | |
try: | |
# Format the input | |
prompt = f"<|input|>\n### Template:\n{template}\n### Text:\n{text}\n\n<|output|>" | |
# Generate prediction | |
input_ids = tokenizer(prompt, return_tensors="pt", truncation=True).to(model.device) | |
output = tokenizer.decode(model.generate(**input_ids, max_new_tokens=2000)[0], skip_special_tokens=True) | |
# Extract result | |
result = output.split("<|output|>")[1] | |
# Highlight found items in text (simplified) | |
highlighted = f"<p>Processed text of length {len(text)} characters</p>" | |
return "Processing complete", result, highlighted | |
except Exception as e: | |
return f"Error: {str(e)}", "{}", "<p>Processing failed</p>" | |
# Load model | |
model_name = "numind/NuExtract-1.5" | |
try: | |
model = AutoModelForCausalLM.from_pretrained( | |
model_name, | |
torch_dtype=torch.float16, # Using float16 instead of bfloat16 for better compatibility | |
trust_remote_code=True, | |
device_map="auto" | |
) | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model_loaded = True | |
except Exception as e: | |
print(f"Model loading error: {e}") | |
model_loaded = False | |
# Create interface | |
with gr.Blocks() as demo: | |
gr.Markdown("# NuExtract-1.5 Demo") | |
if not model_loaded: | |
gr.Markdown("## ⚠️ Model failed to load. Using dummy mode.") | |
with gr.Row(): | |
with gr.Column(): | |
template_input = gr.Textbox( | |
label="Template (JSON)", | |
value='{"name": "", "email": ""}', | |
lines=5 | |
) | |
text_input = gr.Textbox( | |
label="Input Text", | |
value="Contact: John Smith ([email protected])", | |
lines=10 | |
) | |
submit_btn = gr.Button("Extract Information") | |
with gr.Column(): | |
progress_output = gr.Textbox(label="Progress") | |
result_output = gr.Textbox(label="Extracted Information") | |
html_output = gr.HTML(label="Highlighted Text") | |
submit_btn.click( | |
fn=extract_structure, | |
inputs=[template_input, text_input], | |
outputs=[progress_output, result_output, html_output] | |
) | |
# Simple example | |
gr.Examples( | |
[ | |
[ | |
'{"name": "", "email": ""}', | |
'Contact: John Smith ([email protected])' | |
] | |
], | |
[template_input, text_input] | |
) | |
if __name__ == "__main__": | |
demo.launch() |