Spaces:
Running
on
Zero
Running
on
Zero
File size: 2,724 Bytes
be196c4 f764538 be196c4 f764538 be196c4 f764538 be196c4 f764538 6248a95 f764538 be196c4 f764538 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
import json
import torch
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
# Simplified extraction function
def extract_structure(template, text, progress=None):
try:
# Format the input
prompt = f"<|input|>\n### Template:\n{template}\n### Text:\n{text}\n\n<|output|>"
# Generate prediction
input_ids = tokenizer(prompt, return_tensors="pt", truncation=True).to(model.device)
output = tokenizer.decode(model.generate(**input_ids, max_new_tokens=2000)[0], skip_special_tokens=True)
# Extract result
result = output.split("<|output|>")[1]
# Highlight found items in text (simplified)
highlighted = f"<p>Processed text of length {len(text)} characters</p>"
return "Processing complete", result, highlighted
except Exception as e:
return f"Error: {str(e)}", "{}", "<p>Processing failed</p>"
# Load model
model_name = "numind/NuExtract-1.5"
try:
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16, # Using float16 instead of bfloat16 for better compatibility
trust_remote_code=True,
device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
model_loaded = True
except Exception as e:
print(f"Model loading error: {e}")
model_loaded = False
# Create interface
with gr.Blocks() as demo:
gr.Markdown("# NuExtract-1.5 Demo")
if not model_loaded:
gr.Markdown("## ⚠️ Model failed to load. Using dummy mode.")
with gr.Row():
with gr.Column():
template_input = gr.Textbox(
label="Template (JSON)",
value='{"name": "", "email": ""}',
lines=5
)
text_input = gr.Textbox(
label="Input Text",
value="Contact: John Smith ([email protected])",
lines=10
)
submit_btn = gr.Button("Extract Information")
with gr.Column():
progress_output = gr.Textbox(label="Progress")
result_output = gr.Textbox(label="Extracted Information")
html_output = gr.HTML(label="Highlighted Text")
submit_btn.click(
fn=extract_structure,
inputs=[template_input, text_input],
outputs=[progress_output, result_output, html_output]
)
# Simple example
gr.Examples(
[
[
'{"name": "", "email": ""}',
'Contact: John Smith ([email protected])'
]
],
[template_input, text_input]
)
if __name__ == "__main__":
demo.launch() |