File size: 2,724 Bytes
be196c4
 
 
f764538
be196c4
f764538
 
 
 
 
be196c4
f764538
 
 
be196c4
f764538
 
6248a95
f764538
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
be196c4
f764538
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import json
import torch
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer

# Simplified extraction function
def extract_structure(template, text, progress=None):
    try:
        # Format the input
        prompt = f"<|input|>\n### Template:\n{template}\n### Text:\n{text}\n\n<|output|>"
        
        # Generate prediction
        input_ids = tokenizer(prompt, return_tensors="pt", truncation=True).to(model.device)
        output = tokenizer.decode(model.generate(**input_ids, max_new_tokens=2000)[0], skip_special_tokens=True)
        
        # Extract result
        result = output.split("<|output|>")[1]
        
        # Highlight found items in text (simplified)
        highlighted = f"<p>Processed text of length {len(text)} characters</p>"
        
        return "Processing complete", result, highlighted
    except Exception as e:
        return f"Error: {str(e)}", "{}", "<p>Processing failed</p>"

# Load model
model_name = "numind/NuExtract-1.5"
try:
    model = AutoModelForCausalLM.from_pretrained(
        model_name, 
        torch_dtype=torch.float16,  # Using float16 instead of bfloat16 for better compatibility
        trust_remote_code=True,
        device_map="auto"
    )
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model_loaded = True
except Exception as e:
    print(f"Model loading error: {e}")
    model_loaded = False

# Create interface
with gr.Blocks() as demo:
    gr.Markdown("# NuExtract-1.5 Demo")
    
    if not model_loaded:
        gr.Markdown("## ⚠️ Model failed to load. Using dummy mode.")
    
    with gr.Row():
        with gr.Column():
            template_input = gr.Textbox(
                label="Template (JSON)", 
                value='{"name": "", "email": ""}',
                lines=5
            )
            text_input = gr.Textbox(
                label="Input Text", 
                value="Contact: John Smith ([email protected])",
                lines=10
            )
            submit_btn = gr.Button("Extract Information")
        
        with gr.Column():
            progress_output = gr.Textbox(label="Progress")
            result_output = gr.Textbox(label="Extracted Information")
            html_output = gr.HTML(label="Highlighted Text")
    
    submit_btn.click(
        fn=extract_structure,
        inputs=[template_input, text_input],
        outputs=[progress_output, result_output, html_output]
    )

    # Simple example
    gr.Examples(
        [
            [
                '{"name": "", "email": ""}',
                'Contact: John Smith ([email protected])'
            ]
        ],
        [template_input, text_input]
    )

if __name__ == "__main__":
    demo.launch()