File size: 3,430 Bytes
be096d1
 
90d7012
 
be096d1
90d7012
 
 
432d5fe
39ee1aa
432d5fe
39ee1aa
be096d1
39ee1aa
cede142
39ee1aa
be096d1
432d5fe
be096d1
90d7012
 
 
 
 
 
 
 
 
 
 
39ee1aa
90d7012
 
 
39ee1aa
90d7012
 
 
 
 
 
 
 
cede142
90d7012
 
 
a89d538
90d7012
cede142
39ee1aa
 
432d5fe
 
 
 
 
 
 
39ee1aa
cede142
432d5fe
39ee1aa
 
432d5fe
be096d1
39ee1aa
be096d1
 
 
 
39ee1aa
be096d1
 
 
39ee1aa
 
be096d1
39ee1aa
be096d1
39ee1aa
 
 
 
be096d1
 
 
39ee1aa
be096d1
432d5fe
39ee1aa
 
 
 
be096d1
 
39ee1aa
 
 
 
be096d1
 
39ee1aa
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import gradio as gr
import json
import requests
import os

# Use the Hugging Face Inference API instead of loading the model
API_URL = "https://api-inference.huggingface.co/models/numind/NuExtract-1.5"
headers = {"Authorization": f"Bearer {os.environ.get('HF_TOKEN', '')}"}

def test_function(template, text):
    print(f"Test function called with template: {template[:30]} and text: {text[:30]}")
    return "Button clicked successfully", "Function was called"

def extract_info(template, text):
    try:
        # Format prompt according to NuExtract-1.5 requirements
        prompt = f"<|input|>\n### Template:\n{template}\n### Text:\n{text}\n\n<|output|>"
        print(f"Processing with prompt: {prompt[:100]}...")
        
        # Call API instead of using local model
        payload = {
            "inputs": prompt,
            "parameters": {
                "max_new_tokens": 1000,
                "do_sample": False
            }
        }
        
        print("Calling API...")
        response = requests.post(API_URL, headers=headers, json=payload)
        
        if response.status_code != 200:
            print(f"API error: {response.status_code}, {response.text}")
            return f"❌ API Error: {response.status_code}", response.text
        
        # Process result
        result = response.json()
        
        # Handle different response formats
        if isinstance(result, list) and len(result) > 0:
            result_text = result[0].get("generated_text", "")
        else:
            result_text = str(result)
        
        # Split at output marker if present
        if "<|output|>" in result_text:
            json_text = result_text.split("<|output|>")[1].strip()
        else:
            json_text = result_text
        
        # Try to parse as JSON
        print("Parsing JSON...")
        try:
            extracted = json.loads(json_text)
            formatted = json.dumps(extracted, indent=2)
        except json.JSONDecodeError:
            print(f"JSON parsing failed. Raw output: {json_text[:100]}...")
            return "❌ JSON parsing error", json_text
            
        return "βœ… Success", formatted
    except Exception as e:
        print(f"Error in extraction: {str(e)}")
        return f"❌ Error: {str(e)}", "{}"

# Create a simple interface
with gr.Blocks() as demo:
    gr.Markdown("# NuExtract-1.5 Extraction Tool")
    
    with gr.Row():
        with gr.Column():
            template = gr.Textbox(
                label="JSON Template", 
                value='{"name": "", "email": ""}',
                lines=5
            )
            text = gr.Textbox(
                label="Text to Extract From",
                value="Contact: John Smith ([email protected])",
                lines=8
            )
            
            # Two buttons for testing
            test_btn = gr.Button("Test Click")
            extract_btn = gr.Button("Extract Information", variant="primary")
        
        with gr.Column():
            status = gr.Textbox(label="Status")
            output = gr.Textbox(label="Output", lines=10)
    
    # Connect both buttons
    test_btn.click(
        fn=test_function,
        inputs=[template, text],
        outputs=[status, output]
    )
    
    extract_btn.click(
        fn=extract_info,
        inputs=[template, text],
        outputs=[status, output]
    )

if __name__ == "__main__":
    demo.launch()