oberbics commited on
Commit
3953725
Β·
verified Β·
1 Parent(s): 19377ce

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -146
app.py CHANGED
@@ -1,165 +1,53 @@
1
  import gradio as gr
2
- from transformers import pipeline
3
  import torch
4
  import json
5
  import time
6
- from functools import lru_cache
7
 
8
- # 1. Model Loading with Health Checks
9
- @lru_cache(maxsize=1)
10
- def load_model():
11
- try:
12
- print("βš™οΈ Initializing NuExtract-1.5 model...")
13
- start_time = time.time()
14
-
15
- model = pipeline(
16
- "text2text-generation",
17
- model="numind/NuExtract-1.5",
18
- device="cuda" if torch.cuda.is_available() else "cpu",
19
- torch_dtype=torch.float16 if torch.cuda.is_available() else None
20
- )
21
-
22
- load_time = round(time.time() - start_time, 2)
23
- print(f"βœ… Model loaded successfully in {load_time}s")
24
- return model
25
- except Exception as e:
26
- print(f"❌ Model loading failed: {str(e)}")
27
- return None
28
 
29
- # 2. Processing Function with Streamed Output
30
  def extract_structure(template, text):
31
- # Input validation
32
- if not text.strip():
33
- yield "❌ Error: Empty input text", "", "<p style='color:red'>Please enter text to analyze</p>"
34
- return
35
 
36
  try:
37
- template_data = json.loads(template) if template.strip() else {}
38
- except json.JSONDecodeError:
39
- yield "❌ Error: Invalid JSON template", "", "<p style='color:red'>Malformed JSON template</p>"
40
- return
41
-
42
- # Processing stages
43
- stages = [
44
- ("πŸ” Initializing model...", 0.5),
45
- ("πŸ“– Parsing document structure...", 1.2),
46
- ("πŸ”„ Matching template fields...", 0.8),
47
- ("✨ Finalizing extraction...", 0.3)
48
- ]
49
-
50
- for msg, delay in stages:
51
- yield msg, "", ""
52
- time.sleep(delay)
53
-
54
- try:
55
- # Actual inference
56
- result = extractor(
57
- text,
58
- **template_data,
59
- max_length=512,
60
- num_return_sequences=1,
61
- temperature=0.7
62
- )[0]['generated_text']
63
 
64
- # Format output
65
- formatted_json = json.loads(result) # Parse to validate JSON
66
- html_output = f"""
67
- <div style='
68
- padding: 15px;
69
- background: #f8f9fa;
70
- border-radius: 8px;
71
- border-left: 4px solid #4CAF50;
72
- margin-top: 10px;
73
- '>
74
- <h3 style='margin-top:0'>Extracted Data</h3>
75
- <pre style='white-space: pre-wrap'>{json.dumps(formatted_json, indent=2)}</pre>
76
- </div>
77
- """
78
 
79
- yield "βœ… Extraction complete", formatted_json, html_output
80
 
81
  except Exception as e:
82
- error_msg = f"❌ Processing error: {str(e)}"
83
- yield error_msg, "", f"<p style='color:red'>{error_msg}</p>"
84
 
85
- # 3. Gradio Interface
86
- with gr.Blocks(theme=gr.themes.Soft(), title="NuExtract 1.5") as demo:
87
- # Header
88
- gr.Markdown("""
89
- <div style='text-align:center'>
90
- <h1>🧠 NuExtract-1.5</h1>
91
- <p>Advanced Information Extraction System</p>
92
- </div>
93
- """)
94
 
95
- # Main layout
96
  with gr.Row():
97
- # Input Column
98
- with gr.Column(scale=1, min_width=400):
99
- gr.Markdown("### πŸ“₯ Input")
100
- template_input = gr.Textbox(
101
- label="Extraction Template (JSON)",
102
- value='{"fields": ["name", "email", "phone"]}',
103
- lines=5
104
- )
105
- text_input = gr.TextArea(
106
- label="Document Text",
107
- placeholder="John Smith ([email protected]) called regarding order #12345...",
108
- lines=12
109
- )
110
- gr.Examples(
111
- examples=[
112
- [
113
- '{"fields": ["name", "email"]}',
114
- "Please contact Dr. Sarah Johnson at [email protected]"
115
- ],
116
- [
117
- '{"fields": ["product", "price"]}',
118
- "The new MacBook Pro costs $1,299 at our store"
119
- ]
120
- ],
121
- inputs=[template_input, text_input],
122
- label="Try Examples:"
123
- )
124
 
125
- # Output Column
126
- with gr.Column(scale=1, min_width=500):
127
- gr.Markdown("### πŸ“€ Results")
128
- status = gr.Textbox(
129
- label="Status",
130
- value="🟒 System Ready"
131
- )
132
- json_output = gr.JSON(label="Structured Output") # Removed interactive parameter
133
- html_output = gr.HTML(
134
- label="Formatted View",
135
- value="<div style='min-height:200px'></div>"
136
- )
137
-
138
- # Controls
139
- submit_btn = gr.Button("Extract Information", variant="primary")
140
- clear_btn = gr.Button("Clear")
141
 
142
- # Event handlers
143
- submit_btn.click(
144
- fn=extract_structure,
145
- inputs=[template_input, text_input],
146
- outputs=[status, json_output, html_output]
147
- )
148
-
149
- clear_btn.click(
150
- fn=lambda: ["", "", {}, "<div></div>"],
151
- inputs=[],
152
- outputs=[template_input, text_input, json_output, html_output]
153
- )
154
 
155
- # 4. Launch Configuration
156
- if __name__ == "__main__":
157
- # Initialize model
158
- extractor = load_model()
159
-
160
- # Launch app
161
- demo.launch(
162
- server_name="0.0.0.0",
163
- server_port=7860,
164
- show_error=True
165
- )
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
4
  import json
5
  import time
 
6
 
7
+ # Model Loading
8
+ tokenizer = AutoTokenizer.from_pretrained("numind/NuExtract-1.5")
9
+ model = AutoModelForCausalLM.from_pretrained(
10
+ "numind/NuExtract-1.5",
11
+ device_map="auto",
12
+ torch_dtype=torch.float16
13
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
 
15
  def extract_structure(template, text):
16
+ prompt = f"""Extract the following fields from the text:
17
+ Template: {template}
18
+ Text: {text}
19
+ Extracted JSON:"""
20
 
21
  try:
22
+ inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
23
+ outputs = model.generate(**inputs, max_new_tokens=512)
24
+ result = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
+ # Extract JSON portion
27
+ json_start = result.find("{")
28
+ json_end = result.rfind("}") + 1
29
+ extracted = json.loads(result[json_start:json_end])
 
 
 
 
 
 
 
 
 
 
30
 
31
+ return "βœ… Success", extracted, f"<pre>{json.dumps(extracted, indent=2)}</pre>"
32
 
33
  except Exception as e:
34
+ return f"❌ Error: {str(e)}", {}, f"<p style='color:red'>{str(e)}</p>"
 
35
 
36
+ # Gradio Interface
37
+ with gr.Blocks() as demo:
38
+ gr.Markdown("# NuExtract-1.5 Structured Data Extractor")
 
 
 
 
 
 
39
 
 
40
  with gr.Row():
41
+ with gr.Column():
42
+ template = gr.Textbox(label="Template (JSON)", value='{"fields": ["name", "email"]}')
43
+ text = gr.TextArea(label="Input Text")
44
+ btn = gr.Button("Extract")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
+ with gr.Column():
47
+ status = gr.Textbox(label="Status")
48
+ json_out = gr.JSON(label="Output")
49
+ html_out = gr.HTML()
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
+ btn.click(extract_structure, [template, text], [status, json_out, html_out])
 
 
 
 
 
 
 
 
 
 
 
52
 
53
+ demo.launch()