oberbics commited on
Commit
cede142
Β·
verified Β·
1 Parent(s): 7e99353

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +175 -16
app.py CHANGED
@@ -1,28 +1,187 @@
1
  import gradio as gr
 
 
 
 
 
2
 
3
- def extract_structure(template, text):
4
- return "βœ… Test worked", '{"name": "John Smith", "email": "john@example.com"}', "<p>Success</p>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- with gr.Blocks() as demo:
7
- gr.Markdown("# 🧠 NuExtract-1.5 Information Extractor")
 
 
 
 
 
 
8
 
9
- with gr.Row():
10
- with gr.Column():
11
- template_input = gr.Textbox(label="Template (JSON)", lines=5)
12
- text_input = gr.Textbox(label="Input Text", lines=10)
13
- submit_btn = gr.Button("Extract Information")
 
 
 
 
 
 
 
14
 
15
- with gr.Column():
16
- progress_output = gr.Textbox(label="Progress")
17
- result_output = gr.Textbox(label="Extracted Information")
18
- html_output = gr.HTML(label="Info")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  submit_btn.click(
21
  fn=extract_structure,
22
  inputs=[template_input, text_input],
23
- outputs=[progress_output, result_output, html_output]
 
 
 
 
 
 
24
  )
25
- print("βœ… Button click event bound!")
26
 
 
27
  if __name__ == "__main__":
28
- demo.launch(debug=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ from transformers import pipeline
3
+ import torch
4
+ import json
5
+ import time
6
+ from functools import lru_cache
7
 
8
+ # 1. Model Loading with Health Checks
9
+ @lru_cache(maxsize=1)
10
+ def load_model():
11
+ try:
12
+ print("βš™οΈ Initializing NuExtract-1.5 model...")
13
+ start_time = time.time()
14
+
15
+ model = pipeline(
16
+ "text2text-generation",
17
+ model="numind/NuExtract-1.5",
18
+ device="cuda" if torch.cuda.is_available() else "cpu",
19
+ torch_dtype=torch.float16 if torch.cuda.is_available() else None
20
+ )
21
+
22
+ load_time = round(time.time() - start_time, 2)
23
+ print(f"βœ… Model loaded successfully in {load_time}s")
24
+ return model
25
+ except Exception as e:
26
+ print(f"❌ Model loading failed: {str(e)}")
27
+ return None
28
 
29
+ # 2. Warm Start Mechanism
30
+ def keep_model_warm():
31
+ """Periodic ping to prevent Hugging Face from unloading the model"""
32
+ if extractor:
33
+ try:
34
+ extractor("ping", max_length=1)
35
+ except:
36
+ pass
37
 
38
+ # 3. Processing Function with Streamed Output
39
+ def extract_structure(template, text):
40
+ # Input validation
41
+ if not text.strip():
42
+ yield "❌ Error: Empty input text", "", "<p style='color:red'>Please enter text to analyze</p>"
43
+ return
44
+
45
+ try:
46
+ template_data = json.loads(template) if template.strip() else {}
47
+ except json.JSONDecodeError:
48
+ yield "❌ Error: Invalid JSON template", "", "<p style='color:red'>Malformed JSON template</p>"
49
+ return
50
 
51
+ # Processing stages
52
+ stages = [
53
+ ("πŸ” Initializing model...", 0.5),
54
+ ("πŸ“– Parsing document structure...", 1.2),
55
+ ("πŸ”„ Matching template fields...", 0.8),
56
+ ("✨ Finalizing extraction...", 0.3)
57
+ ]
58
+
59
+ for msg, delay in stages:
60
+ yield msg, "", ""
61
+ time.sleep(delay)
62
+
63
+ try:
64
+ # Actual inference
65
+ result = extractor(
66
+ text,
67
+ **template_data,
68
+ max_length=512,
69
+ num_return_sequences=1,
70
+ temperature=0.7
71
+ )[0]['generated_text']
72
+
73
+ # Format output
74
+ formatted_json = json.dumps(json.loads(result), indent=2)
75
+ html_output = f"""
76
+ <div style='
77
+ padding: 15px;
78
+ background: #f8f9fa;
79
+ border-radius: 8px;
80
+ border-left: 4px solid #4CAF50;
81
+ margin-top: 10px;
82
+ '>
83
+ <h3 style='margin-top:0'>Extracted Data</h3>
84
+ <pre style='white-space: pre-wrap'>{formatted_json}</pre>
85
+ </div>
86
+ """
87
+
88
+ yield "βœ… Extraction complete", formatted_json, html_output
89
+
90
+ except Exception as e:
91
+ error_msg = f"❌ Processing error: {str(e)}"
92
+ yield error_msg, "", f"<p style='color:red'>{error_msg}</p>"
93
 
94
+ # 4. Gradio Interface
95
+ with gr.Blocks(theme=gr.themes.Soft(), title="NuExtract 1.5") as demo:
96
+ # Header
97
+ gr.Markdown("""
98
+ <div style='text-align:center'>
99
+ <h1>🧠 NuExtract-1.5</h1>
100
+ <p>Advanced Information Extraction System</p>
101
+ </div>
102
+ """)
103
+
104
+ # Main layout
105
+ with gr.Row():
106
+ # Input Column
107
+ with gr.Column(scale=1, min_width=400):
108
+ gr.Markdown("### πŸ“₯ Input")
109
+ template_input = gr.Textbox(
110
+ label="Extraction Template (JSON)",
111
+ value='{"fields": ["name", "email", "phone"]}',
112
+ lines=5
113
+ )
114
+ text_input = gr.TextArea(
115
+ label="Document Text",
116
+ placeholder="John Smith ([email protected]) called regarding order #12345...",
117
+ lines=12
118
+ )
119
+ gr.Examples(
120
+ examples=[
121
+ [
122
+ '{"fields": ["name", "email"]}',
123
+ "Please contact Dr. Sarah Johnson at [email protected]"
124
+ ],
125
+ [
126
+ '{"fields": ["product", "price"]}',
127
+ "The new MacBook Pro costs $1,299 at our store"
128
+ ]
129
+ ],
130
+ inputs=[template_input, text_input],
131
+ label="Try Examples:"
132
+ )
133
+
134
+ # Output Column
135
+ with gr.Column(scale=1, min_width=500):
136
+ gr.Markdown("### πŸ“€ Results")
137
+ status = gr.Textbox(
138
+ label="Status",
139
+ value="🟒 System Ready",
140
+ interactive=False
141
+ )
142
+ json_output = gr.JSON(
143
+ label="Structured Output",
144
+ interactive=False
145
+ )
146
+ html_output = gr.HTML(
147
+ label="Formatted View",
148
+ value="<div style='min-height:200px'></div>"
149
+ )
150
+
151
+ # Controls
152
+ submit_btn = gr.Button("Extract Information", variant="primary")
153
+ clear_btn = gr.Button("Clear")
154
+
155
+ # Event handlers
156
  submit_btn.click(
157
  fn=extract_structure,
158
  inputs=[template_input, text_input],
159
+ outputs=[status, json_output, html_output]
160
+ )
161
+
162
+ clear_btn.click(
163
+ fn=lambda: ["", "", "", "<div></div>"],
164
+ inputs=[],
165
+ outputs=[template_input, text_input, json_output, html_output]
166
  )
 
167
 
168
+ # 5. Launch Configuration
169
  if __name__ == "__main__":
170
+ # Initialize model
171
+ extractor = load_model()
172
+
173
+ # Start keep-alive thread
174
+ import threading
175
+ threading.Thread(
176
+ target=lambda: [keep_model_warm() for _ in iter(int, 1)],
177
+ daemon=True
178
+ ).start()
179
+
180
+ # Launch app
181
+ demo.launch(
182
+ server_name="0.0.0.0",
183
+ server_port=7860,
184
+ show_error=True,
185
+ share=False,
186
+ favicon_path="https://huggingface.co/favicon.ico"
187
+ )