Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,28 +1,187 @@
|
|
1 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
-
|
4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
-
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
submit_btn.click(
|
21 |
fn=extract_structure,
|
22 |
inputs=[template_input, text_input],
|
23 |
-
outputs=[
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
)
|
25 |
-
print("β
Button click event bound!")
|
26 |
|
|
|
27 |
if __name__ == "__main__":
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
+
from transformers import pipeline
|
3 |
+
import torch
|
4 |
+
import json
|
5 |
+
import time
|
6 |
+
from functools import lru_cache
|
7 |
|
8 |
+
# 1. Model Loading with Health Checks
|
9 |
+
@lru_cache(maxsize=1)
|
10 |
+
def load_model():
|
11 |
+
try:
|
12 |
+
print("βοΈ Initializing NuExtract-1.5 model...")
|
13 |
+
start_time = time.time()
|
14 |
+
|
15 |
+
model = pipeline(
|
16 |
+
"text2text-generation",
|
17 |
+
model="numind/NuExtract-1.5",
|
18 |
+
device="cuda" if torch.cuda.is_available() else "cpu",
|
19 |
+
torch_dtype=torch.float16 if torch.cuda.is_available() else None
|
20 |
+
)
|
21 |
+
|
22 |
+
load_time = round(time.time() - start_time, 2)
|
23 |
+
print(f"β
Model loaded successfully in {load_time}s")
|
24 |
+
return model
|
25 |
+
except Exception as e:
|
26 |
+
print(f"β Model loading failed: {str(e)}")
|
27 |
+
return None
|
28 |
|
29 |
+
# 2. Warm Start Mechanism
|
30 |
+
def keep_model_warm():
|
31 |
+
"""Periodic ping to prevent Hugging Face from unloading the model"""
|
32 |
+
if extractor:
|
33 |
+
try:
|
34 |
+
extractor("ping", max_length=1)
|
35 |
+
except:
|
36 |
+
pass
|
37 |
|
38 |
+
# 3. Processing Function with Streamed Output
|
39 |
+
def extract_structure(template, text):
|
40 |
+
# Input validation
|
41 |
+
if not text.strip():
|
42 |
+
yield "β Error: Empty input text", "", "<p style='color:red'>Please enter text to analyze</p>"
|
43 |
+
return
|
44 |
+
|
45 |
+
try:
|
46 |
+
template_data = json.loads(template) if template.strip() else {}
|
47 |
+
except json.JSONDecodeError:
|
48 |
+
yield "β Error: Invalid JSON template", "", "<p style='color:red'>Malformed JSON template</p>"
|
49 |
+
return
|
50 |
|
51 |
+
# Processing stages
|
52 |
+
stages = [
|
53 |
+
("π Initializing model...", 0.5),
|
54 |
+
("π Parsing document structure...", 1.2),
|
55 |
+
("π Matching template fields...", 0.8),
|
56 |
+
("β¨ Finalizing extraction...", 0.3)
|
57 |
+
]
|
58 |
+
|
59 |
+
for msg, delay in stages:
|
60 |
+
yield msg, "", ""
|
61 |
+
time.sleep(delay)
|
62 |
+
|
63 |
+
try:
|
64 |
+
# Actual inference
|
65 |
+
result = extractor(
|
66 |
+
text,
|
67 |
+
**template_data,
|
68 |
+
max_length=512,
|
69 |
+
num_return_sequences=1,
|
70 |
+
temperature=0.7
|
71 |
+
)[0]['generated_text']
|
72 |
+
|
73 |
+
# Format output
|
74 |
+
formatted_json = json.dumps(json.loads(result), indent=2)
|
75 |
+
html_output = f"""
|
76 |
+
<div style='
|
77 |
+
padding: 15px;
|
78 |
+
background: #f8f9fa;
|
79 |
+
border-radius: 8px;
|
80 |
+
border-left: 4px solid #4CAF50;
|
81 |
+
margin-top: 10px;
|
82 |
+
'>
|
83 |
+
<h3 style='margin-top:0'>Extracted Data</h3>
|
84 |
+
<pre style='white-space: pre-wrap'>{formatted_json}</pre>
|
85 |
+
</div>
|
86 |
+
"""
|
87 |
+
|
88 |
+
yield "β
Extraction complete", formatted_json, html_output
|
89 |
+
|
90 |
+
except Exception as e:
|
91 |
+
error_msg = f"β Processing error: {str(e)}"
|
92 |
+
yield error_msg, "", f"<p style='color:red'>{error_msg}</p>"
|
93 |
|
94 |
+
# 4. Gradio Interface
|
95 |
+
with gr.Blocks(theme=gr.themes.Soft(), title="NuExtract 1.5") as demo:
|
96 |
+
# Header
|
97 |
+
gr.Markdown("""
|
98 |
+
<div style='text-align:center'>
|
99 |
+
<h1>π§ NuExtract-1.5</h1>
|
100 |
+
<p>Advanced Information Extraction System</p>
|
101 |
+
</div>
|
102 |
+
""")
|
103 |
+
|
104 |
+
# Main layout
|
105 |
+
with gr.Row():
|
106 |
+
# Input Column
|
107 |
+
with gr.Column(scale=1, min_width=400):
|
108 |
+
gr.Markdown("### π₯ Input")
|
109 |
+
template_input = gr.Textbox(
|
110 |
+
label="Extraction Template (JSON)",
|
111 |
+
value='{"fields": ["name", "email", "phone"]}',
|
112 |
+
lines=5
|
113 |
+
)
|
114 |
+
text_input = gr.TextArea(
|
115 |
+
label="Document Text",
|
116 |
+
placeholder="John Smith ([email protected]) called regarding order #12345...",
|
117 |
+
lines=12
|
118 |
+
)
|
119 |
+
gr.Examples(
|
120 |
+
examples=[
|
121 |
+
[
|
122 |
+
'{"fields": ["name", "email"]}',
|
123 |
+
"Please contact Dr. Sarah Johnson at [email protected]"
|
124 |
+
],
|
125 |
+
[
|
126 |
+
'{"fields": ["product", "price"]}',
|
127 |
+
"The new MacBook Pro costs $1,299 at our store"
|
128 |
+
]
|
129 |
+
],
|
130 |
+
inputs=[template_input, text_input],
|
131 |
+
label="Try Examples:"
|
132 |
+
)
|
133 |
+
|
134 |
+
# Output Column
|
135 |
+
with gr.Column(scale=1, min_width=500):
|
136 |
+
gr.Markdown("### π€ Results")
|
137 |
+
status = gr.Textbox(
|
138 |
+
label="Status",
|
139 |
+
value="π’ System Ready",
|
140 |
+
interactive=False
|
141 |
+
)
|
142 |
+
json_output = gr.JSON(
|
143 |
+
label="Structured Output",
|
144 |
+
interactive=False
|
145 |
+
)
|
146 |
+
html_output = gr.HTML(
|
147 |
+
label="Formatted View",
|
148 |
+
value="<div style='min-height:200px'></div>"
|
149 |
+
)
|
150 |
+
|
151 |
+
# Controls
|
152 |
+
submit_btn = gr.Button("Extract Information", variant="primary")
|
153 |
+
clear_btn = gr.Button("Clear")
|
154 |
+
|
155 |
+
# Event handlers
|
156 |
submit_btn.click(
|
157 |
fn=extract_structure,
|
158 |
inputs=[template_input, text_input],
|
159 |
+
outputs=[status, json_output, html_output]
|
160 |
+
)
|
161 |
+
|
162 |
+
clear_btn.click(
|
163 |
+
fn=lambda: ["", "", "", "<div></div>"],
|
164 |
+
inputs=[],
|
165 |
+
outputs=[template_input, text_input, json_output, html_output]
|
166 |
)
|
|
|
167 |
|
168 |
+
# 5. Launch Configuration
|
169 |
if __name__ == "__main__":
|
170 |
+
# Initialize model
|
171 |
+
extractor = load_model()
|
172 |
+
|
173 |
+
# Start keep-alive thread
|
174 |
+
import threading
|
175 |
+
threading.Thread(
|
176 |
+
target=lambda: [keep_model_warm() for _ in iter(int, 1)],
|
177 |
+
daemon=True
|
178 |
+
).start()
|
179 |
+
|
180 |
+
# Launch app
|
181 |
+
demo.launch(
|
182 |
+
server_name="0.0.0.0",
|
183 |
+
server_port=7860,
|
184 |
+
show_error=True,
|
185 |
+
share=False,
|
186 |
+
favicon_path="https://huggingface.co/favicon.ico"
|
187 |
+
)
|