Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -2,93 +2,108 @@ import json
|
|
2 |
import gradio as gr
|
3 |
import requests
|
4 |
import os
|
|
|
5 |
|
6 |
# Hugging Face API details
|
7 |
API_URL = "https://api-inference.huggingface.co/models/numind/NuExtract-1.5"
|
8 |
api_token = os.environ.get("HF_TOKEN", "") # Get token from environment variable
|
9 |
-
|
10 |
headers = {"Authorization": f"Bearer {api_token}"}
|
11 |
|
|
|
12 |
def query_api(payload):
|
13 |
response = requests.post(API_URL, headers=headers, json=payload)
|
14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
def extract_structure(template, text):
|
17 |
try:
|
18 |
-
# Format the input
|
19 |
prompt = f"<|input|>\n### Template:\n{template}\n### Text:\n{text}\n\n<|output|>"
|
20 |
-
|
21 |
-
# Call the API
|
22 |
payload = {
|
23 |
"inputs": prompt,
|
24 |
"parameters": {
|
25 |
"max_new_tokens": 2000,
|
26 |
-
"temperature": 0.01,
|
27 |
"return_full_text": True
|
28 |
}
|
29 |
}
|
30 |
-
|
31 |
response = query_api(payload)
|
32 |
-
|
33 |
-
# Check for
|
34 |
if isinstance(response, dict) and "error" in response:
|
35 |
return f"API Error: {response['error']}", "{}", f"<p>Error occurred: {response['error']}</p>"
|
36 |
-
|
37 |
-
#
|
38 |
if isinstance(response, list) and len(response) > 0:
|
39 |
output = response[0].get("generated_text", "")
|
40 |
-
|
41 |
-
|
42 |
-
# Try to
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
try:
|
44 |
parsed = json.loads(result)
|
45 |
result = json.dumps(parsed, indent=2)
|
46 |
-
except:
|
47 |
pass
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
return "Processing complete", result, highlighted
|
53 |
else:
|
54 |
-
return "Unexpected API
|
55 |
-
|
56 |
except Exception as e:
|
57 |
-
return f"Error: {str(e)}", "{}", f"<p>Processing failed: {str(e)}</p>"
|
58 |
|
59 |
-
|
|
|
60 |
with gr.Blocks() as demo:
|
61 |
-
gr.Markdown("# NuExtract-1.5
|
62 |
-
|
63 |
if not api_token:
|
64 |
-
gr.Markdown("## ⚠️ No API token found. Set HF_TOKEN in Space secrets.")
|
65 |
-
|
66 |
with gr.Row():
|
67 |
with gr.Column():
|
68 |
template_input = gr.Textbox(
|
69 |
-
label="Template (JSON)",
|
70 |
value='{"name": "", "email": ""}',
|
71 |
lines=5
|
72 |
)
|
73 |
text_input = gr.Textbox(
|
74 |
-
label="Input Text",
|
75 |
value="Contact: John Smith ([email protected])",
|
76 |
lines=10
|
77 |
)
|
78 |
submit_btn = gr.Button("Extract Information")
|
79 |
-
|
80 |
with gr.Column():
|
81 |
progress_output = gr.Textbox(label="Progress")
|
82 |
result_output = gr.Textbox(label="Extracted Information")
|
83 |
-
html_output = gr.HTML(label="
|
84 |
-
|
85 |
submit_btn.click(
|
86 |
fn=extract_structure,
|
87 |
inputs=[template_input, text_input],
|
88 |
outputs=[progress_output, result_output, html_output]
|
89 |
)
|
90 |
|
91 |
-
# Examples
|
92 |
gr.Examples(
|
93 |
[
|
94 |
[
|
@@ -114,4 +129,4 @@ with gr.Blocks() as demo:
|
|
114 |
)
|
115 |
|
116 |
if __name__ == "__main__":
|
117 |
-
demo.launch()
|
|
|
2 |
import gradio as gr
|
3 |
import requests
|
4 |
import os
|
5 |
+
import re
|
6 |
|
7 |
# Hugging Face API details
|
8 |
API_URL = "https://api-inference.huggingface.co/models/numind/NuExtract-1.5"
|
9 |
api_token = os.environ.get("HF_TOKEN", "") # Get token from environment variable
|
|
|
10 |
headers = {"Authorization": f"Bearer {api_token}"}
|
11 |
|
12 |
+
|
13 |
def query_api(payload):
|
14 |
response = requests.post(API_URL, headers=headers, json=payload)
|
15 |
+
|
16 |
+
# Debug logs
|
17 |
+
print("API STATUS CODE:", response.status_code)
|
18 |
+
print("RAW RESPONSE:", response.text)
|
19 |
+
|
20 |
+
try:
|
21 |
+
return response.json()
|
22 |
+
except Exception as e:
|
23 |
+
return {"error": f"Could not decode JSON: {str(e)}"}
|
24 |
+
|
25 |
|
26 |
def extract_structure(template, text):
|
27 |
try:
|
28 |
+
# Format the input for NuExtract
|
29 |
prompt = f"<|input|>\n### Template:\n{template}\n### Text:\n{text}\n\n<|output|>"
|
30 |
+
|
|
|
31 |
payload = {
|
32 |
"inputs": prompt,
|
33 |
"parameters": {
|
34 |
"max_new_tokens": 2000,
|
35 |
+
"temperature": 0.01,
|
36 |
"return_full_text": True
|
37 |
}
|
38 |
}
|
39 |
+
|
40 |
response = query_api(payload)
|
41 |
+
|
42 |
+
# Check for API error
|
43 |
if isinstance(response, dict) and "error" in response:
|
44 |
return f"API Error: {response['error']}", "{}", f"<p>Error occurred: {response['error']}</p>"
|
45 |
+
|
46 |
+
# Get generated text
|
47 |
if isinstance(response, list) and len(response) > 0:
|
48 |
output = response[0].get("generated_text", "")
|
49 |
+
print("Generated Text:", output) # Optional debugging
|
50 |
+
|
51 |
+
# Try to extract after <|output|>
|
52 |
+
if "<|output|>" in output:
|
53 |
+
result = output.split("<|output|>")[-1].strip()
|
54 |
+
else:
|
55 |
+
# Try to extract JSON-like structure using regex
|
56 |
+
json_match = re.search(r'({[\s\S]+})', output)
|
57 |
+
result = json_match.group(1) if json_match else output.strip()
|
58 |
+
|
59 |
+
# Attempt to format JSON nicely
|
60 |
try:
|
61 |
parsed = json.loads(result)
|
62 |
result = json.dumps(parsed, indent=2)
|
63 |
+
except Exception:
|
64 |
pass
|
65 |
+
|
66 |
+
highlighted = f"<p>✅ Successfully processed input of length {len(text)} characters.</p>"
|
67 |
+
return "✅ Extraction Complete", result, highlighted
|
|
|
|
|
68 |
else:
|
69 |
+
return "⚠️ Unexpected API Response", json.dumps(response, indent=2), "<p>Please check the API response format.</p>"
|
70 |
+
|
71 |
except Exception as e:
|
72 |
+
return f"❌ Error: {str(e)}", "{}", f"<p>Processing failed: {str(e)}</p>"
|
73 |
|
74 |
+
|
75 |
+
# Gradio Interface
|
76 |
with gr.Blocks() as demo:
|
77 |
+
gr.Markdown("# 🧠 NuExtract-1.5 Information Extractor")
|
78 |
+
|
79 |
if not api_token:
|
80 |
+
gr.Markdown("## ⚠️ No API token found. Set `HF_TOKEN` in the Space secrets.")
|
81 |
+
|
82 |
with gr.Row():
|
83 |
with gr.Column():
|
84 |
template_input = gr.Textbox(
|
85 |
+
label="Template (JSON)",
|
86 |
value='{"name": "", "email": ""}',
|
87 |
lines=5
|
88 |
)
|
89 |
text_input = gr.Textbox(
|
90 |
+
label="Input Text",
|
91 |
value="Contact: John Smith ([email protected])",
|
92 |
lines=10
|
93 |
)
|
94 |
submit_btn = gr.Button("Extract Information")
|
95 |
+
|
96 |
with gr.Column():
|
97 |
progress_output = gr.Textbox(label="Progress")
|
98 |
result_output = gr.Textbox(label="Extracted Information")
|
99 |
+
html_output = gr.HTML(label="Info")
|
100 |
+
|
101 |
submit_btn.click(
|
102 |
fn=extract_structure,
|
103 |
inputs=[template_input, text_input],
|
104 |
outputs=[progress_output, result_output, html_output]
|
105 |
)
|
106 |
|
|
|
107 |
gr.Examples(
|
108 |
[
|
109 |
[
|
|
|
129 |
)
|
130 |
|
131 |
if __name__ == "__main__":
|
132 |
+
demo.launch()
|