Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -1,8 +1,8 @@
|
|
|
|
1 |
import json
|
|
|
2 |
import gradio as gr
|
3 |
import requests
|
4 |
-
import os
|
5 |
-
import re
|
6 |
|
7 |
# Hugging Face API details
|
8 |
API_URL = "https://api-inference.huggingface.co/models/numind/NuExtract-1.5"
|
@@ -10,39 +10,18 @@ api_token = os.environ.get("HF_TOKEN", "") # Get token from environment variabl
|
|
10 |
headers = {"Authorization": f"Bearer {api_token}"}
|
11 |
|
12 |
|
13 |
-
# Test API Connection
|
14 |
-
def test_api_connection():
|
15 |
-
try:
|
16 |
-
# Test with a simple GET request
|
17 |
-
response = requests.get("https://api-inference.huggingface.co/models/numind/NuExtract-1.5")
|
18 |
-
|
19 |
-
# Check if the connection was successful
|
20 |
-
if response.status_code == 200:
|
21 |
-
print("β
Connection to Hugging Face API successful!")
|
22 |
-
else:
|
23 |
-
print(f"β οΈ API returned status code {response.status_code}: {response.text}")
|
24 |
-
except requests.exceptions.RequestException as e:
|
25 |
-
print(f"β Connection failed: {str(e)}")
|
26 |
-
|
27 |
-
|
28 |
-
# Make the API request
|
29 |
def query_api(payload):
|
30 |
try:
|
31 |
response = requests.post(API_URL, headers=headers, json=payload)
|
32 |
-
# Debug logs
|
33 |
print("API STATUS CODE:", response.status_code)
|
34 |
print("RAW RESPONSE:", response.text)
|
35 |
-
|
36 |
return response.json()
|
37 |
except Exception as e:
|
38 |
-
|
39 |
-
return {"error": f"Could not decode JSON: {str(e)}"}
|
40 |
|
41 |
|
42 |
-
# Extract structure from the template and text
|
43 |
def extract_structure(template, text):
|
44 |
try:
|
45 |
-
# Format the input for NuExtract
|
46 |
prompt = f"<|input|>\n### Template:\n{template}\n### Text:\n{text}\n\n<|output|>"
|
47 |
|
48 |
payload = {
|
@@ -56,24 +35,19 @@ def extract_structure(template, text):
|
|
56 |
|
57 |
response = query_api(payload)
|
58 |
|
59 |
-
# Check for API error
|
60 |
if isinstance(response, dict) and "error" in response:
|
61 |
-
return f"API Error: {response['error']}", "{}", "<p>Error
|
62 |
|
63 |
-
# Get generated text
|
64 |
if isinstance(response, list) and len(response) > 0:
|
65 |
output = response[0].get("generated_text", "")
|
66 |
-
print("Generated Text:", output)
|
67 |
|
68 |
-
# Try to extract after <|output|>
|
69 |
if "<|output|>" in output:
|
70 |
result = output.split("<|output|>")[-1].strip()
|
71 |
else:
|
72 |
-
|
73 |
-
|
74 |
-
result = json_match.group(1) if json_match else output.strip()
|
75 |
|
76 |
-
# Attempt to format JSON nicely
|
77 |
try:
|
78 |
parsed = json.loads(result)
|
79 |
result = json.dumps(parsed, indent=2)
|
@@ -82,22 +56,19 @@ def extract_structure(template, text):
|
|
82 |
|
83 |
highlighted = f"<p>β
Successfully processed input of length {len(text)} characters.</p>"
|
84 |
return "β
Extraction Complete", result, highlighted
|
85 |
-
|
86 |
-
|
87 |
|
88 |
except Exception as e:
|
89 |
return f"β Error: {str(e)}", "{}", f"<p>Processing failed: {str(e)}</p>"
|
90 |
|
91 |
|
92 |
-
# Gradio
|
93 |
with gr.Blocks() as demo:
|
94 |
gr.Markdown("# π§ NuExtract-1.5 Information Extractor")
|
95 |
|
96 |
if not api_token:
|
97 |
-
gr.Markdown("## β οΈ No API token found.
|
98 |
-
|
99 |
-
# Call test connection before launching the Gradio interface
|
100 |
-
test_api_connection()
|
101 |
|
102 |
with gr.Row():
|
103 |
with gr.Column():
|
@@ -148,6 +119,26 @@ with gr.Blocks() as demo:
|
|
148 |
[template_input, text_input]
|
149 |
)
|
150 |
|
151 |
-
if __name__ == "__main__":
|
152 |
-
demo.launch()
|
153 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
import json
|
3 |
+
import re
|
4 |
import gradio as gr
|
5 |
import requests
|
|
|
|
|
6 |
|
7 |
# Hugging Face API details
|
8 |
API_URL = "https://api-inference.huggingface.co/models/numind/NuExtract-1.5"
|
|
|
10 |
headers = {"Authorization": f"Bearer {api_token}"}
|
11 |
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
def query_api(payload):
|
14 |
try:
|
15 |
response = requests.post(API_URL, headers=headers, json=payload)
|
|
|
16 |
print("API STATUS CODE:", response.status_code)
|
17 |
print("RAW RESPONSE:", response.text)
|
|
|
18 |
return response.json()
|
19 |
except Exception as e:
|
20 |
+
return {"error": f"Request failed: {str(e)}"}
|
|
|
21 |
|
22 |
|
|
|
23 |
def extract_structure(template, text):
|
24 |
try:
|
|
|
25 |
prompt = f"<|input|>\n### Template:\n{template}\n### Text:\n{text}\n\n<|output|>"
|
26 |
|
27 |
payload = {
|
|
|
35 |
|
36 |
response = query_api(payload)
|
37 |
|
|
|
38 |
if isinstance(response, dict) and "error" in response:
|
39 |
+
return f"API Error: {response['error']}", "{}", f"<p>Error: {response['error']}</p>"
|
40 |
|
|
|
41 |
if isinstance(response, list) and len(response) > 0:
|
42 |
output = response[0].get("generated_text", "")
|
43 |
+
print("Generated Text:", output)
|
44 |
|
|
|
45 |
if "<|output|>" in output:
|
46 |
result = output.split("<|output|>")[-1].strip()
|
47 |
else:
|
48 |
+
match = re.search(r'({[\s\S]+})', output)
|
49 |
+
result = match.group(1) if match else output.strip()
|
|
|
50 |
|
|
|
51 |
try:
|
52 |
parsed = json.loads(result)
|
53 |
result = json.dumps(parsed, indent=2)
|
|
|
56 |
|
57 |
highlighted = f"<p>β
Successfully processed input of length {len(text)} characters.</p>"
|
58 |
return "β
Extraction Complete", result, highlighted
|
59 |
+
|
60 |
+
return "β οΈ Unexpected API Response", json.dumps(response, indent=2), "<p>Unexpected format.</p>"
|
61 |
|
62 |
except Exception as e:
|
63 |
return f"β Error: {str(e)}", "{}", f"<p>Processing failed: {str(e)}</p>"
|
64 |
|
65 |
|
66 |
+
# Gradio App
|
67 |
with gr.Blocks() as demo:
|
68 |
gr.Markdown("# π§ NuExtract-1.5 Information Extractor")
|
69 |
|
70 |
if not api_token:
|
71 |
+
gr.Markdown("## β οΈ No API token found. Please set `HF_TOKEN` in environment variables.")
|
|
|
|
|
|
|
72 |
|
73 |
with gr.Row():
|
74 |
with gr.Column():
|
|
|
119 |
[template_input, text_input]
|
120 |
)
|
121 |
|
|
|
|
|
122 |
|
123 |
+
def test_api_connection():
|
124 |
+
print("===== Application Startup =====")
|
125 |
+
if not api_token:
|
126 |
+
print("β HF_TOKEN not set. Please set your API token.")
|
127 |
+
else:
|
128 |
+
test_payload = {
|
129 |
+
"inputs": "<|input|>\n### Template:\n{\"test\": \"\"}\n### Text:\nHello world\n\n<|output|>",
|
130 |
+
"parameters": {
|
131 |
+
"max_new_tokens": 100,
|
132 |
+
"temperature": 0.01
|
133 |
+
}
|
134 |
+
}
|
135 |
+
response = query_api(test_payload)
|
136 |
+
if isinstance(response, list):
|
137 |
+
print("β
Connection to Hugging Face API successful!")
|
138 |
+
else:
|
139 |
+
print("β οΈ API may not be returning expected format:", response)
|
140 |
+
|
141 |
+
|
142 |
+
if __name__ == "__main__":
|
143 |
+
test_api_connection()
|
144 |
+
demo.launch(debug=True) # You can add share=True or server_name/port if needed
|