Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -1,49 +1,67 @@
|
|
1 |
import json
|
2 |
-
import torch
|
3 |
import gradio as gr
|
4 |
-
|
|
|
5 |
|
6 |
-
#
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
try:
|
9 |
-
# Format the input
|
10 |
prompt = f"<|input|>\n### Template:\n{template}\n### Text:\n{text}\n\n<|output|>"
|
11 |
|
12 |
-
#
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
|
|
|
|
|
|
18 |
|
19 |
-
|
20 |
-
highlighted = f"<p>Processed text of length {len(text)} characters</p>"
|
21 |
|
22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
except Exception as e:
|
24 |
-
return f"Error: {str(e)}", "{}", "<p>Processing failed</p>"
|
25 |
-
|
26 |
-
# Load model
|
27 |
-
model_name = "numind/NuExtract-1.5"
|
28 |
-
try:
|
29 |
-
model = AutoModelForCausalLM.from_pretrained(
|
30 |
-
model_name,
|
31 |
-
torch_dtype=torch.float16, # Using float16 instead of bfloat16 for better compatibility
|
32 |
-
trust_remote_code=True,
|
33 |
-
device_map="auto"
|
34 |
-
)
|
35 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
36 |
-
model_loaded = True
|
37 |
-
except Exception as e:
|
38 |
-
print(f"Model loading error: {e}")
|
39 |
-
model_loaded = False
|
40 |
|
41 |
# Create interface
|
42 |
with gr.Blocks() as demo:
|
43 |
gr.Markdown("# NuExtract-1.5 Demo")
|
44 |
|
45 |
-
if not
|
46 |
-
gr.Markdown("## ⚠️
|
47 |
|
48 |
with gr.Row():
|
49 |
with gr.Column():
|
@@ -70,12 +88,26 @@ with gr.Blocks() as demo:
|
|
70 |
outputs=[progress_output, result_output, html_output]
|
71 |
)
|
72 |
|
73 |
-
#
|
74 |
gr.Examples(
|
75 |
[
|
76 |
[
|
77 |
'{"name": "", "email": ""}',
|
78 |
'Contact: John Smith ([email protected])'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
]
|
80 |
],
|
81 |
[template_input, text_input]
|
|
|
1 |
import json
|
|
|
2 |
import gradio as gr
|
3 |
+
import requests
|
4 |
+
import os
|
5 |
|
6 |
+
# Hugging Face API details
|
7 |
+
API_URL = "https://api-inference.huggingface.co/models/numind/NuExtract-1.5"
|
8 |
+
api_token = os.environ.get("HF_TOKEN", "") # Get token from environment variable
|
9 |
+
|
10 |
+
headers = {"Authorization": f"Bearer {api_token}"}
|
11 |
+
|
12 |
+
def query_api(payload):
|
13 |
+
response = requests.post(API_URL, headers=headers, json=payload)
|
14 |
+
return response.json()
|
15 |
+
|
16 |
+
def extract_structure(template, text):
|
17 |
try:
|
18 |
+
# Format the input following NuExtract's format
|
19 |
prompt = f"<|input|>\n### Template:\n{template}\n### Text:\n{text}\n\n<|output|>"
|
20 |
|
21 |
+
# Call the API
|
22 |
+
payload = {
|
23 |
+
"inputs": prompt,
|
24 |
+
"parameters": {
|
25 |
+
"max_new_tokens": 2000,
|
26 |
+
"temperature": 0.01, # Nearly deterministic as recommended
|
27 |
+
"return_full_text": True
|
28 |
+
}
|
29 |
+
}
|
30 |
|
31 |
+
response = query_api(payload)
|
|
|
32 |
|
33 |
+
# Check for errors
|
34 |
+
if isinstance(response, dict) and "error" in response:
|
35 |
+
return f"API Error: {response['error']}", "{}", f"<p>Error occurred: {response['error']}</p>"
|
36 |
+
|
37 |
+
# Extract result - the API returns the full text so we need to split it
|
38 |
+
if isinstance(response, list) and len(response) > 0:
|
39 |
+
output = response[0].get("generated_text", "")
|
40 |
+
result = output.split("<|output|>")[1] if "<|output|>" in output else output
|
41 |
+
|
42 |
+
# Try to parse as JSON to format it nicely
|
43 |
+
try:
|
44 |
+
parsed = json.loads(result)
|
45 |
+
result = json.dumps(parsed, indent=2)
|
46 |
+
except:
|
47 |
+
pass
|
48 |
+
|
49 |
+
# Create a simple highlight
|
50 |
+
highlighted = f"<p>Successfully processed text of length {len(text)} characters</p>"
|
51 |
+
|
52 |
+
return "Processing complete", result, highlighted
|
53 |
+
else:
|
54 |
+
return "Unexpected API response", str(response), "<p>Please check API token and try again</p>"
|
55 |
+
|
56 |
except Exception as e:
|
57 |
+
return f"Error: {str(e)}", "{}", f"<p>Processing failed: {str(e)}</p>"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
# Create interface
|
60 |
with gr.Blocks() as demo:
|
61 |
gr.Markdown("# NuExtract-1.5 Demo")
|
62 |
|
63 |
+
if not api_token:
|
64 |
+
gr.Markdown("## ⚠️ No API token found. Set HF_TOKEN in Space secrets.")
|
65 |
|
66 |
with gr.Row():
|
67 |
with gr.Column():
|
|
|
88 |
outputs=[progress_output, result_output, html_output]
|
89 |
)
|
90 |
|
91 |
+
# Examples
|
92 |
gr.Examples(
|
93 |
[
|
94 |
[
|
95 |
'{"name": "", "email": ""}',
|
96 |
'Contact: John Smith ([email protected])'
|
97 |
+
],
|
98 |
+
[
|
99 |
+
'''{
|
100 |
+
"Model": {
|
101 |
+
"Name": "",
|
102 |
+
"Number of parameters": "",
|
103 |
+
"Architecture": []
|
104 |
+
},
|
105 |
+
"Usage": {
|
106 |
+
"Use case": [],
|
107 |
+
"License": ""
|
108 |
+
}
|
109 |
+
}''',
|
110 |
+
'''We introduce Mistral 7B, a 7-billion-parameter language model engineered for superior performance and efficiency. Mistral 7B outperforms the best open 13B model (Llama 2) across all evaluated benchmarks, and the best released 34B model (Llama 1) in reasoning, mathematics, and code generation. Our model is released under the Apache 2.0 license.'''
|
111 |
]
|
112 |
],
|
113 |
[template_input, text_input]
|