Spaces:
Running
on
Zero
Running
on
Zero
File size: 5,201 Bytes
be196c4 095dbb9 26a1605 be196c4 095dbb9 26a1605 d5698f0 095dbb9 26a1605 f683120 26a1605 f683120 26a1605 095dbb9 d5698f0 095dbb9 f764538 26a1605 f764538 26a1605 095dbb9 26a1605 095dbb9 26a1605 095dbb9 26a1605 095dbb9 f683120 26a1605 095dbb9 26a1605 095dbb9 26a1605 095dbb9 26a1605 095dbb9 26a1605 f764538 26a1605 f764538 26a1605 f764538 26a1605 095dbb9 26a1605 d5698f0 f764538 26a1605 f764538 26a1605 f764538 26a1605 f764538 26a1605 f764538 095dbb9 f764538 26a1605 d5698f0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
import json
import gradio as gr
import requests
import os
import re
# Hugging Face API details
API_URL = "https://api-inference.huggingface.co/models/numind/NuExtract-1.5"
api_token = os.environ.get("HF_TOKEN", "") # Get token from environment variable
headers = {"Authorization": f"Bearer {api_token}"}
# Test API Connection
def test_api_connection():
try:
# Test with a simple GET request
response = requests.get("https://api-inference.huggingface.co/models/numind/NuExtract-1.5")
# Check if the connection was successful
if response.status_code == 200:
print("β
Connection to Hugging Face API successful!")
else:
print(f"β οΈ API returned status code {response.status_code}: {response.text}")
except requests.exceptions.RequestException as e:
print(f"β Connection failed: {str(e)}")
# Make the API request
def query_api(payload):
try:
response = requests.post(API_URL, headers=headers, json=payload)
# Debug logs
print("API STATUS CODE:", response.status_code)
print("RAW RESPONSE:", response.text)
return response.json()
except Exception as e:
print("Error during API call:", e)
return {"error": f"Could not decode JSON: {str(e)}"}
# Extract structure from the template and text
def extract_structure(template, text):
try:
# Format the input for NuExtract
prompt = f"<|input|>\n### Template:\n{template}\n### Text:\n{text}\n\n<|output|>"
payload = {
"inputs": prompt,
"parameters": {
"max_new_tokens": 2000,
"temperature": 0.01,
"return_full_text": True
}
}
response = query_api(payload)
# Check for API error
if isinstance(response, dict) and "error" in response:
return f"API Error: {response['error']}", "{}", "<p>Error occurred: {response['error']}</p>"
# Get generated text
if isinstance(response, list) and len(response) > 0:
output = response[0].get("generated_text", "")
print("Generated Text:", output) # Optional debugging
# Try to extract after <|output|>
if "<|output|>" in output:
result = output.split("<|output|>")[-1].strip()
else:
# Try to extract JSON-like structure using regex
json_match = re.search(r'({[\s\S]+})', output)
result = json_match.group(1) if json_match else output.strip()
# Attempt to format JSON nicely
try:
parsed = json.loads(result)
result = json.dumps(parsed, indent=2)
except Exception:
pass
highlighted = f"<p>β
Successfully processed input of length {len(text)} characters.</p>"
return "β
Extraction Complete", result, highlighted
else:
return "β οΈ Unexpected API Response", json.dumps(response, indent=2), "<p>Please check the API response format.</p>"
except Exception as e:
return f"β Error: {str(e)}", "{}", f"<p>Processing failed: {str(e)}</p>"
# Gradio Interface
with gr.Blocks() as demo:
gr.Markdown("# π§ NuExtract-1.5 Information Extractor")
if not api_token:
gr.Markdown("## β οΈ No API token found. Set `HF_TOKEN` in the Space secrets.")
# Call test connection before launching the Gradio interface
test_api_connection()
with gr.Row():
with gr.Column():
template_input = gr.Textbox(
label="Template (JSON)",
value='{"name": "", "email": ""}',
lines=5
)
text_input = gr.Textbox(
label="Input Text",
value="Contact: John Smith ([email protected])",
lines=10
)
submit_btn = gr.Button("Extract Information")
with gr.Column():
progress_output = gr.Textbox(label="Progress")
result_output = gr.Textbox(label="Extracted Information")
html_output = gr.HTML(label="Info")
submit_btn.click(
fn=extract_structure,
inputs=[template_input, text_input],
outputs=[progress_output, result_output, html_output]
)
gr.Examples(
[
[
'{"name": "", "email": ""}',
'Contact: John Smith ([email protected])'
],
[
'''{
"Model": {
"Name": "",
"Number of parameters": "",
"Architecture": []
},
"Usage": {
"Use case": [],
"License": ""
}
}''',
'''We introduce Mistral 7B, a 7-billion-parameter language model engineered for superior performance and efficiency. Mistral 7B outperforms the best open 13B model (Llama 2) across all evaluated benchmarks, and the best released 34B model (Llama 1) in reasoning, mathematics, and code generation. Our model is released under the Apache 2.0 license.'''
]
],
[template_input, text_input]
)
if __name__ == "__main__":
demo.launch()
|