Spaces:
Running
on
Zero
Running
on
Zero
import json | |
import gradio as gr | |
import requests | |
import os | |
import re | |
# Hugging Face API details | |
API_URL = "https://api-inference.huggingface.co/models/numind/NuExtract-1.5" | |
api_token = os.environ.get("HF_TOKEN", "") # Get token from environment variable | |
headers = {"Authorization": f"Bearer {api_token}"} | |
# Test API Connection | |
def test_api_connection(): | |
try: | |
# Test with a simple GET request | |
response = requests.get("https://api-inference.huggingface.co/models/numind/NuExtract-1.5") | |
# Check if the connection was successful | |
if response.status_code == 200: | |
print("β Connection to Hugging Face API successful!") | |
else: | |
print(f"β οΈ API returned status code {response.status_code}: {response.text}") | |
except requests.exceptions.RequestException as e: | |
print(f"β Connection failed: {str(e)}") | |
# Make the API request | |
def query_api(payload): | |
try: | |
response = requests.post(API_URL, headers=headers, json=payload) | |
# Debug logs | |
print("API STATUS CODE:", response.status_code) | |
print("RAW RESPONSE:", response.text) | |
return response.json() | |
except Exception as e: | |
print("Error during API call:", e) | |
return {"error": f"Could not decode JSON: {str(e)}"} | |
# Extract structure from the template and text | |
def extract_structure(template, text): | |
try: | |
# Format the input for NuExtract | |
prompt = f"<|input|>\n### Template:\n{template}\n### Text:\n{text}\n\n<|output|>" | |
payload = { | |
"inputs": prompt, | |
"parameters": { | |
"max_new_tokens": 2000, | |
"temperature": 0.01, | |
"return_full_text": True | |
} | |
} | |
response = query_api(payload) | |
# Check for API error | |
if isinstance(response, dict) and "error" in response: | |
return f"API Error: {response['error']}", "{}", "<p>Error occurred: {response['error']}</p>" | |
# Get generated text | |
if isinstance(response, list) and len(response) > 0: | |
output = response[0].get("generated_text", "") | |
print("Generated Text:", output) # Optional debugging | |
# Try to extract after <|output|> | |
if "<|output|>" in output: | |
result = output.split("<|output|>")[-1].strip() | |
else: | |
# Try to extract JSON-like structure using regex | |
json_match = re.search(r'({[\s\S]+})', output) | |
result = json_match.group(1) if json_match else output.strip() | |
# Attempt to format JSON nicely | |
try: | |
parsed = json.loads(result) | |
result = json.dumps(parsed, indent=2) | |
except Exception: | |
pass | |
highlighted = f"<p>β Successfully processed input of length {len(text)} characters.</p>" | |
return "β Extraction Complete", result, highlighted | |
else: | |
return "β οΈ Unexpected API Response", json.dumps(response, indent=2), "<p>Please check the API response format.</p>" | |
except Exception as e: | |
return f"β Error: {str(e)}", "{}", f"<p>Processing failed: {str(e)}</p>" | |
# Gradio Interface | |
with gr.Blocks() as demo: | |
gr.Markdown("# π§ NuExtract-1.5 Information Extractor") | |
if not api_token: | |
gr.Markdown("## β οΈ No API token found. Set `HF_TOKEN` in the Space secrets.") | |
# Call test connection before launching the Gradio interface | |
test_api_connection() | |
with gr.Row(): | |
with gr.Column(): | |
template_input = gr.Textbox( | |
label="Template (JSON)", | |
value='{"name": "", "email": ""}', | |
lines=5 | |
) | |
text_input = gr.Textbox( | |
label="Input Text", | |
value="Contact: John Smith ([email protected])", | |
lines=10 | |
) | |
submit_btn = gr.Button("Extract Information") | |
with gr.Column(): | |
progress_output = gr.Textbox(label="Progress") | |
result_output = gr.Textbox(label="Extracted Information") | |
html_output = gr.HTML(label="Info") | |
submit_btn.click( | |
fn=extract_structure, | |
inputs=[template_input, text_input], | |
outputs=[progress_output, result_output, html_output] | |
) | |
gr.Examples( | |
[ | |
[ | |
'{"name": "", "email": ""}', | |
'Contact: John Smith ([email protected])' | |
], | |
[ | |
'''{ | |
"Model": { | |
"Name": "", | |
"Number of parameters": "", | |
"Architecture": [] | |
}, | |
"Usage": { | |
"Use case": [], | |
"License": "" | |
} | |
}''', | |
'''We introduce Mistral 7B, a 7-billion-parameter language model engineered for superior performance and efficiency. Mistral 7B outperforms the best open 13B model (Llama 2) across all evaluated benchmarks, and the best released 34B model (Llama 1) in reasoning, mathematics, and code generation. Our model is released under the Apache 2.0 license.''' | |
] | |
], | |
[template_input, text_input] | |
) | |
if __name__ == "__main__": | |
demo.launch() | |