Spaces:
Running
on
Zero
Running
on
Zero
File size: 4,067 Bytes
be196c4 095dbb9 be196c4 095dbb9 f764538 095dbb9 f764538 be196c4 095dbb9 6248a95 095dbb9 f764538 095dbb9 f764538 095dbb9 f764538 be196c4 095dbb9 f764538 095dbb9 f764538 095dbb9 f764538 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
import json
import gradio as gr
import requests
import os
# Hugging Face API details
API_URL = "https://api-inference.huggingface.co/models/numind/NuExtract-1.5"
api_token = os.environ.get("HF_TOKEN", "") # Get token from environment variable
headers = {"Authorization": f"Bearer {api_token}"}
def query_api(payload):
response = requests.post(API_URL, headers=headers, json=payload)
return response.json()
def extract_structure(template, text):
try:
# Format the input following NuExtract's format
prompt = f"<|input|>\n### Template:\n{template}\n### Text:\n{text}\n\n<|output|>"
# Call the API
payload = {
"inputs": prompt,
"parameters": {
"max_new_tokens": 2000,
"temperature": 0.01, # Nearly deterministic as recommended
"return_full_text": True
}
}
response = query_api(payload)
# Check for errors
if isinstance(response, dict) and "error" in response:
return f"API Error: {response['error']}", "{}", f"<p>Error occurred: {response['error']}</p>"
# Extract result - the API returns the full text so we need to split it
if isinstance(response, list) and len(response) > 0:
output = response[0].get("generated_text", "")
result = output.split("<|output|>")[1] if "<|output|>" in output else output
# Try to parse as JSON to format it nicely
try:
parsed = json.loads(result)
result = json.dumps(parsed, indent=2)
except:
pass
# Create a simple highlight
highlighted = f"<p>Successfully processed text of length {len(text)} characters</p>"
return "Processing complete", result, highlighted
else:
return "Unexpected API response", str(response), "<p>Please check API token and try again</p>"
except Exception as e:
return f"Error: {str(e)}", "{}", f"<p>Processing failed: {str(e)}</p>"
# Create interface
with gr.Blocks() as demo:
gr.Markdown("# NuExtract-1.5 Demo")
if not api_token:
gr.Markdown("## ⚠️ No API token found. Set HF_TOKEN in Space secrets.")
with gr.Row():
with gr.Column():
template_input = gr.Textbox(
label="Template (JSON)",
value='{"name": "", "email": ""}',
lines=5
)
text_input = gr.Textbox(
label="Input Text",
value="Contact: John Smith ([email protected])",
lines=10
)
submit_btn = gr.Button("Extract Information")
with gr.Column():
progress_output = gr.Textbox(label="Progress")
result_output = gr.Textbox(label="Extracted Information")
html_output = gr.HTML(label="Highlighted Text")
submit_btn.click(
fn=extract_structure,
inputs=[template_input, text_input],
outputs=[progress_output, result_output, html_output]
)
# Examples
gr.Examples(
[
[
'{"name": "", "email": ""}',
'Contact: John Smith ([email protected])'
],
[
'''{
"Model": {
"Name": "",
"Number of parameters": "",
"Architecture": []
},
"Usage": {
"Use case": [],
"License": ""
}
}''',
'''We introduce Mistral 7B, a 7-billion-parameter language model engineered for superior performance and efficiency. Mistral 7B outperforms the best open 13B model (Llama 2) across all evaluated benchmarks, and the best released 34B model (Llama 1) in reasoning, mathematics, and code generation. Our model is released under the Apache 2.0 license.'''
]
],
[template_input, text_input]
)
if __name__ == "__main__":
demo.launch() |