Spaces:
Sleeping
Sleeping
File size: 3,430 Bytes
be096d1 90d7012 be096d1 90d7012 432d5fe 39ee1aa 432d5fe 39ee1aa be096d1 39ee1aa cede142 39ee1aa be096d1 432d5fe be096d1 90d7012 39ee1aa 90d7012 39ee1aa 90d7012 cede142 90d7012 a89d538 90d7012 cede142 39ee1aa 432d5fe 39ee1aa cede142 432d5fe 39ee1aa 432d5fe be096d1 39ee1aa be096d1 39ee1aa be096d1 39ee1aa be096d1 39ee1aa be096d1 39ee1aa be096d1 39ee1aa be096d1 432d5fe 39ee1aa be096d1 39ee1aa be096d1 39ee1aa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
import gradio as gr
import json
import requests
import os
# Use the Hugging Face Inference API instead of loading the model
API_URL = "https://api-inference.huggingface.co/models/numind/NuExtract-1.5"
headers = {"Authorization": f"Bearer {os.environ.get('HF_TOKEN', '')}"}
def test_function(template, text):
print(f"Test function called with template: {template[:30]} and text: {text[:30]}")
return "Button clicked successfully", "Function was called"
def extract_info(template, text):
try:
# Format prompt according to NuExtract-1.5 requirements
prompt = f"<|input|>\n### Template:\n{template}\n### Text:\n{text}\n\n<|output|>"
print(f"Processing with prompt: {prompt[:100]}...")
# Call API instead of using local model
payload = {
"inputs": prompt,
"parameters": {
"max_new_tokens": 1000,
"do_sample": False
}
}
print("Calling API...")
response = requests.post(API_URL, headers=headers, json=payload)
if response.status_code != 200:
print(f"API error: {response.status_code}, {response.text}")
return f"β API Error: {response.status_code}", response.text
# Process result
result = response.json()
# Handle different response formats
if isinstance(result, list) and len(result) > 0:
result_text = result[0].get("generated_text", "")
else:
result_text = str(result)
# Split at output marker if present
if "<|output|>" in result_text:
json_text = result_text.split("<|output|>")[1].strip()
else:
json_text = result_text
# Try to parse as JSON
print("Parsing JSON...")
try:
extracted = json.loads(json_text)
formatted = json.dumps(extracted, indent=2)
except json.JSONDecodeError:
print(f"JSON parsing failed. Raw output: {json_text[:100]}...")
return "β JSON parsing error", json_text
return "β
Success", formatted
except Exception as e:
print(f"Error in extraction: {str(e)}")
return f"β Error: {str(e)}", "{}"
# Create a simple interface
with gr.Blocks() as demo:
gr.Markdown("# NuExtract-1.5 Extraction Tool")
with gr.Row():
with gr.Column():
template = gr.Textbox(
label="JSON Template",
value='{"name": "", "email": ""}',
lines=5
)
text = gr.Textbox(
label="Text to Extract From",
value="Contact: John Smith ([email protected])",
lines=8
)
# Two buttons for testing
test_btn = gr.Button("Test Click")
extract_btn = gr.Button("Extract Information", variant="primary")
with gr.Column():
status = gr.Textbox(label="Status")
output = gr.Textbox(label="Output", lines=10)
# Connect both buttons
test_btn.click(
fn=test_function,
inputs=[template, text],
outputs=[status, output]
)
extract_btn.click(
fn=extract_info,
inputs=[template, text],
outputs=[status, output]
)
if __name__ == "__main__":
demo.launch() |