Spaces:

oberbics
/

HistorySpace

Running on Zero

App Files Files Community

HistorySpace / app.py

oberbics

Update app.py

095dbb9 verified 4 months ago

raw

history blame

4.07 kB

	import json
	import gradio as gr
	import requests
	import os

	# Hugging Face API details
	API_URL = "https://api-inference.huggingface.co/models/numind/NuExtract-1.5"
	api_token = os.environ.get("HF_TOKEN", "") # Get token from environment variable

	headers = {"Authorization": f"Bearer {api_token}"}

	def query_api(payload):
	response = requests.post(API_URL, headers=headers, json=payload)
	return response.json()

	def extract_structure(template, text):
	try:
	# Format the input following NuExtract's format
	prompt = f"<\|input\|>\n### Template:\n{template}\n### Text:\n{text}\n\n<\|output\|>"

	# Call the API
	payload = {
	"inputs": prompt,
	"parameters": {
	"max_new_tokens": 2000,
	"temperature": 0.01, # Nearly deterministic as recommended
	"return_full_text": True
	}
	}

	response = query_api(payload)

	# Check for errors
	if isinstance(response, dict) and "error" in response:
	return f"API Error: {response['error']}", "{}", f"<p>Error occurred: {response['error']}</p>"

	# Extract result - the API returns the full text so we need to split it
	if isinstance(response, list) and len(response) > 0:
	output = response[0].get("generated_text", "")
	result = output.split("<\|output\|>")[1] if "<\|output\|>" in output else output

	# Try to parse as JSON to format it nicely
	try:
	parsed = json.loads(result)
	result = json.dumps(parsed, indent=2)
	except:
	pass

	# Create a simple highlight
	highlighted = f"<p>Successfully processed text of length {len(text)} characters</p>"

	return "Processing complete", result, highlighted
	else:
	return "Unexpected API response", str(response), "<p>Please check API token and try again</p>"

	except Exception as e:
	return f"Error: {str(e)}", "{}", f"<p>Processing failed: {str(e)}</p>"

	# Create interface
	with gr.Blocks() as demo:
	gr.Markdown("# NuExtract-1.5 Demo")

	if not api_token:
	gr.Markdown("## ⚠️ No API token found. Set HF_TOKEN in Space secrets.")

	with gr.Row():
	with gr.Column():
	template_input = gr.Textbox(
	label="Template (JSON)",
	value='{"name": "", "email": ""}',
	lines=5
	)
	text_input = gr.Textbox(
	label="Input Text",
	value="Contact: John Smith ([email protected])",
	lines=10
	)
	submit_btn = gr.Button("Extract Information")

	with gr.Column():
	progress_output = gr.Textbox(label="Progress")
	result_output = gr.Textbox(label="Extracted Information")
	html_output = gr.HTML(label="Highlighted Text")

	submit_btn.click(
	fn=extract_structure,
	inputs=[template_input, text_input],
	outputs=[progress_output, result_output, html_output]
	)

	# Examples
	gr.Examples(
	[
	[
	'{"name": "", "email": ""}',
	'Contact: John Smith ([email protected])'
	],
	[
	'''{
	"Model": {
	"Name": "",
	"Number of parameters": "",
	"Architecture": []
	},
	"Usage": {
	"Use case": [],
	"License": ""
	}
	}''',
	'''We introduce Mistral 7B, a 7-billion-parameter language model engineered for superior performance and efficiency. Mistral 7B outperforms the best open 13B model (Llama 2) across all evaluated benchmarks, and the best released 34B model (Llama 1) in reasoning, mathematics, and code generation. Our model is released under the Apache 2.0 license.'''
	]
	],
	[template_input, text_input]
	)

	if __name__ == "__main__":
	demo.launch()