Spaces:

oberbics
/

HistorySpace

Running on Zero

App Files Files Community

HistorySpace / app.py

oberbics

Update app.py

d5698f0 verified 4 months ago

raw

history blame

5.2 kB

	import json
	import gradio as gr
	import requests
	import os
	import re

	# Hugging Face API details
	API_URL = "https://api-inference.huggingface.co/models/numind/NuExtract-1.5"
	api_token = os.environ.get("HF_TOKEN", "") # Get token from environment variable
	headers = {"Authorization": f"Bearer {api_token}"}


	# Test API Connection
	def test_api_connection():
	try:
	# Test with a simple GET request
	response = requests.get("https://api-inference.huggingface.co/models/numind/NuExtract-1.5")

	# Check if the connection was successful
	if response.status_code == 200:
	print("✅ Connection to Hugging Face API successful!")
	else:
	print(f"⚠️ API returned status code {response.status_code}: {response.text}")
	except requests.exceptions.RequestException as e:
	print(f"❌ Connection failed: {str(e)}")


	# Make the API request
	def query_api(payload):
	try:
	response = requests.post(API_URL, headers=headers, json=payload)
	# Debug logs
	print("API STATUS CODE:", response.status_code)
	print("RAW RESPONSE:", response.text)

	return response.json()
	except Exception as e:
	print("Error during API call:", e)
	return {"error": f"Could not decode JSON: {str(e)}"}


	# Extract structure from the template and text
	def extract_structure(template, text):
	try:
	# Format the input for NuExtract
	prompt = f"<\|input\|>\n### Template:\n{template}\n### Text:\n{text}\n\n<\|output\|>"

	payload = {
	"inputs": prompt,
	"parameters": {
	"max_new_tokens": 2000,
	"temperature": 0.01,
	"return_full_text": True
	}
	}

	response = query_api(payload)

	# Check for API error
	if isinstance(response, dict) and "error" in response:
	return f"API Error: {response['error']}", "{}", "<p>Error occurred: {response['error']}</p>"

	# Get generated text
	if isinstance(response, list) and len(response) > 0:
	output = response[0].get("generated_text", "")
	print("Generated Text:", output) # Optional debugging

	# Try to extract after <\|output\|>
	if "<\|output\|>" in output:
	result = output.split("<\|output\|>")[-1].strip()
	else:
	# Try to extract JSON-like structure using regex
	json_match = re.search(r'({[\s\S]+})', output)
	result = json_match.group(1) if json_match else output.strip()

	# Attempt to format JSON nicely
	try:
	parsed = json.loads(result)
	result = json.dumps(parsed, indent=2)
	except Exception:
	pass

	highlighted = f"<p>✅ Successfully processed input of length {len(text)} characters.</p>"
	return "✅ Extraction Complete", result, highlighted
	else:
	return "⚠️ Unexpected API Response", json.dumps(response, indent=2), "<p>Please check the API response format.</p>"

	except Exception as e:
	return f"❌ Error: {str(e)}", "{}", f"<p>Processing failed: {str(e)}</p>"


	# Gradio Interface
	with gr.Blocks() as demo:
	gr.Markdown("# 🧠 NuExtract-1.5 Information Extractor")

	if not api_token:
	gr.Markdown("## ⚠️ No API token found. Set `HF_TOKEN` in the Space secrets.")

	# Call test connection before launching the Gradio interface
	test_api_connection()

	with gr.Row():
	with gr.Column():
	template_input = gr.Textbox(
	label="Template (JSON)",
	value='{"name": "", "email": ""}',
	lines=5
	)
	text_input = gr.Textbox(
	label="Input Text",
	value="Contact: John Smith ([email protected])",
	lines=10
	)
	submit_btn = gr.Button("Extract Information")

	with gr.Column():
	progress_output = gr.Textbox(label="Progress")
	result_output = gr.Textbox(label="Extracted Information")
	html_output = gr.HTML(label="Info")

	submit_btn.click(
	fn=extract_structure,
	inputs=[template_input, text_input],
	outputs=[progress_output, result_output, html_output]
	)

	gr.Examples(
	[
	[
	'{"name": "", "email": ""}',
	'Contact: John Smith ([email protected])'
	],
	[
	'''{
	"Model": {
	"Name": "",
	"Number of parameters": "",
	"Architecture": []
	},
	"Usage": {
	"Use case": [],
	"License": ""
	}
	}''',
	'''We introduce Mistral 7B, a 7-billion-parameter language model engineered for superior performance and efficiency. Mistral 7B outperforms the best open 13B model (Llama 2) across all evaluated benchmarks, and the best released 34B model (Llama 1) in reasoning, mathematics, and code generation. Our model is released under the Apache 2.0 license.'''
	]
	],
	[template_input, text_input]
	)

	if __name__ == "__main__":
	demo.launch()