Spaces:

oberbics
/

HistorySpace

Running on Zero

File size: 4,410 Bytes

import json
import gradio as gr
import requests
import os
import re

# Hugging Face API details
API_URL = "https://api-inference.huggingface.co/models/numind/NuExtract-1.5"
api_token = os.environ.get("HF_TOKEN", "")  # Get token from environment variable
headers = {"Authorization": f"Bearer {api_token}"}


def query_api(payload):
    response = requests.post(API_URL, headers=headers, json=payload)

    # Debug logs
    print("API STATUS CODE:", response.status_code)
    print("RAW RESPONSE:", response.text)

    try:
        return response.json()
    except Exception as e:
        return {"error": f"Could not decode JSON: {str(e)}"}


def extract_structure(template, text):
    try:
        # Format the input for NuExtract
        prompt = f"<|input|>\n### Template:\n{template}\n### Text:\n{text}\n\n<|output|>"

        payload = {
            "inputs": prompt,
            "parameters": {
                "max_new_tokens": 2000,
                "temperature": 0.01,
                "return_full_text": True
            }
        }

        response = query_api(payload)

        # Check for API error
        if isinstance(response, dict) and "error" in response:
            return f"API Error: {response['error']}", "{}", f"<p>Error occurred: {response['error']}</p>"

        # Get generated text
        if isinstance(response, list) and len(response) > 0:
            output = response[0].get("generated_text", "")
            print("Generated Text:", output)  # Optional debugging

            # Try to extract after <|output|>
            if "<|output|>" in output:
                result = output.split("<|output|>")[-1].strip()
            else:
                # Try to extract JSON-like structure using regex
                json_match = re.search(r'({[\s\S]+})', output)
                result = json_match.group(1) if json_match else output.strip()

            # Attempt to format JSON nicely
            try:
                parsed = json.loads(result)
                result = json.dumps(parsed, indent=2)
            except Exception:
                pass

            highlighted = f"<p>✅ Successfully processed input of length {len(text)} characters.</p>"
            return "✅ Extraction Complete", result, highlighted
        else:
            return "⚠️ Unexpected API Response", json.dumps(response, indent=2), "<p>Please check the API response format.</p>"

    except Exception as e:
        return f"❌ Error: {str(e)}", "{}", f"<p>Processing failed: {str(e)}</p>"


# Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("# 🧠 NuExtract-1.5 Information Extractor")

    if not api_token:
        gr.Markdown("## ⚠️ No API token found. Set `HF_TOKEN` in the Space secrets.")

    with gr.Row():
        with gr.Column():
            template_input = gr.Textbox(
                label="Template (JSON)",
                value='{"name": "", "email": ""}',
                lines=5
            )
            text_input = gr.Textbox(
                label="Input Text",
                value="Contact: John Smith ([email protected])",
                lines=10
            )
            submit_btn = gr.Button("Extract Information")

        with gr.Column():
            progress_output = gr.Textbox(label="Progress")
            result_output = gr.Textbox(label="Extracted Information")
            html_output = gr.HTML(label="Info")

    submit_btn.click(
        fn=extract_structure,
        inputs=[template_input, text_input],
        outputs=[progress_output, result_output, html_output]
    )

    gr.Examples(
        [
            [
                '{"name": "", "email": ""}',
                'Contact: John Smith ([email protected])'
            ],
            [
                '''{
    "Model": {
        "Name": "",
        "Number of parameters": "",
        "Architecture": []
    },
    "Usage": {
        "Use case": [],
        "License": ""
    }
}''',
                '''We introduce Mistral 7B, a 7-billion-parameter language model engineered for superior performance and efficiency. Mistral 7B outperforms the best open 13B model (Llama 2) across all evaluated benchmarks, and the best released 34B model (Llama 1) in reasoning, mathematics, and code generation. Our model is released under the Apache 2.0 license.'''
            ]
        ],
        [template_input, text_input]
    )

if __name__ == "__main__":
    demo.launch()