Spaces:

Mina76
/

expert

Sleeping

File size: 5,055 Bytes

import gradio as gr
import requests
import asyncio
import aiohttp
import os

HF_API_TOKEN = os.getenv("HF_API_TOKEN")



# Models setup
models = {
    "Mistral-7B-Instruct": "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2",
    "DeepSeek-7B-Instruct": "https://api-inference.huggingface.co/models/deepseek-ai/deepseek-llm-7b-instruct",
    "Qwen-7B-Chat": "https://api-inference.huggingface.co/models/Qwen/Qwen-7B-Chat"
}

# Judge model (Mixtral-8x7B)
judge_model_url = "https://api-inference.huggingface.co/models/mistralai/Mixtral-8x7B-Instruct-v0.1"

# Your Hugging Face API Token
API_TOKEN = HF_API_TOKEN
HEADERS = {"Authorization": f"Bearer {API_TOKEN}"}

# Async function to call a model
async def query_model(session, model_name, question):
    payload = {"inputs": question, "parameters": {"max_new_tokens": 300}}
    try:
        async with session.post(models[model_name], headers=HEADERS, json=payload, timeout=60) as response:
            result = await response.json()
            if isinstance(result, list) and len(result) > 0:
                return model_name, result[0]["generated_text"]
            elif isinstance(result, dict) and "generated_text" in result:
                return model_name, result["generated_text"]
            else:
                return model_name, str(result)
    except Exception as e:
        return model_name, f"Error: {str(e)}"

# Async function to call all models
async def gather_model_answers(question):
    async with aiohttp.ClientSession() as session:
        tasks = [query_model(session, model_name, question) for model_name in models]
        results = await asyncio.gather(*tasks)
        return dict(results)

# Function to ask the judge
def judge_best_answer(question, answers):
    # Format the prompt for the Judge
    judge_prompt = f"""
You are a wise AI Judge. A user asked the following question:

Question:
{question}

Here are the answers provided by different models:

Answer 1 (Mistral-7B-Instruct):
{answers['Mistral-7B-Instruct']}

Answer 2 (DeepSeek-7B-Instruct):
{answers['DeepSeek-7B-Instruct']}

Answer 3 (Qwen-7B-Chat):
{answers['Qwen-7B-Chat']}

Please carefully read all three answers. Your job:
- Pick the best answer (Answer 1, Answer 2, or Answer 3).
- Explain briefly why you chose that answer.

Respond in this JSON format:
{{"best_answer": "Answer X", "reason": "Your reasoning here"}}
    """.strip()

    payload = {"inputs": judge_prompt, "parameters": {"max_new_tokens": 300}}
    response = requests.post(judge_model_url, headers=HEADERS, json=payload)

    if response.status_code == 200:
        result = response.json()
        # Try to extract JSON from response
        import json
        import re

        # Attempt to extract JSON block
        match = re.search(r"\{.*\}", str(result))
        if match:
            try:
                judge_decision = json.loads(match.group(0))
                return judge_decision
            except json.JSONDecodeError:
                return {"best_answer": "Unknown", "reason": "Failed to parse judge output."}
        else:
            return {"best_answer": "Unknown", "reason": "No JSON found in judge output."}
    else:
        return {"best_answer": "Unknown", "reason": f"Judge API error: {response.status_code}"}

# Final app logic
def multi_model_qa(question):
    answers = asyncio.run(gather_model_answers(question))
    judge_decision = judge_best_answer(question, answers)

    # Find the selected best answer
    best_answer_key = judge_decision.get("best_answer", "")
    best_answer_text = ""
    if "1" in best_answer_key:
        best_answer_text = answers["Mistral-7B-Instruct"]
    elif "2" in best_answer_key:
        best_answer_text = answers["DeepSeek-7B-Instruct"]
    elif "3" in best_answer_key:
        best_answer_text = answers["Qwen-7B-Chat"]
    else:
        best_answer_text = "Could not determine best answer."

    return (
        answers["Mistral-7B-Instruct"],
        answers["DeepSeek-7B-Instruct"],
        answers["Qwen-7B-Chat"],
        best_answer_text,
        judge_decision.get("reason", "No reasoning provided.")
    )

# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("# 🧠 Multi-Model Answer Aggregator")
    gr.Markdown("Ask any question. The system queries multiple models and the AI Judge selects the best answer.")

    question_input = gr.Textbox(label="Enter your question", placeholder="Ask me anything...", lines=2)
    submit_btn = gr.Button("Get Best Answer")

    mistral_output = gr.Textbox(label="Mistral-7B-Instruct Answer")
    deepseek_output = gr.Textbox(label="DeepSeek-7B-Instruct Answer")
    qwen_output = gr.Textbox(label="Qwen-7B-Chat Answer")
    best_answer_output = gr.Textbox(label="🏆 Best Answer Selected")
    judge_reasoning_output = gr.Textbox(label="⚖️ Judge's Reasoning")

    submit_btn.click(
        multi_model_qa,
        inputs=[question_input],
        outputs=[mistral_output, deepseek_output, qwen_output, best_answer_output, judge_reasoning_output]
    )

demo.launch()