File size: 5,055 Bytes
8c61a2e
38366a7
 
 
09f366d
 
 
 
 
38366a7
 
 
 
 
 
 
 
 
 
 
 
09f366d
38366a7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import gradio as gr
import requests
import asyncio
import aiohttp
import os

HF_API_TOKEN = os.getenv("HF_API_TOKEN")



# Models setup
models = {
    "Mistral-7B-Instruct": "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2",
    "DeepSeek-7B-Instruct": "https://api-inference.huggingface.co/models/deepseek-ai/deepseek-llm-7b-instruct",
    "Qwen-7B-Chat": "https://api-inference.huggingface.co/models/Qwen/Qwen-7B-Chat"
}

# Judge model (Mixtral-8x7B)
judge_model_url = "https://api-inference.huggingface.co/models/mistralai/Mixtral-8x7B-Instruct-v0.1"

# Your Hugging Face API Token
API_TOKEN = HF_API_TOKEN
HEADERS = {"Authorization": f"Bearer {API_TOKEN}"}

# Async function to call a model
async def query_model(session, model_name, question):
    payload = {"inputs": question, "parameters": {"max_new_tokens": 300}}
    try:
        async with session.post(models[model_name], headers=HEADERS, json=payload, timeout=60) as response:
            result = await response.json()
            if isinstance(result, list) and len(result) > 0:
                return model_name, result[0]["generated_text"]
            elif isinstance(result, dict) and "generated_text" in result:
                return model_name, result["generated_text"]
            else:
                return model_name, str(result)
    except Exception as e:
        return model_name, f"Error: {str(e)}"

# Async function to call all models
async def gather_model_answers(question):
    async with aiohttp.ClientSession() as session:
        tasks = [query_model(session, model_name, question) for model_name in models]
        results = await asyncio.gather(*tasks)
        return dict(results)

# Function to ask the judge
def judge_best_answer(question, answers):
    # Format the prompt for the Judge
    judge_prompt = f"""
You are a wise AI Judge. A user asked the following question:

Question:
{question}

Here are the answers provided by different models:

Answer 1 (Mistral-7B-Instruct):
{answers['Mistral-7B-Instruct']}

Answer 2 (DeepSeek-7B-Instruct):
{answers['DeepSeek-7B-Instruct']}

Answer 3 (Qwen-7B-Chat):
{answers['Qwen-7B-Chat']}

Please carefully read all three answers. Your job:
- Pick the best answer (Answer 1, Answer 2, or Answer 3).
- Explain briefly why you chose that answer.

Respond in this JSON format:
{{"best_answer": "Answer X", "reason": "Your reasoning here"}}
    """.strip()

    payload = {"inputs": judge_prompt, "parameters": {"max_new_tokens": 300}}
    response = requests.post(judge_model_url, headers=HEADERS, json=payload)

    if response.status_code == 200:
        result = response.json()
        # Try to extract JSON from response
        import json
        import re

        # Attempt to extract JSON block
        match = re.search(r"\{.*\}", str(result))
        if match:
            try:
                judge_decision = json.loads(match.group(0))
                return judge_decision
            except json.JSONDecodeError:
                return {"best_answer": "Unknown", "reason": "Failed to parse judge output."}
        else:
            return {"best_answer": "Unknown", "reason": "No JSON found in judge output."}
    else:
        return {"best_answer": "Unknown", "reason": f"Judge API error: {response.status_code}"}

# Final app logic
def multi_model_qa(question):
    answers = asyncio.run(gather_model_answers(question))
    judge_decision = judge_best_answer(question, answers)

    # Find the selected best answer
    best_answer_key = judge_decision.get("best_answer", "")
    best_answer_text = ""
    if "1" in best_answer_key:
        best_answer_text = answers["Mistral-7B-Instruct"]
    elif "2" in best_answer_key:
        best_answer_text = answers["DeepSeek-7B-Instruct"]
    elif "3" in best_answer_key:
        best_answer_text = answers["Qwen-7B-Chat"]
    else:
        best_answer_text = "Could not determine best answer."

    return (
        answers["Mistral-7B-Instruct"],
        answers["DeepSeek-7B-Instruct"],
        answers["Qwen-7B-Chat"],
        best_answer_text,
        judge_decision.get("reason", "No reasoning provided.")
    )

# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("# 🧠 Multi-Model Answer Aggregator")
    gr.Markdown("Ask any question. The system queries multiple models and the AI Judge selects the best answer.")

    question_input = gr.Textbox(label="Enter your question", placeholder="Ask me anything...", lines=2)
    submit_btn = gr.Button("Get Best Answer")

    mistral_output = gr.Textbox(label="Mistral-7B-Instruct Answer")
    deepseek_output = gr.Textbox(label="DeepSeek-7B-Instruct Answer")
    qwen_output = gr.Textbox(label="Qwen-7B-Chat Answer")
    best_answer_output = gr.Textbox(label="🏆 Best Answer Selected")
    judge_reasoning_output = gr.Textbox(label="⚖️ Judge's Reasoning")

    submit_btn.click(
        multi_model_qa,
        inputs=[question_input],
        outputs=[mistral_output, deepseek_output, qwen_output, best_answer_output, judge_reasoning_output]
    )

demo.launch()