Spaces:

Mina76
/

expert

Sleeping

App Files Files Community

Mina Parham commited on Apr 29

Commit

38366a7

1 Parent(s): 55aae7a

Initial commit 🚀

Browse files

Files changed (2) hide show

app.py +136 -63
requirements.txt +54 -1

app.py CHANGED Viewed

@@ -1,64 +1,137 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
-"""
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-"""
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
-def respond(
-    message,
-    history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-):
-    messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-demo = gr.ChatInterface(
-    respond,
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
-    ],
-)
-if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
+import requests
+import asyncio
+import aiohttp
+# Models setup
+models = {
+    "Mistral-7B-Instruct": "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2",
+    "DeepSeek-7B-Instruct": "https://api-inference.huggingface.co/models/deepseek-ai/deepseek-llm-7b-instruct",
+    "Qwen-7B-Chat": "https://api-inference.huggingface.co/models/Qwen/Qwen-7B-Chat"
+}
+# Judge model (Mixtral-8x7B)
+judge_model_url = "https://api-inference.huggingface.co/models/mistralai/Mixtral-8x7B-Instruct-v0.1"
+# Your Hugging Face API Token
+API_TOKEN = "YOUR_HUGGINGFACE_API_TOKEN"
+HEADERS = {"Authorization": f"Bearer {API_TOKEN}"}
+# Async function to call a model
+async def query_model(session, model_name, question):
+    payload = {"inputs": question, "parameters": {"max_new_tokens": 300}}
+    try:
+        async with session.post(models[model_name], headers=HEADERS, json=payload, timeout=60) as response:
+            result = await response.json()
+            if isinstance(result, list) and len(result) > 0:
+                return model_name, result[0]["generated_text"]
+            elif isinstance(result, dict) and "generated_text" in result:
+                return model_name, result["generated_text"]
+            else:
+                return model_name, str(result)
+    except Exception as e:
+        return model_name, f"Error: {str(e)}"
+# Async function to call all models
+async def gather_model_answers(question):
+    async with aiohttp.ClientSession() as session:
+        tasks = [query_model(session, model_name, question) for model_name in models]
+        results = await asyncio.gather(*tasks)
+        return dict(results)
+# Function to ask the judge
+def judge_best_answer(question, answers):
+    # Format the prompt for the Judge
+    judge_prompt = f"""
+You are a wise AI Judge. A user asked the following question:
+Question:
+{question}
+Here are the answers provided by different models:
+Answer 1 (Mistral-7B-Instruct):
+{answers['Mistral-7B-Instruct']}
+Answer 2 (DeepSeek-7B-Instruct):
+{answers['DeepSeek-7B-Instruct']}
+Answer 3 (Qwen-7B-Chat):
+{answers['Qwen-7B-Chat']}
+Please carefully read all three answers. Your job:
+- Pick the best answer (Answer 1, Answer 2, or Answer 3).
+- Explain briefly why you chose that answer.
+Respond in this JSON format:
+{{"best_answer": "Answer X", "reason": "Your reasoning here"}}
+    """.strip()
+    payload = {"inputs": judge_prompt, "parameters": {"max_new_tokens": 300}}
+    response = requests.post(judge_model_url, headers=HEADERS, json=payload)
+    if response.status_code == 200:
+        result = response.json()
+        # Try to extract JSON from response
+        import json
+        import re
+        # Attempt to extract JSON block
+        match = re.search(r"\{.*\}", str(result))
+        if match:
+            try:
+                judge_decision = json.loads(match.group(0))
+                return judge_decision
+            except json.JSONDecodeError:
+                return {"best_answer": "Unknown", "reason": "Failed to parse judge output."}
+        else:
+            return {"best_answer": "Unknown", "reason": "No JSON found in judge output."}
+    else:
+        return {"best_answer": "Unknown", "reason": f"Judge API error: {response.status_code}"}
+# Final app logic
+def multi_model_qa(question):
+    answers = asyncio.run(gather_model_answers(question))
+    judge_decision = judge_best_answer(question, answers)
+    # Find the selected best answer
+    best_answer_key = judge_decision.get("best_answer", "")
+    best_answer_text = ""
+    if "1" in best_answer_key:
+        best_answer_text = answers["Mistral-7B-Instruct"]
+    elif "2" in best_answer_key:
+        best_answer_text = answers["DeepSeek-7B-Instruct"]
+    elif "3" in best_answer_key:
+        best_answer_text = answers["Qwen-7B-Chat"]
+    else:
+        best_answer_text = "Could not determine best answer."
+    return (
+        answers["Mistral-7B-Instruct"],
+        answers["DeepSeek-7B-Instruct"],
+        answers["Qwen-7B-Chat"],
+        best_answer_text,
+        judge_decision.get("reason", "No reasoning provided.")
+    )
+# Gradio UI
+with gr.Blocks() as demo:
+    gr.Markdown("# 🧠 Multi-Model Answer Aggregator")
+    gr.Markdown("Ask any question. The system queries multiple models and the AI Judge selects the best answer.")
+    question_input = gr.Textbox(label="Enter your question", placeholder="Ask me anything...", lines=2)
+    submit_btn = gr.Button("Get Best Answer")
+    mistral_output = gr.Textbox(label="Mistral-7B-Instruct Answer")
+    deepseek_output = gr.Textbox(label="DeepSeek-7B-Instruct Answer")
+    qwen_output = gr.Textbox(label="Qwen-7B-Chat Answer")
+    best_answer_output = gr.Textbox(label="🏆 Best Answer Selected")
+    judge_reasoning_output = gr.Textbox(label="⚖️ Judge's Reasoning")
+    submit_btn.click(
+        multi_model_qa,
+        inputs=[question_input],
+        outputs=[mistral_output, deepseek_output, qwen_output, best_answer_output, judge_reasoning_output]
+    )
+demo.launch()

requirements.txt CHANGED Viewed

	@@ -1 +1,54 @@
1	- ~~huggingface_hub~~==0.25.2

+aiofiles==24.1.0
+annotated-types==0.7.0
+anyio==4.9.0
+audioop-lts==0.2.1
+certifi==2025.4.26
+charset-normalizer==3.4.1
+click==8.1.8
+fastapi==0.115.12
+ffmpy==0.5.0
+filelock==3.18.0
+fsspec==2025.3.2
+gradio==5.27.0
+gradio_client==1.9.0
+groovy==0.1.2
+h11==0.16.0
+httpcore==1.0.9
+httpx==0.28.1
+huggingface-hub==0.30.2
+idna==3.10
+Jinja2==3.1.6
+markdown-it-py==3.0.0
+MarkupSafe==3.0.2
+mdurl==0.1.2
+numpy==2.2.5
+orjson==3.10.16
+packaging==25.0
+pandas==2.2.3
+pillow==11.2.1
+pydantic==2.11.3
+pydantic_core==2.33.1
+pydub==0.25.1
+Pygments==2.19.1
+python-dateutil==2.9.0.post0
+python-multipart==0.0.20
+pytz==2025.2
+PyYAML==6.0.2
+requests==2.32.3
+rich==14.0.0
+ruff==0.11.7
+safehttpx==0.1.6
+semantic-version==2.10.0
+shellingham==1.5.4
+six==1.17.0
+sniffio==1.3.1
+starlette==0.46.2
+tomlkit==0.13.2
+tqdm==4.67.1
+typer==0.15.2
+typing-inspection==0.4.0
+typing_extensions==4.13.2
+tzdata==2025.2
+urllib3==2.4.0
+uvicorn==0.34.2
+websockets==15.0.1