File size: 4,101 Bytes
1f52162
799534e
e96884e
 
1f52162
 
 
 
 
 
08f78da
 
1f52162
04c06dd
 
08f78da
1f52162
08f78da
04c06dd
e96884e
 
 
04c06dd
e96884e
 
 
 
 
04c06dd
1f52162
08f78da
 
e96884e
08f78da
 
 
 
 
e96884e
08f78da
e96884e
08f78da
1f52162
08f78da
1f52162
e96884e
04c06dd
 
 
08f78da
1f52162
04c06dd
 
 
 
08f78da
04c06dd
 
 
 
 
1f52162
e96884e
 
04c06dd
1f52162
e96884e
04c06dd
08f78da
e96884e
04c06dd
e96884e
 
08f78da
799534e
 
 
08f78da
 
 
 
 
799534e
 
08f78da
799534e
 
 
 
08f78da
04c06dd
799534e
 
 
04c06dd
e96884e
04c06dd
e96884e
 
 
1f52162
 
 
 
 
 
04c06dd
08f78da
1f52162
 
 
e82d1de
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import os
import time
import json
import httpx
import gradio as gr
from huggingface_hub import InferenceClient
from dotenv import load_dotenv

# Load API keys from .env file
load_dotenv()
HF_API_KEY = os.getenv("HF_API_KEY")  # Hugging Face API for Gemma & Llama
TOGETHER_API_KEY = os.getenv("TOGETHER_API_KEY")  # Together AI API for DeepSeek-V3

# Initialize Hugging Face Clients
hf_client = InferenceClient(provider="hf-inference", api_key=HF_API_KEY)
together_client = InferenceClient(provider="together", api_key=TOGETHER_API_KEY)

# Query Hugging Face Models (Gemma & Llama)
def query_huggingface_model(user_input, model_name):
    try:
        messages = [{"role": "user", "content": user_input}]
        completion = hf_client.chat.completions.create(
            model=model_name, 
            messages=messages, 
            max_tokens=500
        )
        return completion.choices[0].message["content"]
    except Exception as e:
        return f"Error querying {model_name}: {str(e)}"

# Query DeepSeek-V3 (Together AI via Hugging Face)
def query_deepseek_v3(user_input):
    try:
        messages = [{"role": "user", "content": user_input}]
        completion = together_client.chat.completions.create(
            model="deepseek-ai/DeepSeek-V3", 
            messages=messages, 
            max_tokens=500
        )
        return completion.choices[0].message["content"]
    except Exception as e:
        return f"Error querying DeepSeek-V3: {str(e)}"

# Function to refine responses using DeepSeek-V3
def refine_response(user_input):
    try:
        # Get responses from all three models
        gemma_response = query_huggingface_model(user_input, "google/gemma-2-27b-it")
        llama_response = query_huggingface_model(user_input, "meta-llama/Llama-3.3-70B-Instruct")
        deepseek_response = query_deepseek_v3(user_input)

        # If any response is missing, return the available ones
        responses = {
            "Gemma": gemma_response.strip(),
            "Llama": llama_response.strip(),
            "DeepSeek-V3": deepseek_response.strip()
        }
        valid_responses = {k: v for k, v in responses.items() if v}

        if len(valid_responses) < 2:
            return "\n\n".join(f"{k} Response: {v}" for k, v in valid_responses.items())

        # Prepare refinement prompt
        improvement_prompt = f"""
        Here are three AI-generated responses:

        Response 1 (Gemma): {gemma_response}
        Response 2 (Llama 3.3): {llama_response}
        Response 3 (DeepSeek-V3): {deepseek_response}

        Please combine the best elements of all three, improve clarity, and provide a final refined answer.
        """

        # Retry loop for DeepSeek-V3 refinement
        max_retries = 3
        for attempt in range(max_retries):
            try:
                messages = [{"role": "user", "content": improvement_prompt}]
                refined_completion = together_client.chat.completions.create(
                    model="deepseek-ai/DeepSeek-V3",
                    messages=messages,
                    max_tokens=500
                )

                refined_content = refined_completion.choices[0].message["content"]

                if refined_content.strip():
                    return refined_content
                else:
                    print("Received empty response from DeepSeek-V3, retrying...")
                    time.sleep(2)

            except Exception as e:
                print(f"Error on attempt {attempt + 1}: {str(e)}")
                time.sleep(2)

        return f"Refinement failed. Here’s the best available response:\n\n{max(valid_responses.values(), key=len)}"

    except Exception as e:
        return f"Error refining response: {str(e)}"

# Create Gradio interface
iface = gr.Interface(
    fn=refine_response, 
    inputs=gr.Textbox(lines=2, placeholder="Ask me anything..."), 
    outputs="text",
    title="Multi-Model AI Enhancer",
    description="Get responses from Gemma, Llama 3.3, and DeepSeek-V3. Then receive an improved final answer."
)

# Launch app
iface.launch(debug=True)