File size: 4,365 Bytes
1f52162
799534e
e96884e
 
1f52162
 
 
 
 
 
 
 
 
 
04c06dd
 
1f52162
 
 
 
 
 
 
04c06dd
 
e96884e
 
 
04c06dd
e96884e
 
 
 
 
04c06dd
1f52162
04c06dd
1f52162
e96884e
 
 
 
 
 
 
 
1f52162
e96884e
1f52162
e96884e
04c06dd
 
 
e96884e
1f52162
04c06dd
 
 
 
 
 
 
 
 
 
1f52162
e96884e
 
04c06dd
1f52162
e96884e
04c06dd
 
e96884e
04c06dd
e96884e
 
799534e
 
 
 
 
 
 
 
 
 
 
 
 
 
04c06dd
799534e
 
 
 
 
 
 
 
 
 
04c06dd
799534e
 
 
04c06dd
e96884e
04c06dd
e96884e
 
 
1f52162
 
 
 
 
 
04c06dd
 
1f52162
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import os
import time
import json
import httpx
import gradio as gr
from huggingface_hub import InferenceClient
from openai import OpenAI
from dotenv import load_dotenv

# Load API keys from .env file
load_dotenv()
HF_API_KEY = os.getenv("HF_API_KEY")
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")

# Initialize Hugging Face Clients
hf_client = InferenceClient(provider="hf-inference", api_key=HF_API_KEY)

# Initialize OpenRouter DeepSeek Client
openrouter_client = OpenAI(
    base_url="https://openrouter.ai/api/v1",
    api_key=OPENROUTER_API_KEY
)

# Query Hugging Face Models
def query_huggingface_model(user_input, model_name):
    try:
        messages = [{"role": "user", "content": user_input}]
        completion = hf_client.chat.completions.create(
            model=model_name, 
            messages=messages, 
            max_tokens=500
        )
        return completion.choices[0].message["content"]
    except Exception as e:
        return f"Error querying {model_name}: {str(e)}"

# Query DeepSeek-R1 (OpenRouter)
def query_deepseek(user_input):
    try:
        completion = openrouter_client.chat.completions.create(
            model="deepseek/deepseek-r1:free",
            messages=[{"role": "user", "content": user_input}]
        )
        return completion.choices[0].message.content
    except Exception as e:
        return f"Error querying DeepSeek: {str(e)}"

# Function to refine responses using DeepSeek
def refine_response(user_input):
    try:
        # Get responses from all three models
        gemma_response = query_huggingface_model(user_input, "google/gemma-2-27b-it")
        llama_response = query_huggingface_model(user_input, "meta-llama/Llama-3.3-70B-Instruct")
        deepseek_response = query_deepseek(user_input)

        # If any response is missing, return the available ones
        responses = {
            "Gemma": gemma_response.strip(),
            "Llama": llama_response.strip(),
            "DeepSeek": deepseek_response.strip()
        }
        valid_responses = {k: v for k, v in responses.items() if v}

        if len(valid_responses) < 2:
            return "\n\n".join(f"{k} Response: {v}" for k, v in valid_responses.items())

        # Prepare refinement prompt
        improvement_prompt = f"""
        Here are three AI-generated responses:

        Response 1 (Gemma): {gemma_response}
        Response 2 (Llama 3.3): {llama_response}
        Response 3 (DeepSeek): {deepseek_response}

        Please combine the best elements of all three, improve clarity, and provide a final refined answer.
        """

        # Retry loop for OpenRouter API
        max_retries = 3
        for attempt in range(max_retries):
            try:
                response = httpx.post(
                    "https://openrouter.ai/api/v1/chat/completions",
                    headers={
                        "Authorization": f"Bearer {OPENROUTER_API_KEY}",
                        "Content-Type": "application/json"
                    },
                    json={
                        "model": "deepseek/deepseek-r1:free",
                        "messages": [{"role": "user", "content": improvement_prompt}]
                    },
                    timeout=30
                )

                print(f"Attempt {attempt + 1}: OpenRouter Response:", response.text)
                response_json = response.json()
                refined_content = response_json["choices"][0]["message"]["content"]

                if refined_content.strip():
                    return refined_content
                else:
                    print("Received empty response from DeepSeek, retrying...")
                    time.sleep(2)

            except Exception as e:
                print(f"Error on attempt {attempt + 1}: {str(e)}")
                time.sleep(2)

        return f"Refinement failed. Here’s the best available response:\n\n{max(valid_responses.values(), key=len)}"

    except Exception as e:
        return f"Error refining response: {str(e)}"

# Create Gradio interface
iface = gr.Interface(
    fn=refine_response, 
    inputs=gr.Textbox(lines=2, placeholder="Ask me anything..."), 
    outputs="text",
    title="Multi-Model AI Enhancer",
    description="Get responses from Gemma, Llama 3.3, and DeepSeek. Then receive an improved final answer."
)

# Launch app
iface.launch()