Spaces:
Sleeping
Sleeping
import os | |
import time | |
import json | |
import httpx | |
import gradio as gr | |
from huggingface_hub import InferenceClient | |
from dotenv import load_dotenv | |
# Load API keys from .env file | |
load_dotenv() | |
HF_API_KEY = os.getenv("HF_API_KEY") # Hugging Face API for Gemma & Llama | |
TOGETHER_API_KEY = os.getenv("TOGETHER_API_KEY") # Together AI API for DeepSeek-V3 | |
# Initialize Hugging Face Clients | |
hf_client = InferenceClient(provider="hf-inference", api_key=HF_API_KEY) | |
together_client = InferenceClient(provider="together", api_key=TOGETHER_API_KEY) | |
# Query Hugging Face Models (Gemma & Llama) | |
def query_huggingface_model(user_input, model_name): | |
try: | |
messages = [{"role": "user", "content": user_input}] | |
completion = hf_client.chat.completions.create( | |
model=model_name, | |
messages=messages, | |
max_tokens=500 | |
) | |
return completion.choices[0].message["content"] | |
except Exception as e: | |
return f"Error querying {model_name}: {str(e)}" | |
# Query DeepSeek-V3 (Together AI via Hugging Face) | |
def query_deepseek_v3(user_input): | |
try: | |
messages = [{"role": "user", "content": user_input}] | |
completion = together_client.chat.completions.create( | |
model="deepseek-ai/DeepSeek-V3", | |
messages=messages, | |
max_tokens=500 | |
) | |
return completion.choices[0].message["content"] | |
except Exception as e: | |
return f"Error querying DeepSeek-V3: {str(e)}" | |
# Function to refine responses using DeepSeek-V3 | |
def refine_response(user_input): | |
try: | |
# Get responses from all three models | |
gemma_response = query_huggingface_model(user_input, "google/gemma-2-27b-it") | |
llama_response = query_huggingface_model(user_input, "meta-llama/Llama-3.3-70B-Instruct") | |
deepseek_response = query_deepseek_v3(user_input) | |
# If any response is missing, return the available ones | |
responses = { | |
"Gemma": gemma_response.strip(), | |
"Llama": llama_response.strip(), | |
"DeepSeek-V3": deepseek_response.strip() | |
} | |
valid_responses = {k: v for k, v in responses.items() if v} | |
if len(valid_responses) < 2: | |
return "\n\n".join(f"{k} Response: {v}" for k, v in valid_responses.items()) | |
# Prepare refinement prompt | |
improvement_prompt = f""" | |
Here are three AI-generated responses: | |
Response 1 (Gemma): {gemma_response} | |
Response 2 (Llama 3.3): {llama_response} | |
Response 3 (DeepSeek-V3): {deepseek_response} | |
Please combine the best elements of all three, improve clarity, and provide a final refined answer. | |
""" | |
# Retry loop for DeepSeek-V3 refinement | |
max_retries = 3 | |
for attempt in range(max_retries): | |
try: | |
messages = [{"role": "user", "content": improvement_prompt}] | |
refined_completion = together_client.chat.completions.create( | |
model="deepseek-ai/DeepSeek-V3", | |
messages=messages, | |
max_tokens=500 | |
) | |
refined_content = refined_completion.choices[0].message["content"] | |
if refined_content.strip(): | |
return refined_content | |
else: | |
print("Received empty response from DeepSeek-V3, retrying...") | |
time.sleep(2) | |
except Exception as e: | |
print(f"Error on attempt {attempt + 1}: {str(e)}") | |
time.sleep(2) | |
return f"Refinement failed. Here’s the best available response:\n\n{max(valid_responses.values(), key=len)}" | |
except Exception as e: | |
return f"Error refining response: {str(e)}" | |
# Create Gradio interface | |
iface = gr.Interface( | |
fn=refine_response, | |
inputs=gr.Textbox(lines=2, placeholder="Ask me anything..."), | |
outputs="text", | |
title="Multi-Model AI Enhancer", | |
description="Get responses from Gemma, Llama 3.3, and DeepSeek-V3. Then receive an improved final answer." | |
) | |
# Launch app | |
iface.launch() | |