import os import threading import gradio as gr from openai import OpenAI from dotenv import load_dotenv # Load API keys from .env file load_dotenv() API_KEY_LLAMA = os.getenv("OPENROUTER_API_KEY1") # Llama API Key API_KEY_GEMMA = os.getenv("OPENROUTER_API_KEY2") # Gemma API Key API_KEY_DEEPSEEK1 = os.getenv("OPENROUTER_API_KEY3") # DeepSeek First Query API_KEY_DEEPSEEK2 = os.getenv("OPENROUTER_API_KEY4") # DeepSeek Final Refinement # Create OpenAI Clients for each model llama_client = OpenAI(base_url="https://openrouter.ai/api/v1", api_key=API_KEY_LLAMA) gemma_client = OpenAI(base_url="https://openrouter.ai/api/v1", api_key=API_KEY_GEMMA) deepseek_client1 = OpenAI(base_url="https://openrouter.ai/api/v1", api_key=API_KEY_DEEPSEEK1) deepseek_client2 = OpenAI(base_url="https://openrouter.ai/api/v1", api_key=API_KEY_DEEPSEEK2) # Function to query Llama model def query_llama(user_input, results): try: completion = llama_client.chat.completions.create( model="meta-llama/llama-3.2-3b-instruct:free", messages=[{"role": "user", "content": user_input}] ) results["Llama"] = completion.choices[0].message.content except Exception as e: results["Llama"] = f"Error: {str(e)}" # Function to query Gemma model def query_gemma(user_input, results): try: completion = gemma_client.chat.completions.create( model="google/gemma-2-9b-it:free", messages=[{"role": "user", "content": user_input}] ) results["Gemma"] = completion.choices[0].message.content except Exception as e: results["Gemma"] = f"Error: {str(e)}" # Function to query DeepSeek for additional context def query_deepseek_1(user_input, results): try: completion = deepseek_client1.chat.completions.create( model="deepseek/deepseek-r1:free", messages=[{"role": "user", "content": user_input}] ) results["DeepSeek1"] = completion.choices[0].message.content except Exception as e: results["DeepSeek1"] = f"Error: {str(e)}" # Function to refine responses using DeepSeek-R1 (Final API) def refine_response(user_input): try: results = {} # Create threads for parallel API calls threads = [ threading.Thread(target=query_llama, args=(user_input, results)), threading.Thread(target=query_gemma, args=(user_input, results)), threading.Thread(target=query_deepseek_1, args=(user_input, results)) ] # Start all threads for thread in threads: thread.start() # Wait for all threads to complete for thread in threads: thread.join() # Ensure all responses are received valid_responses = {k: v.strip() for k, v in results.items() if v and "Error" not in v} if len(valid_responses) < 2: return "\n\n".join(f"{k} Response: {v}" for k, v in valid_responses.items()) # Prepare refinement prompt improvement_prompt = f""" Here are three AI-generated responses: Response 1 (Llama): {results.get("Llama", "N/A")} Response 2 (Gemma): {results.get("Gemma", "N/A")} Response 3 (DeepSeek1): {results.get("DeepSeek1", "N/A")} Please combine the best elements of all three, improve clarity, and provide a final refined answer. """ # Query DeepSeek-R1 for refinement using API key 4 try: refined_completion = deepseek_client2.chat.completions.create( model="deepseek/deepseek-r1:free", messages=[{"role": "user", "content": improvement_prompt}] ) refined_content = refined_completion.choices[0].message.content return refined_content if refined_content.strip() else "Refinement failed, returning best response." except Exception as e: return f"Error refining response: {str(e)}" except Exception as e: return f"Unexpected error: {str(e)}" # Create Gradio interface iface = gr.Interface( fn=refine_response, inputs=gr.Textbox(lines=2, placeholder="Ask me anything..."), outputs="text", title="Multi-Model AI Enhancer (4 API Keys)", description="Llama (API 1) + Gemma (API 2) + DeepSeek (API 3) → Final Refinement with DeepSeek (API 4)" ) # Launch app iface.launch()