Spaces:

EduuGomes
/

CachoeiraBot

Running

App Files Files Community

EduuGomes commited on May 31

Commit

48a4d7a

verified ·

1 Parent(s): 6ccd719

Update app.py

Browse files

Files changed (1) hide show

app.py +62 -151

app.py CHANGED Viewed

@@ -1,168 +1,79 @@
 import os
 import gradio as gr
 from openai import OpenAI
-from optillm.cot_reflection import cot_reflection
-from optillm.rto import round_trip_optimization
-from optillm.z3_solver import Z3SymPySolverSystem
-from optillm.self_consistency import advanced_self_consistency_approach
-from optillm.plansearch import plansearch
-from optillm.leap import leap
-from optillm.reread import re2_approach
 API_KEY = os.environ.get("OPENROUTER_API_KEY")
-def compare_responses(message, model1, approach1, model2, approach2, system_message, max_tokens, temperature, top_p):
-    response1 = respond(message, [], model1, approach1, system_message, max_tokens, temperature, top_p)
-    response2 = respond(message, [], model2, approach2, system_message, max_tokens, temperature, top_p)
-    return response1, response2
-def parse_conversation(messages):
-    system_prompt = ""
-    conversation = []
-    for message in messages:
-        role = message['role']
-        content = message['content']
-        if role == 'system':
-            system_prompt = content
-        elif role in ['user', 'assistant']:
-            conversation.append(f"{role.capitalize()}: {content}")
-    initial_query = "\n".join(conversation)
-    return system_prompt, initial_query
-def respond(message, history, model, approach, system_message, max_tokens, temperature, top_p):
     try:
-        client = OpenAI(api_key=API_KEY, base_url="https://openrouter.ai/v1")
-        messages = [{"role": "system", "content": system_message}]
-        for val in history:
-            if val[0]:
-                messages.append({"role": "user", "content": val[0]})
-            if val[1]:
-                messages.append({"role": "assistant", "content": val[1]})
-        messages.append({"role": "user", "content": message})
-        if approach == "none":
-            response = client.chat.completions.create(
-                extra_headers={
-                    "HTTP-Referer": "https://github.com/codelion/optillm",
-                    "X-Title": "optillm"
-                },
-                model=model,
-                messages=messages,
-                max_tokens=max_tokens,
-                temperature=temperature,
-                top_p=top_p,
-            )
-            return response.choices[0].message.content
-        else:
-            system_prompt, initial_query = parse_conversation(messages)
-            if approach == 'rto':
-                final_response, _ = round_trip_optimization(system_prompt, initial_query, client, model)
-            elif approach == 'z3':
-                z3_solver = Z3SymPySolverSystem(system_prompt, client, model)
-                final_response, _ = z3_solver.process_query(initial_query)
-            elif approach == "self_consistency":
-                final_response, _ = advanced_self_consistency_approach(system_prompt, initial_query, client, model)
-            elif approach == "cot_reflection":
-                final_response, _ = cot_reflection(system_prompt, initial_query, client, model)
-            elif approach == 'plansearch':
-                response, _ = plansearch(system_prompt, initial_query, client, model)
-                final_response = response[0]
-            elif approach == 'leap':
-                final_response, _ = leap(system_prompt, initial_query, client, model)
-            elif approach == 're2':
-                final_response, _ = re2_approach(system_prompt, initial_query, client, model)
-            return final_response
     except Exception as e:
-        error_message = f"Error in respond function: {str(e)}\nType: {type(e).__name__}"
-        print(error_message)
-def create_model_dropdown():
-    return gr.Dropdown(
-        [ "meta-llama/llama-3.1-8b-instruct:free", "nousresearch/hermes-3-llama-3.1-405b:free","meta-llama/llama-3.2-1b-instruct:free",
-         "mistralai/mistral-7b-instruct:free","mistralai/pixtral-12b:free","meta-llama/llama-3.1-70b-instruct:free",
-         "qwen/qwen-2-7b-instruct:free", "qwen/qwen-2-vl-7b-instruct:free", "google/gemma-2-9b-it:free", "liquid/lfm-40b:free", "meta-llama/llama-3.1-405b-instruct:free",
-         "openchat/openchat-7b:free", "meta-llama/llama-3.2-90b-vision-instruct:free", "meta-llama/llama-3.2-11b-vision-instruct:free",
-         "meta-llama/llama-3-8b-instruct:free", "meta-llama/llama-3.2-3b-instruct:free", "microsoft/phi-3-medium-128k-instruct:free",
-         "microsoft/phi-3-mini-128k-instruct:free", "huggingfaceh4/zephyr-7b-beta:free"],
-        value="meta-llama/llama-3.2-1b-instruct:free", label="Model"
-    )
-def create_approach_dropdown():
-    return gr.Dropdown(
-        ["none", "leap", "plansearch", "cot_reflection", "rto", "self_consistency", "z3", "re2"],
-        value="none", label="Approach"
-    )
-html = """<iframe src="https://ghbtns.com/github-btn.html?user=codelion&repo=optillm&type=star&count=true&size=large" frameborder="0" scrolling="0" width="170" height="30" title="GitHub"></iframe>
 """
 with gr.Blocks() as demo:
-    gr.Markdown("# optillm - Optimizing LLM Inference")
-    gr.HTML(html)
-    with gr.Row():
-        system_message = gr.Textbox(value="", label="System message")
-        max_tokens = gr.Slider(minimum=1, maximum=4096, value=1024, step=1, label="Max new tokens")
-        temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
-        top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
-    with gr.Tabs():
-        with gr.TabItem("Chat"):
-            model = create_model_dropdown()
-            approach = create_approach_dropdown()
-            chatbot = gr.Chatbot()
-            msg = gr.Textbox()
-            with gr.Row():
-                submit = gr.Button("Submit")
-                clear = gr.Button("Clear")
-            def user(user_message, history):
-                return "", history + [[user_message, None]]
-            def bot(history, model, approach, system_message, max_tokens, temperature, top_p):
-                user_message = history[-1][0]
-                bot_message = respond(user_message, history[:-1], model, approach, system_message, max_tokens, temperature, top_p)
-                history[-1][1] = bot_message
-                return history
-            msg.submit(user, [msg, chatbot], [msg, chatbot]).then(
-                bot, [chatbot, model, approach, system_message, max_tokens, temperature, top_p], chatbot
-            )
-            submit.click(user, [msg, chatbot], [msg, chatbot]).then(
-                bot, [chatbot, model, approach, system_message, max_tokens, temperature, top_p], chatbot
-            )
-            clear.click(lambda: None, None, chatbot, queue=False)
-        with gr.TabItem("Compare"):
-            with gr.Row():
-                model1 = create_model_dropdown()
-                approach1 = create_approach_dropdown()
-                model2 = create_model_dropdown()
-                approach2 = create_approach_dropdown()
-            compare_input = gr.Textbox(label="Enter your message for comparison")
-            compare_button = gr.Button("Compare")
-            with gr.Row():
-                output1 = gr.Textbox(label="Response 1")
-                output2 = gr.Textbox(label="Response 2")
-            compare_button.click(
-                compare_responses,
-                inputs=[compare_input, model1, approach1, model2, approach2, system_message, max_tokens, temperature, top_p],
-                outputs=[output1, output2]
-            )
 if __name__ == "__main__":
     demo.launch()

 import os
 import gradio as gr
 from openai import OpenAI
+# Configuração da API e cliente OpenRouter correta
 API_KEY = os.environ.get("OPENROUTER_API_KEY")
+if not API_KEY:
+    raise ValueError("Defina a variável de ambiente OPENROUTER_API_KEY com sua chave válida.")
+client = OpenAI(api_key=API_KEY, base_url="https://openrouter.ai/api/v1")
+# Modelos selecionados (3 modelos diferentes)
+MODEL_1 = "meta-llama/llama-3.2-1b-instruct:free"
+MODEL_2 = "mistralai/mistral-7b-instruct:free"
+MODEL_3 = "google/gemma-2-9b-it:free"  # modelo para julgamento
+# Função para chamada simplificada à API OpenRouter
+def call_model(model_name, messages, max_tokens=512, temperature=0.7, top_p=0.95):
     try:
+        response = client.chat.completions.create(
+            model=model_name,
+            messages=messages,
+            max_tokens=max_tokens,
+            temperature=temperature,
+            top_p=top_p
+        )
+        return response.choices[0].message.content.strip()
     except Exception as e:
+        return f"Erro na chamada ao modelo {model_name}: {str(e)}"
+# Função que gera as duas respostas e depois chama a LLM julgadora
+def generate_and_judge(user_input):
+    system_prompt = "Você é um assistente útil e objetivo."
+    # Mensagens para LLM1 e LLM2 — simples prompt para responder a pergunta
+    messages = [{"role": "system", "content": system_prompt},
+                {"role": "user", "content": user_input}]
+    # Gera respostas
+    response1 = call_model(MODEL_1, messages)
+    response2 = call_model(MODEL_2, messages)
+    # Prepara prompt para julgamento da terceira LLM
+    judge_prompt = f"""
+Você é um avaliador imparcial. Dadas duas respostas para a mesma pergunta, escolha a melhor.
+Pergunta: {user_input}
+Resposta 1: {response1}
+Resposta 2: {response2}
+Indique qual resposta é melhor (Resposta 1 ou Resposta 2) e explique brevemente sua escolha.
+Resposta:
 """
+    judge_messages = [{"role": "system", "content": "Você é um avaliador que escolhe a melhor resposta."},
+                      {"role": "user", "content": judge_prompt}]
+    judgment = call_model(MODEL_3, judge_messages)
+    # Retorna todas as informações para mostrar na interface
+    return response1, response2, judgment
+# Interface Gradio minimalista
 with gr.Blocks() as demo:
+    gr.Markdown("# Cascata de 3 LLMs - Resposta + Julgamento")
+    user_input = gr.Textbox(label="Digite sua pergunta aqui")
+    generate_button = gr.Button("Gerar respostas")
+    response1_out = gr.Textbox(label="Resposta da LLM 1", interactive=False)
+    response2_out = gr.Textbox(label="Resposta da LLM 2", interactive=False)
+    judgment_out = gr.Textbox(label="Julgamento da LLM 3", interactive=False)
+    generate_button.click(generate_and_judge, inputs=user_input,
+                          outputs=[response1_out, response2_out, judgment_out])
 if __name__ == "__main__":
     demo.launch()