EduuGomes commited on
Commit
48a4d7a
·
verified ·
1 Parent(s): 6ccd719

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -151
app.py CHANGED
@@ -1,168 +1,79 @@
1
  import os
2
  import gradio as gr
3
-
4
  from openai import OpenAI
5
 
6
- from optillm.cot_reflection import cot_reflection
7
- from optillm.rto import round_trip_optimization
8
- from optillm.z3_solver import Z3SymPySolverSystem
9
- from optillm.self_consistency import advanced_self_consistency_approach
10
- from optillm.plansearch import plansearch
11
- from optillm.leap import leap
12
- from optillm.reread import re2_approach
13
-
14
-
15
  API_KEY = os.environ.get("OPENROUTER_API_KEY")
 
 
16
 
17
- def compare_responses(message, model1, approach1, model2, approach2, system_message, max_tokens, temperature, top_p):
18
- response1 = respond(message, [], model1, approach1, system_message, max_tokens, temperature, top_p)
19
- response2 = respond(message, [], model2, approach2, system_message, max_tokens, temperature, top_p)
20
- return response1, response2
21
 
22
- def parse_conversation(messages):
23
- system_prompt = ""
24
- conversation = []
25
-
26
- for message in messages:
27
- role = message['role']
28
- content = message['content']
29
-
30
- if role == 'system':
31
- system_prompt = content
32
- elif role in ['user', 'assistant']:
33
- conversation.append(f"{role.capitalize()}: {content}")
34
-
35
- initial_query = "\n".join(conversation)
36
- return system_prompt, initial_query
37
 
38
- def respond(message, history, model, approach, system_message, max_tokens, temperature, top_p):
 
39
  try:
40
- client = OpenAI(api_key=API_KEY, base_url="https://openrouter.ai/v1")
41
- messages = [{"role": "system", "content": system_message}]
42
-
43
- for val in history:
44
- if val[0]:
45
- messages.append({"role": "user", "content": val[0]})
46
- if val[1]:
47
- messages.append({"role": "assistant", "content": val[1]})
48
-
49
- messages.append({"role": "user", "content": message})
50
-
51
- if approach == "none":
52
- response = client.chat.completions.create(
53
- extra_headers={
54
- "HTTP-Referer": "https://github.com/codelion/optillm",
55
- "X-Title": "optillm"
56
- },
57
- model=model,
58
- messages=messages,
59
- max_tokens=max_tokens,
60
- temperature=temperature,
61
- top_p=top_p,
62
- )
63
- return response.choices[0].message.content
64
- else:
65
- system_prompt, initial_query = parse_conversation(messages)
66
-
67
- if approach == 'rto':
68
- final_response, _ = round_trip_optimization(system_prompt, initial_query, client, model)
69
- elif approach == 'z3':
70
- z3_solver = Z3SymPySolverSystem(system_prompt, client, model)
71
- final_response, _ = z3_solver.process_query(initial_query)
72
- elif approach == "self_consistency":
73
- final_response, _ = advanced_self_consistency_approach(system_prompt, initial_query, client, model)
74
- elif approach == "cot_reflection":
75
- final_response, _ = cot_reflection(system_prompt, initial_query, client, model)
76
- elif approach == 'plansearch':
77
- response, _ = plansearch(system_prompt, initial_query, client, model)
78
- final_response = response[0]
79
- elif approach == 'leap':
80
- final_response, _ = leap(system_prompt, initial_query, client, model)
81
- elif approach == 're2':
82
- final_response, _ = re2_approach(system_prompt, initial_query, client, model)
83
-
84
- return final_response
85
-
86
  except Exception as e:
87
- error_message = f"Error in respond function: {str(e)}\nType: {type(e).__name__}"
88
- print(error_message)
89
-
90
- def create_model_dropdown():
91
- return gr.Dropdown(
92
- [ "meta-llama/llama-3.1-8b-instruct:free", "nousresearch/hermes-3-llama-3.1-405b:free","meta-llama/llama-3.2-1b-instruct:free",
93
- "mistralai/mistral-7b-instruct:free","mistralai/pixtral-12b:free","meta-llama/llama-3.1-70b-instruct:free",
94
- "qwen/qwen-2-7b-instruct:free", "qwen/qwen-2-vl-7b-instruct:free", "google/gemma-2-9b-it:free", "liquid/lfm-40b:free", "meta-llama/llama-3.1-405b-instruct:free",
95
- "openchat/openchat-7b:free", "meta-llama/llama-3.2-90b-vision-instruct:free", "meta-llama/llama-3.2-11b-vision-instruct:free",
96
- "meta-llama/llama-3-8b-instruct:free", "meta-llama/llama-3.2-3b-instruct:free", "microsoft/phi-3-medium-128k-instruct:free",
97
- "microsoft/phi-3-mini-128k-instruct:free", "huggingfaceh4/zephyr-7b-beta:free"],
98
- value="meta-llama/llama-3.2-1b-instruct:free", label="Model"
99
- )
100
-
101
- def create_approach_dropdown():
102
- return gr.Dropdown(
103
- ["none", "leap", "plansearch", "cot_reflection", "rto", "self_consistency", "z3", "re2"],
104
- value="none", label="Approach"
105
- )
106
-
107
- html = """<iframe src="https://ghbtns.com/github-btn.html?user=codelion&repo=optillm&type=star&count=true&size=large" frameborder="0" scrolling="0" width="170" height="30" title="GitHub"></iframe>
 
 
 
 
 
108
  """
109
 
 
 
 
 
 
 
 
 
 
110
  with gr.Blocks() as demo:
111
- gr.Markdown("# optillm - Optimizing LLM Inference")
112
- gr.HTML(html)
 
113
 
114
- with gr.Row():
115
- system_message = gr.Textbox(value="", label="System message")
116
- max_tokens = gr.Slider(minimum=1, maximum=4096, value=1024, step=1, label="Max new tokens")
117
- temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
118
- top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
119
-
120
- with gr.Tabs():
121
- with gr.TabItem("Chat"):
122
- model = create_model_dropdown()
123
- approach = create_approach_dropdown()
124
- chatbot = gr.Chatbot()
125
- msg = gr.Textbox()
126
- with gr.Row():
127
- submit = gr.Button("Submit")
128
- clear = gr.Button("Clear")
129
-
130
- def user(user_message, history):
131
- return "", history + [[user_message, None]]
132
-
133
- def bot(history, model, approach, system_message, max_tokens, temperature, top_p):
134
- user_message = history[-1][0]
135
- bot_message = respond(user_message, history[:-1], model, approach, system_message, max_tokens, temperature, top_p)
136
- history[-1][1] = bot_message
137
- return history
138
-
139
- msg.submit(user, [msg, chatbot], [msg, chatbot]).then(
140
- bot, [chatbot, model, approach, system_message, max_tokens, temperature, top_p], chatbot
141
- )
142
- submit.click(user, [msg, chatbot], [msg, chatbot]).then(
143
- bot, [chatbot, model, approach, system_message, max_tokens, temperature, top_p], chatbot
144
- )
145
- clear.click(lambda: None, None, chatbot, queue=False)
146
-
147
- with gr.TabItem("Compare"):
148
- with gr.Row():
149
- model1 = create_model_dropdown()
150
- approach1 = create_approach_dropdown()
151
- model2 = create_model_dropdown()
152
- approach2 = create_approach_dropdown()
153
-
154
- compare_input = gr.Textbox(label="Enter your message for comparison")
155
- compare_button = gr.Button("Compare")
156
-
157
- with gr.Row():
158
- output1 = gr.Textbox(label="Response 1")
159
- output2 = gr.Textbox(label="Response 2")
160
-
161
- compare_button.click(
162
- compare_responses,
163
- inputs=[compare_input, model1, approach1, model2, approach2, system_message, max_tokens, temperature, top_p],
164
- outputs=[output1, output2]
165
- )
166
 
167
  if __name__ == "__main__":
168
  demo.launch()
 
1
  import os
2
  import gradio as gr
 
3
  from openai import OpenAI
4
 
5
+ # Configuração da API e cliente OpenRouter correta
 
 
 
 
 
 
 
 
6
  API_KEY = os.environ.get("OPENROUTER_API_KEY")
7
+ if not API_KEY:
8
+ raise ValueError("Defina a variável de ambiente OPENROUTER_API_KEY com sua chave válida.")
9
 
10
+ client = OpenAI(api_key=API_KEY, base_url="https://openrouter.ai/api/v1")
 
 
 
11
 
12
+ # Modelos selecionados (3 modelos diferentes)
13
+ MODEL_1 = "meta-llama/llama-3.2-1b-instruct:free"
14
+ MODEL_2 = "mistralai/mistral-7b-instruct:free"
15
+ MODEL_3 = "google/gemma-2-9b-it:free" # modelo para julgamento
 
 
 
 
 
 
 
 
 
 
 
16
 
17
+ # Função para chamada simplificada à API OpenRouter
18
+ def call_model(model_name, messages, max_tokens=512, temperature=0.7, top_p=0.95):
19
  try:
20
+ response = client.chat.completions.create(
21
+ model=model_name,
22
+ messages=messages,
23
+ max_tokens=max_tokens,
24
+ temperature=temperature,
25
+ top_p=top_p
26
+ )
27
+ return response.choices[0].message.content.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  except Exception as e:
29
+ return f"Erro na chamada ao modelo {model_name}: {str(e)}"
30
+
31
+ # Função que gera as duas respostas e depois chama a LLM julgadora
32
+ def generate_and_judge(user_input):
33
+ system_prompt = "Você é um assistente útil e objetivo."
34
+
35
+ # Mensagens para LLM1 e LLM2 — simples prompt para responder a pergunta
36
+ messages = [{"role": "system", "content": system_prompt},
37
+ {"role": "user", "content": user_input}]
38
+
39
+ # Gera respostas
40
+ response1 = call_model(MODEL_1, messages)
41
+ response2 = call_model(MODEL_2, messages)
42
+
43
+ # Prepara prompt para julgamento da terceira LLM
44
+ judge_prompt = f"""
45
+ Você é um avaliador imparcial. Dadas duas respostas para a mesma pergunta, escolha a melhor.
46
+
47
+ Pergunta: {user_input}
48
+
49
+ Resposta 1: {response1}
50
+
51
+ Resposta 2: {response2}
52
+
53
+ Indique qual resposta é melhor (Resposta 1 ou Resposta 2) e explique brevemente sua escolha.
54
+ Resposta:
55
  """
56
 
57
+ judge_messages = [{"role": "system", "content": "Você é um avaliador que escolhe a melhor resposta."},
58
+ {"role": "user", "content": judge_prompt}]
59
+
60
+ judgment = call_model(MODEL_3, judge_messages)
61
+
62
+ # Retorna todas as informações para mostrar na interface
63
+ return response1, response2, judgment
64
+
65
+ # Interface Gradio minimalista
66
  with gr.Blocks() as demo:
67
+ gr.Markdown("# Cascata de 3 LLMs - Resposta + Julgamento")
68
+ user_input = gr.Textbox(label="Digite sua pergunta aqui")
69
+ generate_button = gr.Button("Gerar respostas")
70
 
71
+ response1_out = gr.Textbox(label="Resposta da LLM 1", interactive=False)
72
+ response2_out = gr.Textbox(label="Resposta da LLM 2", interactive=False)
73
+ judgment_out = gr.Textbox(label="Julgamento da LLM 3", interactive=False)
74
+
75
+ generate_button.click(generate_and_judge, inputs=user_input,
76
+ outputs=[response1_out, response2_out, judgment_out])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
 
78
  if __name__ == "__main__":
79
  demo.launch()