artificialguybr's picture
Refactor call_api function to call_nvidia_api
62cbda2
raw
history blame
4.2 kB
import gradio as gr
import requests
import json
import os
# Definir variáveis de ambiente ou substituir com sua chave de API real
API_KEY = os.getenv('API_KEY')
INVOKE_URL = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/df2bee43-fb69-42b9-9ee5-f4eabbeaf3a8"
headers = {
"Authorization": f"Bearer {API_KEY}",
"accept": "text/event-stream",
"content-type": "application/json",
}
BASE_SYSTEM_MESSAGE = "I carefully provide accurate, factual, thoughtful, nuanced answers and am brilliant at reasoning."
def clear_chat(chat_history_state, chat_message):
print("Clearing chat...")
chat_history_state = []
chat_message = ''
return chat_history_state, chat_message
def user(message, history):
print(f"User message: {message}")
history = history or []
history.append({"role": "user", "content": message})
return history
def call_nvidia_api(history, max_tokens, temperature, top_p, seed=42):
# Preparar o payload com o histórico de chat formatado
messages = [{"role": "user" if i % 2 == 0 else "assistant", "content": msg} for i, msg in enumerate(history)]
payload = {
"messages": messages,
"temperature": temperature,
"top_p": top_p,
"max_tokens": max_tokens,
"seed": seed,
"stream": True
}
response = requests.post(INVOKE_URL, headers=headers, json=payload, stream=True)
full_response = ""
for line in response.iter_lines():
if line:
decoded_line = line.decode("utf-8").strip()
if decoded_line.startswith("data:"):
try:
json_data = json.loads(decoded_line[5:])
# Processar a resposta da API aqui
# Supondo que a resposta da API seja diretamente o texto a ser adicionado ao chat
full_response += json_data.get("content", "")
except json.JSONDecodeError:
print(f"Invalid JSON: {decoded_line[5:]}")
return full_response
def chat(history, system_message, max_tokens, temperature, top_p, top_k, repetition_penalty):
print("Starting chat...")
# Chamar a API da NVIDIA aqui com o histórico formatado
assistant_response = call_nvidia_api(history, max_tokens, temperature, top_p)
# Atualizar o histórico com a resposta do assistente
if history:
history[-1][1] += assistant_response
else:
history.append(["", assistant_response])
return history, history, ""
# Gradio interface setup
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
gr.Markdown("## Your Chatbot Interface")
chatbot = gr.Chatbot()
message = gr.Textbox(label="What do you want to chat about?", placeholder="Ask me anything.", lines=3)
submit = gr.Button(value="Send message")
clear = gr.Button(value="New topic")
system_msg = gr.Textbox(BASE_SYSTEM_MESSAGE, label="System Message", placeholder="System prompt.", lines=5)
max_tokens = gr.Slider(20, 512, label="Max Tokens", step=20, value=500)
temperature = gr.Slider(0.0, 1.0, label="Temperature", step=0.1, value=0.7)
top_p = gr.Slider(0.0, 1.0, label="Top P", step=0.05, value=0.95)
chat_history_state = gr.State([])
def update_chatbot(message, chat_history):
print("Updating chatbot...")
# Certifique-se de que a mensagem do usuário não seja adicionada duas vezes
if not chat_history or (chat_history and chat_history[-1][0] != "user"):
chat_history = user(message, chat_history)
chat_history, _, _ = chat(chat_history, system_msg.value, max_tokens.value, temperature.value, top_p.value, 40, 1.1)
# Formate as mensagens para exibição, removendo roles do conteúdo
formatted_chat_history = [(msg["role"], msg["content"]) for msg in chat_history]
return formatted_chat_history, chat_history, ""
submit.click(
fn=update_chatbot,
inputs=[message, chat_history_state],
outputs=[chatbot, chat_history_state, message]
)
clear.click(
fn=clear_chat,
inputs=[chat_history_state, message],
outputs=[chat_history_state, message]
)
demo.launch()