import os import gradio as gr import requests import random # Função atualizada para incluir a geração de uma seed aleatória def query_model(context, question, temperature, top_p, max_tokens, seed=""): """ Queries the Nemotron-3-8B-QA API with the provided question and context, allowing customization of temperature, top_p, max_tokens, and seed. If no seed is provided, generates a random seed within the specified range. """ invoke_url = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/0c60f14d-46cb-465e-b994-227e1c3d5047" api_key = os.environ.get("NEMO_API_KEY") if not api_key: raise ValueError("Please set the NEMO_API_KEY environment variable.") headers = { "Authorization": f"Bearer {api_key}", "accept": "text/event-stream", "content-type": "application/json", } # Gera uma seed aleatória se o usuário não especificou uma if not seed: seed = random.randint(0, 18446744073709552000) payload = { "messages": [ {"content": context, "role": "context"}, {"content": question, "role": "user"}, ], "temperature": float(temperature), "top_p": float(top_p), "max_tokens": int(max_tokens), "stream": True, "seed": int(seed), # Usa a seed especificada ou a gerada aleatoriamente } response = requests.post(invoke_url, headers=headers, json=payload, stream=True) answer = "" for line in response.iter_lines(): if line: data = line.decode("utf-8") if '"content":"' in data: try: content = data.split('"content":"')[1].split('"')[0] answer += content except IndexError: continue if data == "data:[DONE]": break return answer # Interface do Gradio atualizada para incluir a entrada da seed com limitação de valor iface = gr.Interface( fn=query_model, inputs=[ gr.Textbox(label="Context", lines=5), gr.Textbox(label="Question"), gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, step=0.1, value=0.2), gr.Slider(label="Top P", minimum=0.1, maximum=1.0, step=0.1, value=0.7), gr.Slider(label="Max Tokens", minimum=1, maximum=1024, step=63, value=1024), gr.Textbox(label="Seed (optional, 0 to 18446744073709552000)"), ], outputs="text", title="Nemotron-3-8B-QA", description="""
Dive into the realm of advanced AI with the Nemotron-3-8B-QA, a state-of-the-art language model fine-tuned by NVIDIA for unparalleled question answering performance. Built upon the robust Nemotron-3-8B architecture, this 8 billion parameter model is designed to understand and follow instructions with remarkable accuracy, providing you with precise answers to your queries.
How to Use:
This cutting-edge question answering service is powered by NVIDIA NGC, ensuring top-notch performance and reliability, and is completely free to use.
Created by: @artificialguybr (Twitter)
Discover more: artificialguy.com
""" ) iface.launch()