import os import gradio as gr import requests import random # Função atualizada para incluir a geração de uma seed aleatória def query_model(context, question, temperature, top_p, max_tokens, seed=""): """ Queries the Nemotron-3-8B-QA API with the provided question and context, allowing customization of temperature, top_p, max_tokens, and seed. If no seed is provided, generates a random seed within the specified range. """ invoke_url = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/0c60f14d-46cb-465e-b994-227e1c3d5047" api_key = os.environ.get("NEMO_API_KEY") if not api_key: raise ValueError("Please set the NEMO_API_KEY environment variable.") headers = { "Authorization": f"Bearer {api_key}", "accept": "text/event-stream", "content-type": "application/json", } # Gera uma seed aleatória se o usuário não especificou uma if not seed: seed = random.randint(0, 18446744073709552000) payload = { "messages": [ {"content": context, "role": "context"}, {"content": question, "role": "user"}, ], "temperature": float(temperature), "top_p": float(top_p), "max_tokens": int(max_tokens), "stream": True, "seed": int(seed), # Usa a seed especificada ou a gerada aleatoriamente } response = requests.post(invoke_url, headers=headers, json=payload, stream=True) answer = "" for line in response.iter_lines(): if line: data = line.decode("utf-8") if '"content":"' in data: try: content = data.split('"content":"')[1].split('"')[0] answer += content except IndexError: continue if data == "data:[DONE]": break return answer # Interface do Gradio atualizada para incluir a entrada da seed com limitação de valor iface = gr.Interface( fn=query_model, inputs=[ gr.Textbox(label="Context", lines=5), gr.Textbox(label="Question"), gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, step=0.1, value=0.2), gr.Slider(label="Top P", minimum=0.1, maximum=1.0, step=0.1, value=0.7), gr.Slider(label="Max Tokens", minimum=1, maximum=1024, step=63, value=1024), gr.Textbox(label="Seed (optional, 0 to 18446744073709552000)"), ], outputs="text", title="Nemotron-3-8B-QA", description="""
Unlock the Power of AI with Nemotron-3-8B-QA for Precision Question Answering

Dive into the realm of advanced AI with the Nemotron-3-8B-QA, a state-of-the-art language model fine-tuned by NVIDIA for unparalleled question answering performance. Built upon the robust Nemotron-3-8B architecture, this 8 billion parameter model is designed to understand and follow instructions with remarkable accuracy, providing you with precise answers to your queries.

How to Use:

  1. Provide a context in the designated box, offering the model relevant information or background on your question.
  2. Enter your question in the next box, making it as clear and specific as possible.
  3. Adjust Temperature, Top P, Max Tokens, and Seed (optional) as needed to customize the response.
  4. Click Submit to receive a detailed and accurate answer based on the provided context and the model's extensive knowledge base.

This cutting-edge question answering service is powered by NVIDIA NGC, ensuring top-notch performance and reliability, and is completely free to use.

Created by: @artificialguybr (Twitter)

Discover more: artificialguy.com

""" ) iface.launch()