File size: 4,145 Bytes
acac17b
 
 
092abcd
acac17b
092abcd
 
392f157
092abcd
 
392f157
 
 
 
 
 
 
 
 
 
092abcd
 
 
 
 
392f157
 
 
 
 
092abcd
 
 
392f157
092abcd
392f157
092abcd
392f157
 
 
 
 
 
 
 
 
 
 
 
 
 
acac17b
092abcd
392f157
 
 
 
 
58b8c81
 
 
5957372
acac17b
392f157
acac17b
092abcd
2c0ed9f
1a85890
 
 
 
 
 
 
 
 
 
092abcd
1a85890
 
 
 
 
 
 
 
 
 
 
8327923
acac17b
 
092abcd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import os
import gradio as gr
import requests
import random

# Função atualizada para incluir a geração de uma seed aleatória
def query_model(context, question, temperature, top_p, max_tokens, seed=""):
    """
    Queries the Nemotron-3-8B-QA API with the provided question and context, allowing customization of temperature, top_p, max_tokens, and seed.
    If no seed is provided, generates a random seed within the specified range.
    """
    invoke_url = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/0c60f14d-46cb-465e-b994-227e1c3d5047"
    api_key = os.environ.get("NEMO_API_KEY")
    if not api_key:
        raise ValueError("Please set the NEMO_API_KEY environment variable.")
    headers = {
        "Authorization": f"Bearer {api_key}",
        "accept": "text/event-stream",
        "content-type": "application/json",
    }
    
    # Gera uma seed aleatória se o usuário não especificou uma
    if not seed:
        seed = random.randint(0, 18446744073709552000)
    
    payload = {
        "messages": [
            {"content": context, "role": "context"},
            {"content": question, "role": "user"},
        ],
        "temperature": float(temperature),
        "top_p": float(top_p),
        "max_tokens": int(max_tokens),
        "stream": True,
        "seed": int(seed),  # Usa a seed especificada ou a gerada aleatoriamente
    }

    response = requests.post(invoke_url, headers=headers, json=payload, stream=True)
    answer = ""
    for line in response.iter_lines():
        if line:
            data = line.decode("utf-8")
            if '"content":"' in data:
                try:
                    content = data.split('"content":"')[1].split('"')[0]
                    answer += content
                except IndexError:
                    continue
            if data == "data:[DONE]":
                break
    return answer

# Interface do Gradio atualizada para incluir a entrada da seed com limitação de valor
iface = gr.Interface(
    fn=query_model,
    inputs=[
        gr.Textbox(label="Context", lines=5),
        gr.Textbox(label="Question"),
        gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, step=0.1, value=0.2),
        gr.Slider(label="Top P", minimum=0.1, maximum=1.0, step=0.1, value=0.7),
        gr.Slider(label="Max Tokens", minimum=1, maximum=1024, step=63, value=1024),
        gr.Textbox(label="Seed (optional, 0 to 18446744073709552000)"),
    ],
    outputs="text",
    title="Nemotron-3-8B-QA",
    description="""<div style="text-align: center; font-size: 1.5em; margin-bottom: 20px;">
<strong>Unlock the Power of AI with Nemotron-3-8B-QA for Precision Question Answering</strong>
</div>
<p>
    Dive into the realm of advanced AI with the Nemotron-3-8B-QA, a state-of-the-art language model fine-tuned by NVIDIA for unparalleled question answering performance. Built upon the robust Nemotron-3-8B architecture, this 8 billion parameter model is designed to understand and follow instructions with remarkable accuracy, providing you with precise answers to your queries.
</p>
<p>
    <strong>How to Use:</strong>
</p>
<ol>
    <li>Provide a <strong>context</strong> in the designated box, offering the model relevant information or background on your question.</li>
    <li>Enter your <strong>question</strong> in the next box, making it as clear and specific as possible.</li>
    <li>Adjust <strong>Temperature</strong>, <strong>Top P</strong>, <strong>Max Tokens</strong>, and <strong>Seed</strong> (optional) as needed to customize the response.</li>
    <li>Click <strong>Submit</strong> to receive a detailed and accurate answer based on the provided context and the model's extensive knowledge base.</li>
</ol>
<p>
    <strong>This cutting-edge question answering service is powered by NVIDIA NGC, ensuring top-notch performance and reliability, and is completely free to use.</strong>
</p>
<p>
    <strong>Created by:</strong> @artificialguybr (<a href="https://twitter.com/artificialguybr">Twitter</a>)
</p>
<p>
    <strong>Discover more:</strong> <a href="https://artificialguy.com">artificialguy.com</a>
</p>
"""
)

iface.launch()