File size: 3,173 Bytes
93c520c
8a85e81
 
 
93c520c
 
 
 
 
 
 
 
 
 
 
8a85e81
93c520c
 
 
 
 
 
 
 
 
 
 
 
 
8a85e81
93c520c
 
 
 
8a85e81
93c520c
 
8a85e81
93c520c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8a85e81
 
 
93c520c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import torch
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM

# Load model and tokenizer once at startup
model_name = "jsbeaudry/makandal-v2"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto"
)

think_token_id = tokenizer.convert_tokens_to_ids("</think>")

def generate_response(prompt):
    # Format input for chat template
    messages = [{"role": "user", "content": prompt}]
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True,
    
    )
    
    # Tokenize
    model_inputs = tokenizer([text], return_tensors="pt")
    model_inputs = {k: v.to(model.device) for k, v in model_inputs.items()}

    # Generate
    generated_ids = model.generate(
        **model_inputs,
        max_new_tokens=2048,
        do_sample=True,
        temperature=0.7,
        top_p=0.9
    )

    output_ids = generated_ids[0][len(model_inputs["input_ids"][0]):].tolist()

    try:
        index = len(output_ids) - output_ids[::-1].index(think_token_id)
    except ValueError:
        index = 0

    thinking_content = tokenizer.decode(output_ids[:index], skip_special_tokens=True).strip("\n")
    content = tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n")

    return thinking_content, content

# Gradio Interface
demo = gr.Interface(
    fn=generate_response,
    inputs=gr.Textbox(label="Your Prompt", placeholder="Ask something..."),
    outputs=[
        gr.Textbox(label="Thinking Content"),
        gr.Textbox(label="Final Response")
    ],
    title="Qwen3 Thinking Chatbot",
    description="Ask a question and get both the thinking trace and final answer from Qwen3-0.6B."
)

if __name__ == "__main__":
    demo.launch()





# import gradio as gr
# from transformers import AutoTokenizer, AutoModelForCausalLM
# import torch

# # Load model and tokenizer
# tokenizer = AutoTokenizer.from_pretrained("jsbeaudry/makandal-v2")
# model = AutoModelForCausalLM.from_pretrained("jsbeaudry/makandal-v2")

# # Set device
# device = "cuda" if torch.cuda.is_available() else "cpu"
# model.to(device)

# # Generation function
# def generate_text(prompt):
#     inputs = tokenizer(prompt, return_tensors="pt", padding=True).to(device)
#     output = model.generate(
#         **inputs,
#         max_new_tokens=30,
#         do_sample=True,
#         repetition_penalty=1.2,
#         no_repeat_ngram_size=3,
#         temperature=0.9,
#         top_k=40,
#         top_p=0.85,
#         pad_token_id=tokenizer.pad_token_id,
#         eos_token_id=tokenizer.eos_token_id
#     )
#     return tokenizer.decode(output[0], skip_special_tokens=True)

# # Gradio interface
# iface = gr.Interface(
#     fn=generate_text,
#     inputs=gr.Textbox(lines=2, placeholder="Ekri yon sijè oswa yon fraz..."),
#     outputs="text",
#     title="Makandal Text Generator",
#     description="Ekri yon fraz oswa mo kle pou jenere tèks ak modèl Makandal la. Modèl sa fèt espesyalman pou kontèks Ayiti."
# )

# if __name__ == "__main__":
#     iface.launch()