jsbeaudry commited on
Commit
93c520c
·
verified ·
1 Parent(s): 0da1971

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +100 -30
app.py CHANGED
@@ -1,40 +1,110 @@
 
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
- import torch
4
 
5
- # Load model and tokenizer
6
- tokenizer = AutoTokenizer.from_pretrained("jsbeaudry/makandal-v2")
7
- model = AutoModelForCausalLM.from_pretrained("jsbeaudry/makandal-v2")
 
 
 
 
 
 
 
 
8
 
9
- # Set device
10
- device = "cuda" if torch.cuda.is_available() else "cpu"
11
- model.to(device)
 
 
 
 
 
 
 
 
 
 
12
 
13
- # Generation function
14
- def generate_text(prompt):
15
- inputs = tokenizer(prompt, return_tensors="pt", padding=True).to(device)
16
- output = model.generate(
17
- **inputs,
18
- max_new_tokens=30,
19
  do_sample=True,
20
- repetition_penalty=1.2,
21
- no_repeat_ngram_size=3,
22
- temperature=0.9,
23
- top_k=40,
24
- top_p=0.85,
25
- pad_token_id=tokenizer.pad_token_id,
26
- eos_token_id=tokenizer.eos_token_id
27
  )
28
- return tokenizer.decode(output[0], skip_special_tokens=True)
29
-
30
- # Gradio interface
31
- iface = gr.Interface(
32
- fn=generate_text,
33
- inputs=gr.Textbox(lines=2, placeholder="Ekri yon sijè oswa yon fraz..."),
34
- outputs="text",
35
- title="Makandal Text Generator",
36
- description="Ekri yon fraz oswa mo kle pou jenere tèks ak modèl Makandal la. Modèl sa fèt espesyalman pou kontèks Ayiti."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  )
38
 
39
  if __name__ == "__main__":
40
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
  import gradio as gr
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
 
4
 
5
+ # Load model and tokenizer once at startup
6
+ model_name = "jsbeaudry/makandal-v2"
7
+
8
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
9
+ model = AutoModelForCausalLM.from_pretrained(
10
+ model_name,
11
+ torch_dtype=torch.float16,
12
+ device_map="auto"
13
+ )
14
+
15
+ think_token_id = tokenizer.convert_tokens_to_ids("</think>")
16
 
17
+ def generate_response(prompt):
18
+ # Format input for chat template
19
+ messages = [{"role": "user", "content": prompt}]
20
+ text = tokenizer.apply_chat_template(
21
+ messages,
22
+ tokenize=False,
23
+ add_generation_prompt=True,
24
+
25
+ )
26
+
27
+ # Tokenize
28
+ model_inputs = tokenizer([text], return_tensors="pt")
29
+ model_inputs = {k: v.to(model.device) for k, v in model_inputs.items()}
30
 
31
+ # Generate
32
+ generated_ids = model.generate(
33
+ **model_inputs,
34
+ max_new_tokens=2048,
 
 
35
  do_sample=True,
36
+ temperature=0.7,
37
+ top_p=0.9
 
 
 
 
 
38
  )
39
+
40
+ output_ids = generated_ids[0][len(model_inputs["input_ids"][0]):].tolist()
41
+
42
+ try:
43
+ index = len(output_ids) - output_ids[::-1].index(think_token_id)
44
+ except ValueError:
45
+ index = 0
46
+
47
+ thinking_content = tokenizer.decode(output_ids[:index], skip_special_tokens=True).strip("\n")
48
+ content = tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n")
49
+
50
+ return thinking_content, content
51
+
52
+ # Gradio Interface
53
+ demo = gr.Interface(
54
+ fn=generate_response,
55
+ inputs=gr.Textbox(label="Your Prompt", placeholder="Ask something..."),
56
+ outputs=[
57
+ gr.Textbox(label="Thinking Content"),
58
+ gr.Textbox(label="Final Response")
59
+ ],
60
+ title="Qwen3 Thinking Chatbot",
61
+ description="Ask a question and get both the thinking trace and final answer from Qwen3-0.6B."
62
  )
63
 
64
  if __name__ == "__main__":
65
+ demo.launch()
66
+
67
+
68
+
69
+
70
+
71
+ # import gradio as gr
72
+ # from transformers import AutoTokenizer, AutoModelForCausalLM
73
+ # import torch
74
+
75
+ # # Load model and tokenizer
76
+ # tokenizer = AutoTokenizer.from_pretrained("jsbeaudry/makandal-v2")
77
+ # model = AutoModelForCausalLM.from_pretrained("jsbeaudry/makandal-v2")
78
+
79
+ # # Set device
80
+ # device = "cuda" if torch.cuda.is_available() else "cpu"
81
+ # model.to(device)
82
+
83
+ # # Generation function
84
+ # def generate_text(prompt):
85
+ # inputs = tokenizer(prompt, return_tensors="pt", padding=True).to(device)
86
+ # output = model.generate(
87
+ # **inputs,
88
+ # max_new_tokens=30,
89
+ # do_sample=True,
90
+ # repetition_penalty=1.2,
91
+ # no_repeat_ngram_size=3,
92
+ # temperature=0.9,
93
+ # top_k=40,
94
+ # top_p=0.85,
95
+ # pad_token_id=tokenizer.pad_token_id,
96
+ # eos_token_id=tokenizer.eos_token_id
97
+ # )
98
+ # return tokenizer.decode(output[0], skip_special_tokens=True)
99
+
100
+ # # Gradio interface
101
+ # iface = gr.Interface(
102
+ # fn=generate_text,
103
+ # inputs=gr.Textbox(lines=2, placeholder="Ekri yon sijè oswa yon fraz..."),
104
+ # outputs="text",
105
+ # title="Makandal Text Generator",
106
+ # description="Ekri yon fraz oswa mo kle pou jenere tèks ak modèl Makandal la. Modèl sa fèt espesyalman pou kontèks Ayiti."
107
+ # )
108
+
109
+ # if __name__ == "__main__":
110
+ # iface.launch()