what2up commited on
Commit
59b0e70
Β·
1 Parent(s): ec376c9
Files changed (1) hide show
  1. app.py +46 -3
app.py CHANGED
@@ -1,7 +1,50 @@
1
  import gradio as gr
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
1
  import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModelForSeq2SeqLM
3
 
4
+
5
+ def greet(input):
6
+
7
+ model_name = "Qwen/Qwen3-8B"
8
+
9
+ # load the tokenizer and the model
10
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
11
+ model = AutoModelForCausalLM.from_pretrained(model_name)
12
+
13
+ tokenizer.save_pretrained("./qwen3")
14
+ model.save_pretrained("./qwen3")
15
+
16
+ # prepare the model input
17
+ prompt = "Give me a short introduction to large language model."
18
+ prompt = input
19
+ messages = [{"role": "user", "content": prompt}]
20
+ text = tokenizer.apply_chat_template(
21
+ messages,
22
+ tokenize=False,
23
+ add_generation_prompt=True,
24
+ enable_thinking=True, # Switches between thinking and non-thinking modes. Default is True.
25
+ )
26
+ model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
27
+
28
+ # conduct text completion
29
+ generated_ids = model.generate(**model_inputs, max_new_tokens=32768)
30
+ output_ids = generated_ids[0][len(model_inputs.input_ids[0]) :].tolist()
31
+
32
+ # parsing thinking content
33
+ try:
34
+ # rindex finding 151668 (</think>)
35
+ index = len(output_ids) - output_ids[::-1].index(151668)
36
+ except ValueError:
37
+ index = 0
38
+
39
+ thinking_content = tokenizer.decode(
40
+ output_ids[:index], skip_special_tokens=True
41
+ ).strip("\n")
42
+ content = tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n")
43
+
44
+ # print("thinking content:", thinking_content)
45
+ # print("content:", content)
46
+
47
+ return "thinking content:" + thinking_content + "\n" + "content:" + content
48
 
49
  demo = gr.Interface(fn=greet, inputs="text", outputs="text")
50
+ demo.launch()