Staticaliza commited on
Commit
fa4e434
·
verified ·
1 Parent(s): f5f1e3e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -26
app.py CHANGED
@@ -1,16 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import spaces
3
- import threading
4
- from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
5
 
6
  model_id = "microsoft/bitnet-b1.58-2B-4T"
7
- tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
8
- model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True)
9
 
10
- @spaces.GPU(duration=15)
11
- def gpu():
12
- print("[GPU] | GPU maintained.")
 
 
 
 
13
 
 
14
  def respond(
15
  message: str,
16
  history: list[tuple[str, str]],
@@ -30,22 +45,19 @@ def respond(
30
  prompt = tokenizer.apply_chat_template(
31
  messages, tokenize=False, add_generation_prompt=True
32
  )
33
- inputs = tokenizer(prompt, return_tensors="pt")
34
- input_ids = inputs.input_ids
35
- attention_mask = inputs.attention_mask
36
 
37
  streamer = TextIteratorStreamer(
38
  tokenizer, skip_prompt=True, skip_special_tokens=True
39
  )
40
- generate_kwargs = {
41
- "input_ids": input_ids,
42
- "attention_mask": attention_mask,
43
- "streamer": streamer,
44
- "max_new_tokens": max_tokens,
45
- "temperature": temperature,
46
- "top_p": top_p,
47
- "do_sample": True,
48
- }
49
  thread = threading.Thread(target=model.generate, kwargs=generate_kwargs)
50
  thread.start()
51
 
@@ -57,16 +69,49 @@ def respond(
57
  demo = gr.ChatInterface(
58
  fn=respond,
59
  title="Bitnet-b1.58-2B-4T Chatbot",
60
- description="powered by microsoft bitnet-b1.58-2b-4t (cpu only)",
61
  examples=[
62
- ["hello how are you", "you are a helpful ai assistant", 512, 0.7, 0.95],
63
- ["can you code a snake game in python", "you are a helpful ai assistant for coding", 2048, 0.7, 0.95],
 
 
 
 
 
 
 
 
 
 
 
 
64
  ],
65
  additional_inputs=[
66
- gr.Textbox(value="you are a helpful ai assistant", label="system message"),
67
- gr.Slider(minimum=1, maximum=8192, value=2048, step=1, label="max new tokens"),
68
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="temperature"),
69
- gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="top-p"),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  ],
71
  )
72
 
 
1
+ import os
2
+
3
+ os.system("pip install git+https://github.com/shumingma/transformers.git")
4
+
5
+ import threading
6
+ import torch
7
+ import torch._dynamo
8
+ torch._dynamo.config.suppress_errors = True
9
+
10
+ from transformers import (
11
+ AutoModelForCausalLM,
12
+ AutoTokenizer,
13
+ TextIteratorStreamer,
14
+ )
15
  import gradio as gr
16
  import spaces
 
 
17
 
18
  model_id = "microsoft/bitnet-b1.58-2B-4T"
 
 
19
 
20
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
21
+ model = AutoModelForCausalLM.from_pretrained(
22
+ model_id,
23
+ torch_dtype=torch.bfloat16,
24
+ device_map="auto"
25
+ )
26
+ print(model.device)
27
 
28
+ @spaces.GPU
29
  def respond(
30
  message: str,
31
  history: list[tuple[str, str]],
 
45
  prompt = tokenizer.apply_chat_template(
46
  messages, tokenize=False, add_generation_prompt=True
47
  )
48
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
 
 
49
 
50
  streamer = TextIteratorStreamer(
51
  tokenizer, skip_prompt=True, skip_special_tokens=True
52
  )
53
+ generate_kwargs = dict(
54
+ **inputs,
55
+ streamer=streamer,
56
+ max_new_tokens=max_tokens,
57
+ temperature=temperature,
58
+ top_p=top_p,
59
+ do_sample=True,
60
+ )
 
61
  thread = threading.Thread(target=model.generate, kwargs=generate_kwargs)
62
  thread.start()
63
 
 
69
  demo = gr.ChatInterface(
70
  fn=respond,
71
  title="Bitnet-b1.58-2B-4T Chatbot",
72
+ description="This chat application is powered by Microsoft's SOTA Bitnet-b1.58-2B-4T and designed for natural and fast conversations.",
73
  examples=[
74
+ [
75
+ "Hello! How are you?",
76
+ "You are a helpful AI assistant for everyday tasks.",
77
+ 512,
78
+ 0.7,
79
+ 0.95,
80
+ ],
81
+ [
82
+ "Can you code a snake game in Python?",
83
+ "You are a helpful AI assistant for coding.",
84
+ 2048,
85
+ 0.7,
86
+ 0.95,
87
+ ],
88
  ],
89
  additional_inputs=[
90
+ gr.Textbox(
91
+ value="You are a helpful AI assistant.",
92
+ label="System message"
93
+ ),
94
+ gr.Slider(
95
+ minimum=1,
96
+ maximum=8192,
97
+ value=2048,
98
+ step=1,
99
+ label="Max new tokens"
100
+ ),
101
+ gr.Slider(
102
+ minimum=0.1,
103
+ maximum=4.0,
104
+ value=0.7,
105
+ step=0.1,
106
+ label="Temperature"
107
+ ),
108
+ gr.Slider(
109
+ minimum=0.1,
110
+ maximum=1.0,
111
+ value=0.95,
112
+ step=0.05,
113
+ label="Top-p (nucleus sampling)"
114
+ ),
115
  ],
116
  )
117