Reality123b commited on
Commit
691f69e
·
verified ·
1 Parent(s): 83e20b0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -13
app.py CHANGED
@@ -3,18 +3,20 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
  import time
5
 
 
 
 
6
  # Initialize model and tokenizer
7
- model_name = "Qwen/Qwen2.5-3B-Instruct"
8
  print("Loading model and tokenizer...")
9
  model = AutoModelForCausalLM.from_pretrained(
10
- model_name,
11
  torch_dtype="auto",
12
  device_map="auto"
13
  )
14
- tokenizer = AutoTokenizer.from_pretrained(model_name)
15
  print("Model and tokenizer loaded!")
16
 
17
- def simulate_typing(text, min_chars_per_sec=20, max_chars_per_sec=60):
18
  """Simulate typing animation with variable speed."""
19
  full_text = ""
20
  words = text.split()
@@ -22,7 +24,6 @@ def simulate_typing(text, min_chars_per_sec=20, max_chars_per_sec=60):
22
  full_text += word
23
  if i < len(words) - 1:
24
  full_text += " "
25
- # Vary typing speed between min and max chars per second
26
  delay = 1 / (min_chars_per_sec + (max_chars_per_sec - min_chars_per_sec) * torch.rand(1).item())
27
  time.sleep(delay)
28
  yield full_text
@@ -31,9 +32,9 @@ def generate_response(
31
  message,
32
  history: list[tuple[str, str]],
33
  system_message,
34
- max_tokens,
35
- temperature,
36
- top_p,
37
  ):
38
  # Prepare conversation history
39
  messages = [{"role": "system", "content": system_message}]
@@ -52,7 +53,7 @@ def generate_response(
52
  add_generation_prompt=True
53
  )
54
 
55
- # Prepare model inputs and generate in one go
56
  with torch.inference_mode():
57
  model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
58
  generated_ids = model.generate(
@@ -86,7 +87,8 @@ body, .gradio-container {
86
  """
87
 
88
  # System message
89
- system_message = """You are Qwen, created by Alibaba Cloud. You are a helpful assistant."""
 
90
 
91
  # Gradio chat interface
92
  demo = gr.ChatInterface(
@@ -118,10 +120,12 @@ demo = gr.ChatInterface(
118
  label="Top-p (nucleus sampling)"
119
  ),
120
  ],
121
- css=custom_css
 
 
122
  )
123
 
124
  # Launch the demo
125
  if __name__ == "__main__":
126
- demo.queue() # Enable queuing for better handling of multiple requests
127
- demo.launch()
 
3
  import torch
4
  import time
5
 
6
+ # Model configuration
7
+ MODEL_NAME = "Qwen/Qwen2-14B-Instruct"
8
+
9
  # Initialize model and tokenizer
 
10
  print("Loading model and tokenizer...")
11
  model = AutoModelForCausalLM.from_pretrained(
12
+ MODEL_NAME,
13
  torch_dtype="auto",
14
  device_map="auto"
15
  )
16
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
17
  print("Model and tokenizer loaded!")
18
 
19
+ def simulate_typing(text, min_chars_per_sec=15, max_chars_per_sec=40):
20
  """Simulate typing animation with variable speed."""
21
  full_text = ""
22
  words = text.split()
 
24
  full_text += word
25
  if i < len(words) - 1:
26
  full_text += " "
 
27
  delay = 1 / (min_chars_per_sec + (max_chars_per_sec - min_chars_per_sec) * torch.rand(1).item())
28
  time.sleep(delay)
29
  yield full_text
 
32
  message,
33
  history: list[tuple[str, str]],
34
  system_message,
35
+ max_tokens=512,
36
+ temperature=0.7,
37
+ top_p=0.95
38
  ):
39
  # Prepare conversation history
40
  messages = [{"role": "system", "content": system_message}]
 
53
  add_generation_prompt=True
54
  )
55
 
56
+ # Generate response
57
  with torch.inference_mode():
58
  model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
59
  generated_ids = model.generate(
 
87
  """
88
 
89
  # System message
90
+ system_message = """You are Qwen 2.5 14B, an advanced AI assistant created by Alibaba Cloud.
91
+ You are knowledgeable, helpful, and strive to provide accurate and comprehensive responses."""
92
 
93
  # Gradio chat interface
94
  demo = gr.ChatInterface(
 
120
  label="Top-p (nucleus sampling)"
121
  ),
122
  ],
123
+ css=custom_css,
124
+ title="Qwen 2.5 14B Chat",
125
+ description="An advanced AI assistant powered by Qwen 2.5 14B"
126
  )
127
 
128
  # Launch the demo
129
  if __name__ == "__main__":
130
+ demo.queue(max_size=40)
131
+ demo.launch(max_threads=40)