Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -3,18 +3,20 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
3 |
import torch
|
4 |
import time
|
5 |
|
|
|
|
|
|
|
6 |
# Initialize model and tokenizer
|
7 |
-
model_name = "Qwen/Qwen2.5-3B-Instruct"
|
8 |
print("Loading model and tokenizer...")
|
9 |
model = AutoModelForCausalLM.from_pretrained(
|
10 |
-
|
11 |
torch_dtype="auto",
|
12 |
device_map="auto"
|
13 |
)
|
14 |
-
tokenizer = AutoTokenizer.from_pretrained(
|
15 |
print("Model and tokenizer loaded!")
|
16 |
|
17 |
-
def simulate_typing(text, min_chars_per_sec=
|
18 |
"""Simulate typing animation with variable speed."""
|
19 |
full_text = ""
|
20 |
words = text.split()
|
@@ -22,7 +24,6 @@ def simulate_typing(text, min_chars_per_sec=20, max_chars_per_sec=60):
|
|
22 |
full_text += word
|
23 |
if i < len(words) - 1:
|
24 |
full_text += " "
|
25 |
-
# Vary typing speed between min and max chars per second
|
26 |
delay = 1 / (min_chars_per_sec + (max_chars_per_sec - min_chars_per_sec) * torch.rand(1).item())
|
27 |
time.sleep(delay)
|
28 |
yield full_text
|
@@ -31,9 +32,9 @@ def generate_response(
|
|
31 |
message,
|
32 |
history: list[tuple[str, str]],
|
33 |
system_message,
|
34 |
-
max_tokens,
|
35 |
-
temperature,
|
36 |
-
top_p
|
37 |
):
|
38 |
# Prepare conversation history
|
39 |
messages = [{"role": "system", "content": system_message}]
|
@@ -52,7 +53,7 @@ def generate_response(
|
|
52 |
add_generation_prompt=True
|
53 |
)
|
54 |
|
55 |
-
#
|
56 |
with torch.inference_mode():
|
57 |
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
|
58 |
generated_ids = model.generate(
|
@@ -86,7 +87,8 @@ body, .gradio-container {
|
|
86 |
"""
|
87 |
|
88 |
# System message
|
89 |
-
system_message = """You are Qwen, created by Alibaba Cloud.
|
|
|
90 |
|
91 |
# Gradio chat interface
|
92 |
demo = gr.ChatInterface(
|
@@ -118,10 +120,12 @@ demo = gr.ChatInterface(
|
|
118 |
label="Top-p (nucleus sampling)"
|
119 |
),
|
120 |
],
|
121 |
-
css=custom_css
|
|
|
|
|
122 |
)
|
123 |
|
124 |
# Launch the demo
|
125 |
if __name__ == "__main__":
|
126 |
-
demo.queue()
|
127 |
-
demo.launch()
|
|
|
3 |
import torch
|
4 |
import time
|
5 |
|
6 |
+
# Model configuration
|
7 |
+
MODEL_NAME = "Qwen/Qwen2-14B-Instruct"
|
8 |
+
|
9 |
# Initialize model and tokenizer
|
|
|
10 |
print("Loading model and tokenizer...")
|
11 |
model = AutoModelForCausalLM.from_pretrained(
|
12 |
+
MODEL_NAME,
|
13 |
torch_dtype="auto",
|
14 |
device_map="auto"
|
15 |
)
|
16 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
17 |
print("Model and tokenizer loaded!")
|
18 |
|
19 |
+
def simulate_typing(text, min_chars_per_sec=15, max_chars_per_sec=40):
|
20 |
"""Simulate typing animation with variable speed."""
|
21 |
full_text = ""
|
22 |
words = text.split()
|
|
|
24 |
full_text += word
|
25 |
if i < len(words) - 1:
|
26 |
full_text += " "
|
|
|
27 |
delay = 1 / (min_chars_per_sec + (max_chars_per_sec - min_chars_per_sec) * torch.rand(1).item())
|
28 |
time.sleep(delay)
|
29 |
yield full_text
|
|
|
32 |
message,
|
33 |
history: list[tuple[str, str]],
|
34 |
system_message,
|
35 |
+
max_tokens=512,
|
36 |
+
temperature=0.7,
|
37 |
+
top_p=0.95
|
38 |
):
|
39 |
# Prepare conversation history
|
40 |
messages = [{"role": "system", "content": system_message}]
|
|
|
53 |
add_generation_prompt=True
|
54 |
)
|
55 |
|
56 |
+
# Generate response
|
57 |
with torch.inference_mode():
|
58 |
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
|
59 |
generated_ids = model.generate(
|
|
|
87 |
"""
|
88 |
|
89 |
# System message
|
90 |
+
system_message = """You are Qwen 2.5 14B, an advanced AI assistant created by Alibaba Cloud.
|
91 |
+
You are knowledgeable, helpful, and strive to provide accurate and comprehensive responses."""
|
92 |
|
93 |
# Gradio chat interface
|
94 |
demo = gr.ChatInterface(
|
|
|
120 |
label="Top-p (nucleus sampling)"
|
121 |
),
|
122 |
],
|
123 |
+
css=custom_css,
|
124 |
+
title="Qwen 2.5 14B Chat",
|
125 |
+
description="An advanced AI assistant powered by Qwen 2.5 14B"
|
126 |
)
|
127 |
|
128 |
# Launch the demo
|
129 |
if __name__ == "__main__":
|
130 |
+
demo.queue(max_size=40)
|
131 |
+
demo.launch(max_threads=40)
|