TEST

Sleeping

App Files Files Community

Reality123b commited on Nov 18, 2024

Commit

691f69e

verified ·

1 Parent(s): 83e20b0

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -13

app.py CHANGED Viewed

@@ -3,18 +3,20 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
 import time
 # Initialize model and tokenizer
-model_name = "Qwen/Qwen2.5-3B-Instruct"
 print("Loading model and tokenizer...")
 model = AutoModelForCausalLM.from_pretrained(
-    model_name,
     torch_dtype="auto",
     device_map="auto"
 )
-tokenizer = AutoTokenizer.from_pretrained(model_name)
 print("Model and tokenizer loaded!")
-def simulate_typing(text, min_chars_per_sec=20, max_chars_per_sec=60):
     """Simulate typing animation with variable speed."""
     full_text = ""
     words = text.split()
@@ -22,7 +24,6 @@ def simulate_typing(text, min_chars_per_sec=20, max_chars_per_sec=60):
         full_text += word
         if i < len(words) - 1:
             full_text += " "
-        # Vary typing speed between min and max chars per second
         delay = 1 / (min_chars_per_sec + (max_chars_per_sec - min_chars_per_sec) * torch.rand(1).item())
         time.sleep(delay)
         yield full_text
@@ -31,9 +32,9 @@ def generate_response(
     message,
     history: list[tuple[str, str]],
     system_message,
-    max_tokens,
-    temperature,
-    top_p,
 ):
     # Prepare conversation history
     messages = [{"role": "system", "content": system_message}]
@@ -52,7 +53,7 @@ def generate_response(
         add_generation_prompt=True
     )
-    # Prepare model inputs and generate in one go
     with torch.inference_mode():
         model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
         generated_ids = model.generate(
@@ -86,7 +87,8 @@ body, .gradio-container {
 """
 # System message
-system_message = """You are Qwen, created by Alibaba Cloud. You are a helpful assistant."""
 # Gradio chat interface
 demo = gr.ChatInterface(
@@ -118,10 +120,12 @@ demo = gr.ChatInterface(
             label="Top-p (nucleus sampling)"
         ),
     ],
-    css=custom_css
 )
 # Launch the demo
 if __name__ == "__main__":
-    demo.queue()  # Enable queuing for better handling of multiple requests
-    demo.launch()

 import torch
 import time
+# Model configuration
+MODEL_NAME = "Qwen/Qwen2-14B-Instruct"
 # Initialize model and tokenizer
 print("Loading model and tokenizer...")
 model = AutoModelForCausalLM.from_pretrained(
+    MODEL_NAME,
     torch_dtype="auto",
     device_map="auto"
 )
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 print("Model and tokenizer loaded!")
+def simulate_typing(text, min_chars_per_sec=15, max_chars_per_sec=40):
     """Simulate typing animation with variable speed."""
     full_text = ""
     words = text.split()
         full_text += word
         if i < len(words) - 1:
             full_text += " "
         delay = 1 / (min_chars_per_sec + (max_chars_per_sec - min_chars_per_sec) * torch.rand(1).item())
         time.sleep(delay)
         yield full_text
     message,
     history: list[tuple[str, str]],
     system_message,
+    max_tokens=512,
+    temperature=0.7,
+    top_p=0.95
 ):
     # Prepare conversation history
     messages = [{"role": "system", "content": system_message}]
         add_generation_prompt=True
     )
+    # Generate response
     with torch.inference_mode():
         model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
         generated_ids = model.generate(
 """
 # System message
+system_message = """You are Qwen 2.5 14B, an advanced AI assistant created by Alibaba Cloud.
+You are knowledgeable, helpful, and strive to provide accurate and comprehensive responses."""
 # Gradio chat interface
 demo = gr.ChatInterface(
             label="Top-p (nucleus sampling)"
         ),
     ],
+    css=custom_css,
+    title="Qwen 2.5 14B Chat",
+    description="An advanced AI assistant powered by Qwen 2.5 14B"
 )
 # Launch the demo
 if __name__ == "__main__":
+    demo.queue(max_size=40)
+    demo.launch(max_threads=40)