Reality123b commited on
Commit
f69c6af
·
verified ·
1 Parent(s): 691f69e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -117
app.py CHANGED
@@ -1,131 +1,74 @@
1
  import gradio as gr
2
- from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
- import time
5
 
6
- # Model configuration
7
- MODEL_NAME = "Qwen/Qwen2-14B-Instruct"
8
 
9
- # Initialize model and tokenizer
10
- print("Loading model and tokenizer...")
11
  model = AutoModelForCausalLM.from_pretrained(
12
- MODEL_NAME,
 
13
  torch_dtype="auto",
14
- device_map="auto"
15
  )
16
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
17
- print("Model and tokenizer loaded!")
18
 
19
- def simulate_typing(text, min_chars_per_sec=15, max_chars_per_sec=40):
20
- """Simulate typing animation with variable speed."""
21
- full_text = ""
22
- words = text.split()
23
- for i, word in enumerate(words):
24
- full_text += word
25
- if i < len(words) - 1:
26
- full_text += " "
27
- delay = 1 / (min_chars_per_sec + (max_chars_per_sec - min_chars_per_sec) * torch.rand(1).item())
28
- time.sleep(delay)
29
- yield full_text
30
-
31
- def generate_response(
32
- message,
33
- history: list[tuple[str, str]],
34
- system_message,
35
- max_tokens=512,
36
- temperature=0.7,
37
- top_p=0.95
38
- ):
39
- # Prepare conversation history
40
- messages = [{"role": "system", "content": system_message}]
41
- for user_msg, assistant_msg in history:
42
- if user_msg:
43
- messages.append({"role": "user", "content": user_msg})
44
- if assistant_msg:
45
- messages.append({"role": "assistant", "content": assistant_msg})
46
-
47
- messages.append({"role": "user", "content": message})
48
-
49
- # Convert messages to model input format
50
- text = tokenizer.apply_chat_template(
51
- messages,
52
- tokenize=False,
53
- add_generation_prompt=True
54
- )
55
 
56
- # Generate response
57
- with torch.inference_mode():
58
- model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
59
- generated_ids = model.generate(
60
- **model_inputs,
61
- max_new_tokens=max_tokens,
62
- temperature=temperature,
63
- top_p=top_p,
64
- do_sample=True,
65
- pad_token_id=tokenizer.eos_token_id
66
- )
67
- generated_ids = generated_ids[0, len(model_inputs.input_ids[0]):]
68
- response = tokenizer.decode(generated_ids, skip_special_tokens=True)
69
 
70
- # Return response with typing animation
71
- for partial_response in simulate_typing(response):
72
- yield partial_response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
- # Custom CSS with typing cursor animation
75
- custom_css = """
76
- @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;600&display=swap');
77
- body, .gradio-container {
78
- font-family: 'Inter', sans-serif;
79
- }
80
- .typing-cursor::after {
81
- content: '|';
82
- animation: blink 1s step-start infinite;
83
- }
84
- @keyframes blink {
85
- 50% { opacity: 0; }
86
- }
87
- """
88
 
89
- # System message
90
- system_message = """You are Qwen 2.5 14B, an advanced AI assistant created by Alibaba Cloud.
91
- You are knowledgeable, helpful, and strive to provide accurate and comprehensive responses."""
 
 
 
92
 
93
- # Gradio chat interface
94
- demo = gr.ChatInterface(
95
- generate_response,
96
- additional_inputs=[
97
- gr.Textbox(
98
- value=system_message,
99
- visible=False,
100
- ),
101
- gr.Slider(
102
- minimum=1,
103
- maximum=2048,
104
- value=512,
105
- step=1,
106
- label="Max new tokens"
107
- ),
108
- gr.Slider(
109
- minimum=0.1,
110
- maximum=2.0,
111
- value=0.7,
112
- step=0.1,
113
- label="Temperature"
114
- ),
115
- gr.Slider(
116
- minimum=0.1,
117
- maximum=1.0,
118
- value=0.95,
119
- step=0.05,
120
- label="Top-p (nucleus sampling)"
121
- ),
122
- ],
123
- css=custom_css,
124
- title="Qwen 2.5 14B Chat",
125
- description="An advanced AI assistant powered by Qwen 2.5 14B"
126
- )
127
 
128
- # Launch the demo
129
- if __name__ == "__main__":
130
- demo.queue(max_size=40)
131
- demo.launch(max_threads=40)
 
1
  import gradio as gr
 
2
  import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
4
 
5
+ # Set seed for reproducibility
6
+ torch.random.manual_seed(0)
7
 
8
+ # Load the model and tokenizer
 
9
  model = AutoModelForCausalLM.from_pretrained(
10
+ "microsoft/Phi-3.5-mini-instruct",
11
+ device_map="cuda",
12
  torch_dtype="auto",
13
+ trust_remote_code=True,
14
  )
15
+ tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3.5-mini-instruct")
 
16
 
17
+ # Define the pipeline
18
+ pipe = pipeline(
19
+ "text-generation",
20
+ model=model,
21
+ tokenizer=tokenizer,
22
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
+ # System message (invisible to the user)
25
+ SYSTEM_MESSAGE = {"role": "system", "content": "You are a helpful AI assistant."}
 
 
 
 
 
 
 
 
 
 
 
26
 
27
+ # Function to process the user input and generate output
28
+ def chatbot_response(conversation_history):
29
+ # Build message sequence
30
+ messages = [SYSTEM_MESSAGE] + [
31
+ {"role": "user", "content": message["user_input"]} for message in conversation_history
32
+ ]
33
+ # Pass messages to the model
34
+ generation_args = {
35
+ "max_new_tokens": 500,
36
+ "return_full_text": False,
37
+ "temperature": 0.0,
38
+ "do_sample": False,
39
+ }
40
+ output = pipe(messages, **generation_args)
41
+ assistant_reply = output[0]["generated_text"]
42
+ # Append assistant's response to history
43
+ conversation_history[-1]["assistant_reply"] = assistant_reply
44
+ return conversation_history
45
 
46
+ # Define Gradio interface
47
+ with gr.Blocks() as demo:
48
+ gr.Markdown("# AI Chatbot with System Message")
49
+
50
+ with gr.Row():
51
+ with gr.Column():
52
+ chatbox = gr.Chatbot()
53
+ input_box = gr.Textbox(label="Your Message")
54
+ submit_btn = gr.Button("Submit")
55
+
56
+ conversation_state = gr.State([]) # Maintain conversation history
 
 
 
57
 
58
+ def update_conversation(user_input, history):
59
+ if user_input.strip():
60
+ history.append({"user_input": user_input})
61
+ updated_history = chatbot_response(history)
62
+ return updated_history, ""
63
+ return history, ""
64
 
65
+ submit_btn.click(
66
+ update_conversation,
67
+ inputs=[input_box, conversation_state],
68
+ outputs=[conversation_state, input_box],
69
+ )
70
+
71
+ chatbox.update(chatbot_response(conversation_state))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
+ # Launch the interface
74
+ demo.launch()