Spaces:
Sleeping
Sleeping
File size: 5,374 Bytes
bba8253 3e64474 bba8253 0bd5ba6 3e64474 bba8253 0bd5ba6 bba8253 0bd5ba6 bba8253 3e64474 bba8253 3e64474 bba8253 3e64474 0bd5ba6 bba8253 3e64474 9b8f05f 3e64474 bba8253 0bd5ba6 bba8253 3e64474 0bd5ba6 bba8253 3e64474 bba8253 0bd5ba6 bba8253 3e64474 0bd5ba6 3e64474 0bd5ba6 3e64474 bba8253 3e64474 bba8253 3e64474 bba8253 3e64474 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 |
import streamlit as st
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import datetime
# Set Streamlit page configuration
st.set_page_config(
page_title="Qwen2.5-Coder Chat",
page_icon="π¬",
layout="wide",
)
# Title of the app
st.title("π¬ Qwen2.5-Coder Chat Interface")
# Initialize session state for messages (store conversation history)
st.session_state.setdefault('messages', [])
# Load the model and tokenizer
@st.cache_resource
def load_model():
model_name = "Qwen/Qwen2.5-Coder-32B-Instruct" # Replace with the correct model path
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16,
device_map="auto",
load_in_8bit=True # Optional: Use if supported for reduced memory usage
)
return tokenizer, model
# Load tokenizer and model with error handling
try:
with st.spinner("Loading model... This may take a while..."):
tokenizer, model = load_model()
except Exception as e:
st.error(f"Error loading model: {e}")
st.stop()
# Function to generate model response
def generate_response(messages, tokenizer, model, max_tokens=150, temperature=0.7, top_p=0.9):
"""
Generates a response from the model based on the conversation history.
Args:
messages (list): List of message dictionaries containing 'role' and 'content'.
tokenizer: The tokenizer instance.
model: The language model instance.
max_tokens (int): Maximum number of tokens for the response.
temperature (float): Sampling temperature.
top_p (float): Nucleus sampling probability.
Returns:
str: The generated response text.
"""
# Concatenate all previous messages
conversation = ""
for message in messages:
role = "You" if message['role'] == 'user' else "Qwen2.5-Coder"
conversation += f"**{role}:** {message['content']}\n"
# Append the latest user input
conversation += f"**You:** {messages[-1]['content']}\n**Qwen2.5-Coder:**"
# Tokenize the conversation
inputs = tokenizer.encode(conversation, return_tensors="pt").to(model.device)
# Generate a response
with torch.no_grad():
outputs = model.generate(
inputs,
max_length=inputs.shape[1] + max_tokens,
temperature=temperature,
top_p=top_p,
do_sample=True,
num_return_sequences=1,
pad_token_id=tokenizer.eos_token_id
)
# Decode the response
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Extract the generated response after the conversation
generated_response = response.split("Qwen2.5-Coder:")[-1].strip()
return generated_response
# Layout: Two columns for the main chat and sidebar
chat_col, sidebar_col = st.columns([4, 1])
with chat_col:
st.markdown("### Chat")
chat_container = st.container()
with chat_container:
for message in st.session_state['messages']:
time = message.get('timestamp', '')
if message['role'] == 'user':
st.markdown(f"**You:** {message['content']} _({time})_")
else:
st.markdown(f"**Qwen2.5-Coder:** {message['content']} _({time})_")
# Input area for user message
with st.form(key='chat_form', clear_on_submit=True):
user_input = st.text_area("You:", height=100)
submit_button = st.form_submit_button(label='Send')
if submit_button and user_input.strip():
timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
# Append the user's message to the chat history
st.session_state['messages'].append({'role': 'user', 'content': user_input, 'timestamp': timestamp})
# Generate and append the model's response
try:
with st.spinner("Qwen2.5-Coder is typing..."):
response = generate_response(
st.session_state['messages'],
tokenizer,
model,
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p
)
timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
st.session_state['messages'].append({'role': 'assistant', 'content': response, 'timestamp': timestamp})
except Exception as e:
st.error(f"Error generating response: {e}")
with sidebar_col:
st.sidebar.header("Settings")
max_tokens = st.sidebar.slider(
"Maximum Tokens",
min_value=50,
max_value=4096,
value=512,
step=256,
help="Set the maximum number of tokens for the model's response."
)
temperature = st.sidebar.slider(
"Temperature",
min_value=0.1,
max_value=1.0,
value=0.7,
step=0.1,
help="Controls the randomness of the model's output."
)
top_p = st.sidebar.slider(
"Top-p (Nucleus Sampling)",
min_value=0.1,
max_value=1.0,
value=0.9,
step=0.1,
help="Controls the diversity of the model's output."
)
if st.sidebar.button("Clear Chat"):
st.session_state['messages'] = []
st.success("Chat history cleared.")
|