Spaces:
Sleeping
Sleeping
File size: 4,964 Bytes
bba8253 088f906 3e64474 bba8253 088f906 bba8253 088f906 bba8253 088f906 bba8253 088f906 bba8253 088f906 3d4f049 088f906 3d4f049 088f906 3e64474 088f906 3e64474 088f906 3e64474 088f906 bba8253 088f906 0bd5ba6 088f906 bba8253 3e64474 088f906 bba8253 088f906 bba8253 088f906 bba8253 088f906 bba8253 088f906 bba8253 088f906 bba8253 088f906 bba8253 088f906 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 |
import streamlit as st
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import datetime
# Page configuration
st.set_page_config(
page_title="Qwen2.5-Coder Chat",
page_icon="π¬",
layout="wide"
)
# Initialize session state for conversation history
if 'messages' not in st.session_state:
st.session_state.messages = []
# Cache the model loading
@st.cache_resource
def load_model_and_tokenizer():
model_name = "Qwen/Qwen2.5-Coder-32B-Instruct"
# Configure quantization
bnb_config = BitsAndBytesConfig(
load_in_8bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_use_double_quant=False,
)
# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(
model_name,
trust_remote_code=True
)
model = AutoModelForCausalLM.from_pretrained(
model_name,
quantization_config=bnb_config,
torch_dtype=torch.float16,
device_map="auto",
trust_remote_code=True
)
return tokenizer, model
# Main title
st.title("π¬ Qwen2.5-Coder Chat")
# Sidebar settings
with st.sidebar:
st.header("Settings")
max_length = st.slider(
"Maximum Length",
min_value=64,
max_value=4096,
value=512,
step=64,
help="Maximum number of tokens to generate"
)
temperature = st.slider(
"Temperature",
min_value=0.1,
max_value=2.0,
value=0.7,
step=0.1,
help="Higher values make output more random, lower values more deterministic"
)
top_p = st.slider(
"Top P",
min_value=0.1,
max_value=1.0,
value=0.9,
step=0.1,
help="Nucleus sampling: higher values consider more tokens, lower values are more focused"
)
if st.button("Clear Conversation"):
st.session_state.messages = []
st.rerun()
# Load model with error handling
try:
with st.spinner("Loading model... Please wait..."):
tokenizer, model = load_model_and_tokenizer()
except Exception as e:
st.error(f"Error loading model: {str(e)}")
st.stop()
def generate_response(prompt, max_new_tokens=512, temperature=0.7, top_p=0.9):
"""Generate response from the model"""
try:
# Tokenize input
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
# Generate response
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=max_new_tokens,
temperature=temperature,
top_p=top_p,
do_sample=True,
pad_token_id=tokenizer.pad_token_id,
eos_token_id=tokenizer.eos_token_id,
)
# Decode and return response
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Extract only the model's response (after the prompt)
response = response[len(prompt):].strip()
return response
except Exception as e:
st.error(f"Error generating response: {str(e)}")
return None
# Display chat history
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.write(f"{message['content']}\n\n_{message['timestamp']}_")
# Chat input
if prompt := st.chat_input("Ask me anything about coding..."):
# Add user message to chat
timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
st.session_state.messages.append({
"role": "user",
"content": prompt,
"timestamp": timestamp
})
# Display user message
with st.chat_message("user"):
st.write(f"{prompt}\n\n_{timestamp}_")
# Generate and display response
with st.chat_message("assistant"):
with st.spinner("Thinking..."):
# Prepare conversation history
conversation = ""
for msg in st.session_state.messages:
if msg["role"] == "user":
conversation += f"Human: {msg['content']}\n"
else:
conversation += f"Assistant: {msg['content']}\n"
conversation += "Assistant:"
response = generate_response(
conversation,
max_new_tokens=max_length,
temperature=temperature,
top_p=top_p
)
if response:
timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
st.write(f"{response}\n\n_{timestamp}_")
# Add assistant response to chat history
st.session_state.messages.append({
"role": "assistant",
"content": response,
"timestamp": timestamp
}) |