Spaces:
Sleeping
Sleeping
File size: 3,604 Bytes
bba8253 0bd5ba6 bba8253 0bd5ba6 bba8253 0bd5ba6 bba8253 0bd5ba6 bba8253 0bd5ba6 bba8253 9b8f05f bba8253 0bd5ba6 bba8253 0bd5ba6 bba8253 0bd5ba6 bba8253 0bd5ba6 bba8253 0bd5ba6 bba8253 0bd5ba6 bba8253 0bd5ba6 bba8253 0bd5ba6 bba8253 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
import streamlit as st
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
# Set Streamlit page configuration
st.set_page_config(
page_title="Qwen2.5-Coder Chat",
page_icon="π¬",
layout="wide",
)
# Title of the app
st.title("π¬ Qwen2.5-Coder Chat Interface")
# Initialize session state for messages (store conversation history)
if 'messages' not in st.session_state:
st.session_state['messages'] = []
# Load the model and tokenizer
@st.cache_resource
def load_model():
model_name = "Qwen/Qwen2.5-Coder-32B-Instruct" # Replace with the correct model path
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")
return tokenizer, model
# Load tokenizer and model
with st.spinner("Loading model... This may take a while..."):
tokenizer, model = load_model()
# Function to generate model response
def generate_response(user_input, max_tokens=150, temperature=0.7, top_p=0.9):
# Tokenize the user input
inputs = tokenizer.encode(user_input, return_tensors="pt").to(model.device)
# Generate a response
with torch.no_grad():
outputs = model.generate(
inputs,
max_length=max_tokens,
temperature=temperature,
top_p=top_p,
do_sample=True,
num_return_sequences=1
)
# Decode the response
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Return the response without the input prompt
return response[len(user_input):].strip()
# Layout: Two columns for the main chat and sidebar
chat_col, sidebar_col = st.columns([4, 1])
with chat_col:
# Display chat messages
for message in st.session_state['messages']:
if message['role'] == 'user':
st.markdown(f"**You:** {message['content']}")
else:
st.markdown(f"**Qwen2.5-Coder:** {message['content']}")
# Input area for user message
with st.form(key='chat_form', clear_on_submit=True):
user_input = st.text_area("You:", height=100)
submit_button = st.form_submit_button(label='Send')
if submit_button and user_input:
# Append the user's message to the chat history
st.session_state['messages'].append({'role': 'user', 'content': user_input})
# Generate and append the model's response
with st.spinner("Qwen2.5-Coder is typing..."):
response = generate_response(user_input)
# Append the model's response to the chat history
st.session_state['messages'].append({'role': 'assistant', 'content': response})
# Rerun the app to display new messages
st.experimental_rerun()
with sidebar_col:
st.sidebar.header("Settings")
max_tokens = st.sidebar.slider(
"Maximum Tokens",
min_value=512,
max_value=4096,
value=150,
step=256,
help="Set the maximum number of tokens for the model's response."
)
temperature = st.sidebar.slider(
"Temperature",
min_value=0.1,
max_value=1.0,
value=0.7,
step=0.1,
help="Controls the randomness of the model's output."
)
top_p = st.sidebar.slider(
"Top-p (Nucleus Sampling)",
min_value=0.1,
max_value=1.0,
value=0.9,
step=0.1,
help="Controls the diversity of the model's output."
)
if st.sidebar.button("Clear Chat"):
st.session_state['messages'] = []
st.experimental_rerun()
|