Spaces:

Avinash109
/

qwen2.5

Sleeping

App Files Files Community

qwen2.5 / app.py

Avinash109

Create app.py

bba8253 verified 7 months ago

raw

history blame

4.22 kB

	import streamlit as st
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import torch

	# Set Streamlit page configuration
	st.set_page_config(
	page_title="Qwen2.5-Coder Chat",
	page_icon="💬",
	layout="wide",
	)

	# Title of the app
	st.title("💬 Qwen2.5-Coder Chat Interface")

	# Initialize session state for messages
	if 'messages' not in st.session_state:
	st.session_state['messages'] = []

	# Function to load the model
	@st.cache_resource
	def load_model():
	model_name = "Qwen/Qwen2.5-Coder-32B-Instruct" # Replace with your model path or name
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	torch_dtype=torch.float16, # Use appropriate dtype
	device_map='auto' # Automatically choose device (GPU/CPU)
	)
	return tokenizer, model

	# Load tokenizer and model
	with st.spinner("Loading model... This may take a while..."):
	tokenizer, model = load_model()

	# Function to generate model response
	def generate_response(prompt, max_tokens=2048):
	inputs = tokenizer.encode(prompt, return_tensors='pt').to(model.device)

	# Generate response
	with torch.no_grad():
	outputs = model.generate(
	inputs,
	max_length=max_tokens,
	temperature=0.7, # Adjust for creativity
	top_p=0.9, # Nucleus sampling
	do_sample=True, # Enable sampling
	num_return_sequences=1
	)

	response = tokenizer.decode(outputs[0], skip_special_tokens=True)
	# Remove the prompt from the response
	response = response[len(prompt):].strip()
	return response

	# Layout: Two columns, main chat and sidebar
	chat_col, sidebar_col = st.columns([4, 1])

	with chat_col:
	# Display chat messages
	for message in st.session_state['messages']:
	if message['role'] == 'user':
	st.markdown(f"You: {message['content']}")
	else:
	st.markdown(f"Qwen2.5-Coder: {message['content']}")

	# Input area for user
	with st.form(key='chat_form', clear_on_submit=True):
	user_input = st.text_area("You:", height=100)
	submit_button = st.form_submit_button(label='Send')

	if submit_button and user_input:
	# Append user message
	st.session_state['messages'].append({'role': 'user', 'content': user_input})

	# Generate and append model response
	with st.spinner("Qwen2.5-Coder is typing..."):
	response = generate_response(user_input, max_tokens=2048)
	st.session_state['messages'].append({'role': 'assistant', 'content': response})

	# Rerun to display new messages
	st.experimental_rerun()

	with sidebar_col:
	st.sidebar.header("Settings")
	max_tokens = st.sidebar.slider(
	"Maximum Tokens",
	min_value=512,
	max_value=4096,
	value=2048,
	step=256,
	help="Set the maximum number of tokens for the model's response."
	)

	temperature = st.sidebar.slider(
	"Temperature",
	min_value=0.1,
	max_value=1.0,
	value=0.7,
	step=0.1,
	help="Controls the randomness of the model's output."
	)

	top_p = st.sidebar.slider(
	"Top-p (Nucleus Sampling)",
	min_value=0.1,
	max_value=1.0,
	value=0.9,
	step=0.1,
	help="Controls the diversity of the model's output."
	)

	if st.sidebar.button("Clear Chat"):
	st.session_state['messages'] = []
	st.experimental_rerun()

	# Update the generate_response function to use sidebar settings
	def generate_response(prompt):
	inputs = tokenizer.encode(prompt, return_tensors='pt').to(model.device)

	# Generate response
	with torch.no_grad():
	outputs = model.generate(
	inputs,
	max_length=max_tokens,
	temperature=temperature,
	top_p=top_p,
	do_sample=True,
	num_return_sequences=1
	)

	response = tokenizer.decode(outputs[0], skip_special_tokens=True)
	# Remove the prompt from the response
	response = response[len(prompt):].strip()
	return response