File size: 3,604 Bytes
bba8253
 
 
 
 
 
 
 
 
 
 
 
 
 
0bd5ba6
bba8253
 
 
0bd5ba6
bba8253
 
0bd5ba6
bba8253
0bd5ba6
bba8253
 
 
 
 
 
 
0bd5ba6
 
 
 
 
bba8253
 
 
 
9b8f05f
 
 
bba8253
 
0bd5ba6
 
bba8253
 
0bd5ba6
 
 
 
bba8253
 
 
 
 
 
 
 
 
 
0bd5ba6
bba8253
 
 
 
 
0bd5ba6
bba8253
0bd5ba6
 
bba8253
0bd5ba6
 
 
bba8253
 
0bd5ba6
bba8253
 
 
 
 
 
 
 
0bd5ba6
bba8253
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import streamlit as st
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# Set Streamlit page configuration
st.set_page_config(
    page_title="Qwen2.5-Coder Chat",
    page_icon="πŸ’¬",
    layout="wide",
)

# Title of the app
st.title("πŸ’¬ Qwen2.5-Coder Chat Interface")

# Initialize session state for messages (store conversation history)
if 'messages' not in st.session_state:
    st.session_state['messages'] = []

# Load the model and tokenizer
@st.cache_resource
def load_model():
    model_name = "Qwen/Qwen2.5-Coder-32B-Instruct"  # Replace with the correct model path
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")
    return tokenizer, model

# Load tokenizer and model
with st.spinner("Loading model... This may take a while..."):
    tokenizer, model = load_model()

# Function to generate model response
def generate_response(user_input, max_tokens=150, temperature=0.7, top_p=0.9):
    # Tokenize the user input
    inputs = tokenizer.encode(user_input, return_tensors="pt").to(model.device)

    # Generate a response
    with torch.no_grad():
        outputs = model.generate(
            inputs,
            max_length=max_tokens,
            temperature=temperature,
            top_p=top_p,
            do_sample=True,
            num_return_sequences=1
        )

    # Decode the response
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Return the response without the input prompt
    return response[len(user_input):].strip()

# Layout: Two columns for the main chat and sidebar
chat_col, sidebar_col = st.columns([4, 1])

with chat_col:
    # Display chat messages
    for message in st.session_state['messages']:
        if message['role'] == 'user':
            st.markdown(f"**You:** {message['content']}")
        else:
            st.markdown(f"**Qwen2.5-Coder:** {message['content']}")

    # Input area for user message
    with st.form(key='chat_form', clear_on_submit=True):
        user_input = st.text_area("You:", height=100)
        submit_button = st.form_submit_button(label='Send')

    if submit_button and user_input:
        # Append the user's message to the chat history
        st.session_state['messages'].append({'role': 'user', 'content': user_input})

        # Generate and append the model's response
        with st.spinner("Qwen2.5-Coder is typing..."):
            response = generate_response(user_input)

            # Append the model's response to the chat history
            st.session_state['messages'].append({'role': 'assistant', 'content': response})

        # Rerun the app to display new messages
        st.experimental_rerun()

with sidebar_col:
    st.sidebar.header("Settings")
    max_tokens = st.sidebar.slider(
        "Maximum Tokens",
        min_value=512,
        max_value=4096,
        value=150,
        step=256,
        help="Set the maximum number of tokens for the model's response."
    )
    
    temperature = st.sidebar.slider(
        "Temperature",
        min_value=0.1,
        max_value=1.0,
        value=0.7,
        step=0.1,
        help="Controls the randomness of the model's output."
    )
    
    top_p = st.sidebar.slider(
        "Top-p (Nucleus Sampling)",
        min_value=0.1,
        max_value=1.0,
        value=0.9,
        step=0.1,
        help="Controls the diversity of the model's output."
    )

    if st.sidebar.button("Clear Chat"):
        st.session_state['messages'] = []
        st.experimental_rerun()