Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -29,7 +29,7 @@ model_info = {
|
|
29 |
},
|
30 |
}
|
31 |
|
32 |
-
def
|
33 |
prompt = ""
|
34 |
if custom_instructions:
|
35 |
prompt += f"\[INST\] {custom_instructions} \[/INST\]"
|
@@ -104,7 +104,7 @@ if prompt := st.chat_input(f"Hi I'm {selected_model}, How can I help you today?"
|
|
104 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
105 |
conversation_history = [(message["role"], message["content"]) for message in st.session_state.messages]
|
106 |
|
107 |
-
|
108 |
|
109 |
max_tokens = {
|
110 |
"LegacyLift🚀": 32000,
|
@@ -113,14 +113,18 @@ if prompt := st.chat_input(f"Hi I'm {selected_model}, How can I help you today?"
|
|
113 |
}
|
114 |
|
115 |
# Calculate available tokens for new content
|
116 |
-
input_tokens = len(
|
117 |
-
max_new_tokens =
|
|
|
|
|
|
|
|
|
118 |
|
119 |
with st.chat_message("assistant"):
|
120 |
client = InferenceClient(
|
121 |
model=model_links[selected_model], )
|
122 |
output = client.text_generation(
|
123 |
-
|
124 |
temperature=temp_values, # 0.5
|
125 |
max_new_tokens=max_new_tokens,
|
126 |
stream=True
|
|
|
29 |
},
|
30 |
}
|
31 |
|
32 |
+
def format_prompt(message, conversation_history, custom_instructions=None):
|
33 |
prompt = ""
|
34 |
if custom_instructions:
|
35 |
prompt += f"\[INST\] {custom_instructions} \[/INST\]"
|
|
|
104 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
105 |
conversation_history = [(message["role"], message["content"]) for message in st.session_state.messages]
|
106 |
|
107 |
+
formatted_text = format_prompt(prompt, conversation_history, custom_instruction)
|
108 |
|
109 |
max_tokens = {
|
110 |
"LegacyLift🚀": 32000,
|
|
|
113 |
}
|
114 |
|
115 |
# Calculate available tokens for new content
|
116 |
+
input_tokens = len(formatted_text.split())
|
117 |
+
max_new_tokens = max_tokens[selected_model] - input_tokens
|
118 |
+
|
119 |
+
# Ensure max_new_tokens is within the model's limit
|
120 |
+
if selected_model == "RetroRecode🔄":
|
121 |
+
max_new_tokens = min(max_new_tokens, 3000)
|
122 |
|
123 |
with st.chat_message("assistant"):
|
124 |
client = InferenceClient(
|
125 |
model=model_links[selected_model], )
|
126 |
output = client.text_generation(
|
127 |
+
formatted_text,
|
128 |
temperature=temp_values, # 0.5
|
129 |
max_new_tokens=max_new_tokens,
|
130 |
stream=True
|