Commit
·
c15c518
1
Parent(s):
fd97c8c
fixing model loading
Browse files
app.py
CHANGED
|
@@ -131,7 +131,7 @@ with st.sidebar:
|
|
| 131 |
else:
|
| 132 |
synthesis_num_beams = st.slider("Num Beams", 1, 4, 1, key='synthesis_num_beams')
|
| 133 |
|
| 134 |
-
synthesis_max_new_tokens = st.slider("Max New Tokens", 100, 2000,
|
| 135 |
else:
|
| 136 |
# Temperature
|
| 137 |
synthesis_api_temperature = st.slider("Temperature", 0.0, .3, .5, help="Defines the randomness in the next token prediction. Lower: More predictable and focused. Higher: More adventurous and diverse.", key='a2t')
|
|
@@ -300,6 +300,15 @@ if submit_button_placeholder.button("AI Answer", type="primary"):
|
|
| 300 |
# synthesis responses
|
| 301 |
#-------------------------
|
| 302 |
if st.session_state.synthesis_model in ["LLaMA-3.2-3B", "LLaMA-3.2-11B"]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 303 |
synthesis_prompt = f"""
|
| 304 |
Question:
|
| 305 |
{st.session_state.question}
|
|
@@ -318,8 +327,8 @@ if submit_button_placeholder.button("AI Answer", type="primary"):
|
|
| 318 |
]
|
| 319 |
|
| 320 |
synthesis_answer = generate_response(
|
| 321 |
-
model=
|
| 322 |
-
tokenizer=
|
| 323 |
messages=messages,
|
| 324 |
tokenizer_max_length=30000,
|
| 325 |
do_sample=synthesis_do_sample,
|
|
|
|
| 131 |
else:
|
| 132 |
synthesis_num_beams = st.slider("Num Beams", 1, 4, 1, key='synthesis_num_beams')
|
| 133 |
|
| 134 |
+
synthesis_max_new_tokens = st.slider("Max New Tokens", 100, 2000, 1500, step=50, key='synthesis_max_new_tokens')
|
| 135 |
else:
|
| 136 |
# Temperature
|
| 137 |
synthesis_api_temperature = st.slider("Temperature", 0.0, .3, .5, help="Defines the randomness in the next token prediction. Lower: More predictable and focused. Higher: More adventurous and diverse.", key='a2t')
|
|
|
|
| 300 |
# synthesis responses
|
| 301 |
#-------------------------
|
| 302 |
if st.session_state.synthesis_model in ["LLaMA-3.2-3B", "LLaMA-3.2-11B"]:
|
| 303 |
+
|
| 304 |
+
if st.session_state.expert_model == "LLaMA-3.2-11B":
|
| 305 |
+
model_s = st.session_state.llama_model
|
| 306 |
+
tokenizer_s = st.session_state.llama_tokenizer
|
| 307 |
+
|
| 308 |
+
elif st.session_state.expert_model == "LLaMA-3.2-3B":
|
| 309 |
+
model_s = st.session_state.llama_model_3B
|
| 310 |
+
tokenizer_s = st.session_state.llama_tokenizer_3B
|
| 311 |
+
|
| 312 |
synthesis_prompt = f"""
|
| 313 |
Question:
|
| 314 |
{st.session_state.question}
|
|
|
|
| 327 |
]
|
| 328 |
|
| 329 |
synthesis_answer = generate_response(
|
| 330 |
+
model=model_s,
|
| 331 |
+
tokenizer=tokenizer_s,
|
| 332 |
messages=messages,
|
| 333 |
tokenizer_max_length=30000,
|
| 334 |
do_sample=synthesis_do_sample,
|