Update app.py
Browse files
app.py
CHANGED
@@ -14,14 +14,16 @@ st.set_page_config(
|
|
14 |
layout="centered"
|
15 |
)
|
16 |
|
17 |
-
|
18 |
-
|
19 |
-
#"amiguel/LoraPEFT"
|
20 |
-
# "amiguel/classItem-FT-llama-3-1-8b-instruct"
|
21 |
|
22 |
# Title with rocket emojis
|
23 |
st.title("π WizNerd Insp π")
|
24 |
|
|
|
|
|
|
|
|
|
25 |
# Sidebar configuration
|
26 |
with st.sidebar:
|
27 |
st.header("Authentication π")
|
@@ -113,7 +115,7 @@ def generate_with_kv_cache(prompt, file_context, use_cache=True):
|
|
113 |
# Display chat messages
|
114 |
for message in st.session_state.messages:
|
115 |
try:
|
116 |
-
avatar =
|
117 |
with st.chat_message(message["role"], avatar=avatar):
|
118 |
st.markdown(message["content"])
|
119 |
except:
|
@@ -139,7 +141,7 @@ if prompt := st.chat_input("Ask your inspection question..."):
|
|
139 |
tokenizer = st.session_state.tokenizer
|
140 |
|
141 |
# Add user message
|
142 |
-
with st.chat_message("user", avatar=
|
143 |
st.markdown(prompt)
|
144 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
145 |
|
@@ -149,7 +151,7 @@ if prompt := st.chat_input("Ask your inspection question..."):
|
|
149 |
# Generate response with KV caching
|
150 |
if model and tokenizer:
|
151 |
try:
|
152 |
-
with st.chat_message("assistant", avatar=
|
153 |
start_time = time.time()
|
154 |
streamer = generate_with_kv_cache(prompt, file_context, use_cache=True)
|
155 |
|
@@ -161,9 +163,24 @@ if prompt := st.chat_input("Ask your inspection question..."):
|
|
161 |
full_response += cleaned_chunk + " "
|
162 |
response_container.markdown(full_response + "β", unsafe_allow_html=True)
|
163 |
|
164 |
-
#
|
165 |
end_time = time.time()
|
166 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
167 |
|
168 |
response_container.markdown(full_response)
|
169 |
st.session_state.messages.append({"role": "assistant", "content": full_response})
|
|
|
14 |
layout="centered"
|
15 |
)
|
16 |
|
17 |
+
# Model name
|
18 |
+
MODEL_NAME = "amiguel/classFinetuned_deepSeek"
|
|
|
|
|
19 |
|
20 |
# Title with rocket emojis
|
21 |
st.title("π WizNerd Insp π")
|
22 |
|
23 |
+
# Configure Avatars
|
24 |
+
USER_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/9904d9a0d445ab0488cf7395cb863cce7621d897/USER_AVATAR.png"
|
25 |
+
BOT_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/991f4c6e4e1dc7a8e24876ca5aae5228bcdb4dba/Ataliba_Avatar.jpg"
|
26 |
+
|
27 |
# Sidebar configuration
|
28 |
with st.sidebar:
|
29 |
st.header("Authentication π")
|
|
|
115 |
# Display chat messages
|
116 |
for message in st.session_state.messages:
|
117 |
try:
|
118 |
+
avatar = USER_AVATAR if message["role"] == "user" else BOT_AVATAR
|
119 |
with st.chat_message(message["role"], avatar=avatar):
|
120 |
st.markdown(message["content"])
|
121 |
except:
|
|
|
141 |
tokenizer = st.session_state.tokenizer
|
142 |
|
143 |
# Add user message
|
144 |
+
with st.chat_message("user", avatar=USER_AVATAR):
|
145 |
st.markdown(prompt)
|
146 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
147 |
|
|
|
151 |
# Generate response with KV caching
|
152 |
if model and tokenizer:
|
153 |
try:
|
154 |
+
with st.chat_message("assistant", avatar=BOT_AVATAR):
|
155 |
start_time = time.time()
|
156 |
streamer = generate_with_kv_cache(prompt, file_context, use_cache=True)
|
157 |
|
|
|
163 |
full_response += cleaned_chunk + " "
|
164 |
response_container.markdown(full_response + "β", unsafe_allow_html=True)
|
165 |
|
166 |
+
# Calculate performance metrics
|
167 |
end_time = time.time()
|
168 |
+
input_tokens = len(tokenizer(prompt)["input_ids"])
|
169 |
+
output_tokens = len(tokenizer(full_response)["input_ids"])
|
170 |
+
speed = output_tokens / (end_time - start_time)
|
171 |
+
|
172 |
+
# Calculate costs (hypothetical pricing model)
|
173 |
+
input_cost = (input_tokens / 1000000) * 5 # $5 per million input tokens
|
174 |
+
output_cost = (output_tokens / 1000000) * 15 # $15 per million output tokens
|
175 |
+
total_cost_usd = input_cost + output_cost
|
176 |
+
total_cost_aoa = total_cost_usd * 1160 # Convert to AOA (Angolan Kwanza)
|
177 |
+
|
178 |
+
# Display metrics
|
179 |
+
st.caption(
|
180 |
+
f"π Input Tokens: {input_tokens} | Output Tokens: {output_tokens} | "
|
181 |
+
f"π Speed: {speed:.1f}t/s | π° Cost (USD): ${total_cost_usd:.4f} | "
|
182 |
+
f"π΅ Cost (AOA): {total_cost_aoa:.4f}"
|
183 |
+
)
|
184 |
|
185 |
response_container.markdown(full_response)
|
186 |
st.session_state.messages.append({"role": "assistant", "content": full_response})
|