amiguel commited on
Commit
a454488
Β·
verified Β·
1 Parent(s): d8d4c3a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -9
app.py CHANGED
@@ -14,14 +14,16 @@ st.set_page_config(
14
  layout="centered"
15
  )
16
 
17
- MODEL_NAME = "amiguel/SmolLM2-360M-concise-reasoning"
18
- #"amiguel/optimizedModelListing6.1"
19
- #"amiguel/LoraPEFT"
20
- # "amiguel/classItem-FT-llama-3-1-8b-instruct"
21
 
22
  # Title with rocket emojis
23
  st.title("πŸš€ WizNerd Insp πŸš€")
24
 
 
 
 
 
25
  # Sidebar configuration
26
  with st.sidebar:
27
  st.header("Authentication πŸ”’")
@@ -113,7 +115,7 @@ def generate_with_kv_cache(prompt, file_context, use_cache=True):
113
  # Display chat messages
114
  for message in st.session_state.messages:
115
  try:
116
- avatar = "πŸ‘€" if message["role"] == "user" else "πŸ€–"
117
  with st.chat_message(message["role"], avatar=avatar):
118
  st.markdown(message["content"])
119
  except:
@@ -139,7 +141,7 @@ if prompt := st.chat_input("Ask your inspection question..."):
139
  tokenizer = st.session_state.tokenizer
140
 
141
  # Add user message
142
- with st.chat_message("user", avatar="πŸ‘€"):
143
  st.markdown(prompt)
144
  st.session_state.messages.append({"role": "user", "content": prompt})
145
 
@@ -149,7 +151,7 @@ if prompt := st.chat_input("Ask your inspection question..."):
149
  # Generate response with KV caching
150
  if model and tokenizer:
151
  try:
152
- with st.chat_message("assistant", avatar="πŸ€–"):
153
  start_time = time.time()
154
  streamer = generate_with_kv_cache(prompt, file_context, use_cache=True)
155
 
@@ -161,9 +163,24 @@ if prompt := st.chat_input("Ask your inspection question..."):
161
  full_response += cleaned_chunk + " "
162
  response_container.markdown(full_response + "β–Œ", unsafe_allow_html=True)
163
 
164
- # Display metrics
165
  end_time = time.time()
166
- st.caption(f"Generated in {end_time - start_time:.2f}s using KV caching")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
 
168
  response_container.markdown(full_response)
169
  st.session_state.messages.append({"role": "assistant", "content": full_response})
 
14
  layout="centered"
15
  )
16
 
17
+ # Model name
18
+ MODEL_NAME = "amiguel/classFinetuned_deepSeek"
 
 
19
 
20
  # Title with rocket emojis
21
  st.title("πŸš€ WizNerd Insp πŸš€")
22
 
23
+ # Configure Avatars
24
+ USER_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/9904d9a0d445ab0488cf7395cb863cce7621d897/USER_AVATAR.png"
25
+ BOT_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/991f4c6e4e1dc7a8e24876ca5aae5228bcdb4dba/Ataliba_Avatar.jpg"
26
+
27
  # Sidebar configuration
28
  with st.sidebar:
29
  st.header("Authentication πŸ”’")
 
115
  # Display chat messages
116
  for message in st.session_state.messages:
117
  try:
118
+ avatar = USER_AVATAR if message["role"] == "user" else BOT_AVATAR
119
  with st.chat_message(message["role"], avatar=avatar):
120
  st.markdown(message["content"])
121
  except:
 
141
  tokenizer = st.session_state.tokenizer
142
 
143
  # Add user message
144
+ with st.chat_message("user", avatar=USER_AVATAR):
145
  st.markdown(prompt)
146
  st.session_state.messages.append({"role": "user", "content": prompt})
147
 
 
151
  # Generate response with KV caching
152
  if model and tokenizer:
153
  try:
154
+ with st.chat_message("assistant", avatar=BOT_AVATAR):
155
  start_time = time.time()
156
  streamer = generate_with_kv_cache(prompt, file_context, use_cache=True)
157
 
 
163
  full_response += cleaned_chunk + " "
164
  response_container.markdown(full_response + "β–Œ", unsafe_allow_html=True)
165
 
166
+ # Calculate performance metrics
167
  end_time = time.time()
168
+ input_tokens = len(tokenizer(prompt)["input_ids"])
169
+ output_tokens = len(tokenizer(full_response)["input_ids"])
170
+ speed = output_tokens / (end_time - start_time)
171
+
172
+ # Calculate costs (hypothetical pricing model)
173
+ input_cost = (input_tokens / 1000000) * 5 # $5 per million input tokens
174
+ output_cost = (output_tokens / 1000000) * 15 # $15 per million output tokens
175
+ total_cost_usd = input_cost + output_cost
176
+ total_cost_aoa = total_cost_usd * 1160 # Convert to AOA (Angolan Kwanza)
177
+
178
+ # Display metrics
179
+ st.caption(
180
+ f"πŸ”‘ Input Tokens: {input_tokens} | Output Tokens: {output_tokens} | "
181
+ f"πŸ•’ Speed: {speed:.1f}t/s | πŸ’° Cost (USD): ${total_cost_usd:.4f} | "
182
+ f"πŸ’΅ Cost (AOA): {total_cost_aoa:.4f}"
183
+ )
184
 
185
  response_container.markdown(full_response)
186
  st.session_state.messages.append({"role": "assistant", "content": full_response})