Spaces:

amiguel
/

amiguel-fintune_naming_model

Sleeping

App Files Files Community

amiguel commited on Mar 2

Commit

a454488

verified ·

1 Parent(s): d8d4c3a

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -9

app.py CHANGED Viewed

@@ -14,14 +14,16 @@ st.set_page_config(
     layout="centered"
 )
-MODEL_NAME = "amiguel/SmolLM2-360M-concise-reasoning"
-#"amiguel/optimizedModelListing6.1"
-#"amiguel/LoraPEFT"
-# "amiguel/classItem-FT-llama-3-1-8b-instruct"
 # Title with rocket emojis
 st.title("🚀 WizNerd Insp 🚀")
 # Sidebar configuration
 with st.sidebar:
     st.header("Authentication 🔒")
@@ -113,7 +115,7 @@ def generate_with_kv_cache(prompt, file_context, use_cache=True):
 # Display chat messages
 for message in st.session_state.messages:
     try:
-        avatar = "👤" if message["role"] == "user" else "🤖"
         with st.chat_message(message["role"], avatar=avatar):
             st.markdown(message["content"])
     except:
@@ -139,7 +141,7 @@ if prompt := st.chat_input("Ask your inspection question..."):
     tokenizer = st.session_state.tokenizer
     # Add user message
-    with st.chat_message("user", avatar="👤"):
         st.markdown(prompt)
     st.session_state.messages.append({"role": "user", "content": prompt})
@@ -149,7 +151,7 @@ if prompt := st.chat_input("Ask your inspection question..."):
     # Generate response with KV caching
     if model and tokenizer:
         try:
-            with st.chat_message("assistant", avatar="🤖"):
                 start_time = time.time()
                 streamer = generate_with_kv_cache(prompt, file_context, use_cache=True)
@@ -161,9 +163,24 @@ if prompt := st.chat_input("Ask your inspection question..."):
                     full_response += cleaned_chunk + " "
                     response_container.markdown(full_response + "▌", unsafe_allow_html=True)
-                # Display metrics
                 end_time = time.time()
-                st.caption(f"Generated in {end_time - start_time:.2f}s using KV caching")
                 response_container.markdown(full_response)
                 st.session_state.messages.append({"role": "assistant", "content": full_response})

     layout="centered"
 )
+# Model name
+MODEL_NAME = "amiguel/classFinetuned_deepSeek"
 # Title with rocket emojis
 st.title("🚀 WizNerd Insp 🚀")
+# Configure Avatars
+USER_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/9904d9a0d445ab0488cf7395cb863cce7621d897/USER_AVATAR.png"
+BOT_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/991f4c6e4e1dc7a8e24876ca5aae5228bcdb4dba/Ataliba_Avatar.jpg"
 # Sidebar configuration
 with st.sidebar:
     st.header("Authentication 🔒")
 # Display chat messages
 for message in st.session_state.messages:
     try:
+        avatar = USER_AVATAR if message["role"] == "user" else BOT_AVATAR
         with st.chat_message(message["role"], avatar=avatar):
             st.markdown(message["content"])
     except:
     tokenizer = st.session_state.tokenizer
     # Add user message
+    with st.chat_message("user", avatar=USER_AVATAR):
         st.markdown(prompt)
     st.session_state.messages.append({"role": "user", "content": prompt})
     # Generate response with KV caching
     if model and tokenizer:
         try:
+            with st.chat_message("assistant", avatar=BOT_AVATAR):
                 start_time = time.time()
                 streamer = generate_with_kv_cache(prompt, file_context, use_cache=True)
                     full_response += cleaned_chunk + " "
                     response_container.markdown(full_response + "▌", unsafe_allow_html=True)
+                # Calculate performance metrics
                 end_time = time.time()
+                input_tokens = len(tokenizer(prompt)["input_ids"])
+                output_tokens = len(tokenizer(full_response)["input_ids"])
+                speed = output_tokens / (end_time - start_time)
+                # Calculate costs (hypothetical pricing model)
+                input_cost = (input_tokens / 1000000) * 5  # $5 per million input tokens
+                output_cost = (output_tokens / 1000000) * 15  # $15 per million output tokens
+                total_cost_usd = input_cost + output_cost
+                total_cost_aoa = total_cost_usd * 1160  # Convert to AOA (Angolan Kwanza)
+                # Display metrics
+                st.caption(
+                    f"🔑 Input Tokens: {input_tokens} | Output Tokens: {output_tokens} | "
+                    f"🕒 Speed: {speed:.1f}t/s | 💰 Cost (USD): ${total_cost_usd:.4f} | "
+                    f"💵 Cost (AOA): {total_cost_aoa:.4f}"
+                )
                 response_container.markdown(full_response)
                 st.session_state.messages.append({"role": "assistant", "content": full_response})