Spaces:

amiguel
/

classfinetune

Sleeping

App Files Files Community

amiguel commited on Mar 23

Commit

037c4ae

verified ·

1 Parent(s): 4543832

Create app.py

Browse files

Files changed (1) hide show

app.py +62 -0

app.py ADDED Viewed

	@@ -0,0 +1,62 @@

+# Load model if not already loaded or if model type changed
+if "model" not in st.session_state or st.session_state.get("model_type") != model_type:
+    model_data = load_model(hf_token, model_type, selected_model)
+    if model_data is None:
+        st.error("Failed to load model. Please check your token and try again.")
+        st.stop()
+    st.session_state.model, st.session_state.tokenizer = model_data
+    st.session_state.model_type = model_type
+model = st.session_state.model
+tokenizer = st.session_state.tokenizer
+# Add user message
+with st.chat_message("user", avatar=USER_AVATAR):
+    st.markdown(prompt)
+st.session_state.messages.append({"role": "user", "content": prompt})
+# Process file
+file_context = process_file(uploaded_file)
+# Generate response with KV caching
+if model and tokenizer:
+    try:
+        with st.chat_message("assistant", avatar=BOT_AVATAR):
+            start_time = time.time()
+            streamer = generate_with_kv_cache(prompt, file_context, model, tokenizer, use_cache=True)
+            response_container = st.empty()
+            full_response = ""
+            for chunk in streamer:
+                cleaned_chunk = chunk.replace("<think>", "").replace("</think>", "").strip()
+                full_response += cleaned_chunk + " "
+                response_container.markdown(full_response + "▌", unsafe_allow_html=True)
+            # Calculate performance metrics
+            end_time = time.time()
+            input_tokens = len(tokenizer(prompt)["input_ids"])
+            output_tokens = len(tokenizer(full_response)["input_ids"])
+            speed = output_tokens / (end_time - start_time)
+            # Calculate costs (hypothetical pricing model)
+            input_cost = (input_tokens / 1000000) * 5  # $5 per million input tokens
+            output_cost = (output_tokens / 1000000) * 15  # $15 per million output tokens
+            total_cost_usd = input_cost + output_cost
+            total_cost_aoa = total_cost_usd * 1160  # Convert to AOA (Angolan Kwanza)
+            # Display metrics
+            st.caption(
+                f"🔑 Input Tokens: {input_tokens} | Output Tokens: {output_tokens} | "
+                f"🕒 Speed: {speed:.1f}t/s | 💰 Cost (USD): ${total_cost_usd:.4f} | "
+                f"💵 Cost (AOA): {total_cost_aoa:.4f}"
+            )
+            response_container.markdown(full_response)
+            st.session_state.messages.append({"role": "assistant", "content": full_response})
+    except Exception as e:
+        st.error(f"⚡ Generation error: {str(e)}")
+else:
+    st.error("🤖 Model not loaded!")