Spaces:

coldn00dl3s
/

llm-human-prediction-demo

Sleeping

App Files Files Community

coldn00dl3s commited on Apr 16

Commit

02c3a11

verified ·

1 Parent(s): b7688a8

re: two models

Browse files

Files changed (1) hide show

app.py +36 -103

app.py CHANGED Viewed

@@ -12,19 +12,20 @@ from keras.layers import Layer
 from transformers import DebertaV2Tokenizer, TFAutoModel
 import streamlit as st
 from google import genai
-import pandas as pd
 torch.classes.__path__ = []
 # Download tokenizer data once
 nltk.download('punkt', quiet=True)
 # === Cleaning Function ===
 def clean_response(text: str) -> str:
     text = re.sub(r"[*_`#>\-\[\]()]", "", text)
     text = re.sub(r"\s+", " ", text)
     return text.strip()
-# === Model APIs ===
 def get_response_from_gemini(prompt: str, key) -> str:
     gemini_client = genai.Client(api_key=key)
     response = gemini_client.models.generate_content(
@@ -33,6 +34,7 @@ def get_response_from_gemini(prompt: str, key) -> str:
     )
     return response.text.strip()
 def get_response_from_deepseek(prompt: str, key) -> str:
     response = requests.post(
         url="https://openrouter.ai/api/v1/chat/completions",
@@ -44,36 +46,6 @@ def get_response_from_deepseek(prompt: str, key) -> str:
     )
     return response.json()["choices"][0]["message"]["content"]
-def get_response_from_llamafourscout(prompt: str, key) -> str:
-    response = requests.post(
-        url="https://openrouter.ai/api/v1/chat/completions",
-        headers={"Authorization": f"Bearer {key}"},
-        data=json.dumps({
-            "model": "meta-llama/llama-4-scout:free",
-            "messages": [{"role": "user", "content": prompt}]
-        })
-    )
-    return response.json()["choices"][0]["message"]["content"]
-def get_response_from_mistralsmall(prompt: str, key) -> str:
-    response = requests.post(
-        url="https://openrouter.ai/api/v1/chat/completions",
-        headers={"Authorization": f"Bearer {key}"},
-        data=json.dumps({
-            "model": "mistralai/mistral-small-3.1-24b-instruct:free",
-            "messages": [{"role": "user", "content": prompt}]
-        })
-    )
-    return response.json()["choices"][0]["message"]["content"]
-# === Model Function Mapping ===
-MODEL_MAP = {
-    "Gemini": get_response_from_gemini,
-    "DeepSeek": get_response_from_deepseek,
-    "LLaMA 4 Scout": get_response_from_llamafourscout,
-    "Mistral Small": get_response_from_mistralsmall,
-}
 # === Metrics ===
 def calculate_entropy(text: str) -> float:
     try:
@@ -90,7 +62,8 @@ def calculate_entropy(text: str) -> float:
 def calculate_ttr(text: str) -> float:
     try:
         tokens = [token.lower() for token in word_tokenize(text) if token.isalnum()]
-        return len(set(tokens)) / len(tokens) if tokens else -999999
     except:
         return -999999
@@ -146,13 +119,10 @@ def preprocess_inputs(prompt: str, response_a: str, response_b: str, tokenizer,
     ]).reshape(1, -1).astype(np.float32)
     return encoded["input_ids"], encoded["attention_mask"], metrics
-# === History Buffer ===
-if "history" not in st.session_state:
-    st.session_state.history = []
 # === Streamlit UI ===
 st.set_page_config(page_title="LMSYS Demo", layout="wide")
 st.markdown(
     """
     <style>
@@ -182,45 +152,42 @@ st.markdown(
 st.title("Predicting Human Preference : Gemini vs DeepSeek")
 st.write("As part of this demo, we make use of two SOTA LLMs : [Gemini 2.5 Pro](https://deepmind.google/technologies/gemini/pro/) and [DeepSeek R1](https://api-docs.deepseek.com/news/news250120) and make them compete against each other on a given prompt (to be entered through the sidebar)")
 st.write("Using our proposed hybrid model, we predict which response is more suited to be preferred by a human user.")
 st.sidebar.title("Ask a Question!")
-model_choices = list(MODEL_MAP.keys())
-model_a_name = st.sidebar.selectbox("Choose Model A", model_choices, index=0)
-model_b_name = st.sidebar.selectbox("Choose Model B", model_choices, index=1)
 question = st.sidebar.text_area("Enter your question:", key="prompt_input")
 if "generated" not in st.session_state:
     st.session_state["generated"] = False
 if st.sidebar.button("Generate Responses") and question:
     with st.spinner("Generating LLM responses..."):
-        raw_a = MODEL_MAP[model_a_name](question, st.secrets["GEMINI_API_KEY"] if model_a_name == "Gemini" else st.secrets["OPENROUTER_API_KEY"])
-        raw_b = MODEL_MAP[model_b_name](question, st.secrets["GEMINI_API_KEY"] if model_b_name == "Gemini" else st.secrets["OPENROUTER_API_KEY"])
-        st.session_state.update({
-            "response_a_raw": raw_a,
-            "response_b_raw": raw_b,
-            "response_a_clean": clean_response(raw_a),
-            "response_b_clean": clean_response(raw_b),
-            "generated": True,
-            "prediction": None,
-            "model_a_name": model_a_name,
-            "model_b_name": model_b_name
-        })
 if st.session_state["generated"]:
-    tab1, tab2, tab3, tab4 = st.tabs(["Predictions","Model Architecture", "📈 Metric Curves", "📜 History"])
     with tab1:
         st.subheader("Model Responses")
         col1, col2 = st.columns(2)
         with col1:
-            st.markdown(f"#### {st.session_state['model_a_name']}")
             st.markdown(st.session_state["response_a_raw"])
         with col2:
-            st.markdown(f"#### {st.session_state['model_b_name']}")
             st.markdown(st.session_state["response_b_raw"])
         if st.button("Predict Winner"):
             with st.spinner("Running model..."):
                 input_ids, attention_mask, num_features = preprocess_inputs(
@@ -231,23 +198,9 @@ if st.session_state["generated"]:
                 )
                 predictions = model.predict([input_ids, attention_mask, num_features], verbose=0)
                 predicted_class = np.argmax(predictions, axis=-1)[0]
-                label_map = {0: f"{st.session_state['model_a_name']}!", 1: f"{st.session_state['model_b_name']}!", 2: "Tie!"}
                 st.session_state["prediction"] = label_map[predicted_class]
-                # Add to history
-                st.session_state.history.append({
-                    "Prompt": question,
-                    "Model A": st.session_state['model_a_name'],
-                    "Model B": st.session_state['model_b_name'],
-                    "Response A": st.session_state["response_a_raw"],
-                    "Response B": st.session_state["response_b_raw"],
-                    "Prediction": label_map[predicted_class],
-                    "FRES_A": num_features[0][0], "FRES_B": num_features[0][1],
-                    "DC_A": num_features[0][2], "DC_B": num_features[0][3],
-                    "TTR_A": num_features[0][4], "TTR_B": num_features[0][5],
-                    "Entropy_A": num_features[0][6], "Entropy_B": num_features[0][7]
-                })
         if st.session_state.get("prediction"):
             st.success(f"🤖 Model Prediction: {st.session_state['prediction']}")
@@ -261,54 +214,34 @@ if st.session_state["generated"]:
         st.markdown("### RNN")
         col1, col2 = st.columns(2)
         with col1:
-            st.image("images/plots/rnn_baseline_acc.png", caption="Accuracy - RNN", use_column_width=True)
         with col2:
-            st.image("images/plots/rnn_baseline_loss.png", caption="Log Loss - RNN", use_column_width=True)
         st.markdown("### LSTM")
         col1, col2 = st.columns(2)
         with col1:
-            st.image("images/plots/lstm_baseline_acc.png", caption="Accuracy - LSTM", use_column_width=True)
         with col2:
-            st.image("images/plots/lstm_baseline_loss.png", caption="Log Loss - LSTM", use_column_width=True)
         st.markdown("### Bi-LSTM")
         col1, col2 = st.columns(2)
         with col1:
-            st.image("images/plots/bilstm_baseline_acc.png", caption="Accuracy - Bi-LSTM", use_column_width=True)
         with col2:
-            st.image("images/plots/bilstm_baseline_loss.png", caption="Log Loss - Bi-LSTM", use_column_width=True)
         st.markdown("### Hybrid (Dual-LSTM)")
         col1, col2 = st.columns(2)
         with col1:
-            st.image("images/plots/duallstm_hybrid_acc.png", caption="Accuracy - Hybrid (Dual-LSTM)", use_column_width=True)
         with col2:
-            st.image("images/plots/duallstm_hybrid_loss.png", caption="Log Loss - Hybrid (Dual-LSTM)", use_column_width=True)
         st.markdown("### Hybrid (Bi-LSTM)")
         col1, col2 = st.columns(2)
         with col1:
-            st.image("images/plots/bilstm_hybrid_acc.png", caption="Accuracy - Hybrid (Bi-LSTM)", use_column_width=True)
         with col2:
-            st.image("images/plots/bilstm_hybrid_loss.png", caption="Log Loss - Hybrid (Bi-LSTM)", use_column_width=True)
-    with tab4:
-        st.subheader("History of Predictions")
-        if st.session_state.history:
-            for i, item in enumerate(reversed(st.session_state.history), 1):
-                with st.expander(f"Prediction #{len(st.session_state.history) - i + 1}: {item['Prediction']}"):
-                    st.markdown(f"**Prompt:** {item['Prompt']}")
-                    st.markdown(f"**Model A ({item['Model A']}):**")
-                    st.markdown(item['Response A'])
-                    st.markdown(f"**Model B ({item['Model B']}):**")
-                    st.markdown(item['Response B'])
-                    st.markdown("**Numerical Features:**")
-                    st.json({
-                        "FRES_A": item['FRES_A'], "FRES_B": item['FRES_B'],
-                        "DC_A": item['DC_A'], "DC_B": item['DC_B'],
-                        "TTR_A": item['TTR_A'], "TTR_B": item['TTR_B'],
-                        "Entropy_A": item['Entropy_A'], "Entropy_B": item['Entropy_B']
-                    })
-        else:
-            st.info("No history yet. Run a prediction first.")

 from transformers import DebertaV2Tokenizer, TFAutoModel
 import streamlit as st
 from google import genai
 torch.classes.__path__ = []
 # Download tokenizer data once
 nltk.download('punkt', quiet=True)
 # === Cleaning Function ===
 def clean_response(text: str) -> str:
+    # Simple markdown cleaner
     text = re.sub(r"[*_`#>\-\[\]()]", "", text)
     text = re.sub(r"\s+", " ", text)
     return text.strip()
+# === Gemini API ===
 def get_response_from_gemini(prompt: str, key) -> str:
     gemini_client = genai.Client(api_key=key)
     response = gemini_client.models.generate_content(
     )
     return response.text.strip()
+# === DeepSeek API ===
 def get_response_from_deepseek(prompt: str, key) -> str:
     response = requests.post(
         url="https://openrouter.ai/api/v1/chat/completions",
     )
     return response.json()["choices"][0]["message"]["content"]
 # === Metrics ===
 def calculate_entropy(text: str) -> float:
     try:
 def calculate_ttr(text: str) -> float:
     try:
         tokens = [token.lower() for token in word_tokenize(text) if token.isalnum()]
+        return len(set(tokens
+)) / len(tokens) if tokens else -999999
     except:
         return -999999
     ]).reshape(1, -1).astype(np.float32)
     return encoded["input_ids"], encoded["attention_mask"], metrics
 # === Streamlit UI ===
 st.set_page_config(page_title="LMSYS Demo", layout="wide")
+# Optional styling (vintage theme)
 st.markdown(
     """
     <style>
 st.title("Predicting Human Preference : Gemini vs DeepSeek")
 st.write("As part of this demo, we make use of two SOTA LLMs : [Gemini 2.5 Pro](https://deepmind.google/technologies/gemini/pro/) and [DeepSeek R1](https://api-docs.deepseek.com/news/news250120) and make them compete against each other on a given prompt (to be entered through the sidebar)")
 st.write("Using our proposed hybrid model, we predict which response is more suited to be preferred by a human user.")
 st.sidebar.title("Ask a Question!")
 question = st.sidebar.text_area("Enter your question:", key="prompt_input")
+# Init session state
 if "generated" not in st.session_state:
     st.session_state["generated"] = False
+# Generate responses
 if st.sidebar.button("Generate Responses") and question:
     with st.spinner("Generating LLM responses..."):
+        raw_a = get_response_from_gemini(question, st.secrets["GEMINI_API_KEY"])
+        raw_b = get_response_from_deepseek(question, st.secrets["OPENROUTER_API_KEY"])
+        st.session_state["response_a_raw"] = raw_a
+        st.session_state["response_b_raw"] = raw_b
+        st.session_state["response_a_clean"] = clean_response(raw_a)
+        st.session_state["response_b_clean"] = clean_response(raw_b)
+        st.session_state["generated"] = True
+        st.session_state["prediction"] = None
+# Display and interact
 if st.session_state["generated"]:
+    tab1, tab2, tab3 = st.tabs(["Predictions","Model Architecture", "📈 Metric Curves"])
     with tab1:
         st.subheader("Model Responses")
         col1, col2 = st.columns(2)
         with col1:
+            st.markdown("#### Gemini")
             st.markdown(st.session_state["response_a_raw"])
         with col2:
+            st.markdown("#### DeepSeek")
             st.markdown(st.session_state["response_b_raw"])
         if st.button("Predict Winner"):
             with st.spinner("Running model..."):
                 input_ids, attention_mask, num_features = preprocess_inputs(
                 )
                 predictions = model.predict([input_ids, attention_mask, num_features], verbose=0)
                 predicted_class = np.argmax(predictions, axis=-1)[0]
+                label_map = {0: "Gemini!", 1: "DeepSeek!", 2: "Tie!"}
                 st.session_state["prediction"] = label_map[predicted_class]
         if st.session_state.get("prediction"):
             st.success(f"🤖 Model Prediction: {st.session_state['prediction']}")
         st.markdown("### RNN")
         col1, col2 = st.columns(2)
         with col1:
+            st.image("images/plots/rnn_baseline_acc.png", caption="Accuracy - RNN", use_container_width=True)
         with col2:
+            st.image("images/plots/rnn_baseline_loss.png", caption="Log Loss - RNN", use_container_width=True)
         st.markdown("### LSTM")
         col1, col2 = st.columns(2)
         with col1:
+            st.image("images/plots/lstm_baseline_acc.png", caption="Accuracy - LSTM", use_container_width=True)
         with col2:
+            st.image("images/plots/lstm_baseline_loss.png", caption="Log Loss - LSTM", use_container_width=True)
         st.markdown("### Bi-LSTM")
         col1, col2 = st.columns(2)
         with col1:
+            st.image("images/plots/bilstm_baseline_acc.png", caption="Accuracy - Bi-LSTM", use_container_width=True)
         with col2:
+            st.image("images/plots/bilstm_baseline_loss.png", caption="Log Loss - Bi-LSTM", use_container_width=True)
         st.markdown("### Hybrid (Dual-LSTM)")
         col1, col2 = st.columns(2)
         with col1:
+            st.image("images/plots/duallstm_hybrid_acc.png", caption="Accuracy - Hybrid (Dual-LSTM)", use_container_width=True)
         with col2:
+            st.image("images/plots/duallstm_hybrid_loss.png", caption="Log Loss - Hybrid (Dual-LSTM)", use_container_width=True)
         st.markdown("### Hybrid (Bi-LSTM)")
         col1, col2 = st.columns(2)
         with col1:
+            st.image("images/plots/bilstm_hybrid_acc.png", caption="Accuracy - Hybrid (Bi-LSTM)", use_container_width=True)
         with col2:
+            st.image("images/plots/bilstm_hybrid_loss.png", caption="Log Loss - Hybrid (Bi-LSTM)", use_container_width=True)