Spaces:

coldn00dl3s
/

llm-human-prediction-demo

Sleeping

App Files Files Community

coldn00dl3s commited on Apr 16

Commit

8a8619b

verified ·

1 Parent(s): 02c3a11

added multi-models again

Browse files

Files changed (1) hide show

app.py +74 -26

app.py CHANGED Viewed

@@ -7,25 +7,26 @@ from nltk.tokenize import word_tokenize
 import textstat
 import json
 import requests
 import tensorflow as tf
 from keras.layers import Layer
 from transformers import DebertaV2Tokenizer, TFAutoModel
 import streamlit as st
 from google import genai
 torch.classes.__path__ = []
 # Download tokenizer data once
 nltk.download('punkt', quiet=True)
 # === Cleaning Function ===
 def clean_response(text: str) -> str:
-    # Simple markdown cleaner
     text = re.sub(r"[*_`#>\-\[\]()]", "", text)
     text = re.sub(r"\s+", " ", text)
     return text.strip()
-# === Gemini API ===
 def get_response_from_gemini(prompt: str, key) -> str:
     gemini_client = genai.Client(api_key=key)
     response = gemini_client.models.generate_content(
@@ -34,7 +35,6 @@ def get_response_from_gemini(prompt: str, key) -> str:
     )
     return response.text.strip()
-# === DeepSeek API ===
 def get_response_from_deepseek(prompt: str, key) -> str:
     response = requests.post(
         url="https://openrouter.ai/api/v1/chat/completions",
@@ -46,6 +46,36 @@ def get_response_from_deepseek(prompt: str, key) -> str:
     )
     return response.json()["choices"][0]["message"]["content"]
 # === Metrics ===
 def calculate_entropy(text: str) -> float:
     try:
@@ -62,8 +92,7 @@ def calculate_entropy(text: str) -> float:
 def calculate_ttr(text: str) -> float:
     try:
         tokens = [token.lower() for token in word_tokenize(text) if token.isalnum()]
-        return len(set(tokens
-)) / len(tokens) if tokens else -999999
     except:
         return -999999
@@ -119,10 +148,13 @@ def preprocess_inputs(prompt: str, response_a: str, response_b: str, tokenizer,
     ]).reshape(1, -1).astype(np.float32)
     return encoded["input_ids"], encoded["attention_mask"], metrics
 # === Streamlit UI ===
 st.set_page_config(page_title="LMSYS Demo", layout="wide")
-# Optional styling (vintage theme)
 st.markdown(
     """
     <style>
@@ -152,28 +184,42 @@ st.markdown(
 st.title("Predicting Human Preference : Gemini vs DeepSeek")
 st.write("As part of this demo, we make use of two SOTA LLMs : [Gemini 2.5 Pro](https://deepmind.google/technologies/gemini/pro/) and [DeepSeek R1](https://api-docs.deepseek.com/news/news250120) and make them compete against each other on a given prompt (to be entered through the sidebar)")
 st.write("Using our proposed hybrid model, we predict which response is more suited to be preferred by a human user.")
 st.sidebar.title("Ask a Question!")
 question = st.sidebar.text_area("Enter your question:", key="prompt_input")
-# Init session state
 if "generated" not in st.session_state:
     st.session_state["generated"] = False
-# Generate responses
-if st.sidebar.button("Generate Responses") and question:
-    with st.spinner("Generating LLM responses..."):
-        raw_a = get_response_from_gemini(question, st.secrets["GEMINI_API_KEY"])
-        raw_b = get_response_from_deepseek(question, st.secrets["OPENROUTER_API_KEY"])
-        st.session_state["response_a_raw"] = raw_a
-        st.session_state["response_b_raw"] = raw_b
-        st.session_state["response_a_clean"] = clean_response(raw_a)
-        st.session_state["response_b_clean"] = clean_response(raw_b)
-        st.session_state["generated"] = True
-        st.session_state["prediction"] = None
-# Display and interact
 if st.session_state["generated"]:
     tab1, tab2, tab3 = st.tabs(["Predictions","Model Architecture", "📈 Metric Curves"])
@@ -181,13 +227,12 @@ if st.session_state["generated"]:
         st.subheader("Model Responses")
         col1, col2 = st.columns(2)
         with col1:
-            st.markdown("#### Gemini")
             st.markdown(st.session_state["response_a_raw"])
         with col2:
-            st.markdown("#### DeepSeek")
             st.markdown(st.session_state["response_b_raw"])
         if st.button("Predict Winner"):
             with st.spinner("Running model..."):
                 input_ids, attention_mask, num_features = preprocess_inputs(
@@ -198,9 +243,11 @@ if st.session_state["generated"]:
                 )
                 predictions = model.predict([input_ids, attention_mask, num_features], verbose=0)
                 predicted_class = np.argmax(predictions, axis=-1)[0]
-                label_map = {0: "Gemini!", 1: "DeepSeek!", 2: "Tie!"}
                 st.session_state["prediction"] = label_map[predicted_class]
         if st.session_state.get("prediction"):
             st.success(f"🤖 Model Prediction: {st.session_state['prediction']}")
@@ -245,3 +292,4 @@ if st.session_state["generated"]:
             st.image("images/plots/bilstm_hybrid_acc.png", caption="Accuracy - Hybrid (Bi-LSTM)", use_container_width=True)
         with col2:
             st.image("images/plots/bilstm_hybrid_loss.png", caption="Log Loss - Hybrid (Bi-LSTM)", use_container_width=True)

 import textstat
 import json
 import requests
+import concurrent.futures
 import tensorflow as tf
 from keras.layers import Layer
 from transformers import DebertaV2Tokenizer, TFAutoModel
 import streamlit as st
 from google import genai
+import pandas as pd
 torch.classes.__path__ = []
 # Download tokenizer data once
 nltk.download('punkt', quiet=True)
 # === Cleaning Function ===
 def clean_response(text: str) -> str:
     text = re.sub(r"[*_`#>\-\[\]()]", "", text)
     text = re.sub(r"\s+", " ", text)
     return text.strip()
+# === Model APIs ===
 def get_response_from_gemini(prompt: str, key) -> str:
     gemini_client = genai.Client(api_key=key)
     response = gemini_client.models.generate_content(
     )
     return response.text.strip()
 def get_response_from_deepseek(prompt: str, key) -> str:
     response = requests.post(
         url="https://openrouter.ai/api/v1/chat/completions",
     )
     return response.json()["choices"][0]["message"]["content"]
+def get_response_from_llamafourscout(prompt: str, key) -> str:
+    response = requests.post(
+        url="https://openrouter.ai/api/v1/chat/completions",
+        headers={"Authorization": f"Bearer {key}"},
+        data=json.dumps({
+            "model": "meta-llama/llama-4-scout:free",
+            "messages": [{"role": "user", "content": prompt}]
+        })
+    )
+    return response.json()["choices"][0]["message"]["content"]
+def get_response_from_mistralsmall(prompt: str, key) -> str:
+    response = requests.post(
+        url="https://openrouter.ai/api/v1/chat/completions",
+        headers={"Authorization": f"Bearer {key}"},
+        data=json.dumps({
+            "model": "mistralai/mistral-small-3.1-24b-instruct:free",
+            "messages": [{"role": "user", "content": prompt}]
+        })
+    )
+    return response.json()["choices"][0]["message"]["content"]
+# === Model Function Mapping ===
+MODEL_MAP = {
+    "Gemini": get_response_from_gemini,
+    "DeepSeek": get_response_from_deepseek,
+    "LLaMA 4 Scout": get_response_from_llamafourscout,
+    "Mistral Small": get_response_from_mistralsmall,
+}
 # === Metrics ===
 def calculate_entropy(text: str) -> float:
     try:
 def calculate_ttr(text: str) -> float:
     try:
         tokens = [token.lower() for token in word_tokenize(text) if token.isalnum()]
+        return len(set(tokens)) / len(tokens) if tokens else -999999
     except:
         return -999999
     ]).reshape(1, -1).astype(np.float32)
     return encoded["input_ids"], encoded["attention_mask"], metrics
+# === History Buffer ===
+if "history" not in st.session_state:
+    st.session_state.history = []
 # === Streamlit UI ===
 st.set_page_config(page_title="LMSYS Demo", layout="wide")
 st.markdown(
     """
     <style>
 st.title("Predicting Human Preference : Gemini vs DeepSeek")
 st.write("As part of this demo, we make use of two SOTA LLMs : [Gemini 2.5 Pro](https://deepmind.google/technologies/gemini/pro/) and [DeepSeek R1](https://api-docs.deepseek.com/news/news250120) and make them compete against each other on a given prompt (to be entered through the sidebar)")
 st.write("Using our proposed hybrid model, we predict which response is more suited to be preferred by a human user.")
 st.sidebar.title("Ask a Question!")
+model_choices = list(MODEL_MAP.keys())
+model_a_name = st.sidebar.selectbox("Choose Model A", model_choices, index=0)
+model_b_name = st.sidebar.selectbox("Choose Model B", model_choices, index=1)
 question = st.sidebar.text_area("Enter your question:", key="prompt_input")
 if "generated" not in st.session_state:
     st.session_state["generated"] = False
+import concurrent.futures
+if st.sidebar.button("Generate Responses") and question:
+    with st.spinner("Generating LLM responses"):
+        def fetch_model_response(model_name):
+            api_key = st.secrets["GEMINI_API_KEY"] if model_name == "Gemini" else st.secrets["OPENROUTER_API_KEY"]
+            return MODEL_MAP[model_name](question, api_key)
+        with concurrent.futures.ThreadPoolExecutor() as executor:
+            future_a = executor.submit(fetch_model_response, model_a_name)
+            future_b = executor.submit(fetch_model_response, model_b_name)
+            raw_a = future_a.result()
+            raw_b = future_b.result()
+        st.session_state.update({
+            "response_a_raw": raw_a,
+            "response_b_raw": raw_b,
+            "response_a_clean": clean_response(raw_a),
+            "response_b_clean": clean_response(raw_b),
+            "generated": True,
+            "prediction": None,
+            "model_a_name": model_a_name,
+            "model_b_name": model_b_name
+        })
 if st.session_state["generated"]:
     tab1, tab2, tab3 = st.tabs(["Predictions","Model Architecture", "📈 Metric Curves"])
         st.subheader("Model Responses")
         col1, col2 = st.columns(2)
         with col1:
+            st.markdown(f"#### {st.session_state['model_a_name']}")
             st.markdown(st.session_state["response_a_raw"])
         with col2:
+            st.markdown(f"#### {st.session_state['model_b_name']}")
             st.markdown(st.session_state["response_b_raw"])
         if st.button("Predict Winner"):
             with st.spinner("Running model..."):
                 input_ids, attention_mask, num_features = preprocess_inputs(
                 )
                 predictions = model.predict([input_ids, attention_mask, num_features], verbose=0)
                 predicted_class = np.argmax(predictions, axis=-1)[0]
+                label_map = {0: f"{st.session_state['model_a_name']}!", 1: f"{st.session_state['model_b_name']}!", 2: "Tie!"}
                 st.session_state["prediction"] = label_map[predicted_class]
         if st.session_state.get("prediction"):
             st.success(f"🤖 Model Prediction: {st.session_state['prediction']}")
             st.image("images/plots/bilstm_hybrid_acc.png", caption="Accuracy - Hybrid (Bi-LSTM)", use_container_width=True)
         with col2:
             st.image("images/plots/bilstm_hybrid_loss.png", caption="Log Loss - Hybrid (Bi-LSTM)", use_container_width=True)