coldn00dl3s commited on
Commit
8a8619b
·
verified ·
1 Parent(s): 02c3a11

added multi-models again

Browse files
Files changed (1) hide show
  1. app.py +74 -26
app.py CHANGED
@@ -7,25 +7,26 @@ from nltk.tokenize import word_tokenize
7
  import textstat
8
  import json
9
  import requests
 
 
10
  import tensorflow as tf
11
  from keras.layers import Layer
12
  from transformers import DebertaV2Tokenizer, TFAutoModel
13
  import streamlit as st
14
  from google import genai
 
 
15
  torch.classes.__path__ = []
16
  # Download tokenizer data once
17
  nltk.download('punkt', quiet=True)
18
 
19
  # === Cleaning Function ===
20
  def clean_response(text: str) -> str:
21
- # Simple markdown cleaner
22
  text = re.sub(r"[*_`#>\-\[\]()]", "", text)
23
  text = re.sub(r"\s+", " ", text)
24
  return text.strip()
25
 
26
- # === Gemini API ===
27
-
28
-
29
  def get_response_from_gemini(prompt: str, key) -> str:
30
  gemini_client = genai.Client(api_key=key)
31
  response = gemini_client.models.generate_content(
@@ -34,7 +35,6 @@ def get_response_from_gemini(prompt: str, key) -> str:
34
  )
35
  return response.text.strip()
36
 
37
- # === DeepSeek API ===
38
  def get_response_from_deepseek(prompt: str, key) -> str:
39
  response = requests.post(
40
  url="https://openrouter.ai/api/v1/chat/completions",
@@ -46,6 +46,36 @@ def get_response_from_deepseek(prompt: str, key) -> str:
46
  )
47
  return response.json()["choices"][0]["message"]["content"]
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  # === Metrics ===
50
  def calculate_entropy(text: str) -> float:
51
  try:
@@ -62,8 +92,7 @@ def calculate_entropy(text: str) -> float:
62
  def calculate_ttr(text: str) -> float:
63
  try:
64
  tokens = [token.lower() for token in word_tokenize(text) if token.isalnum()]
65
- return len(set(tokens
66
- )) / len(tokens) if tokens else -999999
67
  except:
68
  return -999999
69
 
@@ -119,10 +148,13 @@ def preprocess_inputs(prompt: str, response_a: str, response_b: str, tokenizer,
119
  ]).reshape(1, -1).astype(np.float32)
120
  return encoded["input_ids"], encoded["attention_mask"], metrics
121
 
 
 
 
 
122
  # === Streamlit UI ===
123
  st.set_page_config(page_title="LMSYS Demo", layout="wide")
124
 
125
- # Optional styling (vintage theme)
126
  st.markdown(
127
  """
128
  <style>
@@ -152,28 +184,42 @@ st.markdown(
152
  st.title("Predicting Human Preference : Gemini vs DeepSeek")
153
  st.write("As part of this demo, we make use of two SOTA LLMs : [Gemini 2.5 Pro](https://deepmind.google/technologies/gemini/pro/) and [DeepSeek R1](https://api-docs.deepseek.com/news/news250120) and make them compete against each other on a given prompt (to be entered through the sidebar)")
154
  st.write("Using our proposed hybrid model, we predict which response is more suited to be preferred by a human user.")
 
155
  st.sidebar.title("Ask a Question!")
 
 
 
156
  question = st.sidebar.text_area("Enter your question:", key="prompt_input")
157
 
158
- # Init session state
159
  if "generated" not in st.session_state:
160
  st.session_state["generated"] = False
161
 
162
- # Generate responses
163
- if st.sidebar.button("Generate Responses") and question:
164
- with st.spinner("Generating LLM responses..."):
165
- raw_a = get_response_from_gemini(question, st.secrets["GEMINI_API_KEY"])
166
- raw_b = get_response_from_deepseek(question, st.secrets["OPENROUTER_API_KEY"])
167
-
168
- st.session_state["response_a_raw"] = raw_a
169
- st.session_state["response_b_raw"] = raw_b
170
- st.session_state["response_a_clean"] = clean_response(raw_a)
171
- st.session_state["response_b_clean"] = clean_response(raw_b)
172
 
173
- st.session_state["generated"] = True
174
- st.session_state["prediction"] = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
 
176
- # Display and interact
177
  if st.session_state["generated"]:
178
  tab1, tab2, tab3 = st.tabs(["Predictions","Model Architecture", "📈 Metric Curves"])
179
 
@@ -181,13 +227,12 @@ if st.session_state["generated"]:
181
  st.subheader("Model Responses")
182
  col1, col2 = st.columns(2)
183
  with col1:
184
- st.markdown("#### Gemini")
185
  st.markdown(st.session_state["response_a_raw"])
186
  with col2:
187
- st.markdown("#### DeepSeek")
188
  st.markdown(st.session_state["response_b_raw"])
189
 
190
-
191
  if st.button("Predict Winner"):
192
  with st.spinner("Running model..."):
193
  input_ids, attention_mask, num_features = preprocess_inputs(
@@ -198,9 +243,11 @@ if st.session_state["generated"]:
198
  )
199
  predictions = model.predict([input_ids, attention_mask, num_features], verbose=0)
200
  predicted_class = np.argmax(predictions, axis=-1)[0]
201
- label_map = {0: "Gemini!", 1: "DeepSeek!", 2: "Tie!"}
202
  st.session_state["prediction"] = label_map[predicted_class]
203
 
 
 
204
  if st.session_state.get("prediction"):
205
  st.success(f"🤖 Model Prediction: {st.session_state['prediction']}")
206
 
@@ -245,3 +292,4 @@ if st.session_state["generated"]:
245
  st.image("images/plots/bilstm_hybrid_acc.png", caption="Accuracy - Hybrid (Bi-LSTM)", use_container_width=True)
246
  with col2:
247
  st.image("images/plots/bilstm_hybrid_loss.png", caption="Log Loss - Hybrid (Bi-LSTM)", use_container_width=True)
 
 
7
  import textstat
8
  import json
9
  import requests
10
+ import concurrent.futures
11
+
12
  import tensorflow as tf
13
  from keras.layers import Layer
14
  from transformers import DebertaV2Tokenizer, TFAutoModel
15
  import streamlit as st
16
  from google import genai
17
+ import pandas as pd
18
+
19
  torch.classes.__path__ = []
20
  # Download tokenizer data once
21
  nltk.download('punkt', quiet=True)
22
 
23
  # === Cleaning Function ===
24
  def clean_response(text: str) -> str:
 
25
  text = re.sub(r"[*_`#>\-\[\]()]", "", text)
26
  text = re.sub(r"\s+", " ", text)
27
  return text.strip()
28
 
29
+ # === Model APIs ===
 
 
30
  def get_response_from_gemini(prompt: str, key) -> str:
31
  gemini_client = genai.Client(api_key=key)
32
  response = gemini_client.models.generate_content(
 
35
  )
36
  return response.text.strip()
37
 
 
38
  def get_response_from_deepseek(prompt: str, key) -> str:
39
  response = requests.post(
40
  url="https://openrouter.ai/api/v1/chat/completions",
 
46
  )
47
  return response.json()["choices"][0]["message"]["content"]
48
 
49
+ def get_response_from_llamafourscout(prompt: str, key) -> str:
50
+ response = requests.post(
51
+ url="https://openrouter.ai/api/v1/chat/completions",
52
+ headers={"Authorization": f"Bearer {key}"},
53
+ data=json.dumps({
54
+ "model": "meta-llama/llama-4-scout:free",
55
+ "messages": [{"role": "user", "content": prompt}]
56
+ })
57
+ )
58
+ return response.json()["choices"][0]["message"]["content"]
59
+
60
+ def get_response_from_mistralsmall(prompt: str, key) -> str:
61
+ response = requests.post(
62
+ url="https://openrouter.ai/api/v1/chat/completions",
63
+ headers={"Authorization": f"Bearer {key}"},
64
+ data=json.dumps({
65
+ "model": "mistralai/mistral-small-3.1-24b-instruct:free",
66
+ "messages": [{"role": "user", "content": prompt}]
67
+ })
68
+ )
69
+ return response.json()["choices"][0]["message"]["content"]
70
+
71
+ # === Model Function Mapping ===
72
+ MODEL_MAP = {
73
+ "Gemini": get_response_from_gemini,
74
+ "DeepSeek": get_response_from_deepseek,
75
+ "LLaMA 4 Scout": get_response_from_llamafourscout,
76
+ "Mistral Small": get_response_from_mistralsmall,
77
+ }
78
+
79
  # === Metrics ===
80
  def calculate_entropy(text: str) -> float:
81
  try:
 
92
  def calculate_ttr(text: str) -> float:
93
  try:
94
  tokens = [token.lower() for token in word_tokenize(text) if token.isalnum()]
95
+ return len(set(tokens)) / len(tokens) if tokens else -999999
 
96
  except:
97
  return -999999
98
 
 
148
  ]).reshape(1, -1).astype(np.float32)
149
  return encoded["input_ids"], encoded["attention_mask"], metrics
150
 
151
+ # === History Buffer ===
152
+ if "history" not in st.session_state:
153
+ st.session_state.history = []
154
+
155
  # === Streamlit UI ===
156
  st.set_page_config(page_title="LMSYS Demo", layout="wide")
157
 
 
158
  st.markdown(
159
  """
160
  <style>
 
184
  st.title("Predicting Human Preference : Gemini vs DeepSeek")
185
  st.write("As part of this demo, we make use of two SOTA LLMs : [Gemini 2.5 Pro](https://deepmind.google/technologies/gemini/pro/) and [DeepSeek R1](https://api-docs.deepseek.com/news/news250120) and make them compete against each other on a given prompt (to be entered through the sidebar)")
186
  st.write("Using our proposed hybrid model, we predict which response is more suited to be preferred by a human user.")
187
+
188
  st.sidebar.title("Ask a Question!")
189
+ model_choices = list(MODEL_MAP.keys())
190
+ model_a_name = st.sidebar.selectbox("Choose Model A", model_choices, index=0)
191
+ model_b_name = st.sidebar.selectbox("Choose Model B", model_choices, index=1)
192
  question = st.sidebar.text_area("Enter your question:", key="prompt_input")
193
 
 
194
  if "generated" not in st.session_state:
195
  st.session_state["generated"] = False
196
 
197
+ import concurrent.futures
 
 
 
 
 
 
 
 
 
198
 
199
+ if st.sidebar.button("Generate Responses") and question:
200
+ with st.spinner("Generating LLM responses"):
201
+
202
+ def fetch_model_response(model_name):
203
+ api_key = st.secrets["GEMINI_API_KEY"] if model_name == "Gemini" else st.secrets["OPENROUTER_API_KEY"]
204
+ return MODEL_MAP[model_name](question, api_key)
205
+
206
+ with concurrent.futures.ThreadPoolExecutor() as executor:
207
+ future_a = executor.submit(fetch_model_response, model_a_name)
208
+ future_b = executor.submit(fetch_model_response, model_b_name)
209
+ raw_a = future_a.result()
210
+ raw_b = future_b.result()
211
+
212
+ st.session_state.update({
213
+ "response_a_raw": raw_a,
214
+ "response_b_raw": raw_b,
215
+ "response_a_clean": clean_response(raw_a),
216
+ "response_b_clean": clean_response(raw_b),
217
+ "generated": True,
218
+ "prediction": None,
219
+ "model_a_name": model_a_name,
220
+ "model_b_name": model_b_name
221
+ })
222
 
 
223
  if st.session_state["generated"]:
224
  tab1, tab2, tab3 = st.tabs(["Predictions","Model Architecture", "📈 Metric Curves"])
225
 
 
227
  st.subheader("Model Responses")
228
  col1, col2 = st.columns(2)
229
  with col1:
230
+ st.markdown(f"#### {st.session_state['model_a_name']}")
231
  st.markdown(st.session_state["response_a_raw"])
232
  with col2:
233
+ st.markdown(f"#### {st.session_state['model_b_name']}")
234
  st.markdown(st.session_state["response_b_raw"])
235
 
 
236
  if st.button("Predict Winner"):
237
  with st.spinner("Running model..."):
238
  input_ids, attention_mask, num_features = preprocess_inputs(
 
243
  )
244
  predictions = model.predict([input_ids, attention_mask, num_features], verbose=0)
245
  predicted_class = np.argmax(predictions, axis=-1)[0]
246
+ label_map = {0: f"{st.session_state['model_a_name']}!", 1: f"{st.session_state['model_b_name']}!", 2: "Tie!"}
247
  st.session_state["prediction"] = label_map[predicted_class]
248
 
249
+
250
+
251
  if st.session_state.get("prediction"):
252
  st.success(f"🤖 Model Prediction: {st.session_state['prediction']}")
253
 
 
292
  st.image("images/plots/bilstm_hybrid_acc.png", caption="Accuracy - Hybrid (Bi-LSTM)", use_container_width=True)
293
  with col2:
294
  st.image("images/plots/bilstm_hybrid_loss.png", caption="Log Loss - Hybrid (Bi-LSTM)", use_container_width=True)
295
+