coldn00dl3s commited on
Commit
02c3a11
·
verified ·
1 Parent(s): b7688a8

re: two models

Browse files
Files changed (1) hide show
  1. app.py +36 -103
app.py CHANGED
@@ -12,19 +12,20 @@ from keras.layers import Layer
12
  from transformers import DebertaV2Tokenizer, TFAutoModel
13
  import streamlit as st
14
  from google import genai
15
- import pandas as pd
16
-
17
  torch.classes.__path__ = []
18
  # Download tokenizer data once
19
  nltk.download('punkt', quiet=True)
20
 
21
  # === Cleaning Function ===
22
  def clean_response(text: str) -> str:
 
23
  text = re.sub(r"[*_`#>\-\[\]()]", "", text)
24
  text = re.sub(r"\s+", " ", text)
25
  return text.strip()
26
 
27
- # === Model APIs ===
 
 
28
  def get_response_from_gemini(prompt: str, key) -> str:
29
  gemini_client = genai.Client(api_key=key)
30
  response = gemini_client.models.generate_content(
@@ -33,6 +34,7 @@ def get_response_from_gemini(prompt: str, key) -> str:
33
  )
34
  return response.text.strip()
35
 
 
36
  def get_response_from_deepseek(prompt: str, key) -> str:
37
  response = requests.post(
38
  url="https://openrouter.ai/api/v1/chat/completions",
@@ -44,36 +46,6 @@ def get_response_from_deepseek(prompt: str, key) -> str:
44
  )
45
  return response.json()["choices"][0]["message"]["content"]
46
 
47
- def get_response_from_llamafourscout(prompt: str, key) -> str:
48
- response = requests.post(
49
- url="https://openrouter.ai/api/v1/chat/completions",
50
- headers={"Authorization": f"Bearer {key}"},
51
- data=json.dumps({
52
- "model": "meta-llama/llama-4-scout:free",
53
- "messages": [{"role": "user", "content": prompt}]
54
- })
55
- )
56
- return response.json()["choices"][0]["message"]["content"]
57
-
58
- def get_response_from_mistralsmall(prompt: str, key) -> str:
59
- response = requests.post(
60
- url="https://openrouter.ai/api/v1/chat/completions",
61
- headers={"Authorization": f"Bearer {key}"},
62
- data=json.dumps({
63
- "model": "mistralai/mistral-small-3.1-24b-instruct:free",
64
- "messages": [{"role": "user", "content": prompt}]
65
- })
66
- )
67
- return response.json()["choices"][0]["message"]["content"]
68
-
69
- # === Model Function Mapping ===
70
- MODEL_MAP = {
71
- "Gemini": get_response_from_gemini,
72
- "DeepSeek": get_response_from_deepseek,
73
- "LLaMA 4 Scout": get_response_from_llamafourscout,
74
- "Mistral Small": get_response_from_mistralsmall,
75
- }
76
-
77
  # === Metrics ===
78
  def calculate_entropy(text: str) -> float:
79
  try:
@@ -90,7 +62,8 @@ def calculate_entropy(text: str) -> float:
90
  def calculate_ttr(text: str) -> float:
91
  try:
92
  tokens = [token.lower() for token in word_tokenize(text) if token.isalnum()]
93
- return len(set(tokens)) / len(tokens) if tokens else -999999
 
94
  except:
95
  return -999999
96
 
@@ -146,13 +119,10 @@ def preprocess_inputs(prompt: str, response_a: str, response_b: str, tokenizer,
146
  ]).reshape(1, -1).astype(np.float32)
147
  return encoded["input_ids"], encoded["attention_mask"], metrics
148
 
149
- # === History Buffer ===
150
- if "history" not in st.session_state:
151
- st.session_state.history = []
152
-
153
  # === Streamlit UI ===
154
  st.set_page_config(page_title="LMSYS Demo", layout="wide")
155
 
 
156
  st.markdown(
157
  """
158
  <style>
@@ -182,45 +152,42 @@ st.markdown(
182
  st.title("Predicting Human Preference : Gemini vs DeepSeek")
183
  st.write("As part of this demo, we make use of two SOTA LLMs : [Gemini 2.5 Pro](https://deepmind.google/technologies/gemini/pro/) and [DeepSeek R1](https://api-docs.deepseek.com/news/news250120) and make them compete against each other on a given prompt (to be entered through the sidebar)")
184
  st.write("Using our proposed hybrid model, we predict which response is more suited to be preferred by a human user.")
185
-
186
  st.sidebar.title("Ask a Question!")
187
- model_choices = list(MODEL_MAP.keys())
188
- model_a_name = st.sidebar.selectbox("Choose Model A", model_choices, index=0)
189
- model_b_name = st.sidebar.selectbox("Choose Model B", model_choices, index=1)
190
  question = st.sidebar.text_area("Enter your question:", key="prompt_input")
191
 
 
192
  if "generated" not in st.session_state:
193
  st.session_state["generated"] = False
194
 
 
195
  if st.sidebar.button("Generate Responses") and question:
196
  with st.spinner("Generating LLM responses..."):
197
- raw_a = MODEL_MAP[model_a_name](question, st.secrets["GEMINI_API_KEY"] if model_a_name == "Gemini" else st.secrets["OPENROUTER_API_KEY"])
198
- raw_b = MODEL_MAP[model_b_name](question, st.secrets["GEMINI_API_KEY"] if model_b_name == "Gemini" else st.secrets["OPENROUTER_API_KEY"])
199
-
200
- st.session_state.update({
201
- "response_a_raw": raw_a,
202
- "response_b_raw": raw_b,
203
- "response_a_clean": clean_response(raw_a),
204
- "response_b_clean": clean_response(raw_b),
205
- "generated": True,
206
- "prediction": None,
207
- "model_a_name": model_a_name,
208
- "model_b_name": model_b_name
209
- })
210
 
 
 
 
 
211
  if st.session_state["generated"]:
212
- tab1, tab2, tab3, tab4 = st.tabs(["Predictions","Model Architecture", "📈 Metric Curves", "📜 History"])
213
 
214
  with tab1:
215
  st.subheader("Model Responses")
216
  col1, col2 = st.columns(2)
217
  with col1:
218
- st.markdown(f"#### {st.session_state['model_a_name']}")
219
  st.markdown(st.session_state["response_a_raw"])
220
  with col2:
221
- st.markdown(f"#### {st.session_state['model_b_name']}")
222
  st.markdown(st.session_state["response_b_raw"])
223
 
 
224
  if st.button("Predict Winner"):
225
  with st.spinner("Running model..."):
226
  input_ids, attention_mask, num_features = preprocess_inputs(
@@ -231,23 +198,9 @@ if st.session_state["generated"]:
231
  )
232
  predictions = model.predict([input_ids, attention_mask, num_features], verbose=0)
233
  predicted_class = np.argmax(predictions, axis=-1)[0]
234
- label_map = {0: f"{st.session_state['model_a_name']}!", 1: f"{st.session_state['model_b_name']}!", 2: "Tie!"}
235
  st.session_state["prediction"] = label_map[predicted_class]
236
 
237
- # Add to history
238
- st.session_state.history.append({
239
- "Prompt": question,
240
- "Model A": st.session_state['model_a_name'],
241
- "Model B": st.session_state['model_b_name'],
242
- "Response A": st.session_state["response_a_raw"],
243
- "Response B": st.session_state["response_b_raw"],
244
- "Prediction": label_map[predicted_class],
245
- "FRES_A": num_features[0][0], "FRES_B": num_features[0][1],
246
- "DC_A": num_features[0][2], "DC_B": num_features[0][3],
247
- "TTR_A": num_features[0][4], "TTR_B": num_features[0][5],
248
- "Entropy_A": num_features[0][6], "Entropy_B": num_features[0][7]
249
- })
250
-
251
  if st.session_state.get("prediction"):
252
  st.success(f"🤖 Model Prediction: {st.session_state['prediction']}")
253
 
@@ -261,54 +214,34 @@ if st.session_state["generated"]:
261
  st.markdown("### RNN")
262
  col1, col2 = st.columns(2)
263
  with col1:
264
- st.image("images/plots/rnn_baseline_acc.png", caption="Accuracy - RNN", use_column_width=True)
265
  with col2:
266
- st.image("images/plots/rnn_baseline_loss.png", caption="Log Loss - RNN", use_column_width=True)
267
 
268
  st.markdown("### LSTM")
269
  col1, col2 = st.columns(2)
270
  with col1:
271
- st.image("images/plots/lstm_baseline_acc.png", caption="Accuracy - LSTM", use_column_width=True)
272
  with col2:
273
- st.image("images/plots/lstm_baseline_loss.png", caption="Log Loss - LSTM", use_column_width=True)
274
 
275
  st.markdown("### Bi-LSTM")
276
  col1, col2 = st.columns(2)
277
  with col1:
278
- st.image("images/plots/bilstm_baseline_acc.png", caption="Accuracy - Bi-LSTM", use_column_width=True)
279
  with col2:
280
- st.image("images/plots/bilstm_baseline_loss.png", caption="Log Loss - Bi-LSTM", use_column_width=True)
281
 
282
  st.markdown("### Hybrid (Dual-LSTM)")
283
  col1, col2 = st.columns(2)
284
  with col1:
285
- st.image("images/plots/duallstm_hybrid_acc.png", caption="Accuracy - Hybrid (Dual-LSTM)", use_column_width=True)
286
  with col2:
287
- st.image("images/plots/duallstm_hybrid_loss.png", caption="Log Loss - Hybrid (Dual-LSTM)", use_column_width=True)
288
 
289
  st.markdown("### Hybrid (Bi-LSTM)")
290
  col1, col2 = st.columns(2)
291
  with col1:
292
- st.image("images/plots/bilstm_hybrid_acc.png", caption="Accuracy - Hybrid (Bi-LSTM)", use_column_width=True)
293
  with col2:
294
- st.image("images/plots/bilstm_hybrid_loss.png", caption="Log Loss - Hybrid (Bi-LSTM)", use_column_width=True)
295
-
296
- with tab4:
297
- st.subheader("History of Predictions")
298
- if st.session_state.history:
299
- for i, item in enumerate(reversed(st.session_state.history), 1):
300
- with st.expander(f"Prediction #{len(st.session_state.history) - i + 1}: {item['Prediction']}"):
301
- st.markdown(f"**Prompt:** {item['Prompt']}")
302
- st.markdown(f"**Model A ({item['Model A']}):**")
303
- st.markdown(item['Response A'])
304
- st.markdown(f"**Model B ({item['Model B']}):**")
305
- st.markdown(item['Response B'])
306
- st.markdown("**Numerical Features:**")
307
- st.json({
308
- "FRES_A": item['FRES_A'], "FRES_B": item['FRES_B'],
309
- "DC_A": item['DC_A'], "DC_B": item['DC_B'],
310
- "TTR_A": item['TTR_A'], "TTR_B": item['TTR_B'],
311
- "Entropy_A": item['Entropy_A'], "Entropy_B": item['Entropy_B']
312
- })
313
- else:
314
- st.info("No history yet. Run a prediction first.")
 
12
  from transformers import DebertaV2Tokenizer, TFAutoModel
13
  import streamlit as st
14
  from google import genai
 
 
15
  torch.classes.__path__ = []
16
  # Download tokenizer data once
17
  nltk.download('punkt', quiet=True)
18
 
19
  # === Cleaning Function ===
20
  def clean_response(text: str) -> str:
21
+ # Simple markdown cleaner
22
  text = re.sub(r"[*_`#>\-\[\]()]", "", text)
23
  text = re.sub(r"\s+", " ", text)
24
  return text.strip()
25
 
26
+ # === Gemini API ===
27
+
28
+
29
  def get_response_from_gemini(prompt: str, key) -> str:
30
  gemini_client = genai.Client(api_key=key)
31
  response = gemini_client.models.generate_content(
 
34
  )
35
  return response.text.strip()
36
 
37
+ # === DeepSeek API ===
38
  def get_response_from_deepseek(prompt: str, key) -> str:
39
  response = requests.post(
40
  url="https://openrouter.ai/api/v1/chat/completions",
 
46
  )
47
  return response.json()["choices"][0]["message"]["content"]
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  # === Metrics ===
50
  def calculate_entropy(text: str) -> float:
51
  try:
 
62
  def calculate_ttr(text: str) -> float:
63
  try:
64
  tokens = [token.lower() for token in word_tokenize(text) if token.isalnum()]
65
+ return len(set(tokens
66
+ )) / len(tokens) if tokens else -999999
67
  except:
68
  return -999999
69
 
 
119
  ]).reshape(1, -1).astype(np.float32)
120
  return encoded["input_ids"], encoded["attention_mask"], metrics
121
 
 
 
 
 
122
  # === Streamlit UI ===
123
  st.set_page_config(page_title="LMSYS Demo", layout="wide")
124
 
125
+ # Optional styling (vintage theme)
126
  st.markdown(
127
  """
128
  <style>
 
152
  st.title("Predicting Human Preference : Gemini vs DeepSeek")
153
  st.write("As part of this demo, we make use of two SOTA LLMs : [Gemini 2.5 Pro](https://deepmind.google/technologies/gemini/pro/) and [DeepSeek R1](https://api-docs.deepseek.com/news/news250120) and make them compete against each other on a given prompt (to be entered through the sidebar)")
154
  st.write("Using our proposed hybrid model, we predict which response is more suited to be preferred by a human user.")
 
155
  st.sidebar.title("Ask a Question!")
 
 
 
156
  question = st.sidebar.text_area("Enter your question:", key="prompt_input")
157
 
158
+ # Init session state
159
  if "generated" not in st.session_state:
160
  st.session_state["generated"] = False
161
 
162
+ # Generate responses
163
  if st.sidebar.button("Generate Responses") and question:
164
  with st.spinner("Generating LLM responses..."):
165
+ raw_a = get_response_from_gemini(question, st.secrets["GEMINI_API_KEY"])
166
+ raw_b = get_response_from_deepseek(question, st.secrets["OPENROUTER_API_KEY"])
167
+
168
+ st.session_state["response_a_raw"] = raw_a
169
+ st.session_state["response_b_raw"] = raw_b
170
+ st.session_state["response_a_clean"] = clean_response(raw_a)
171
+ st.session_state["response_b_clean"] = clean_response(raw_b)
 
 
 
 
 
 
172
 
173
+ st.session_state["generated"] = True
174
+ st.session_state["prediction"] = None
175
+
176
+ # Display and interact
177
  if st.session_state["generated"]:
178
+ tab1, tab2, tab3 = st.tabs(["Predictions","Model Architecture", "📈 Metric Curves"])
179
 
180
  with tab1:
181
  st.subheader("Model Responses")
182
  col1, col2 = st.columns(2)
183
  with col1:
184
+ st.markdown("#### Gemini")
185
  st.markdown(st.session_state["response_a_raw"])
186
  with col2:
187
+ st.markdown("#### DeepSeek")
188
  st.markdown(st.session_state["response_b_raw"])
189
 
190
+
191
  if st.button("Predict Winner"):
192
  with st.spinner("Running model..."):
193
  input_ids, attention_mask, num_features = preprocess_inputs(
 
198
  )
199
  predictions = model.predict([input_ids, attention_mask, num_features], verbose=0)
200
  predicted_class = np.argmax(predictions, axis=-1)[0]
201
+ label_map = {0: "Gemini!", 1: "DeepSeek!", 2: "Tie!"}
202
  st.session_state["prediction"] = label_map[predicted_class]
203
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
  if st.session_state.get("prediction"):
205
  st.success(f"🤖 Model Prediction: {st.session_state['prediction']}")
206
 
 
214
  st.markdown("### RNN")
215
  col1, col2 = st.columns(2)
216
  with col1:
217
+ st.image("images/plots/rnn_baseline_acc.png", caption="Accuracy - RNN", use_container_width=True)
218
  with col2:
219
+ st.image("images/plots/rnn_baseline_loss.png", caption="Log Loss - RNN", use_container_width=True)
220
 
221
  st.markdown("### LSTM")
222
  col1, col2 = st.columns(2)
223
  with col1:
224
+ st.image("images/plots/lstm_baseline_acc.png", caption="Accuracy - LSTM", use_container_width=True)
225
  with col2:
226
+ st.image("images/plots/lstm_baseline_loss.png", caption="Log Loss - LSTM", use_container_width=True)
227
 
228
  st.markdown("### Bi-LSTM")
229
  col1, col2 = st.columns(2)
230
  with col1:
231
+ st.image("images/plots/bilstm_baseline_acc.png", caption="Accuracy - Bi-LSTM", use_container_width=True)
232
  with col2:
233
+ st.image("images/plots/bilstm_baseline_loss.png", caption="Log Loss - Bi-LSTM", use_container_width=True)
234
 
235
  st.markdown("### Hybrid (Dual-LSTM)")
236
  col1, col2 = st.columns(2)
237
  with col1:
238
+ st.image("images/plots/duallstm_hybrid_acc.png", caption="Accuracy - Hybrid (Dual-LSTM)", use_container_width=True)
239
  with col2:
240
+ st.image("images/plots/duallstm_hybrid_loss.png", caption="Log Loss - Hybrid (Dual-LSTM)", use_container_width=True)
241
 
242
  st.markdown("### Hybrid (Bi-LSTM)")
243
  col1, col2 = st.columns(2)
244
  with col1:
245
+ st.image("images/plots/bilstm_hybrid_acc.png", caption="Accuracy - Hybrid (Bi-LSTM)", use_container_width=True)
246
  with col2:
247
+ st.image("images/plots/bilstm_hybrid_loss.png", caption="Log Loss - Hybrid (Bi-LSTM)", use_container_width=True)