ash-98 commited on
Commit
67137a1
·
1 Parent(s): 6913a64
Files changed (1) hide show
  1. app.py +77 -134
app.py CHANGED
@@ -44,7 +44,7 @@ def provider_change(provider, selected_type, all_types=["text", "vision", "video
44
  return new_models if new_models else all_models
45
 
46
  # --------------------------
47
- # Estimate Cost Function (Updated)
48
  # --------------------------
49
  def estimate_cost(num_alerts, input_size, output_size, model_id):
50
  pricing = st.session_state.get("pricing", {})
@@ -80,35 +80,68 @@ if "data_loaded" not in st.session_state:
80
  with st.sidebar:
81
  st.image("https://cdn.prod.website-files.com/630f558f2a15ca1e88a2f774/631f1436ad7a0605fecc5e15_Logo.svg",
82
  use_container_width=True)
83
- st.markdown(
84
- """ Visit: [https://www.priam.ai](https://www.priam.ai)
85
- """
86
- )
87
  st.divider()
88
  st.sidebar.title("LLM Pricing Calculator")
89
 
90
- # Track active tab in session state
91
- if "active_tab" not in st.session_state:
92
- st.session_state.active_tab = "Model Selection"
93
-
94
- def switch_tab(tab_name):
95
- st.session_state.active_tab = tab_name
96
- st.rerun()
97
-
98
-
99
  # --------------------------
100
- # Main Content Layout (Tabs)
101
  # --------------------------
102
- tab_labels = ["Model Selection", "On Premise Estimator", "About"]
103
- tab_index = tab_labels.index(st.session_state.active_tab)
104
- tabs = st.tabs(tab_labels)
105
- tab1, tab2, tab3 = tabs
 
106
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
- # ----- Tab 1: Model Selection -----
109
- with tab1:
 
 
 
110
  st.header("LLM Pricing App")
111
-
112
  # --- Row 1: Provider/Type and Model Selection ---
113
  col_left, col_right = st.columns(2)
114
  with col_left:
@@ -118,50 +151,27 @@ with tab1:
118
  index=st.session_state["providers"].index("azure") if "azure" in st.session_state["providers"] else 0
119
  )
120
  selected_type = st.radio("Select type", options=["text", "image"], index=0)
121
-
122
  with col_right:
123
- # Filter models based on the selected provider and type
124
  filtered_models = provider_change(selected_provider, selected_type)
125
-
126
  if filtered_models:
127
- # Force "gpt-4-turbo" as default if available; otherwise, default to the first model.
128
  default_model = "o1" if "o1" in filtered_models else filtered_models[0]
129
- selected_model = st.selectbox(
130
- "Select a model",
131
- options=filtered_models,
132
- index=filtered_models.index(default_model)
133
- )
134
  else:
135
  selected_model = None
136
  st.write("No models available")
137
-
138
  # --- Row 2: Alert Stats ---
139
  col1, col2, col3 = st.columns(3)
140
  with col1:
141
- num_alerts = st.number_input(
142
- "Security Alerts Per Day",
143
- value=100,
144
- min_value=1,
145
- step=1,
146
- help="Number of security alerts to analyze daily"
147
- )
148
  with col2:
149
- input_size = st.number_input(
150
- "Alert Content Size (characters)",
151
- value=1000,
152
- min_value=1,
153
- step=1,
154
- help="Include logs, metadata, and context per alert"
155
- )
156
  with col3:
157
- output_size = st.number_input(
158
- "Analysis Output Size (characters)",
159
- value=500,
160
- min_value=1,
161
- step=1,
162
- help="Expected length of security analysis and recommendations"
163
- )
164
-
165
  # --- Row 3: Buttons ---
166
  btn_col1, btn_col2 = st.columns(2)
167
  with btn_col1:
@@ -178,99 +188,34 @@ with tab1:
178
  st.session_state["pricing"] = pricing
179
  st.session_state["providers"] = providers
180
  st.success("Pricing data refreshed!")
181
-
182
  st.divider()
183
- # --- Display Results ---
184
  st.markdown("### Results")
185
  if "result" in st.session_state:
186
  st.write(st.session_state["result"])
187
  else:
188
  st.write("Use the buttons above to estimate costs.")
189
-
190
- # --- Clear Button Below Results ---
191
  if st.button("Clear"):
192
  st.session_state.pop("result", None)
193
- st.rerun()
194
-
195
-
196
- # ----- Tab 2: On Premise Estimator -----
197
- def format_analysis_report(analysis_result: dict) -> str:
198
- """Convert the raw analysis_result dict into a human-readable report."""
199
- if "error" in analysis_result:
200
- return f"**Error:** {analysis_result['error']}"
201
-
202
- lines = []
203
- lines.append(f"### Model Analysis Report for `{analysis_result.get('model_id', 'Unknown Model')}`\n")
204
- lines.append(f"**Parameter Size:** {analysis_result.get('parameter_size', 'N/A')} Billion parameters\n")
205
- lines.append(f"**Precision:** {analysis_result.get('precision', 'N/A')}\n")
206
-
207
- vram = analysis_result.get("vram_requirements", {})
208
- lines.append("#### VRAM Requirements:")
209
- lines.append(f"- Model Size: {vram.get('model_size_gb', 0):.2f} GB")
210
- lines.append(f"- KV Cache: {vram.get('kv_cache_gb', 0):.2f} GB")
211
- lines.append(f"- Activations: {vram.get('activations_gb', 0):.2f} GB")
212
- lines.append(f"- Overhead: {vram.get('overhead_gb', 0):.2f} GB")
213
- lines.append(f"- **Total VRAM:** {vram.get('total_vram_gb', 0):.2f} GB\n")
214
-
215
- compatible_gpus = analysis_result.get("compatible_gpus", [])
216
- lines.append("#### Compatible GPUs:")
217
- if compatible_gpus:
218
- for gpu in compatible_gpus:
219
- lines.append(f"- {gpu}")
220
- else:
221
- lines.append("- None found")
222
- lines.append(f"\n**Largest Compatible GPU:** {analysis_result.get('largest_compatible_gpu', 'N/A')}\n")
223
-
224
- gpu_perf = analysis_result.get("gpu_performance", {})
225
- if gpu_perf:
226
- lines.append("#### GPU Performance:")
227
- for gpu, perf in gpu_perf.items():
228
- lines.append(f"**{gpu}:**")
229
- lines.append(f" - Tokens per Second: {perf.get('tokens_per_second', 0):.2f}")
230
- lines.append(f" - FLOPs per Token: {perf.get('flops_per_token', 0):.2f}")
231
- lines.append(f" - Effective TFLOPS: {perf.get('effective_tflops', 0):.2f}\n")
232
- else:
233
- lines.append("#### GPU Performance: N/A\n")
234
-
235
- #model_info = analysis_result.get("model_info", {})
236
- #lines.append("#### Model Information:")
237
- #if model_info:
238
- # if model_info.get("description"):
239
- # lines.append(f"- Description: {model_info['description']}")
240
- # if model_info.get("tags"):
241
- # lines.append(f"- Tags: {', '.join(model_info['tags'])}")
242
- #if model_info.get("downloads") is not None:
243
- # lines.append(f"- Downloads: {model_info['downloads']}")
244
- #if model_info.get("library"):
245
- # lines.append(f"- Library: {model_info['library']}")
246
- #else:
247
- # lines.append("No additional model info available.")
248
-
249
- return "\n".join(lines)
250
-
251
 
252
- # ----- Tab 2: On Premise Estimator -----
253
- with tab2:
254
  st.header("On Premise Estimator")
255
  st.markdown("Enter a Hugging Face model ID to perform an on premise analysis using the provided estimator.")
256
-
257
- # Input for model ID with a default value
258
  hf_model_id = st.text_input("Hugging Face Model ID", value="facebook/opt-1.3b")
259
 
260
  if st.button("Analyze Model"):
261
- st.session_state.active_tab = "On Premise Estimator"
262
  with st.spinner("Analyzing model..."):
263
  analysis_result = analyze_hf_model(hf_model_id)
264
- st.session_state.analysis_result = analysis_result
265
- st.rerun()
266
-
267
- # Render if analysis result exists
268
  if "analysis_result" in st.session_state:
269
- report = format_analysis_report(st.session_state.analysis_result)
270
  st.markdown(report)
271
 
272
- # ----- Tab 3: About -----
273
- with tab3:
274
  st.markdown(
275
  """
276
  ## About This App
@@ -279,7 +224,7 @@ with tab3:
279
 
280
  - The app downloads the latest pricing from the LiteLLM repository.
281
  - Using simple maths to estimate the total tokens.
282
- - helps you estimate hardware requirements for running open-source large language models (LLMs) on-premise using only the model ID from Hugging Face.
283
  - Version 0.1
284
 
285
  ---
@@ -288,12 +233,10 @@ with tab3:
288
 
289
  | Version | Release Date | Key Feature Updates |
290
  |--------|--------------|---------------------|
291
- | `v1.0` | 2025-03-26 | Initial release with basic total tokens estimation|
292
- | `v1.1` | 2025-04-06 | Added On premise Estimator Tab |
293
-
294
 
295
  ---
296
-
297
 
298
  Website: [https://www.priam.ai](https://www.priam.ai)
299
  """
@@ -304,4 +247,4 @@ with tab3:
304
 
305
  This app is for demonstration purposes only. Actual costs may vary based on usage patterns and other factors.
306
  """
307
- )
 
44
  return new_models if new_models else all_models
45
 
46
  # --------------------------
47
+ # Estimate Cost Function
48
  # --------------------------
49
  def estimate_cost(num_alerts, input_size, output_size, model_id):
50
  pricing = st.session_state.get("pricing", {})
 
80
  with st.sidebar:
81
  st.image("https://cdn.prod.website-files.com/630f558f2a15ca1e88a2f774/631f1436ad7a0605fecc5e15_Logo.svg",
82
  use_container_width=True)
83
+ st.markdown("Visit: [https://www.priam.ai](https://www.priam.ai)")
 
 
 
84
  st.divider()
85
  st.sidebar.title("LLM Pricing Calculator")
86
 
 
 
 
 
 
 
 
 
 
87
  # --------------------------
88
+ # Pills Navigation (Using st.pills)
89
  # --------------------------
90
+ # st.pills creates a pill-style selection widget.
91
+ page = st.pills("Head",
92
+ options=["Model Selection", "On Premise Estimator", "About"],selection_mode="single",default="Model Selection",label_visibility="hidden",
93
+ #index=0 # Change index if you want a different default
94
+ )
95
 
96
+ # --------------------------
97
+ # Helper: Format Analysis Report
98
+ # --------------------------
99
+ def format_analysis_report(analysis_result: dict) -> str:
100
+ """Convert the raw analysis_result dict into a human-readable report."""
101
+ if "error" in analysis_result:
102
+ return f"**Error:** {analysis_result['error']}"
103
+
104
+ lines = []
105
+ lines.append(f"### Model Analysis Report for `{analysis_result.get('model_id', 'Unknown Model')}`\n")
106
+ lines.append(f"**Parameter Size:** {analysis_result.get('parameter_size', 'N/A')} Billion parameters\n")
107
+ lines.append(f"**Precision:** {analysis_result.get('precision', 'N/A')}\n")
108
+
109
+ vram = analysis_result.get("vram_requirements", {})
110
+ lines.append("#### VRAM Requirements:")
111
+ lines.append(f"- Model Size: {vram.get('model_size_gb', 0):.2f} GB")
112
+ lines.append(f"- KV Cache: {vram.get('kv_cache_gb', 0):.2f} GB")
113
+ lines.append(f"- Activations: {vram.get('activations_gb', 0):.2f} GB")
114
+ lines.append(f"- Overhead: {vram.get('overhead_gb', 0):.2f} GB")
115
+ lines.append(f"- **Total VRAM:** {vram.get('total_vram_gb', 0):.2f} GB\n")
116
+
117
+ compatible_gpus = analysis_result.get("compatible_gpus", [])
118
+ lines.append("#### Compatible GPUs:")
119
+ if compatible_gpus:
120
+ for gpu in compatible_gpus:
121
+ lines.append(f"- {gpu}")
122
+ else:
123
+ lines.append("- None found")
124
+ lines.append(f"\n**Largest Compatible GPU:** {analysis_result.get('largest_compatible_gpu', 'N/A')}\n")
125
+
126
+ gpu_perf = analysis_result.get("gpu_performance", {})
127
+ if gpu_perf:
128
+ lines.append("#### GPU Performance:")
129
+ for gpu, perf in gpu_perf.items():
130
+ lines.append(f"**{gpu}:**")
131
+ lines.append(f" - Tokens per Second: {perf.get('tokens_per_second', 0):.2f}")
132
+ lines.append(f" - FLOPs per Token: {perf.get('flops_per_token', 0):.2f}")
133
+ lines.append(f" - Effective TFLOPS: {perf.get('effective_tflops', 0):.2f}\n")
134
+ else:
135
+ lines.append("#### GPU Performance: N/A\n")
136
+
137
+ return "\n".join(lines)
138
 
139
+ # --------------------------
140
+ # Render Content Based on Selected Pill
141
+ # --------------------------
142
+ if page == "Model Selection":
143
+ st.divider()
144
  st.header("LLM Pricing App")
 
145
  # --- Row 1: Provider/Type and Model Selection ---
146
  col_left, col_right = st.columns(2)
147
  with col_left:
 
151
  index=st.session_state["providers"].index("azure") if "azure" in st.session_state["providers"] else 0
152
  )
153
  selected_type = st.radio("Select type", options=["text", "image"], index=0)
 
154
  with col_right:
 
155
  filtered_models = provider_change(selected_provider, selected_type)
 
156
  if filtered_models:
 
157
  default_model = "o1" if "o1" in filtered_models else filtered_models[0]
158
+ selected_model = st.selectbox("Select a model", options=filtered_models, index=filtered_models.index(default_model))
 
 
 
 
159
  else:
160
  selected_model = None
161
  st.write("No models available")
162
+
163
  # --- Row 2: Alert Stats ---
164
  col1, col2, col3 = st.columns(3)
165
  with col1:
166
+ num_alerts = st.number_input("Security Alerts Per Day", value=100, min_value=1, step=1,
167
+ help="Number of security alerts to analyze daily")
 
 
 
 
 
168
  with col2:
169
+ input_size = st.number_input("Alert Content Size (characters)", value=1000, min_value=1, step=1,
170
+ help="Include logs, metadata, and context per alert")
 
 
 
 
 
171
  with col3:
172
+ output_size = st.number_input("Analysis Output Size (characters)", value=500, min_value=1, step=1,
173
+ help="Expected length of security analysis and recommendations")
174
+
 
 
 
 
 
175
  # --- Row 3: Buttons ---
176
  btn_col1, btn_col2 = st.columns(2)
177
  with btn_col1:
 
188
  st.session_state["pricing"] = pricing
189
  st.session_state["providers"] = providers
190
  st.success("Pricing data refreshed!")
191
+
192
  st.divider()
 
193
  st.markdown("### Results")
194
  if "result" in st.session_state:
195
  st.write(st.session_state["result"])
196
  else:
197
  st.write("Use the buttons above to estimate costs.")
198
+
 
199
  if st.button("Clear"):
200
  st.session_state.pop("result", None)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
 
202
+ elif page == "On Premise Estimator":
203
+ st.divider()
204
  st.header("On Premise Estimator")
205
  st.markdown("Enter a Hugging Face model ID to perform an on premise analysis using the provided estimator.")
 
 
206
  hf_model_id = st.text_input("Hugging Face Model ID", value="facebook/opt-1.3b")
207
 
208
  if st.button("Analyze Model"):
 
209
  with st.spinner("Analyzing model..."):
210
  analysis_result = analyze_hf_model(hf_model_id)
211
+ st.session_state["analysis_result"] = analysis_result
212
+
 
 
213
  if "analysis_result" in st.session_state:
214
+ report = format_analysis_report(st.session_state["analysis_result"])
215
  st.markdown(report)
216
 
217
+ elif page == "About":
218
+ st.divider()
219
  st.markdown(
220
  """
221
  ## About This App
 
224
 
225
  - The app downloads the latest pricing from the LiteLLM repository.
226
  - Using simple maths to estimate the total tokens.
227
+ - Helps you estimate hardware requirements for running open-source large language models (LLMs) on-premise using only the model ID from Hugging Face.
228
  - Version 0.1
229
 
230
  ---
 
233
 
234
  | Version | Release Date | Key Feature Updates |
235
  |--------|--------------|---------------------|
236
+ | `v1.0` | 2025-03-26 | Initial release with basic total tokens estimation |
237
+ | `v1.1` | 2025-04-06 | Added On Premise Estimator Tab |
 
238
 
239
  ---
 
240
 
241
  Website: [https://www.priam.ai](https://www.priam.ai)
242
  """
 
247
 
248
  This app is for demonstration purposes only. Actual costs may vary based on usage patterns and other factors.
249
  """
250
+ )