PILL
Browse files
app.py
CHANGED
@@ -44,7 +44,7 @@ def provider_change(provider, selected_type, all_types=["text", "vision", "video
|
|
44 |
return new_models if new_models else all_models
|
45 |
|
46 |
# --------------------------
|
47 |
-
# Estimate Cost Function
|
48 |
# --------------------------
|
49 |
def estimate_cost(num_alerts, input_size, output_size, model_id):
|
50 |
pricing = st.session_state.get("pricing", {})
|
@@ -80,35 +80,68 @@ if "data_loaded" not in st.session_state:
|
|
80 |
with st.sidebar:
|
81 |
st.image("https://cdn.prod.website-files.com/630f558f2a15ca1e88a2f774/631f1436ad7a0605fecc5e15_Logo.svg",
|
82 |
use_container_width=True)
|
83 |
-
st.markdown(
|
84 |
-
""" Visit: [https://www.priam.ai](https://www.priam.ai)
|
85 |
-
"""
|
86 |
-
)
|
87 |
st.divider()
|
88 |
st.sidebar.title("LLM Pricing Calculator")
|
89 |
|
90 |
-
# Track active tab in session state
|
91 |
-
if "active_tab" not in st.session_state:
|
92 |
-
st.session_state.active_tab = "Model Selection"
|
93 |
-
|
94 |
-
def switch_tab(tab_name):
|
95 |
-
st.session_state.active_tab = tab_name
|
96 |
-
st.rerun()
|
97 |
-
|
98 |
-
|
99 |
# --------------------------
|
100 |
-
#
|
101 |
# --------------------------
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
|
|
106 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
|
108 |
-
#
|
109 |
-
|
|
|
|
|
|
|
110 |
st.header("LLM Pricing App")
|
111 |
-
|
112 |
# --- Row 1: Provider/Type and Model Selection ---
|
113 |
col_left, col_right = st.columns(2)
|
114 |
with col_left:
|
@@ -118,50 +151,27 @@ with tab1:
|
|
118 |
index=st.session_state["providers"].index("azure") if "azure" in st.session_state["providers"] else 0
|
119 |
)
|
120 |
selected_type = st.radio("Select type", options=["text", "image"], index=0)
|
121 |
-
|
122 |
with col_right:
|
123 |
-
# Filter models based on the selected provider and type
|
124 |
filtered_models = provider_change(selected_provider, selected_type)
|
125 |
-
|
126 |
if filtered_models:
|
127 |
-
# Force "gpt-4-turbo" as default if available; otherwise, default to the first model.
|
128 |
default_model = "o1" if "o1" in filtered_models else filtered_models[0]
|
129 |
-
selected_model = st.selectbox(
|
130 |
-
"Select a model",
|
131 |
-
options=filtered_models,
|
132 |
-
index=filtered_models.index(default_model)
|
133 |
-
)
|
134 |
else:
|
135 |
selected_model = None
|
136 |
st.write("No models available")
|
137 |
-
|
138 |
# --- Row 2: Alert Stats ---
|
139 |
col1, col2, col3 = st.columns(3)
|
140 |
with col1:
|
141 |
-
num_alerts = st.number_input(
|
142 |
-
|
143 |
-
value=100,
|
144 |
-
min_value=1,
|
145 |
-
step=1,
|
146 |
-
help="Number of security alerts to analyze daily"
|
147 |
-
)
|
148 |
with col2:
|
149 |
-
input_size = st.number_input(
|
150 |
-
|
151 |
-
value=1000,
|
152 |
-
min_value=1,
|
153 |
-
step=1,
|
154 |
-
help="Include logs, metadata, and context per alert"
|
155 |
-
)
|
156 |
with col3:
|
157 |
-
output_size = st.number_input(
|
158 |
-
|
159 |
-
|
160 |
-
min_value=1,
|
161 |
-
step=1,
|
162 |
-
help="Expected length of security analysis and recommendations"
|
163 |
-
)
|
164 |
-
|
165 |
# --- Row 3: Buttons ---
|
166 |
btn_col1, btn_col2 = st.columns(2)
|
167 |
with btn_col1:
|
@@ -178,99 +188,34 @@ with tab1:
|
|
178 |
st.session_state["pricing"] = pricing
|
179 |
st.session_state["providers"] = providers
|
180 |
st.success("Pricing data refreshed!")
|
181 |
-
|
182 |
st.divider()
|
183 |
-
# --- Display Results ---
|
184 |
st.markdown("### Results")
|
185 |
if "result" in st.session_state:
|
186 |
st.write(st.session_state["result"])
|
187 |
else:
|
188 |
st.write("Use the buttons above to estimate costs.")
|
189 |
-
|
190 |
-
# --- Clear Button Below Results ---
|
191 |
if st.button("Clear"):
|
192 |
st.session_state.pop("result", None)
|
193 |
-
st.rerun()
|
194 |
-
|
195 |
-
|
196 |
-
# ----- Tab 2: On Premise Estimator -----
|
197 |
-
def format_analysis_report(analysis_result: dict) -> str:
|
198 |
-
"""Convert the raw analysis_result dict into a human-readable report."""
|
199 |
-
if "error" in analysis_result:
|
200 |
-
return f"**Error:** {analysis_result['error']}"
|
201 |
-
|
202 |
-
lines = []
|
203 |
-
lines.append(f"### Model Analysis Report for `{analysis_result.get('model_id', 'Unknown Model')}`\n")
|
204 |
-
lines.append(f"**Parameter Size:** {analysis_result.get('parameter_size', 'N/A')} Billion parameters\n")
|
205 |
-
lines.append(f"**Precision:** {analysis_result.get('precision', 'N/A')}\n")
|
206 |
-
|
207 |
-
vram = analysis_result.get("vram_requirements", {})
|
208 |
-
lines.append("#### VRAM Requirements:")
|
209 |
-
lines.append(f"- Model Size: {vram.get('model_size_gb', 0):.2f} GB")
|
210 |
-
lines.append(f"- KV Cache: {vram.get('kv_cache_gb', 0):.2f} GB")
|
211 |
-
lines.append(f"- Activations: {vram.get('activations_gb', 0):.2f} GB")
|
212 |
-
lines.append(f"- Overhead: {vram.get('overhead_gb', 0):.2f} GB")
|
213 |
-
lines.append(f"- **Total VRAM:** {vram.get('total_vram_gb', 0):.2f} GB\n")
|
214 |
-
|
215 |
-
compatible_gpus = analysis_result.get("compatible_gpus", [])
|
216 |
-
lines.append("#### Compatible GPUs:")
|
217 |
-
if compatible_gpus:
|
218 |
-
for gpu in compatible_gpus:
|
219 |
-
lines.append(f"- {gpu}")
|
220 |
-
else:
|
221 |
-
lines.append("- None found")
|
222 |
-
lines.append(f"\n**Largest Compatible GPU:** {analysis_result.get('largest_compatible_gpu', 'N/A')}\n")
|
223 |
-
|
224 |
-
gpu_perf = analysis_result.get("gpu_performance", {})
|
225 |
-
if gpu_perf:
|
226 |
-
lines.append("#### GPU Performance:")
|
227 |
-
for gpu, perf in gpu_perf.items():
|
228 |
-
lines.append(f"**{gpu}:**")
|
229 |
-
lines.append(f" - Tokens per Second: {perf.get('tokens_per_second', 0):.2f}")
|
230 |
-
lines.append(f" - FLOPs per Token: {perf.get('flops_per_token', 0):.2f}")
|
231 |
-
lines.append(f" - Effective TFLOPS: {perf.get('effective_tflops', 0):.2f}\n")
|
232 |
-
else:
|
233 |
-
lines.append("#### GPU Performance: N/A\n")
|
234 |
-
|
235 |
-
#model_info = analysis_result.get("model_info", {})
|
236 |
-
#lines.append("#### Model Information:")
|
237 |
-
#if model_info:
|
238 |
-
# if model_info.get("description"):
|
239 |
-
# lines.append(f"- Description: {model_info['description']}")
|
240 |
-
# if model_info.get("tags"):
|
241 |
-
# lines.append(f"- Tags: {', '.join(model_info['tags'])}")
|
242 |
-
#if model_info.get("downloads") is not None:
|
243 |
-
# lines.append(f"- Downloads: {model_info['downloads']}")
|
244 |
-
#if model_info.get("library"):
|
245 |
-
# lines.append(f"- Library: {model_info['library']}")
|
246 |
-
#else:
|
247 |
-
# lines.append("No additional model info available.")
|
248 |
-
|
249 |
-
return "\n".join(lines)
|
250 |
-
|
251 |
|
252 |
-
|
253 |
-
|
254 |
st.header("On Premise Estimator")
|
255 |
st.markdown("Enter a Hugging Face model ID to perform an on premise analysis using the provided estimator.")
|
256 |
-
|
257 |
-
# Input for model ID with a default value
|
258 |
hf_model_id = st.text_input("Hugging Face Model ID", value="facebook/opt-1.3b")
|
259 |
|
260 |
if st.button("Analyze Model"):
|
261 |
-
st.session_state.active_tab = "On Premise Estimator"
|
262 |
with st.spinner("Analyzing model..."):
|
263 |
analysis_result = analyze_hf_model(hf_model_id)
|
264 |
-
st.session_state
|
265 |
-
|
266 |
-
|
267 |
-
# Render if analysis result exists
|
268 |
if "analysis_result" in st.session_state:
|
269 |
-
report = format_analysis_report(st.session_state
|
270 |
st.markdown(report)
|
271 |
|
272 |
-
|
273 |
-
|
274 |
st.markdown(
|
275 |
"""
|
276 |
## About This App
|
@@ -279,7 +224,7 @@ with tab3:
|
|
279 |
|
280 |
- The app downloads the latest pricing from the LiteLLM repository.
|
281 |
- Using simple maths to estimate the total tokens.
|
282 |
-
-
|
283 |
- Version 0.1
|
284 |
|
285 |
---
|
@@ -288,12 +233,10 @@ with tab3:
|
|
288 |
|
289 |
| Version | Release Date | Key Feature Updates |
|
290 |
|--------|--------------|---------------------|
|
291 |
-
| `v1.0` | 2025-03-26 | Initial release with basic total tokens estimation|
|
292 |
-
| `v1.1` | 2025-04-06
|
293 |
-
|
294 |
|
295 |
---
|
296 |
-
|
297 |
|
298 |
Website: [https://www.priam.ai](https://www.priam.ai)
|
299 |
"""
|
@@ -304,4 +247,4 @@ with tab3:
|
|
304 |
|
305 |
This app is for demonstration purposes only. Actual costs may vary based on usage patterns and other factors.
|
306 |
"""
|
307 |
-
)
|
|
|
44 |
return new_models if new_models else all_models
|
45 |
|
46 |
# --------------------------
|
47 |
+
# Estimate Cost Function
|
48 |
# --------------------------
|
49 |
def estimate_cost(num_alerts, input_size, output_size, model_id):
|
50 |
pricing = st.session_state.get("pricing", {})
|
|
|
80 |
with st.sidebar:
|
81 |
st.image("https://cdn.prod.website-files.com/630f558f2a15ca1e88a2f774/631f1436ad7a0605fecc5e15_Logo.svg",
|
82 |
use_container_width=True)
|
83 |
+
st.markdown("Visit: [https://www.priam.ai](https://www.priam.ai)")
|
|
|
|
|
|
|
84 |
st.divider()
|
85 |
st.sidebar.title("LLM Pricing Calculator")
|
86 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
# --------------------------
|
88 |
+
# Pills Navigation (Using st.pills)
|
89 |
# --------------------------
|
90 |
+
# st.pills creates a pill-style selection widget.
|
91 |
+
page = st.pills("Head",
|
92 |
+
options=["Model Selection", "On Premise Estimator", "About"],selection_mode="single",default="Model Selection",label_visibility="hidden",
|
93 |
+
#index=0 # Change index if you want a different default
|
94 |
+
)
|
95 |
|
96 |
+
# --------------------------
|
97 |
+
# Helper: Format Analysis Report
|
98 |
+
# --------------------------
|
99 |
+
def format_analysis_report(analysis_result: dict) -> str:
|
100 |
+
"""Convert the raw analysis_result dict into a human-readable report."""
|
101 |
+
if "error" in analysis_result:
|
102 |
+
return f"**Error:** {analysis_result['error']}"
|
103 |
+
|
104 |
+
lines = []
|
105 |
+
lines.append(f"### Model Analysis Report for `{analysis_result.get('model_id', 'Unknown Model')}`\n")
|
106 |
+
lines.append(f"**Parameter Size:** {analysis_result.get('parameter_size', 'N/A')} Billion parameters\n")
|
107 |
+
lines.append(f"**Precision:** {analysis_result.get('precision', 'N/A')}\n")
|
108 |
+
|
109 |
+
vram = analysis_result.get("vram_requirements", {})
|
110 |
+
lines.append("#### VRAM Requirements:")
|
111 |
+
lines.append(f"- Model Size: {vram.get('model_size_gb', 0):.2f} GB")
|
112 |
+
lines.append(f"- KV Cache: {vram.get('kv_cache_gb', 0):.2f} GB")
|
113 |
+
lines.append(f"- Activations: {vram.get('activations_gb', 0):.2f} GB")
|
114 |
+
lines.append(f"- Overhead: {vram.get('overhead_gb', 0):.2f} GB")
|
115 |
+
lines.append(f"- **Total VRAM:** {vram.get('total_vram_gb', 0):.2f} GB\n")
|
116 |
+
|
117 |
+
compatible_gpus = analysis_result.get("compatible_gpus", [])
|
118 |
+
lines.append("#### Compatible GPUs:")
|
119 |
+
if compatible_gpus:
|
120 |
+
for gpu in compatible_gpus:
|
121 |
+
lines.append(f"- {gpu}")
|
122 |
+
else:
|
123 |
+
lines.append("- None found")
|
124 |
+
lines.append(f"\n**Largest Compatible GPU:** {analysis_result.get('largest_compatible_gpu', 'N/A')}\n")
|
125 |
+
|
126 |
+
gpu_perf = analysis_result.get("gpu_performance", {})
|
127 |
+
if gpu_perf:
|
128 |
+
lines.append("#### GPU Performance:")
|
129 |
+
for gpu, perf in gpu_perf.items():
|
130 |
+
lines.append(f"**{gpu}:**")
|
131 |
+
lines.append(f" - Tokens per Second: {perf.get('tokens_per_second', 0):.2f}")
|
132 |
+
lines.append(f" - FLOPs per Token: {perf.get('flops_per_token', 0):.2f}")
|
133 |
+
lines.append(f" - Effective TFLOPS: {perf.get('effective_tflops', 0):.2f}\n")
|
134 |
+
else:
|
135 |
+
lines.append("#### GPU Performance: N/A\n")
|
136 |
+
|
137 |
+
return "\n".join(lines)
|
138 |
|
139 |
+
# --------------------------
|
140 |
+
# Render Content Based on Selected Pill
|
141 |
+
# --------------------------
|
142 |
+
if page == "Model Selection":
|
143 |
+
st.divider()
|
144 |
st.header("LLM Pricing App")
|
|
|
145 |
# --- Row 1: Provider/Type and Model Selection ---
|
146 |
col_left, col_right = st.columns(2)
|
147 |
with col_left:
|
|
|
151 |
index=st.session_state["providers"].index("azure") if "azure" in st.session_state["providers"] else 0
|
152 |
)
|
153 |
selected_type = st.radio("Select type", options=["text", "image"], index=0)
|
|
|
154 |
with col_right:
|
|
|
155 |
filtered_models = provider_change(selected_provider, selected_type)
|
|
|
156 |
if filtered_models:
|
|
|
157 |
default_model = "o1" if "o1" in filtered_models else filtered_models[0]
|
158 |
+
selected_model = st.selectbox("Select a model", options=filtered_models, index=filtered_models.index(default_model))
|
|
|
|
|
|
|
|
|
159 |
else:
|
160 |
selected_model = None
|
161 |
st.write("No models available")
|
162 |
+
|
163 |
# --- Row 2: Alert Stats ---
|
164 |
col1, col2, col3 = st.columns(3)
|
165 |
with col1:
|
166 |
+
num_alerts = st.number_input("Security Alerts Per Day", value=100, min_value=1, step=1,
|
167 |
+
help="Number of security alerts to analyze daily")
|
|
|
|
|
|
|
|
|
|
|
168 |
with col2:
|
169 |
+
input_size = st.number_input("Alert Content Size (characters)", value=1000, min_value=1, step=1,
|
170 |
+
help="Include logs, metadata, and context per alert")
|
|
|
|
|
|
|
|
|
|
|
171 |
with col3:
|
172 |
+
output_size = st.number_input("Analysis Output Size (characters)", value=500, min_value=1, step=1,
|
173 |
+
help="Expected length of security analysis and recommendations")
|
174 |
+
|
|
|
|
|
|
|
|
|
|
|
175 |
# --- Row 3: Buttons ---
|
176 |
btn_col1, btn_col2 = st.columns(2)
|
177 |
with btn_col1:
|
|
|
188 |
st.session_state["pricing"] = pricing
|
189 |
st.session_state["providers"] = providers
|
190 |
st.success("Pricing data refreshed!")
|
191 |
+
|
192 |
st.divider()
|
|
|
193 |
st.markdown("### Results")
|
194 |
if "result" in st.session_state:
|
195 |
st.write(st.session_state["result"])
|
196 |
else:
|
197 |
st.write("Use the buttons above to estimate costs.")
|
198 |
+
|
|
|
199 |
if st.button("Clear"):
|
200 |
st.session_state.pop("result", None)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
201 |
|
202 |
+
elif page == "On Premise Estimator":
|
203 |
+
st.divider()
|
204 |
st.header("On Premise Estimator")
|
205 |
st.markdown("Enter a Hugging Face model ID to perform an on premise analysis using the provided estimator.")
|
|
|
|
|
206 |
hf_model_id = st.text_input("Hugging Face Model ID", value="facebook/opt-1.3b")
|
207 |
|
208 |
if st.button("Analyze Model"):
|
|
|
209 |
with st.spinner("Analyzing model..."):
|
210 |
analysis_result = analyze_hf_model(hf_model_id)
|
211 |
+
st.session_state["analysis_result"] = analysis_result
|
212 |
+
|
|
|
|
|
213 |
if "analysis_result" in st.session_state:
|
214 |
+
report = format_analysis_report(st.session_state["analysis_result"])
|
215 |
st.markdown(report)
|
216 |
|
217 |
+
elif page == "About":
|
218 |
+
st.divider()
|
219 |
st.markdown(
|
220 |
"""
|
221 |
## About This App
|
|
|
224 |
|
225 |
- The app downloads the latest pricing from the LiteLLM repository.
|
226 |
- Using simple maths to estimate the total tokens.
|
227 |
+
- Helps you estimate hardware requirements for running open-source large language models (LLMs) on-premise using only the model ID from Hugging Face.
|
228 |
- Version 0.1
|
229 |
|
230 |
---
|
|
|
233 |
|
234 |
| Version | Release Date | Key Feature Updates |
|
235 |
|--------|--------------|---------------------|
|
236 |
+
| `v1.0` | 2025-03-26 | Initial release with basic total tokens estimation |
|
237 |
+
| `v1.1` | 2025-04-06 | Added On Premise Estimator Tab |
|
|
|
238 |
|
239 |
---
|
|
|
240 |
|
241 |
Website: [https://www.priam.ai](https://www.priam.ai)
|
242 |
"""
|
|
|
247 |
|
248 |
This app is for demonstration purposes only. Actual costs may vary based on usage patterns and other factors.
|
249 |
"""
|
250 |
+
)
|