Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -63,6 +63,7 @@ def retry_api_request(max_retries=3, wait_time=10):
|
|
63 |
return decorator
|
64 |
|
65 |
# --- Single model request function for Hugging Face ---
|
|
|
66 |
@retry_api_request()
|
67 |
def make_hf_request(model_name, messages, temperature, max_tokens, token=None):
|
68 |
"""
|
@@ -804,6 +805,51 @@ def run_benchmark(hf_models, topics, difficulties, t, model_config, token=None):
|
|
804 |
print(f"Unresponsive models during this run: {unresponsive_models}")
|
805 |
return results, cumulative_avg_rank, s_t
|
806 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
807 |
# Streamlit UI
|
808 |
st.title("LLM Benchmark")
|
809 |
|
@@ -848,6 +894,35 @@ model_config = {}
|
|
848 |
for model in selected_models:
|
849 |
model_config[model] = {"name": model, "role": "both"}
|
850 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
851 |
# Start benchmark button
|
852 |
if st.sidebar.button("Start Benchmark"):
|
853 |
if not hf_token:
|
|
|
63 |
return decorator
|
64 |
|
65 |
# --- Single model request function for Hugging Face ---
|
66 |
+
|
67 |
@retry_api_request()
|
68 |
def make_hf_request(model_name, messages, temperature, max_tokens, token=None):
|
69 |
"""
|
|
|
805 |
print(f"Unresponsive models during this run: {unresponsive_models}")
|
806 |
return results, cumulative_avg_rank, s_t
|
807 |
|
808 |
+
def check_model_availability(models, token):
|
809 |
+
"""Test if models are available with the provided token"""
|
810 |
+
availability_results = {}
|
811 |
+
|
812 |
+
for model_name in models:
|
813 |
+
st.write(f"Testing availability of {model_name}...")
|
814 |
+
try:
|
815 |
+
# Create a simple test prompt
|
816 |
+
test_prompt = "Hello, are you available?"
|
817 |
+
|
818 |
+
# Use a short timeout to quickly test connectivity
|
819 |
+
client = InferenceClient(model=model_name, token=token)
|
820 |
+
response = client.text_generation(
|
821 |
+
test_prompt,
|
822 |
+
max_new_tokens=10,
|
823 |
+
temperature=0.7,
|
824 |
+
do_sample=True
|
825 |
+
)
|
826 |
+
|
827 |
+
availability_results[model_name] = {
|
828 |
+
"available": True,
|
829 |
+
"response": response[:50] + "..." if len(response) > 50 else response
|
830 |
+
}
|
831 |
+
st.success(f"✅ {model_name} is available")
|
832 |
+
|
833 |
+
except Exception as e:
|
834 |
+
error_msg = str(e)
|
835 |
+
availability_results[model_name] = {
|
836 |
+
"available": False,
|
837 |
+
"error": error_msg
|
838 |
+
}
|
839 |
+
|
840 |
+
if "401" in error_msg or "unauthorized" in error_msg.lower():
|
841 |
+
st.error(f"❌ {model_name}: Authentication error. Check your API token.")
|
842 |
+
elif "404" in error_msg or "not found" in error_msg.lower():
|
843 |
+
st.error(f"❌ {model_name}: Model not found. It may not exist or you may not have access.")
|
844 |
+
elif "429" in error_msg or "rate limit" in error_msg.lower():
|
845 |
+
st.error(f"❌ {model_name}: Rate limit exceeded. Try again later.")
|
846 |
+
else:
|
847 |
+
st.error(f"❌ {model_name}: Unknown error: {error_msg}")
|
848 |
+
|
849 |
+
time.sleep(1) # Add delay between checks
|
850 |
+
|
851 |
+
return availability_results
|
852 |
+
|
853 |
# Streamlit UI
|
854 |
st.title("LLM Benchmark")
|
855 |
|
|
|
894 |
for model in selected_models:
|
895 |
model_config[model] = {"name": model, "role": "both"}
|
896 |
|
897 |
+
if st.sidebar.button("Test Selected Models"):
|
898 |
+
if not hf_token:
|
899 |
+
st.error("Please enter your Hugging Face API token")
|
900 |
+
elif not selected_models:
|
901 |
+
st.error("Please select at least one model")
|
902 |
+
else:
|
903 |
+
with st.spinner("Testing model availability..."):
|
904 |
+
availability = check_model_availability(selected_models, hf_token)
|
905 |
+
|
906 |
+
# Show results in a table
|
907 |
+
availability_df = pd.DataFrame([
|
908 |
+
{
|
909 |
+
"Model": model,
|
910 |
+
"Available": info["available"],
|
911 |
+
"Status": "Available" if info["available"] else "Error",
|
912 |
+
"Details": info.get("response", "") if info["available"] else info.get("error", "")
|
913 |
+
}
|
914 |
+
for model, info in availability.items()
|
915 |
+
])
|
916 |
+
|
917 |
+
st.dataframe(availability_df)
|
918 |
+
|
919 |
+
# Check if we have enough models to run the benchmark
|
920 |
+
available_models = [m for m, info in availability.items() if info["available"]]
|
921 |
+
if len(available_models) >= 2:
|
922 |
+
st.success(f"{len(available_models)} models are available for benchmarking")
|
923 |
+
else:
|
924 |
+
st.error("You need at least 2 available models to run the benchmark")
|
925 |
+
|
926 |
# Start benchmark button
|
927 |
if st.sidebar.button("Start Benchmark"):
|
928 |
if not hf_token:
|