PeterKruger commited on
Commit
1bc57b4
·
verified ·
1 Parent(s): 1a66bbf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -0
app.py CHANGED
@@ -63,6 +63,7 @@ def retry_api_request(max_retries=3, wait_time=10):
63
  return decorator
64
 
65
  # --- Single model request function for Hugging Face ---
 
66
  @retry_api_request()
67
  def make_hf_request(model_name, messages, temperature, max_tokens, token=None):
68
  """
@@ -804,6 +805,51 @@ def run_benchmark(hf_models, topics, difficulties, t, model_config, token=None):
804
  print(f"Unresponsive models during this run: {unresponsive_models}")
805
  return results, cumulative_avg_rank, s_t
806
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
807
  # Streamlit UI
808
  st.title("LLM Benchmark")
809
 
@@ -848,6 +894,35 @@ model_config = {}
848
  for model in selected_models:
849
  model_config[model] = {"name": model, "role": "both"}
850
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
851
  # Start benchmark button
852
  if st.sidebar.button("Start Benchmark"):
853
  if not hf_token:
 
63
  return decorator
64
 
65
  # --- Single model request function for Hugging Face ---
66
+
67
  @retry_api_request()
68
  def make_hf_request(model_name, messages, temperature, max_tokens, token=None):
69
  """
 
805
  print(f"Unresponsive models during this run: {unresponsive_models}")
806
  return results, cumulative_avg_rank, s_t
807
 
808
+ def check_model_availability(models, token):
809
+ """Test if models are available with the provided token"""
810
+ availability_results = {}
811
+
812
+ for model_name in models:
813
+ st.write(f"Testing availability of {model_name}...")
814
+ try:
815
+ # Create a simple test prompt
816
+ test_prompt = "Hello, are you available?"
817
+
818
+ # Use a short timeout to quickly test connectivity
819
+ client = InferenceClient(model=model_name, token=token)
820
+ response = client.text_generation(
821
+ test_prompt,
822
+ max_new_tokens=10,
823
+ temperature=0.7,
824
+ do_sample=True
825
+ )
826
+
827
+ availability_results[model_name] = {
828
+ "available": True,
829
+ "response": response[:50] + "..." if len(response) > 50 else response
830
+ }
831
+ st.success(f"✅ {model_name} is available")
832
+
833
+ except Exception as e:
834
+ error_msg = str(e)
835
+ availability_results[model_name] = {
836
+ "available": False,
837
+ "error": error_msg
838
+ }
839
+
840
+ if "401" in error_msg or "unauthorized" in error_msg.lower():
841
+ st.error(f"❌ {model_name}: Authentication error. Check your API token.")
842
+ elif "404" in error_msg or "not found" in error_msg.lower():
843
+ st.error(f"❌ {model_name}: Model not found. It may not exist or you may not have access.")
844
+ elif "429" in error_msg or "rate limit" in error_msg.lower():
845
+ st.error(f"❌ {model_name}: Rate limit exceeded. Try again later.")
846
+ else:
847
+ st.error(f"❌ {model_name}: Unknown error: {error_msg}")
848
+
849
+ time.sleep(1) # Add delay between checks
850
+
851
+ return availability_results
852
+
853
  # Streamlit UI
854
  st.title("LLM Benchmark")
855
 
 
894
  for model in selected_models:
895
  model_config[model] = {"name": model, "role": "both"}
896
 
897
+ if st.sidebar.button("Test Selected Models"):
898
+ if not hf_token:
899
+ st.error("Please enter your Hugging Face API token")
900
+ elif not selected_models:
901
+ st.error("Please select at least one model")
902
+ else:
903
+ with st.spinner("Testing model availability..."):
904
+ availability = check_model_availability(selected_models, hf_token)
905
+
906
+ # Show results in a table
907
+ availability_df = pd.DataFrame([
908
+ {
909
+ "Model": model,
910
+ "Available": info["available"],
911
+ "Status": "Available" if info["available"] else "Error",
912
+ "Details": info.get("response", "") if info["available"] else info.get("error", "")
913
+ }
914
+ for model, info in availability.items()
915
+ ])
916
+
917
+ st.dataframe(availability_df)
918
+
919
+ # Check if we have enough models to run the benchmark
920
+ available_models = [m for m, info in availability.items() if info["available"]]
921
+ if len(available_models) >= 2:
922
+ st.success(f"{len(available_models)} models are available for benchmarking")
923
+ else:
924
+ st.error("You need at least 2 available models to run the benchmark")
925
+
926
  # Start benchmark button
927
  if st.sidebar.button("Start Benchmark"):
928
  if not hf_token: