Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	reject duplicate submission
Browse files
    	
        app.py
    CHANGED
    
    | 
         @@ -15,7 +15,21 @@ H4_TOKEN = os.environ.get("H4_TOKEN", None) 
     | 
|
| 15 | 
         
             
            LMEH_REPO = "HuggingFaceH4/lmeh_evaluations"
         
     | 
| 16 | 
         
             
            IS_PUBLIC = bool(os.environ.get("IS_PUBLIC", None))
         
     | 
| 17 | 
         | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 18 | 
         
             
            repo = None
         
     | 
| 
         | 
|
| 19 | 
         
             
            if H4_TOKEN:
         
     | 
| 20 | 
         
             
                print("pulling repo")
         
     | 
| 21 | 
         
             
                # try:
         
     | 
| 
         @@ -31,6 +45,9 @@ if H4_TOKEN: 
     | 
|
| 31 | 
         
             
                )
         
     | 
| 32 | 
         
             
                repo.git_pull()
         
     | 
| 33 | 
         | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 34 | 
         | 
| 35 | 
         
             
            # parse the results
         
     | 
| 36 | 
         
             
            BENCHMARKS = ["arc_challenge", "hellaswag", "hendrycks", "truthfulqa_mc"]
         
     | 
| 
         @@ -110,7 +127,7 @@ def get_leaderboard(): 
     | 
|
| 110 | 
         | 
| 111 | 
         
             
                dataframe = pd.DataFrame.from_records(all_data)
         
     | 
| 112 | 
         
             
                dataframe = dataframe.sort_values(by=["Average ⬆️"], ascending=False)
         
     | 
| 113 | 
         
            -
                print(dataframe)
         
     | 
| 114 | 
         
             
                dataframe = dataframe[COLS]
         
     | 
| 115 | 
         
             
                return dataframe
         
     | 
| 116 | 
         | 
| 
         @@ -187,12 +204,12 @@ def add_new_eval( 
     | 
|
| 187 | 
         
             
                if is_delta_weight and not is_model_on_hub(base_model, revision):
         
     | 
| 188 | 
         
             
                    error_message = f'Base model "{base_model}" was not found on hub!'
         
     | 
| 189 | 
         
             
                    print(error_message)
         
     | 
| 190 | 
         
            -
                    return f"<p style='color: red; font-size:  
     | 
| 191 | 
         | 
| 192 | 
         
             
                if not is_model_on_hub(model, revision):
         
     | 
| 193 | 
         
             
                    error_message = f'Model "{model}"was not found on hub!'
         
     | 
| 194 | 
         
             
                    print(error_message)
         
     | 
| 195 | 
         
            -
                    return f"<p style='color: red; font-size:  
     | 
| 196 | 
         | 
| 197 | 
         
             
                print("adding new eval")
         
     | 
| 198 | 
         | 
| 
         @@ -216,6 +233,11 @@ def add_new_eval( 
     | 
|
| 216 | 
         
             
                os.makedirs(OUT_DIR, exist_ok=True)
         
     | 
| 217 | 
         
             
                out_path = f"{OUT_DIR}/{model_path}_eval_request_{private}_{is_8_bit_eval}_{is_delta_weight}.json"
         
     | 
| 218 | 
         | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 219 | 
         
             
                with open(out_path, "w") as f:
         
     | 
| 220 | 
         
             
                    f.write(json.dumps(eval_entry))
         
     | 
| 221 | 
         
             
                LMEH_REPO = "HuggingFaceH4/lmeh_evaluations"
         
     | 
| 
         @@ -230,7 +252,7 @@ def add_new_eval( 
     | 
|
| 230 | 
         
             
                )
         
     | 
| 231 | 
         | 
| 232 | 
         
             
                success_message = "Your request has been submitted to the evaluation queue!"
         
     | 
| 233 | 
         
            -
                return f"<p style='color: green; font-size:  
     | 
| 234 | 
         | 
| 235 | 
         | 
| 236 | 
         
             
            def refresh():
         
     | 
| 
         | 
|
| 15 | 
         
             
            LMEH_REPO = "HuggingFaceH4/lmeh_evaluations"
         
     | 
| 16 | 
         
             
            IS_PUBLIC = bool(os.environ.get("IS_PUBLIC", None))
         
     | 
| 17 | 
         | 
| 18 | 
         
            +
             
     | 
| 19 | 
         
            +
            def get_all_requested_models(requested_models_dir):
         
     | 
| 20 | 
         
            +
                depth = 1
         
     | 
| 21 | 
         
            +
                file_names = []
         
     | 
| 22 | 
         
            +
             
     | 
| 23 | 
         
            +
                for root, dirs, files in os.walk(requested_models_dir):
         
     | 
| 24 | 
         
            +
                    current_depth = root.count(os.sep) - requested_models_dir.count(os.sep)
         
     | 
| 25 | 
         
            +
                    if current_depth == depth:
         
     | 
| 26 | 
         
            +
                        file_names.extend([os.path.join(root, file) for file in files])
         
     | 
| 27 | 
         
            +
             
     | 
| 28 | 
         
            +
                return set([file_name.lower().split("./evals/")[1] for file_name in file_names])
         
     | 
| 29 | 
         
            +
             
     | 
| 30 | 
         
            +
             
     | 
| 31 | 
         
             
            repo = None
         
     | 
| 32 | 
         
            +
            requested_models = None
         
     | 
| 33 | 
         
             
            if H4_TOKEN:
         
     | 
| 34 | 
         
             
                print("pulling repo")
         
     | 
| 35 | 
         
             
                # try:
         
     | 
| 
         | 
|
| 45 | 
         
             
                )
         
     | 
| 46 | 
         
             
                repo.git_pull()
         
     | 
| 47 | 
         | 
| 48 | 
         
            +
                requested_models_dir = "./evals/eval_requests"
         
     | 
| 49 | 
         
            +
                requested_models = get_all_requested_models(requested_models_dir)
         
     | 
| 50 | 
         
            +
             
     | 
| 51 | 
         | 
| 52 | 
         
             
            # parse the results
         
     | 
| 53 | 
         
             
            BENCHMARKS = ["arc_challenge", "hellaswag", "hendrycks", "truthfulqa_mc"]
         
     | 
| 
         | 
|
| 127 | 
         | 
| 128 | 
         
             
                dataframe = pd.DataFrame.from_records(all_data)
         
     | 
| 129 | 
         
             
                dataframe = dataframe.sort_values(by=["Average ⬆️"], ascending=False)
         
     | 
| 130 | 
         
            +
                # print(dataframe)
         
     | 
| 131 | 
         
             
                dataframe = dataframe[COLS]
         
     | 
| 132 | 
         
             
                return dataframe
         
     | 
| 133 | 
         | 
| 
         | 
|
| 204 | 
         
             
                if is_delta_weight and not is_model_on_hub(base_model, revision):
         
     | 
| 205 | 
         
             
                    error_message = f'Base model "{base_model}" was not found on hub!'
         
     | 
| 206 | 
         
             
                    print(error_message)
         
     | 
| 207 | 
         
            +
                    return f"<p style='color: red; font-size: 20px; text-align: center;'>{error_message}</p>"
         
     | 
| 208 | 
         | 
| 209 | 
         
             
                if not is_model_on_hub(model, revision):
         
     | 
| 210 | 
         
             
                    error_message = f'Model "{model}"was not found on hub!'
         
     | 
| 211 | 
         
             
                    print(error_message)
         
     | 
| 212 | 
         
            +
                    return f"<p style='color: red; font-size: 20px; text-align: center;'>{error_message}</p>"
         
     | 
| 213 | 
         | 
| 214 | 
         
             
                print("adding new eval")
         
     | 
| 215 | 
         | 
| 
         | 
|
| 233 | 
         
             
                os.makedirs(OUT_DIR, exist_ok=True)
         
     | 
| 234 | 
         
             
                out_path = f"{OUT_DIR}/{model_path}_eval_request_{private}_{is_8_bit_eval}_{is_delta_weight}.json"
         
     | 
| 235 | 
         | 
| 236 | 
         
            +
                # Check for duplicate submission
         
     | 
| 237 | 
         
            +
                if out_path.lower() in requested_models:
         
     | 
| 238 | 
         
            +
                    duplicate_request_message = "This model has been already submitted."
         
     | 
| 239 | 
         
            +
                    return f"<p style='color: orange; font-size: 20px; text-align: center;'>{duplicate_request_message}</p>"
         
     | 
| 240 | 
         
            +
                
         
     | 
| 241 | 
         
             
                with open(out_path, "w") as f:
         
     | 
| 242 | 
         
             
                    f.write(json.dumps(eval_entry))
         
     | 
| 243 | 
         
             
                LMEH_REPO = "HuggingFaceH4/lmeh_evaluations"
         
     | 
| 
         | 
|
| 252 | 
         
             
                )
         
     | 
| 253 | 
         | 
| 254 | 
         
             
                success_message = "Your request has been submitted to the evaluation queue!"
         
     | 
| 255 | 
         
            +
                return f"<p style='color: green; font-size: 20px; text-align: center;'>{success_message}</p>"
         
     | 
| 256 | 
         | 
| 257 | 
         | 
| 258 | 
         
             
            def refresh():
         
     | 
    	
        utils.py
    CHANGED
    
    | 
         @@ -133,4 +133,4 @@ def get_eval_results_dicts(is_public=True) -> List[Dict]: 
     | 
|
| 133 | 
         
             
                return [e.to_dict() for e in eval_results]
         
     | 
| 134 | 
         | 
| 135 | 
         
             
            eval_results_dict = get_eval_results_dicts()
         
     | 
| 136 | 
         
            -
            print(eval_results_dict)
         
     | 
| 
         | 
|
| 133 | 
         
             
                return [e.to_dict() for e in eval_results]
         
     | 
| 134 | 
         | 
| 135 | 
         
             
            eval_results_dict = get_eval_results_dicts()
         
     | 
| 136 | 
         
            +
            # print(eval_results_dict)
         
     |