Spaces:
				
			
			
	
			
			
		Sleeping
		
	
	
	
			
			
	
	
	
	
		
		
		Sleeping
		
	Update app.py
Browse files
    	
        app.py
    CHANGED
    
    | @@ -11,6 +11,7 @@ import base64 | |
| 11 | 
             
            import os
         | 
| 12 | 
             
            from huggingface_hub import login
         | 
| 13 | 
             
            import spaces
         | 
|  | |
| 14 |  | 
| 15 | 
             
            # Read token and login
         | 
| 16 | 
             
            hf_token = os.getenv("HF_TOKEN_READ_WRITE")
         | 
| @@ -188,13 +189,31 @@ def run_evaluation(): | |
| 188 |  | 
| 189 | 
             
                return f"Accuracy: {accuracy:.2f}", full_html
         | 
| 190 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 191 | 
             
            # ---------------------------------------------------------------------------
         | 
| 192 | 
             
            # 6. Gradio Interface
         | 
| 193 | 
             
            # ---------------------------------------------------------------------------
         | 
| 194 | 
             
            with gr.Blocks() as demo:
         | 
| 195 | 
             
                gr.Markdown("# Mistral-7B Math Evaluation Demo")
         | 
| 196 | 
             
                gr.Markdown("""
         | 
| 197 | 
            -
                This demo evaluates Mistral-7B on  | 
| 198 | 
             
                Press the button below to run the evaluation.
         | 
| 199 | 
             
                """)
         | 
| 200 |  | 
| @@ -208,4 +227,13 @@ with gr.Blocks() as demo: | |
| 208 | 
             
                    outputs=[output_text, output_plot]
         | 
| 209 | 
             
                )
         | 
| 210 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 211 | 
             
            demo.launch()
         | 
|  | |
| 11 | 
             
            import os
         | 
| 12 | 
             
            from huggingface_hub import login
         | 
| 13 | 
             
            import spaces
         | 
| 14 | 
            +
            from mmlu_eval import evaluate_mmlu
         | 
| 15 |  | 
| 16 | 
             
            # Read token and login
         | 
| 17 | 
             
            hf_token = os.getenv("HF_TOKEN_READ_WRITE")
         | 
|  | |
| 189 |  | 
| 190 | 
             
                return f"Accuracy: {accuracy:.2f}", full_html
         | 
| 191 |  | 
| 192 | 
            +
            # ---------------------------------------------------------------------------
         | 
| 193 | 
            +
            # 5. MMLU Evaluation call
         | 
| 194 | 
            +
            # ---------------------------------------------------------------------------
         | 
| 195 | 
            +
            def run_mmlu_evaluation(num_questions):
         | 
| 196 | 
            +
                """
         | 
| 197 | 
            +
                Runs the MMLU evaluation with the specified number of questions per task.
         | 
| 198 | 
            +
                """
         | 
| 199 | 
            +
                results = evaluate_mmlu(model, tokenizer, num_questions)
         | 
| 200 | 
            +
                
         | 
| 201 | 
            +
                report = (
         | 
| 202 | 
            +
                    f"Overall Accuracy: {results['overall_accuracy']:.2f}\n"
         | 
| 203 | 
            +
                    f"Min Accuracy: {results['min_accuracy_task'][1]:.2f} on {results['min_accuracy_task'][0]}\n"
         | 
| 204 | 
            +
                    f"Max Accuracy: {results['max_accuracy_task'][1]:.2f} on {results['max_accuracy_task'][0]}"
         | 
| 205 | 
            +
                )
         | 
| 206 | 
            +
                
         | 
| 207 | 
            +
                return report
         | 
| 208 | 
            +
             | 
| 209 | 
            +
             | 
| 210 | 
             
            # ---------------------------------------------------------------------------
         | 
| 211 | 
             
            # 6. Gradio Interface
         | 
| 212 | 
             
            # ---------------------------------------------------------------------------
         | 
| 213 | 
             
            with gr.Blocks() as demo:
         | 
| 214 | 
             
                gr.Markdown("# Mistral-7B Math Evaluation Demo")
         | 
| 215 | 
             
                gr.Markdown("""
         | 
| 216 | 
            +
                This demo evaluates Mistral-7B on three very simple math problems to get started.
         | 
| 217 | 
             
                Press the button below to run the evaluation.
         | 
| 218 | 
             
                """)
         | 
| 219 |  | 
|  | |
| 227 | 
             
                    outputs=[output_text, output_plot]
         | 
| 228 | 
             
                )
         | 
| 229 |  | 
| 230 | 
            +
                gr.Markdown("### MMLU Evaluation")
         | 
| 231 | 
            +
                num_questions_input = gr.Number(label="Questions per Task (there are 57 total Tasks)", value=5, precision=0)
         | 
| 232 | 
            +
                eval_mmlu_button = gr.Button("Run MMLU Evaluation")
         | 
| 233 | 
            +
                mmlu_output = gr.Textbox(label="MMLU Evaluation Results")
         | 
| 234 | 
            +
                
         | 
| 235 | 
            +
                eval_mmlu_button.click(fn=run_mmlu_evaluation, inputs=[num_questions_input], outputs=[mmlu_output])
         | 
| 236 | 
            +
                
         | 
| 237 | 
            +
             | 
| 238 | 
            +
             | 
| 239 | 
             
            demo.launch()
         | 
