bigcodebench-leaderboard

Running

App Files Files Community

terryyz commited on Nov 3, 2024

Commit

180a1c8

verified ·

1 Parent(s): eb84112

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -57

app.py CHANGED Viewed

@@ -523,71 +523,71 @@ with main_block as demo:
                 outputs=[task_id_output, code_completion, nl_instruction, test_cases, count_output, index_slider]
             )
-        with gr.TabItem("🛠️ Code Execution (Beta)", id=5):
-            gr.Markdown("""\
-### Hard Set Ground Truth Pass Rate: 100%
-### Full Set Ground Truth Pass Rate: 99.6%
-### Note: The code execution session is no longer maintained. Please `pip install -U bigcodebench` and refer to [BigCodeBench Repository](https://github.com/bigcode-project/bigcodebench).
-""")
-            with gr.Row():
-                jsonl_file = gr.File(label="Upload JSONL file", file_types=[".jsonl"])
-                split = gr.Dropdown(choices=["complete", "instruct"], label="Split", value="complete")
-                subset = gr.Dropdown(choices=["hard", "full"], label="Subset", value="hard")
-            with gr.Row():
-                parallel = gr.Number(label="Parallel (optional)", precision=0)
-                min_time_limit = gr.Number(label="Min Time Limit", value=1, precision=1)
-                max_as_limit = gr.Number(label="Max AS Limit", value=25*1024, precision=0)
-            with gr.Row():
-                max_data_limit = gr.Number(label="Max Data Limit", value=25*1024, precision=0)
-                max_stack_limit = gr.Number(label="Max Stack Limit", value=10, precision=0)
-                check_gt_only = gr.Checkbox(label="Check GT Only", value=False, visible=False)
-                no_gt = gr.Checkbox(label="No GT", value=False, visible=False)
-            command_output = gr.Textbox(label="Command", value=default_command, interactive=False)
-            with gr.Row():
-                submit_btn = gr.Button("Run Evaluation")
-                download_btn = gr.DownloadButton(label="Download Result", visible=False)
-            log_output = gr.Textbox(label="Execution Logs", lines=20)
-            input_components = [
-                jsonl_file, split, subset, parallel,
-                min_time_limit, max_as_limit, max_data_limit, max_stack_limit,
-                check_gt_only, no_gt
-            ]
-            for component in input_components:
-                component.change(generate_command, inputs=input_components, outputs=command_output)
-            def start_evaluation(command, jsonl_file, subset, split):
-                lock.acquire()
-                if jsonl_file is not None:
-                    result_path = os.path.basename(jsonl_file.name).replace(".jsonl", "_eval_results.json")
-                else:
-                    result_path = None
-                for log in stream_logs(command, jsonl_file):
-                    if jsonl_file is not None and jsonl_file.name.endswith(".jsonl"):
-                        yield log, gr.update(value=result_path, label=result_path, visible=True), gr.update(visible=False)
-                    else:
-                        yield log, gr.update(), gr.update()
-                lock.release()
-                result_file = find_result_file()
-                if result_file:
-                    return gr.update(label="Evaluation completed. Result file found."), gr.update(value=result_file)
-                            # gr.Button(visible=False)#,
-                            # gr.DownloadButton(label="Download Result", value=result_file, visible=True))
-                else:
-                    return gr.update(label="Evaluation completed. No result file found."), gr.update(value=result_path)
-                            # gr.Button("Run Evaluation", visible=True),
-                            # gr.DownloadButton(visible=False))
-            submit_btn.click(start_evaluation,
-                        inputs=[command_output, jsonl_file, subset, split],
-                        outputs=[log_output, download_btn, submit_btn])
         with gr.TabItem("🚀 Request", id=4):
             gr.Markdown(SUBMISSION_TEXT_3)

                 outputs=[task_id_output, code_completion, nl_instruction, test_cases, count_output, index_slider]
             )
+#         with gr.TabItem("🛠️ Code Execution (Beta)", id=5):
+#             gr.Markdown("""\
+# ### Hard Set Ground Truth Pass Rate: 100%
+# ### Full Set Ground Truth Pass Rate: 99.6%
+# ### Note: The code execution session is no longer maintained. Please `pip install -U bigcodebench` and refer to [BigCodeBench Repository](https://github.com/bigcode-project/bigcodebench).
+# """)
+#             with gr.Row():
+#                 jsonl_file = gr.File(label="Upload JSONL file", file_types=[".jsonl"])
+#                 split = gr.Dropdown(choices=["complete", "instruct"], label="Split", value="complete")
+#                 subset = gr.Dropdown(choices=["hard", "full"], label="Subset", value="hard")
+#             with gr.Row():
+#                 parallel = gr.Number(label="Parallel (optional)", precision=0)
+#                 min_time_limit = gr.Number(label="Min Time Limit", value=1, precision=1)
+#                 max_as_limit = gr.Number(label="Max AS Limit", value=25*1024, precision=0)
+#             with gr.Row():
+#                 max_data_limit = gr.Number(label="Max Data Limit", value=25*1024, precision=0)
+#                 max_stack_limit = gr.Number(label="Max Stack Limit", value=10, precision=0)
+#                 check_gt_only = gr.Checkbox(label="Check GT Only", value=False, visible=False)
+#                 no_gt = gr.Checkbox(label="No GT", value=False, visible=False)
+#             command_output = gr.Textbox(label="Command", value=default_command, interactive=False)
+#             with gr.Row():
+#                 submit_btn = gr.Button("Run Evaluation")
+#                 download_btn = gr.DownloadButton(label="Download Result", visible=False)
+#             log_output = gr.Textbox(label="Execution Logs", lines=20)
+#             input_components = [
+#                 jsonl_file, split, subset, parallel,
+#                 min_time_limit, max_as_limit, max_data_limit, max_stack_limit,
+#                 check_gt_only, no_gt
+#             ]
+#             for component in input_components:
+#                 component.change(generate_command, inputs=input_components, outputs=command_output)
+#             def start_evaluation(command, jsonl_file, subset, split):
+#                 lock.acquire()
+#                 if jsonl_file is not None:
+#                     result_path = os.path.basename(jsonl_file.name).replace(".jsonl", "_eval_results.json")
+#                 else:
+#                     result_path = None
+#                 for log in stream_logs(command, jsonl_file):
+#                     if jsonl_file is not None and jsonl_file.name.endswith(".jsonl"):
+#                         yield log, gr.update(value=result_path, label=result_path, visible=True), gr.update(visible=False)
+#                     else:
+#                         yield log, gr.update(), gr.update()
+#                 lock.release()
+#                 result_file = find_result_file()
+#                 if result_file:
+#                     return gr.update(label="Evaluation completed. Result file found."), gr.update(value=result_file)
+#                             # gr.Button(visible=False)#,
+#                             # gr.DownloadButton(label="Download Result", value=result_file, visible=True))
+#                 else:
+#                     return gr.update(label="Evaluation completed. No result file found."), gr.update(value=result_path)
+#                             # gr.Button("Run Evaluation", visible=True),
+#                             # gr.DownloadButton(visible=False))
+#             submit_btn.click(start_evaluation,
+#                         inputs=[command_output, jsonl_file, subset, split],
+#                         outputs=[log_output, download_btn, submit_btn])
         with gr.TabItem("🚀 Request", id=4):
             gr.Markdown(SUBMISSION_TEXT_3)