Spaces:
Running
Running
| import gradio as gr | |
| import json | |
| import time | |
| import traceback | |
| from validation import validate_json, validate_croissant, validate_records, generate_validation_report | |
| import requests | |
| def process_file(file): | |
| results = [] | |
| json_data = None | |
| # Use just the filename instead of full path | |
| filename = file.name.split("/")[-1] | |
| # Check 1: JSON validation | |
| json_valid, json_message, json_data = validate_json(file.name) | |
| # Remove empty checkmarks from messages | |
| json_message = json_message.replace("\nβ\n", "\n") | |
| results.append(("JSON Format Validation", json_valid, json_message)) | |
| if not json_valid: | |
| return results, None | |
| # Check 2: Croissant validation | |
| croissant_valid, croissant_message = validate_croissant(json_data) | |
| # Remove empty checkmarks from messages | |
| croissant_message = croissant_message.replace("\nβ\n", "\n") | |
| results.append(("Croissant Schema Validation", croissant_valid, croissant_message)) | |
| if not croissant_valid: | |
| return results, None | |
| # Check 3: Records validation | |
| records_valid, records_message = validate_records(json_data) | |
| # Remove empty checkmarks from messages | |
| records_message = records_message.replace("\nβ\n", "\n") | |
| results.append(("Records Generation Test", records_valid, records_message)) | |
| # Generate detailed report with just filename | |
| report = generate_validation_report(filename, json_data, results) | |
| return results, report | |
| def create_ui(): | |
| with gr.Blocks(theme=gr.themes.Soft()) as app: | |
| gr.Markdown("# ππ₯ Croissant Validator for NeurIPS D&B") | |
| gr.Markdown(""" | |
| Upload your Croissant JSON-LD file or enter a URL to validate if it meets the requirements for NeurIPS submission. | |
| The validator will check: | |
| 1. If the file is valid JSON | |
| 2. If it passes Croissant schema validation | |
| 3. If records can be generated within a reasonable time | |
| """) | |
| # Track the active tab for conditional UI updates | |
| active_tab = gr.State("upload") # Default to upload tab | |
| # Create a container for the entire input section | |
| with gr.Group(): | |
| # Input tabs | |
| with gr.Tabs() as tabs: | |
| with gr.TabItem("Upload File", id="upload_tab"): | |
| file_input = gr.File(label="Upload Croissant JSON-LD File", file_types=[".json", ".jsonld"]) | |
| validate_btn = gr.Button("Validate Uploaded File", variant="primary") | |
| with gr.TabItem("URL Input", id="url_tab"): | |
| url_input = gr.Textbox( | |
| label="Enter Croissant JSON-LD URL", | |
| placeholder="e.g. https://huggingface.co/api/datasets/facebook/natural_reasoning/croissant" | |
| ) | |
| fetch_btn = gr.Button("Fetch and Validate", variant="primary") | |
| # Change initial message to match upload tab | |
| upload_progress = gr.HTML( | |
| """<div class="progress-status">Ready for upload</div>""", | |
| visible=True) | |
| # Now create the validation results section in a separate group | |
| with gr.Group(): | |
| # Validation results | |
| validation_results = gr.HTML(visible=False) | |
| validation_progress = gr.HTML(visible=False) | |
| # Collapsible report section | |
| with gr.Accordion("Download full validation report", visible=False, open=False) as report_group: | |
| with gr.Column(): | |
| report_md = gr.File( | |
| label="Download Report", | |
| visible=True, | |
| file_types=[".md"] | |
| ) | |
| report_text = gr.Textbox( | |
| label="Report Content", | |
| visible=True, | |
| show_copy_button=True, | |
| lines=10 | |
| ) | |
| # Define CSS for the validation UI | |
| gr.HTML(""" | |
| <style> | |
| /* Set max width and center the app */ | |
| .gradio-container { | |
| max-width: 750px !important; | |
| margin: 0 auto !important; | |
| } | |
| /* Make basic containers transparent */ | |
| .gr-group, .gr-box, .gr-panel, .gradio-box, .gradio-group { | |
| background-color: var(--body-background-fill) !important; | |
| border: none !important; | |
| box-shadow: none !important; | |
| } | |
| /* Style for expandable validation steps */ | |
| .validation-step { | |
| margin-bottom: 12px; | |
| border: 1px solid var(--border-color-primary, rgba(128, 128, 128, 0.2)); | |
| border-radius: 8px; | |
| overflow: hidden; | |
| } | |
| .step-header { | |
| padding: 10px 15px; | |
| display: flex; | |
| align-items: center; | |
| justify-content: space-between; | |
| cursor: pointer; | |
| background-color: rgba(0, 0, 0, 0.03) !important; | |
| } | |
| .step-left { | |
| display: flex; | |
| align-items: center; | |
| gap: 10px; | |
| } | |
| /* Force text color to white in status indicators */ | |
| .step-status { | |
| width: 24px; | |
| height: 24px; | |
| border-radius: 50%; | |
| display: flex; | |
| align-items: center; | |
| justify-content: center; | |
| font-weight: bold; | |
| color: white !important; | |
| } | |
| .status-success { | |
| background-color: #4caf50 !important; | |
| } | |
| .status-error { | |
| background-color: #f44336 !important; | |
| } | |
| .step-details { | |
| padding: 12px 15px; | |
| background-color: var(--body-background-fill) !important; | |
| } | |
| /* User hints styling - italic, smaller, better positioned */ | |
| .progress-status { | |
| font-style: italic; | |
| font-size: 0.9em; | |
| color: var(--body-text-color-subdued); | |
| padding: 8px 0; | |
| margin-top: 5px; | |
| width: 100%; | |
| background: none !important; | |
| border: none !important; | |
| text-align: center; | |
| } | |
| /* Override input containers to match page background */ | |
| .gr-input-container, .gr-form, .gr-input, .gr-box, .gr-panel, | |
| .file-preview, .file-preview > div { | |
| background-color: var(--body-background-fill) !important; | |
| } | |
| /* Ensure buttons have proper styling */ | |
| button.primary, button[data-testid="primary-button"] { | |
| background-color: var(--primary-500) !important; | |
| color: white !important; | |
| } | |
| /* Arrow indicator for expandable sections */ | |
| .arrow-indicator { | |
| font-size: 14px; | |
| transition: transform 0.3s ease; | |
| transform: rotate(0deg); /* Point right by default */ | |
| } | |
| .arrow-down { | |
| transform: rotate(90deg); /* Point down when expanded */ | |
| } | |
| /* Loading animation */ | |
| .loading-spinner { | |
| display: inline-block; | |
| width: 20px; | |
| height: 20px; | |
| border: 3px solid rgba(0, 0, 0, 0.1); | |
| border-radius: 50%; | |
| border-top-color: var(--primary-500); | |
| animation: spin 1s ease-in-out infinite; | |
| margin-right: 10px; | |
| } | |
| @keyframes spin { | |
| to { transform: rotate(360deg); } | |
| } | |
| .validation-progress { | |
| display: flex; | |
| align-items: center; | |
| justify-content: center; | |
| padding: 10px; | |
| margin: 10px 0; | |
| background-color: var(--background-fill-secondary); | |
| border-radius: 8px; | |
| } | |
| /* Override Gradio's default accordion arrow */ | |
| .gr-accordion { | |
| position: relative; | |
| } | |
| .gr-accordion > .label-wrap { | |
| display: flex; | |
| align-items: center; | |
| gap: 8px; | |
| padding-right: 32px; /* Make room for the arrow */ | |
| } | |
| .gr-accordion > .label-wrap::after { | |
| content: "βΆ"; | |
| position: absolute; | |
| right: 16px; | |
| top: 50%; | |
| transform: translateY(-50%); | |
| transition: transform 0.3s ease; | |
| font-size: 0.8em; | |
| } | |
| .gr-accordion[data-open=true] > .label-wrap::after { | |
| transform: translateY(-50%) rotate(90deg); | |
| } | |
| /* Consistent arrow styling for both validation steps and accordion */ | |
| .validation-step .step-header, | |
| .gr-accordion > .label-wrap { | |
| position: relative; | |
| display: flex; | |
| align-items: center; | |
| gap: 8px; | |
| } | |
| .validation-step .arrow-indicator, | |
| .gr-accordion > .label-wrap::after { | |
| content: "βΆ"; | |
| font-size: 0.8em; | |
| margin-left: 8px; | |
| transition: transform 0.3s ease; | |
| } | |
| /* Remove absolute positioning and right alignment for accordion arrow */ | |
| .gr-accordion > .label-wrap { | |
| padding-right: 0; /* Remove extra padding */ | |
| } | |
| .gr-accordion > .label-wrap::after { | |
| position: static; /* Remove absolute positioning */ | |
| right: auto; | |
| transform: none; | |
| } | |
| /* Consistent rotation for expanded state */ | |
| .validation-step .arrow-down, | |
| .gr-accordion[data-open=true] > .label-wrap::after { | |
| transform: rotate(90deg); | |
| } | |
| </style> | |
| """) | |
| # Update helper messages based on tab changes | |
| def on_tab_change(evt: gr.SelectData): | |
| tab_id = evt.value | |
| if tab_id == "Upload File": | |
| return [ | |
| "upload", | |
| """<div class="progress-status">Ready for upload</div>""", | |
| gr.update(visible=False), | |
| gr.update(visible=False), # Hide report group | |
| None, # Clear report text | |
| None, # Clear report file | |
| None, # Clear file input | |
| gr.update(value="") # Clear URL input | |
| ] | |
| else: | |
| return [ | |
| "url", | |
| """<div class="progress-status">Enter a URL to fetch</div>""", | |
| gr.update(visible=False), | |
| gr.update(visible=False), # Hide report group | |
| None, # Clear report text | |
| None, # Clear report file | |
| None, # Clear file input | |
| gr.update(value="") # Clear URL input | |
| ] | |
| def on_copy_click(report): | |
| return report | |
| def on_download_click(report, file_name): | |
| report_file = f"report_{file_name}.md" | |
| with open(report_file, "w") as f: | |
| f.write(report) | |
| return report_file | |
| def on_file_upload(file): | |
| if file is None: | |
| return [ | |
| """<div class="progress-status">Ready for upload</div>""", | |
| gr.update(visible=False), | |
| gr.update(visible=False), # Hide report group | |
| None, # Clear report text | |
| None # Clear report file | |
| ] | |
| return [ | |
| """<div class="progress-status">β File uploaded successfully</div>""", | |
| gr.update(visible=False), | |
| gr.update(visible=False), # Hide report group | |
| None, # Clear report text | |
| None # Clear report file | |
| ] | |
| def fetch_from_url(url): | |
| if not url: | |
| return [ | |
| """<div class="progress-status">Please enter a URL</div>""", | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| None, | |
| None | |
| ] | |
| try: | |
| # Fetch JSON from URL | |
| response = requests.get(url, timeout=10) | |
| response.raise_for_status() | |
| json_data = response.json() | |
| # Process validation | |
| results = [] | |
| results.append(("JSON Format Validation", True, "The URL returned valid JSON.")) | |
| croissant_valid, croissant_message = validate_croissant(json_data) | |
| results.append(("Croissant Schema Validation", croissant_valid, croissant_message)) | |
| if not croissant_valid: | |
| return [ | |
| """<div class="progress-status">β JSON fetched successfully from URL</div>""", | |
| build_results_html(results), | |
| gr.update(visible=False), | |
| None, | |
| None | |
| ] | |
| records_valid, records_message = validate_records(json_data) | |
| results.append(("Records Generation Test", records_valid, records_message)) | |
| # Generate report | |
| report = generate_validation_report(url.split("/")[-1], json_data, results) | |
| report_filename = f"report_croissant-validation_{json_data.get('name', 'unnamed')}.md" | |
| if report: | |
| with open(report_filename, "w") as f: | |
| f.write(report) | |
| return [ | |
| """<div class="progress-status">β JSON fetched successfully from URL</div>""", | |
| build_results_html(results), | |
| gr.update(visible=True), | |
| report, | |
| report_filename | |
| ] | |
| except requests.exceptions.RequestException as e: | |
| error_message = f"Error fetching URL: {str(e)}" | |
| return [ | |
| f"""<div class="progress-status">{error_message}</div>""", | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| None, | |
| None | |
| ] | |
| except json.JSONDecodeError as e: | |
| error_message = f"URL did not return valid JSON: {str(e)}" | |
| return [ | |
| f"""<div class="progress-status">{error_message}</div>""", | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| None, | |
| None | |
| ] | |
| except Exception as e: | |
| error_message = f"Unexpected error: {str(e)}" | |
| return [ | |
| f"""<div class="progress-status">{error_message}</div>""", | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| None, | |
| None | |
| ] | |
| def build_results_html(results): | |
| # Build validation results HTML | |
| html = '<div class="validation-results">' | |
| for i, (test_name, passed, message) in enumerate(results): | |
| status_class = "status-success" if passed else "status-error" | |
| status_icon = "β" if passed else "β" | |
| # Add emoji to message | |
| message_with_emoji = ("β " if passed else "β ") + message | |
| html += f''' | |
| <div class="validation-step" id="step-{i}"> | |
| <div class="step-header" onclick=" | |
| var details = document.getElementById('details-{i}'); | |
| var arrow = document.getElementById('arrow-{i}'); | |
| if(details.style.display === 'none') {{ | |
| details.style.display = 'block'; | |
| arrow.classList.add('arrow-down'); | |
| }} else {{ | |
| details.style.display = 'none'; | |
| arrow.classList.remove('arrow-down'); | |
| }}"> | |
| <div class="step-left"> | |
| <div class="step-status {status_class}">{status_icon}</div> | |
| <span class="step-title">{test_name}</span> | |
| <span class="arrow-indicator" id="arrow-{i}">βΆ</span> | |
| </div> | |
| </div> | |
| <div class="step-details" id="details-{i}" style="display: none;"> | |
| {message_with_emoji} | |
| </div> | |
| </div> | |
| ''' | |
| html += '</div>' | |
| return gr.update(value=html, visible=True) | |
| def on_validate(file): | |
| if file is None: | |
| return [ | |
| gr.update(visible=False), # validation_results | |
| gr.update(visible=False), # validation_progress | |
| gr.update(visible=False), # report_group | |
| None, # report_text | |
| None # report_md | |
| ] | |
| # Process the file and get results | |
| results, report = process_file(file) | |
| # Extract dataset name from the JSON for the report filename | |
| try: | |
| with open(file.name, 'r') as f: | |
| json_data = json.load(f) | |
| dataset_name = json_data.get('name', 'unnamed') | |
| except: | |
| dataset_name = 'unnamed' | |
| # Save report to file with new naming convention | |
| report_filename = f"report_croissant-validation_{dataset_name}.md" | |
| if report: | |
| with open(report_filename, "w") as f: | |
| f.write(report) | |
| # Return final state | |
| return [ | |
| build_results_html(results), # validation_results | |
| gr.update(visible=False), # validation_progress | |
| gr.update(visible=True) if report else gr.update(visible=False), # report_group | |
| report if report else None, # report_text | |
| report_filename if report else None # report_md | |
| ] | |
| # Connect UI events to functions with updated outputs | |
| tabs.select( | |
| on_tab_change, | |
| None, | |
| [active_tab, upload_progress, validation_results, report_group, report_text, report_md, file_input, url_input] | |
| ) | |
| file_input.change( | |
| on_file_upload, | |
| inputs=file_input, | |
| outputs=[upload_progress, validation_results, report_group, report_text, report_md] | |
| ) | |
| # Add progress state handling | |
| def show_progress(): | |
| progress_html = """ | |
| <div class="validation-progress"> | |
| <div class="loading-spinner"></div> | |
| <span>Validating file...</span> | |
| </div> | |
| """ | |
| return [ | |
| gr.update(visible=False), # validation_results | |
| gr.update(visible=True, value=progress_html), # validation_progress | |
| gr.update(visible=False), # report_group | |
| None, # report_text | |
| None # report_md | |
| ] | |
| validate_btn.click( | |
| fn=show_progress, | |
| inputs=None, | |
| outputs=[validation_results, validation_progress, report_group, report_text, report_md], | |
| queue=False | |
| ).then( | |
| fn=on_validate, | |
| inputs=file_input, | |
| outputs=[validation_results, validation_progress, report_group, report_text, report_md] | |
| ) | |
| fetch_btn.click( | |
| fetch_from_url, | |
| inputs=url_input, | |
| outputs=[upload_progress, validation_results, report_group, report_text, report_md] | |
| ) | |
| # Footer | |
| gr.HTML(""" | |
| <div style="text-align: center; margin-top: 20px;"> | |
| <p>Learn more about <a href="https://github.com/mlcommons/croissant" target="_blank">Croissant format</a>.</p> | |
| </div> | |
| """) | |
| return app | |
| if __name__ == "__main__": | |
| app = create_ui() | |
| app.launch() |