Spaces:
Paused
Paused
import gradio as gr | |
from typing import List, Dict, Any | |
from config import DATASET_NAME | |
from arxiv_retrieval_service import ArxivRetrievalService | |
from dataset_management_service import DatasetManagementService | |
# Initialize services | |
arxiv_service = ArxivRetrievalService() | |
dataset_service = DatasetManagementService(DATASET_NAME) | |
def handle_metadata_extraction(query: str, max_results: int) -> str: | |
try: | |
# Fetch metadata from ArXiv | |
metadata_list = arxiv_service.fetch_metadata(query, max_results) | |
if not metadata_list: | |
return "No metadata found for the given query." | |
# Update the dataset with new metadata | |
result = dataset_service.update_dataset(metadata_list) | |
return result | |
except Exception as e: | |
return f"An error occurred: {str(e)}" | |
def handle_dataset_view() -> List[Dict[str, Any]]: | |
try: | |
return dataset_service.get_dataset_records() | |
except Exception as e: | |
return [{"error": f"Error loading dataset: {str(e)}"}] | |
# Define Gradio interface | |
with gr.Blocks() as demo: | |
gr.Markdown( | |
f"""Extract metadata from ArXiv papers and update the dataset. | |
\n\nCurrently leverages the following dataset: | |
\n- [{DATASET_NAME}](https://huggingface.co/datasets/{DATASET_NAME}/viewer) | |
""" | |
) | |
with gr.Tab("Extract Metadata"): | |
query_input = gr.Textbox(label="ArXiv Query") | |
max_results = gr.Slider(minimum=1, maximum=100, value=10, step=1, label="Max Results") | |
submit_button = gr.Button("Extract Metadata") | |
output = gr.Textbox(label="Result") | |
submit_button.click( | |
fn=handle_metadata_extraction, | |
inputs=[query_input, max_results], | |
outputs=output | |
) | |
with gr.Tab("View Dataset"): | |
refresh_button = gr.Button("Refresh Dataset Info") | |
dataset_info = gr.JSON(label="Dataset Info") | |
refresh_button.click( | |
fn=handle_dataset_view, | |
inputs=[], | |
outputs=dataset_info | |
) | |
if __name__ == "__main__": | |
demo.launch() |