Spaces:
Paused
Paused
import gradio as gr | |
from typing import List, Dict, Any | |
from config import DATASET_NAME | |
from arxiv_retrieval_service import ArxivRetrievalService | |
from dataset_management_service import DatasetManagementService | |
import logging | |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
arxiv_service = ArxivRetrievalService() | |
dataset_service = DatasetManagementService(DATASET_NAME) | |
def handle_metadata_extraction(query: str, max_results: int) -> str: | |
try: | |
logging.info(f"Fetching metadata for query: {query}, max_results: {max_results}") | |
metadata_list = arxiv_service.fetch_metadata(query, max_results) | |
if not metadata_list: | |
return "No metadata found for the given query." | |
result = dataset_service.update_dataset(metadata_list) | |
logging.info(f"Dataset update result: {result}") | |
return result | |
except Exception as e: | |
error_msg = f"An error occurred during metadata extraction: {str(e)}" | |
logging.error(error_msg) | |
return error_msg | |
def handle_dataset_view(page: int = 1, page_size: int = 10) -> Dict[str, Any]: | |
logging.info(f"handle_dataset_view called with page={page}, page_size={page_size}") | |
try: | |
total_records = dataset_service.get_dataset_size() | |
logging.info(f"Total records: {total_records}") | |
records = dataset_service.get_dataset_records(page, page_size) | |
logging.info(f"Records type: {type(records)}") | |
logging.info(f"Number of records returned: {len(records)}") | |
result = { | |
"total_records": total_records, | |
"current_page": page, | |
"page_size": page_size, | |
"records": records | |
} | |
logging.info(f"Returning result: {result}") | |
return result | |
except Exception as e: | |
error_msg = f"Error loading dataset: {str(e)}" | |
logging.error(error_msg) | |
return {"error": error_msg} | |
with gr.Blocks() as demo: | |
gr.Markdown( | |
f"""# ArXiv Metadata Extraction and Dataset Management | |
This application extracts metadata from ArXiv papers and manages the dataset: | |
[{DATASET_NAME}](https://huggingface.co/datasets/{DATASET_NAME}/viewer) | |
""" | |
) | |
with gr.Tab("Extract Metadata"): | |
query_input = gr.Textbox(label="ArXiv Query") | |
max_results = gr.Slider(minimum=1, maximum=100, value=10, step=1, label="Max Results") | |
submit_button = gr.Button("Extract Metadata") | |
output = gr.Textbox(label="Result") | |
submit_button.click( | |
fn=handle_metadata_extraction, | |
inputs=[query_input, max_results], | |
outputs=output | |
) | |
with gr.Tab("View Dataset"): | |
page_number = gr.Number(value=1, label="Page Number", precision=0) | |
page_size = gr.Slider(minimum=5, maximum=50, value=10, step=5, label="Page Size") | |
refresh_button = gr.Button("Refresh Dataset View") | |
dataset_info = gr.JSON(label="Dataset Info") | |
refresh_button.click( | |
fn=handle_dataset_view, | |
inputs=[page_number, page_size], | |
outputs=dataset_info | |
) | |
if __name__ == "__main__": | |
demo.launch() |