Spaces:
Paused
Paused
import gradio as gr | |
from arxiv_metadata_service import ArxivMetadataService | |
import traceback | |
import logging | |
from config import DATASET_NAME | |
from datasets import load_dataset | |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
arxiv_service = ArxivMetadataService() | |
def extract_metadata(query: str, max_results: int): | |
try: | |
result = arxiv_service.extract_and_update(query, max_results) | |
logging.info(f"Extraction result: {result}") | |
return result | |
except Exception as e: | |
error_msg = f"An error occurred: {str(e)}\n\nTraceback:\n{traceback.format_exc()}" | |
logging.error(error_msg) | |
return error_msg | |
def load_dataset_info(): | |
try: | |
dataset = load_dataset(DATASET_NAME, split="train") | |
return f"Dataset contains {len(dataset)} records." | |
except Exception as e: | |
return f"Error loading dataset: {str(e)}" | |
with gr.Blocks() as demo: | |
gr.Markdown( | |
f"""Extract metadata from ArXiv papers and update the dataset. | |
\n\nCurrently leverages the following datasets: | |
\n- [{DATASET_NAME}](https://huggingface.co/datasets/{DATASET_NAME}/viewer) dataset. | |
""" | |
) | |
with gr.Tab("Extract Metadata"): | |
query_input = gr.Textbox(label="ArXiv Query") | |
max_results = gr.Slider(minimum=1, maximum=100, value=10, step=1, label="Max Results") | |
submit_button = gr.Button("Extract Metadata") | |
output = gr.Textbox(label="Result") | |
submit_button.click( | |
fn=extract_metadata, | |
inputs=[query_input, max_results], | |
outputs=output | |
) | |
with gr.Tab("View Dataset"): | |
refresh_button = gr.Button("Refresh Dataset Info") | |
dataset_info = gr.Textbox(label="Dataset Info") | |
refresh_button.click( | |
fn=load_dataset_info, | |
inputs=[], | |
outputs=dataset_info | |
) | |
if __name__ == "__main__": | |
demo.launch() |