donb-hf's picture
update dataset info
19ab6fa
raw
history blame
1.99 kB
import gradio as gr
from arxiv_metadata_service import ArxivMetadataService
import traceback
import logging
from config import DATASET_NAME
from datasets import load_dataset
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
arxiv_service = ArxivMetadataService()
def extract_metadata(query: str, max_results: int):
try:
result = arxiv_service.extract_and_update(query, max_results)
logging.info(f"Extraction result: {result}")
return result
except Exception as e:
error_msg = f"An error occurred: {str(e)}\n\nTraceback:\n{traceback.format_exc()}"
logging.error(error_msg)
return error_msg
def load_dataset_info():
try:
dataset = load_dataset(DATASET_NAME, split="train")
return f"Dataset contains {len(dataset)} records."
except Exception as e:
return f"Error loading dataset: {str(e)}"
with gr.Blocks() as demo:
gr.Markdown(
f"""Extract metadata from ArXiv papers and update the dataset.
\n\nCurrently leverages the following datasets:
\n- [{DATASET_NAME}](https://huggingface.co/datasets/{DATASET_NAME}/viewer) dataset.
"""
)
with gr.Tab("Extract Metadata"):
query_input = gr.Textbox(label="ArXiv Query")
max_results = gr.Slider(minimum=1, maximum=100, value=10, step=1, label="Max Results")
submit_button = gr.Button("Extract Metadata")
output = gr.Textbox(label="Result")
submit_button.click(
fn=extract_metadata,
inputs=[query_input, max_results],
outputs=output
)
with gr.Tab("View Dataset"):
refresh_button = gr.Button("Refresh Dataset Info")
dataset_info = gr.Textbox(label="Dataset Info")
refresh_button.click(
fn=load_dataset_info,
inputs=[],
outputs=dataset_info
)
if __name__ == "__main__":
demo.launch()