File size: 1,988 Bytes
edd8809
 
 
19ab6fa
 
 
 
 
edd8809
 
 
 
 
19ab6fa
 
 
edd8809
 
19ab6fa
edd8809
 
19ab6fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
edd8809
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import gradio as gr
from arxiv_metadata_service import ArxivMetadataService
import traceback
import logging
from config import DATASET_NAME
from datasets import load_dataset

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

arxiv_service = ArxivMetadataService()

def extract_metadata(query: str, max_results: int):
    try:
        result = arxiv_service.extract_and_update(query, max_results)
        logging.info(f"Extraction result: {result}")
        return result
    except Exception as e:
        error_msg = f"An error occurred: {str(e)}\n\nTraceback:\n{traceback.format_exc()}"
        logging.error(error_msg)
        return error_msg

def load_dataset_info():
    try:
        dataset = load_dataset(DATASET_NAME, split="train")
        return f"Dataset contains {len(dataset)} records."
    except Exception as e:
        return f"Error loading dataset: {str(e)}"

with gr.Blocks() as demo:
    gr.Markdown(
        f"""Extract metadata from ArXiv papers and update the dataset.
        \n\nCurrently leverages the following datasets:
        \n- [{DATASET_NAME}](https://huggingface.co/datasets/{DATASET_NAME}/viewer) dataset.
        """
        )
    
    with gr.Tab("Extract Metadata"):
        query_input = gr.Textbox(label="ArXiv Query")
        max_results = gr.Slider(minimum=1, maximum=100, value=10, step=1, label="Max Results")
        submit_button = gr.Button("Extract Metadata")
        output = gr.Textbox(label="Result")
        
        submit_button.click(
            fn=extract_metadata,
            inputs=[query_input, max_results],
            outputs=output
        )
    
    with gr.Tab("View Dataset"):
        refresh_button = gr.Button("Refresh Dataset Info")
        dataset_info = gr.Textbox(label="Dataset Info")
        
        refresh_button.click(
            fn=load_dataset_info,
            inputs=[],
            outputs=dataset_info
        )

if __name__ == "__main__":
    demo.launch()