File size: 2,113 Bytes
edd8809
d7cecb4
19ab6fa
97e8d87
 
19ab6fa
97e8d87
 
 
edd8809
97e8d87
d7cecb4
97e8d87
 
d7cecb4
 
97e8d87
 
 
19ab6fa
edd8809
97e8d87
edd8809
97e8d87
19ab6fa
97e8d87
19ab6fa
97e8d87
19ab6fa
d7cecb4
19ab6fa
 
 
97e8d87
 
19ab6fa
d7cecb4
19ab6fa
 
 
 
 
 
 
 
d7cecb4
19ab6fa
 
 
 
 
 
d7cecb4
19ab6fa
 
d7cecb4
19ab6fa
 
 
edd8809
 
97e8d87
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import gradio as gr
from typing import List, Dict, Any
from config import DATASET_NAME
from arxiv_retrieval_service import ArxivRetrievalService
from dataset_management_service import DatasetManagementService

# Initialize services
arxiv_service = ArxivRetrievalService()
dataset_service = DatasetManagementService(DATASET_NAME)

def handle_metadata_extraction(query: str, max_results: int) -> str:
    try:
        # Fetch metadata from ArXiv
        metadata_list = arxiv_service.fetch_metadata(query, max_results)
        if not metadata_list:
            return "No metadata found for the given query."
        
        # Update the dataset with new metadata
        result = dataset_service.update_dataset(metadata_list)
        return result
    except Exception as e:
        return f"An error occurred: {str(e)}"

def handle_dataset_view() -> List[Dict[str, Any]]:
    try:
        return dataset_service.get_dataset_records()
    except Exception as e:
        return [{"error": f"Error loading dataset: {str(e)}"}]

# Define Gradio interface
with gr.Blocks() as demo:
    gr.Markdown(
        f"""Extract metadata from ArXiv papers and update the dataset.
        \n\nCurrently leverages the following dataset:
        \n- [{DATASET_NAME}](https://huggingface.co/datasets/{DATASET_NAME}/viewer)
        """
    )
    
    with gr.Tab("Extract Metadata"):
        query_input = gr.Textbox(label="ArXiv Query")
        max_results = gr.Slider(minimum=1, maximum=100, value=10, step=1, label="Max Results")
        submit_button = gr.Button("Extract Metadata")
        output = gr.Textbox(label="Result")
        
        submit_button.click(
            fn=handle_metadata_extraction,
            inputs=[query_input, max_results],
            outputs=output
        )
    
    with gr.Tab("View Dataset"):
        refresh_button = gr.Button("Refresh Dataset Info")
        dataset_info = gr.JSON(label="Dataset Info")
        
        refresh_button.click(
            fn=handle_dataset_view,
            inputs=[],
            outputs=dataset_info
        )

if __name__ == "__main__":
    demo.launch()