File size: 3,241 Bytes
edd8809
d7cecb4
19ab6fa
97e8d87
 
79cf287
 
 
19ab6fa
97e8d87
 
edd8809
97e8d87
d7cecb4
79cf287
97e8d87
d7cecb4
 
97e8d87
 
79cf287
19ab6fa
edd8809
79cf287
 
 
edd8809
79cf287
 
19ab6fa
79cf287
 
 
 
 
 
 
 
 
 
e66e19e
79cf287
 
 
 
19ab6fa
79cf287
 
 
19ab6fa
 
 
79cf287
 
 
 
19ab6fa
d7cecb4
19ab6fa
 
 
 
 
 
 
 
d7cecb4
19ab6fa
 
 
 
 
79cf287
e66e19e
79cf287
d7cecb4
19ab6fa
 
d7cecb4
e66e19e
19ab6fa
 
edd8809
 
97e8d87
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import gradio as gr
from typing import List, Dict, Any
from config import DATASET_NAME
from arxiv_retrieval_service import ArxivRetrievalService
from dataset_management_service import DatasetManagementService
import logging

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

arxiv_service = ArxivRetrievalService()
dataset_service = DatasetManagementService(DATASET_NAME)

def handle_metadata_extraction(query: str, max_results: int) -> str:
    try:
        logging.info(f"Fetching metadata for query: {query}, max_results: {max_results}")
        metadata_list = arxiv_service.fetch_metadata(query, max_results)
        if not metadata_list:
            return "No metadata found for the given query."
        
        result = dataset_service.update_dataset(metadata_list)
        logging.info(f"Dataset update result: {result}")
        return result
    except Exception as e:
        error_msg = f"An error occurred during metadata extraction: {str(e)}"
        logging.error(error_msg)
        return error_msg

def handle_dataset_view(page: int = 1, page_size: int = 10) -> Dict[str, Any]:
    logging.info(f"handle_dataset_view called with page={page}, page_size={page_size}")
    try:
        total_records = dataset_service.get_dataset_size()
        logging.info(f"Total records: {total_records}")
        
        records = dataset_service.get_dataset_records(page, page_size)
        logging.info(f"Records type: {type(records)}")
        logging.info(f"Number of records returned: {len(records)}")
        
        result = {
            "total_records": total_records,
            "current_page": page,
            "page_size": page_size,
            "records": records
        }
        logging.info(f"Returning result: {result}")
        return result
    except Exception as e:
        error_msg = f"Error loading dataset: {str(e)}"
        logging.error(error_msg)
        return {"error": error_msg}

with gr.Blocks() as demo:
    gr.Markdown(
        f"""# ArXiv Metadata Extraction and Dataset Management
        
        This application extracts metadata from ArXiv papers and manages the dataset:
        [{DATASET_NAME}](https://huggingface.co/datasets/{DATASET_NAME}/viewer)
        """
    )
    
    with gr.Tab("Extract Metadata"):
        query_input = gr.Textbox(label="ArXiv Query")
        max_results = gr.Slider(minimum=1, maximum=100, value=10, step=1, label="Max Results")
        submit_button = gr.Button("Extract Metadata")
        output = gr.Textbox(label="Result")
        
        submit_button.click(
            fn=handle_metadata_extraction,
            inputs=[query_input, max_results],
            outputs=output
        )
    
    with gr.Tab("View Dataset"):
        page_number = gr.Number(value=1, label="Page Number", precision=0)
        page_size = gr.Slider(minimum=5, maximum=50, value=10, step=5, label="Page Size")
        refresh_button = gr.Button("Refresh Dataset View")
        dataset_info = gr.JSON(label="Dataset Info")
        
        refresh_button.click(
            fn=handle_dataset_view,
            inputs=[page_number, page_size],
            outputs=dataset_info
        )

if __name__ == "__main__":
    demo.launch()