Spaces:

jibsn
/

pubmed-mcp-server

Running

File size: 5,916 Bytes

import gradio as gr
import csv
import os
from Bio import Entrez
import xml.etree.ElementTree as ET
import time
import pandas as pd
from datetime import datetime

# 设置NCBI要求的电子邮件和API密钥从环境变量获取
Entrez.email = os.environ.get("EMAIL")
Entrez.api_key = os.environ.get("NCBI_API_KEY")

def search_pubmed(query, max_results=100):
    """在PubMed中搜索并返回文章ID列表"""
    try:
        handle = Entrez.esearch(db="pubmed", term=query, retmax=max_results, usehistory="y")
        record = Entrez.read(handle)
        handle.close()
        # return record["IdList"]
        return record
    except Exception as e:
        return f"Error during search: {str(e)}"

def fetch_details(search_res):
    """获取文章的详细信息"""
    pmids = search_res['IdList']
    if not pmids or isinstance(pmids, str):
        return []
    
    try:
        # 批量获取文章详情
        handle = Entrez.efetch(db="pubmed", rettype="medline", retmode="xml", id=",".join(pmids),
                               webenv=search_res['WebEnv'], query_key=search_res['QueryKey'])
        records = handle.read()
        handle.close()
        
        # 解析XML
        root = ET.fromstring(records)
        articles = []
        
        for article in root.findall(".//PubmedArticle"):
            try:
                # 获取标题
                title = article.find(".//ArticleTitle").text if article.find(".//ArticleTitle") is not None else "N/A"
                
                # 获取作者列表
                authors = article.findall(".//Author")
                author_list = []
                for author in authors:
                    last_name = author.find("LastName").text if author.find("LastName") is not None else ""
                    initials = author.find("Initials").text if author.find("Initials") is not None else ""
                    author_list.append(f"{last_name} {initials}".strip())
                authors_str = "; ".join(author_list) if author_list else "N/A"
                
                # 获取摘要
                abstract = article.find(".//AbstractText")
                abstract_text = abstract.text if abstract is not None else "N/A"
                
                # 获取PMID
                pmid = article.find(".//PMID").text if article.find(".//PMID") is not None else "N/A"
                
                # 获取发表年份
                pub_year = article.find(".//PubDate/Year")
                pub_year = pub_year.text if pub_year is not None else "N/A"
                
                # 获取期刊
                journal = article.find(".//Journal/Title").text if article.find(".//Journal/Title") is not None else "N/A"
                
                articles.append({
                    "PMID": pmid,
                    "Title": title,
                    "Authors": authors_str,
                    "Abstract": abstract_text,
                    "Year": pub_year,
                    "Journal": journal
                })
            except Exception as e:
                print(f"Error processing article with PMID {pmid}: {e}")
                continue
        
        return articles
    except Exception as e:
        return f"Error fetching details: {str(e)}"

def save_to_csv(articles, filename="pubmed_results.csv"):
    """将文章信息保存到CSV文件并返回文件路径"""
    if not articles or isinstance(articles, str):
        return None
    
    headers = ["PMID", "Title", "Authors", "Abstract", "Year", "Journal"]
    with open(filename, "w", newline="", encoding="utf-8") as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=headers)
        writer.writeheader()
        for article in articles:
            writer.writerow(article)
    return filename

def search_and_display(query, max_results):
    """主函数：执行搜索并返回结果和CSV下载链接"""
    if not query:
        return "Please enter a search query.", None, None
    
    try:
        max_results = int(max_results)
        if max_results <= 0:
            return "Max results must be a positive number.", None, None
    except ValueError:
        return "Max results must be a valid number.", None, None

    # 执行搜索
    pmids = search_pubmed(query, max_results)
    if isinstance(pmids, str):
        return pmids, None, None
    
    if not pmids:
        return "No results found.", None, None
    
    # 获取详细信息
    articles = fetch_details(pmids)
    if isinstance(articles, str):
        return articles, None, None
    
    if not articles:
        return "No valid articles retrieved.", None, None
    
    # 转换为DataFrame用于显示
    df = pd.DataFrame(articles)
    
    # 保存CSV文件
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    csv_filename = f"pubmed_results_{timestamp}.csv"
    csv_path = save_to_csv(articles, csv_filename)
    
    return df, csv_path, f"Found {len(articles)} articles."

# Gradio界面
with gr.Blocks() as demo:
    gr.Markdown("# PubMed Search App")
    gr.Markdown("Enter a PubMed search query and the maximum number of results to retrieve. Results will be displayed in a table and available for download as a CSV file.")
    
    with gr.Row():
        query_input = gr.Textbox(label="Search Query", placeholder="e.g., breast cancer AND 2020[PDAT]")
        max_results_input = gr.Number(label="Max Results", value=10, minimum=1, maximum=100)
    
    search_button = gr.Button("Search")
    
    output_text = gr.Textbox(label="Status")
    output_table = gr.DataFrame(label="Search Results")
    output_file = gr.File(label="Download CSV")
    
    search_button.click(
        fn=search_and_display,
        inputs=[query_input, max_results_input],
        outputs=[output_table, output_file, output_text]
    )

# 启动Gradio应用
if __name__ == "__main__":
    demo.launch(mcp_server=True)