Spaces:
Running
Running
import gradio as gr | |
import csv | |
import os | |
from Bio import Entrez | |
import xml.etree.ElementTree as ET | |
import time | |
import pandas as pd | |
from datetime import datetime | |
# 设置NCBI要求的电子邮件和API密钥从环境变量获取 | |
Entrez.email = os.environ.get("EMAIL") | |
Entrez.api_key = os.environ.get("NCBI_API_KEY") | |
def search_pubmed(query, max_results=100): | |
"""在PubMed中搜索并返回文章ID列表""" | |
try: | |
handle = Entrez.esearch(db="pubmed", term=query, retmax=max_results, usehistory="y") | |
record = Entrez.read(handle) | |
handle.close() | |
# return record["IdList"] | |
return record | |
except Exception as e: | |
return f"Error during search: {str(e)}" | |
def fetch_details(search_res): | |
"""获取文章的详细信息""" | |
pmids = search_res['IdList'] | |
if not pmids or isinstance(pmids, str): | |
return [] | |
try: | |
# 批量获取文章详情 | |
handle = Entrez.efetch(db="pubmed", rettype="medline", retmode="xml", id=",".join(pmids), | |
webenv=search_res['WebEnv'], query_key=search_res['QueryKey']) | |
records = handle.read() | |
handle.close() | |
# 解析XML | |
root = ET.fromstring(records) | |
articles = [] | |
for article in root.findall(".//PubmedArticle"): | |
try: | |
# 获取标题 | |
title = article.find(".//ArticleTitle").text if article.find(".//ArticleTitle") is not None else "N/A" | |
# 获取作者列表 | |
authors = article.findall(".//Author") | |
author_list = [] | |
for author in authors: | |
last_name = author.find("LastName").text if author.find("LastName") is not None else "" | |
initials = author.find("Initials").text if author.find("Initials") is not None else "" | |
author_list.append(f"{last_name} {initials}".strip()) | |
authors_str = "; ".join(author_list) if author_list else "N/A" | |
# 获取摘要 | |
abstract = article.find(".//AbstractText") | |
abstract_text = abstract.text if abstract is not None else "N/A" | |
# 获取PMID | |
pmid = article.find(".//PMID").text if article.find(".//PMID") is not None else "N/A" | |
# 获取发表年份 | |
pub_year = article.find(".//PubDate/Year") | |
pub_year = pub_year.text if pub_year is not None else "N/A" | |
# 获取期刊 | |
journal = article.find(".//Journal/Title").text if article.find(".//Journal/Title") is not None else "N/A" | |
articles.append({ | |
"PMID": pmid, | |
"Title": title, | |
"Authors": authors_str, | |
"Abstract": abstract_text, | |
"Year": pub_year, | |
"Journal": journal | |
}) | |
except Exception as e: | |
print(f"Error processing article with PMID {pmid}: {e}") | |
continue | |
return articles | |
except Exception as e: | |
return f"Error fetching details: {str(e)}" | |
def save_to_csv(articles, filename="pubmed_results.csv"): | |
"""将文章信息保存到CSV文件并返回文件路径""" | |
if not articles or isinstance(articles, str): | |
return None | |
headers = ["PMID", "Title", "Authors", "Abstract", "Year", "Journal"] | |
with open(filename, "w", newline="", encoding="utf-8") as csvfile: | |
writer = csv.DictWriter(csvfile, fieldnames=headers) | |
writer.writeheader() | |
for article in articles: | |
writer.writerow(article) | |
return filename | |
def search_and_display(query, max_results): | |
"""主函数:执行搜索并返回结果和CSV下载链接""" | |
if not query: | |
return "Please enter a search query.", None, None | |
try: | |
max_results = int(max_results) | |
if max_results <= 0: | |
return "Max results must be a positive number.", None, None | |
except ValueError: | |
return "Max results must be a valid number.", None, None | |
# 执行搜索 | |
pmids = search_pubmed(query, max_results) | |
if isinstance(pmids, str): | |
return pmids, None, None | |
if not pmids: | |
return "No results found.", None, None | |
# 获取详细信息 | |
articles = fetch_details(pmids) | |
if isinstance(articles, str): | |
return articles, None, None | |
if not articles: | |
return "No valid articles retrieved.", None, None | |
# 转换为DataFrame用于显示 | |
df = pd.DataFrame(articles) | |
# 保存CSV文件 | |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
csv_filename = f"pubmed_results_{timestamp}.csv" | |
csv_path = save_to_csv(articles, csv_filename) | |
return df, csv_path, f"Found {len(articles)} articles." | |
# Gradio界面 | |
with gr.Blocks() as demo: | |
gr.Markdown("# PubMed Search App") | |
gr.Markdown("Enter a PubMed search query and the maximum number of results to retrieve. Results will be displayed in a table and available for download as a CSV file.") | |
with gr.Row(): | |
query_input = gr.Textbox(label="Search Query", placeholder="e.g., breast cancer AND 2020[PDAT]") | |
max_results_input = gr.Number(label="Max Results", value=10, minimum=1, maximum=100) | |
search_button = gr.Button("Search") | |
output_text = gr.Textbox(label="Status") | |
output_table = gr.DataFrame(label="Search Results") | |
output_file = gr.File(label="Download CSV") | |
search_button.click( | |
fn=search_and_display, | |
inputs=[query_input, max_results_input], | |
outputs=[output_table, output_file, output_text] | |
) | |
# 启动Gradio应用 | |
demo.launch() |