jibsn's picture
Update app.py
7224322 verified
raw
history blame
5.87 kB
import gradio as gr
import csv
import os
from Bio import Entrez
import xml.etree.ElementTree as ET
import time
import pandas as pd
from datetime import datetime
# 设置NCBI要求的电子邮件和API密钥从环境变量获取
Entrez.email = os.environ.get("EMAIL")
Entrez.api_key = os.environ.get("NCBI_API_KEY")
def search_pubmed(query, max_results=100):
"""在PubMed中搜索并返回文章ID列表"""
try:
handle = Entrez.esearch(db="pubmed", term=query, retmax=max_results, usehistory="y")
record = Entrez.read(handle)
handle.close()
# return record["IdList"]
return record
except Exception as e:
return f"Error during search: {str(e)}"
def fetch_details(search_res):
"""获取文章的详细信息"""
pmids = search_res['IdList']
if not pmids or isinstance(pmids, str):
return []
try:
# 批量获取文章详情
handle = Entrez.efetch(db="pubmed", rettype="medline", retmode="xml", id=",".join(pmids),
webenv=search_res['WebEnv'], query_key=search_res['QueryKey'])
records = handle.read()
handle.close()
# 解析XML
root = ET.fromstring(records)
articles = []
for article in root.findall(".//PubmedArticle"):
try:
# 获取标题
title = article.find(".//ArticleTitle").text if article.find(".//ArticleTitle") is not None else "N/A"
# 获取作者列表
authors = article.findall(".//Author")
author_list = []
for author in authors:
last_name = author.find("LastName").text if author.find("LastName") is not None else ""
initials = author.find("Initials").text if author.find("Initials") is not None else ""
author_list.append(f"{last_name} {initials}".strip())
authors_str = "; ".join(author_list) if author_list else "N/A"
# 获取摘要
abstract = article.find(".//AbstractText")
abstract_text = abstract.text if abstract is not None else "N/A"
# 获取PMID
pmid = article.find(".//PMID").text if article.find(".//PMID") is not None else "N/A"
# 获取发表年份
pub_year = article.find(".//PubDate/Year")
pub_year = pub_year.text if pub_year is not None else "N/A"
# 获取期刊
journal = article.find(".//Journal/Title").text if article.find(".//Journal/Title") is not None else "N/A"
articles.append({
"PMID": pmid,
"Title": title,
"Authors": authors_str,
"Abstract": abstract_text,
"Year": pub_year,
"Journal": journal
})
except Exception as e:
print(f"Error processing article with PMID {pmid}: {e}")
continue
return articles
except Exception as e:
return f"Error fetching details: {str(e)}"
def save_to_csv(articles, filename="pubmed_results.csv"):
"""将文章信息保存到CSV文件并返回文件路径"""
if not articles or isinstance(articles, str):
return None
headers = ["PMID", "Title", "Authors", "Abstract", "Year", "Journal"]
with open(filename, "w", newline="", encoding="utf-8") as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=headers)
writer.writeheader()
for article in articles:
writer.writerow(article)
return filename
def search_and_display(query, max_results):
"""主函数:执行搜索并返回结果和CSV下载链接"""
if not query:
return "Please enter a search query.", None, None
try:
max_results = int(max_results)
if max_results <= 0:
return "Max results must be a positive number.", None, None
except ValueError:
return "Max results must be a valid number.", None, None
# 执行搜索
pmids = search_pubmed(query, max_results)
if isinstance(pmids, str):
return pmids, None, None
if not pmids:
return "No results found.", None, None
# 获取详细信息
articles = fetch_details(pmids)
if isinstance(articles, str):
return articles, None, None
if not articles:
return "No valid articles retrieved.", None, None
# 转换为DataFrame用于显示
df = pd.DataFrame(articles)
# 保存CSV文件
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
csv_filename = f"pubmed_results_{timestamp}.csv"
csv_path = save_to_csv(articles, csv_filename)
return df, csv_path, f"Found {len(articles)} articles."
# Gradio界面
with gr.Blocks() as demo:
gr.Markdown("# PubMed Search App")
gr.Markdown("Enter a PubMed search query and the maximum number of results to retrieve. Results will be displayed in a table and available for download as a CSV file.")
with gr.Row():
query_input = gr.Textbox(label="Search Query", placeholder="e.g., breast cancer AND 2020[PDAT]")
max_results_input = gr.Number(label="Max Results", value=10, minimum=1, maximum=100)
search_button = gr.Button("Search")
output_text = gr.Textbox(label="Status")
output_table = gr.DataFrame(label="Search Results")
output_file = gr.File(label="Download CSV")
search_button.click(
fn=search_and_display,
inputs=[query_input, max_results_input],
outputs=[output_table, output_file, output_text]
)
# 启动Gradio应用
demo.launch()