Spaces:

jibsn
/

pubmed-mcp-server

Running

App Files Files Community

pubmed-mcp-server / app.py

jibsn

Update app.py

7224322 verified 2 months ago

raw

history blame

5.87 kB

	import gradio as gr
	import csv
	import os
	from Bio import Entrez
	import xml.etree.ElementTree as ET
	import time
	import pandas as pd
	from datetime import datetime

	# 设置NCBI要求的电子邮件和API密钥从环境变量获取
	Entrez.email = os.environ.get("EMAIL")
	Entrez.api_key = os.environ.get("NCBI_API_KEY")

	def search_pubmed(query, max_results=100):
	"""在PubMed中搜索并返回文章ID列表"""
	try:
	handle = Entrez.esearch(db="pubmed", term=query, retmax=max_results, usehistory="y")
	record = Entrez.read(handle)
	handle.close()
	# return record["IdList"]
	return record
	except Exception as e:
	return f"Error during search: {str(e)}"

	def fetch_details(search_res):
	"""获取文章的详细信息"""
	pmids = search_res['IdList']
	if not pmids or isinstance(pmids, str):
	return []

	try:
	# 批量获取文章详情
	handle = Entrez.efetch(db="pubmed", rettype="medline", retmode="xml", id=",".join(pmids),
	webenv=search_res['WebEnv'], query_key=search_res['QueryKey'])
	records = handle.read()
	handle.close()

	# 解析XML
	root = ET.fromstring(records)
	articles = []

	for article in root.findall(".//PubmedArticle"):
	try:
	# 获取标题
	title = article.find(".//ArticleTitle").text if article.find(".//ArticleTitle") is not None else "N/A"

	# 获取作者列表
	authors = article.findall(".//Author")
	author_list = []
	for author in authors:
	last_name = author.find("LastName").text if author.find("LastName") is not None else ""
	initials = author.find("Initials").text if author.find("Initials") is not None else ""
	author_list.append(f"{last_name} {initials}".strip())
	authors_str = "; ".join(author_list) if author_list else "N/A"

	# 获取摘要
	abstract = article.find(".//AbstractText")
	abstract_text = abstract.text if abstract is not None else "N/A"

	# 获取PMID
	pmid = article.find(".//PMID").text if article.find(".//PMID") is not None else "N/A"

	# 获取发表年份
	pub_year = article.find(".//PubDate/Year")
	pub_year = pub_year.text if pub_year is not None else "N/A"

	# 获取期刊
	journal = article.find(".//Journal/Title").text if article.find(".//Journal/Title") is not None else "N/A"

	articles.append({
	"PMID": pmid,
	"Title": title,
	"Authors": authors_str,
	"Abstract": abstract_text,
	"Year": pub_year,
	"Journal": journal
	})
	except Exception as e:
	print(f"Error processing article with PMID {pmid}: {e}")
	continue

	return articles
	except Exception as e:
	return f"Error fetching details: {str(e)}"

	def save_to_csv(articles, filename="pubmed_results.csv"):
	"""将文章信息保存到CSV文件并返回文件路径"""
	if not articles or isinstance(articles, str):
	return None

	headers = ["PMID", "Title", "Authors", "Abstract", "Year", "Journal"]
	with open(filename, "w", newline="", encoding="utf-8") as csvfile:
	writer = csv.DictWriter(csvfile, fieldnames=headers)
	writer.writeheader()
	for article in articles:
	writer.writerow(article)
	return filename

	def search_and_display(query, max_results):
	"""主函数：执行搜索并返回结果和CSV下载链接"""
	if not query:
	return "Please enter a search query.", None, None

	try:
	max_results = int(max_results)
	if max_results <= 0:
	return "Max results must be a positive number.", None, None
	except ValueError:
	return "Max results must be a valid number.", None, None

	# 执行搜索
	pmids = search_pubmed(query, max_results)
	if isinstance(pmids, str):
	return pmids, None, None

	if not pmids:
	return "No results found.", None, None

	# 获取详细信息
	articles = fetch_details(pmids)
	if isinstance(articles, str):
	return articles, None, None

	if not articles:
	return "No valid articles retrieved.", None, None

	# 转换为DataFrame用于显示
	df = pd.DataFrame(articles)

	# 保存CSV文件
	timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
	csv_filename = f"pubmed_results_{timestamp}.csv"
	csv_path = save_to_csv(articles, csv_filename)

	return df, csv_path, f"Found {len(articles)} articles."

	# Gradio界面
	with gr.Blocks() as demo:
	gr.Markdown("# PubMed Search App")
	gr.Markdown("Enter a PubMed search query and the maximum number of results to retrieve. Results will be displayed in a table and available for download as a CSV file.")

	with gr.Row():
	query_input = gr.Textbox(label="Search Query", placeholder="e.g., breast cancer AND 2020[PDAT]")
	max_results_input = gr.Number(label="Max Results", value=10, minimum=1, maximum=100)

	search_button = gr.Button("Search")

	output_text = gr.Textbox(label="Status")
	output_table = gr.DataFrame(label="Search Results")
	output_file = gr.File(label="Download CSV")

	search_button.click(
	fn=search_and_display,
	inputs=[query_input, max_results_input],
	outputs=[output_table, output_file, output_text]
	)

	# 启动Gradio应用
	demo.launch()