File size: 2,521 Bytes
ff77b73 6f578ad ff77b73 6f578ad ff77b73 6f578ad ff77b73 6f578ad ff77b73 6f578ad ff77b73 6f578ad ff77b73 6f578ad ff77b73 f33af5a 6f578ad ff77b73 6f578ad ff77b73 6f578ad ff77b73 f33af5a 6f578ad ff77b73 6f578ad ff77b73 6f578ad f33af5a 6f578ad ff77b73 6f578ad ff77b73 6f578ad ff77b73 6f578ad ff77b73 6f578ad ff77b73 6f578ad ff77b73 f33af5a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
import requests
from transformers import pipeline
from nltk.tokenize import sent_tokenize
import nltk
from config import MY_PUBMED_EMAIL, MAX_PUBMED_RESULTS, SUMMARIZATION_CHUNK_SIZE
nltk.download("punkt")
# Summarization pipeline
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
def search_pubmed(query, max_results=MAX_PUBMED_RESULTS):
"""
Search PubMed for articles matching the query.
"""
url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
params = {
"db": "pubmed",
"term": query,
"retmax": max_results,
"retmode": "json",
"tool": "AdvancedMedicalAI",
"email": MY_PUBMED_EMAIL,
}
response = requests.get(url, params=params, timeout=10)
response.raise_for_status()
data = response.json()
return data.get("esearchresult", {}).get("idlist", [])
def fetch_abstract(pmid):
"""
Fetch the abstract of a given PubMed ID.
"""
url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
params = {
"db": "pubmed",
"id": pmid,
"retmode": "text",
"rettype": "abstract",
"tool": "AdvancedMedicalAI",
"email": MY_PUBMED_EMAIL,
}
response = requests.get(url, params=params, timeout=10)
response.raise_for_status()
return response.text.strip()
def fetch_pubmed_abstracts(pmids):
"""
Fetch multiple abstracts for a list of PMIDs.
"""
results = {}
for pmid in pmids:
try:
abstract = fetch_abstract(pmid)
results[pmid] = abstract
except Exception as e:
results[pmid] = f"Error fetching PMID {pmid}: {e}"
return results
def summarize_text(text, chunk_size=SUMMARIZATION_CHUNK_SIZE):
"""
Summarize long text using a chunking strategy.
"""
sentences = sent_tokenize(text)
chunks = []
current_chunk = []
current_length = 0
for sentence in sentences:
tokens = len(sentence.split())
if current_length + tokens > chunk_size:
chunks.append(" ".join(current_chunk))
current_chunk = []
current_length = 0
current_chunk.append(sentence)
current_length += tokens
if current_chunk:
chunks.append(" ".join(current_chunk))
summaries = []
for chunk in chunks:
summary = summarizer(chunk, max_length=100, min_length=30, do_sample=False)[0]["summary_text"]
summaries.append(summary)
return " ".join(summaries)
|