File size: 2,428 Bytes
ff77b73 6f578ad ff77b73 6f578ad ff77b73 6f578ad ff77b73 6f578ad ff77b73 6f578ad ff77b73 6f578ad ff77b73 6f578ad ff77b73 6f578ad ff77b73 6f578ad ff77b73 6f578ad ff77b73 6f578ad ff77b73 6f578ad ff77b73 6f578ad ff77b73 6f578ad ff77b73 6f578ad ff77b73 6f578ad ff77b73 6f578ad ff77b73 6f578ad ff77b73 6f578ad 42fb2d0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
import requests
from transformers import pipeline
from nltk.tokenize import sent_tokenize
import nltk
from config import MY_PUBMED_EMAIL, MAX_PUBMED_RESULTS, SUMMARIZATION_CHUNK_SIZE
nltk.download("punkt")
# Summarization pipeline
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
def search_pubmed(query, max_results=MAX_PUBMED_RESULTS):
"""
Search PubMed for articles matching the query.
"""
url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
params = {
"db": "pubmed",
"term": query,
"retmax": max_results,
"retmode": "json",
"tool": "AdvancedMedicalAI",
"email": MY_PUBMED_EMAIL,
}
response = requests.get(url, params=params)
response.raise_for_status()
data = response.json()
return data.get("esearchresult", {}).get("idlist", [])
def fetch_abstract(pmid):
"""
Fetch the abstract of a given PubMed ID.
"""
url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
params = {
"db": "pubmed",
"id": pmid,
"retmode": "text",
"rettype": "abstract",
"tool": "AdvancedMedicalAI",
"email": MY_PUBMED_EMAIL,
}
response = requests.get(url, params=params)
response.raise_for_status()
return response.text.strip()
def fetch_pubmed_abstracts(pmids):
"""
Fetch multiple abstracts for a list of PMIDs.
"""
results = {}
for pmid in pmids:
try:
results[pmid] = fetch_abstract(pmid)
except Exception as e:
results[pmid] = f"Error fetching PMID {pmid}: {e}"
return results
def summarize_text(text, chunk_size=SUMMARIZATION_CHUNK_SIZE):
"""
Summarize long text using a chunking strategy.
"""
sentences = sent_tokenize(text)
chunks = []
current_chunk = []
current_length = 0
for sentence in sentences:
tokens = len(sentence.split())
if current_length + tokens > chunk_size:
chunks.append(" ".join(current_chunk))
current_chunk = []
current_length = 0
current_chunk.append(sentence)
current_length += tokens
if current_chunk:
chunks.append(" ".join(current_chunk))
summaries = [
summarizer(chunk, max_length=100, min_length=30, do_sample=False)[0]["summary_text"]
for chunk in chunks
]
return " ".join(summaries) |