File size: 2,498 Bytes
455290d f3774c1 3e78ff5 455290d 3e78ff5 455290d f3774c1 455290d 3e78ff5 455290d f3774c1 455290d f3774c1 455290d f3774c1 3e78ff5 455290d f3774c1 455290d 3e78ff5 455290d f3774c1 455290d f3774c1 455290d f3774c1 455290d f3774c1 455290d 3e78ff5 455290d f3774c1 455290d f3774c1 455290d f3774c1 455290d 3e78ff5 455290d 3e78ff5 455290d 3e78ff5 455290d f3774c1 3e78ff5 455290d f3774c1 3e78ff5 455290d 3e78ff5 f3774c1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
import requests
from concurrent.futures import ThreadPoolExecutor, as_completed
from transformers import pipeline
from config import PUBMED_EMAIL, CHUNK_SIZE
# Summarization pipeline
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
def search_pubmed(query, max_results=5):
"""
Search PubMed for PMIDs matching the query.
"""
url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
params = {
"db": "pubmed",
"term": query,
"retmax": max_results,
"retmode": "json",
"tool": "MedicalAI",
"email": PUBMED_EMAIL,
}
response = requests.get(url, params=params)
response.raise_for_status()
return response.json().get("esearchresult", {}).get("idlist", [])
def fetch_abstract(pmid):
"""
Fetch abstract for a given PubMed ID.
"""
url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
params = {
"db": "pubmed",
"id": pmid,
"retmode": "text",
"rettype": "abstract",
"tool": "MedicalAI",
"email": PUBMED_EMAIL,
}
response = requests.get(url, params=params)
response.raise_for_status()
return response.text.strip()
def fetch_pubmed_abstracts(pmids):
"""
Fetch multiple PubMed abstracts concurrently.
"""
results = {}
with ThreadPoolExecutor(max_workers=5) as executor:
future_to_pmid = {executor.submit(fetch_abstract, pmid): pmid for pmid in pmids}
for future in as_completed(future_to_pmid):
pmid = future_to_pmid[future]
try:
results[pmid] = future.result()
except Exception as e:
results[pmid] = f"Error fetching PMID {pmid}: {str(e)}"
return results
def summarize_text(text, chunk_size=CHUNK_SIZE):
"""
Summarize long text using a chunking strategy.
"""
sentences = text.split(". ")
chunks = []
current_chunk = []
current_length = 0
for sentence in sentences:
tokens = len(sentence.split())
if current_length + tokens > chunk_size:
chunks.append(" ".join(current_chunk))
current_chunk = []
current_length = 0
current_chunk.append(sentence)
current_length += tokens
if current_chunk:
chunks.append(" ".join(current_chunk))
summaries = [summarizer(chunk, max_length=100, min_length=30)[0]["summary_text"] for chunk in chunks]
return " ".join(summaries)
|