File size: 2,498 Bytes
455290d
 
f3774c1
3e78ff5
455290d
3e78ff5
 
455290d
f3774c1
 
455290d
3e78ff5
455290d
f3774c1
455290d
 
 
 
 
f3774c1
 
455290d
f3774c1
 
3e78ff5
455290d
f3774c1
 
455290d
3e78ff5
455290d
f3774c1
455290d
 
f3774c1
455290d
 
f3774c1
 
455290d
f3774c1
 
 
 
455290d
 
 
3e78ff5
455290d
f3774c1
 
 
455290d
 
 
f3774c1
455290d
f3774c1
 
 
455290d
3e78ff5
455290d
3e78ff5
455290d
3e78ff5
455290d
 
 
f3774c1
 
3e78ff5
 
455290d
 
 
f3774c1
3e78ff5
455290d
 
 
 
3e78ff5
f3774c1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import requests
from concurrent.futures import ThreadPoolExecutor, as_completed
from transformers import pipeline
from config import PUBMED_EMAIL, CHUNK_SIZE

# Summarization pipeline
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")


def search_pubmed(query, max_results=5):
    """
    Search PubMed for PMIDs matching the query.
    """
    url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
    params = {
        "db": "pubmed",
        "term": query,
        "retmax": max_results,
        "retmode": "json",
        "tool": "MedicalAI",
        "email": PUBMED_EMAIL,
    }
    response = requests.get(url, params=params)
    response.raise_for_status()
    return response.json().get("esearchresult", {}).get("idlist", [])


def fetch_abstract(pmid):
    """
    Fetch abstract for a given PubMed ID.
    """
    url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
    params = {
        "db": "pubmed",
        "id": pmid,
        "retmode": "text",
        "rettype": "abstract",
        "tool": "MedicalAI",
        "email": PUBMED_EMAIL,
    }
    response = requests.get(url, params=params)
    response.raise_for_status()
    return response.text.strip()


def fetch_pubmed_abstracts(pmids):
    """
    Fetch multiple PubMed abstracts concurrently.
    """
    results = {}
    with ThreadPoolExecutor(max_workers=5) as executor:
        future_to_pmid = {executor.submit(fetch_abstract, pmid): pmid for pmid in pmids}
        for future in as_completed(future_to_pmid):
            pmid = future_to_pmid[future]
            try:
                results[pmid] = future.result()
            except Exception as e:
                results[pmid] = f"Error fetching PMID {pmid}: {str(e)}"
    return results


def summarize_text(text, chunk_size=CHUNK_SIZE):
    """
    Summarize long text using a chunking strategy.
    """
    sentences = text.split(". ")
    chunks = []
    current_chunk = []
    current_length = 0

    for sentence in sentences:
        tokens = len(sentence.split())
        if current_length + tokens > chunk_size:
            chunks.append(" ".join(current_chunk))
            current_chunk = []
            current_length = 0
        current_chunk.append(sentence)
        current_length += tokens

    if current_chunk:
        chunks.append(" ".join(current_chunk))

    summaries = [summarizer(chunk, max_length=100, min_length=30)[0]["summary_text"] for chunk in chunks]
    return " ".join(summaries)