Spaces:

hhschu
/

elna

Sleeping

App Files Files Community

David Chu commited on May 19

Commit

5bb1986

unverified ·

1 Parent(s): a874450

feat: tool to search dailymed

Browse files

Files changed (5) hide show

main.py +65 -152
tools/__init__.py +0 -0
tools/dailymed.py +44 -0
tools/pubmed.py +24 -0
tools/semantic_scholar.py +53 -0

main.py CHANGED Viewed

@@ -1,151 +1,90 @@
 import os
-import time
-import httpx
 import streamlit as st
 from google import genai
-from pydantic import BaseModel
-class Article(BaseModel):
-    id: str
-    title: str
-    summary: str | None
-    abstract: str | None
-    venue: str
-    year: int
-    citations: int | None
-class Statement(BaseModel):
-    text: str
-    source_ids: list[str] | None
-def improve_prompt(client: genai.Client, prompt: str) -> str:
-    response = client.models.generate_content(
-        model="gemini-2.0-flash-lite",
-        contents=f"Rewrite the following medical question to maximize clarity and specificity for optimal search results.\n\n<query>{prompt}</query>\n\nReturn only the improved query.",
-    )
-    return response.text or ""
-def format_sources(articles: list[Article]) -> str:
-    sources = []
-    for article in articles:
-        source = f"<source id={article.id}><title>{article.title}</title>"
-        if article.abstract:
-            source += f"<abstract>{article.abstract}</abstract>"
-        if article.summary:
-            source += f"<summary>{article.summary}</summary>"
-        source += "</source>"
-        sources.append(source)
-    return "\n".join(sources)
-ANSWER_INSTRUCTION = """\
 You are a medical research expert.
-Please answer the user's query clearly and concisely, using no more than 100 words.
 Base every claim or statement strictly on the provided sources. For each claim, include a citation referencing the source's ID (do not include the citation in the `text` field). A claim may be supported by one or multiple sources, but only cite sources that directly support the claim. Do not add unnecessary citations.
 If none of the sources contain relevant information to answer the query, politely inform the user that an answer cannot be provided, and do not use any citations.
 If the query is not related to medicine, politely decline to answer.
 <query>{query}</query>
-<sources>
-{sources}
-</sources>
 """
-def generate_answer(
-    client: genai.Client, query: str, articles: list[Article]
-) -> list[Statement]:
     response = client.models.generate_content(
         model="gemini-2.5-flash-preview-04-17",
-        contents=ANSWER_INSTRUCTION.format(
-            query=query, sources=format_sources(articles)
-        ),
-        config={
-            "response_mime_type": "application/json",
-            "response_schema": list[Statement],
-        },
     )
-    return response.parsed
-def semantic_scholar(
-    client: httpx.Client, query: str, top_k: int = 10
-) -> list[Article]:
-    max_retries = 5
-    for attempt in range(max_retries):
-        try:
-            resp = client.get(
-                "https://api.semanticscholar.org/graph/v1/paper/search",
-                params={
-                    "query": query,
-                    "limit": top_k,
-                    "fields": "title,tldr,abstract,externalIds,url,venue,year,citationCount",
-                    "fieldsOfStudy": "Medicine,Biology",
-                    "minCitationCount": 20,
-                },
-                timeout=10.0,
-            )
-            resp.raise_for_status()
-            break
-        except (httpx.HTTPStatusError, httpx.TimeoutException) as err:
-            if attempt < max_retries - 1:
-                time.sleep(1)
-            else:
-                raise err
-    else:
-        raise RuntimeError()
-    results = resp.json()
-    articles = []
-    for i, article in enumerate(results.get("data", []), 1):
-        article = Article(
-            id=article["externalIds"].get("DOI", str(i)),
-            title=article["title"],
-            summary=article["tldr"]["text"] if article["tldr"] else "",
-            abstract=article["abstract"],
-            venue=article["venue"],
-            year=article["year"],
-            citations=article["citationCount"],
-        )
-        articles.append(article)
-    return articles
-def pubmed(query: str, top_k: int = 10, db: str = "pubmed"):
-    resp = httpx.get(
-        "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi",
-        params={
-            "term": query,
-            "db": db,
-            "retmax": top_k,
-            "retmode": "json",
-        },
-    )
-    id_list = resp.json()["esearchresult"]["idlist"]
-    resp = httpx.get(
-        "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi",
-        params={
-            "db": db,
-            "id": ",".join(id_list),
-            "retmode": "json",
-        },
-    )
-    return resp.json()
 def main():
-    semantic_scholar_client = httpx.Client(transport=httpx.HTTPTransport(retries=3))
     gemini_client = genai.Client(api_key=os.environ["GOOGLE_API_KEY"])
     st.title("Ask ~~Jeeves~~ Elna")
@@ -155,36 +94,10 @@ def main():
         response = st.empty()
         if submit:
-            with st.spinner("Finding papers...", show_time=True):
-                papers = semantic_scholar(semantic_scholar_client, query, top_k=30)
-            if papers:
-                with st.spinner("Thinking...", show_time=True):
-                    paper_map = {paper.id: paper for paper in papers}
-                    sentences = []
-                    citations = {}
-                    statements = generate_answer(gemini_client, query, papers)
-                    for statement in statements:
-                        sentence = statement.text
-                        for source_id in statement.source_ids or []:
-                            if not (citation_id := citations.get(source_id)):
-                                citation_id = len(citations) + 1
-                                citations[source_id] = citation_id
-                            sentence += f"[^{citation_id}] "
-                        sentences.append(sentence.strip())
-                    answer = " ".join(sentences)
-                    footnotes = ""
-                    if citations:
-                        footnotes = "\n".join(
-                            f"[^{v}]: :grey-badge[:material/attribution: {paper_map[k].citations}] [{paper_map[k].title}](https://doi.org/{paper_map[k].id}). _{paper_map[k].venue}_ {paper_map[k].year}."
-                            for k, v in citations.items()
-                        )
-                    result = f"{answer}\n\n{footnotes}"
-            else:
-                result = "No relevant papers found."
-            response.markdown(result)
 if __name__ == "__main__":

+import json
 import os
+import re
 import streamlit as st
 from google import genai
+from google.genai import types
+from tools import dailymed, semantic_scholar
+INSTRUCTION = """\
 You are a medical research expert.
+Please answer the user's query clearly and concisely, using no more than 250 words.
 Base every claim or statement strictly on the provided sources. For each claim, include a citation referencing the source's ID (do not include the citation in the `text` field). A claim may be supported by one or multiple sources, but only cite sources that directly support the claim. Do not add unnecessary citations.
+You can use markdown format, such as **, to highlight the key part of the answer. But do not return the response in a markdown code block.
 If none of the sources contain relevant information to answer the query, politely inform the user that an answer cannot be provided, and do not use any citations.
 If the query is not related to medicine, politely decline to answer.
 <query>{query}</query>
+Produce JSON matching this specification:
+Source = {{ "title": string, "url": str }}
+Statement = {{ "text": string, "sources": array<Source> }}
+Return: array<Statement>
+Do not return the response in a markdown code block.
 """
+def respond(client: genai.Client, query: str) -> str:
+    config = types.GenerateContentConfig(
+        tools=[
+            dailymed.find_drug_set_ids,
+            dailymed.find_drug_instruction,
+            semantic_scholar.search_journal_articles,
+        ],
+    )
     response = client.models.generate_content(
         model="gemini-2.5-flash-preview-04-17",
+        contents=INSTRUCTION.format(query=query),
+        config=config,
     )
+    return response.text or ""
+def format_output(response: str) -> tuple[str, str]:
+    response = response.strip()
+    if response.startswith("```"):
+        # Extract content inside the first markdown code block (``` or ```json)
+        match = re.match(r"^```(?:json)?\s*([\s\S]*?)\s*```", response)
+        if match:
+            response = match.group(1).strip()
+    try:
+        statements = json.loads(response.strip())
+    except json.decoder.JSONDecodeError as err:
+        print(err)
+        return response, ""
+    try:
+        answer = ""
+        citations = {}
+        for statement in statements:
+            answer += statement["text"]
+            for source in statement.get("sources", []):
+                source_str = f"[{source['title']}]({source['url']})"
+                if not (citation_id := citations.get(source_str)):
+                    citation_id = len(citations) + 1
+                    citations[source_str] = citation_id
+                answer += f"[^{citation_id}]"
+            answer += " "
+    except KeyError as err:
+        print(err)
+        return response, ""
+    footnotes = "\n".join(f"[^{id}]: {citation}" for citation, id in citations.items())
+    return answer, footnotes
 def main():
     gemini_client = genai.Client(api_key=os.environ["GOOGLE_API_KEY"])
     st.title("Ask ~~Jeeves~~ Elna")
         response = st.empty()
         if submit:
+            with st.spinner("Thinking...", show_time=True):
+                output = respond(gemini_client, query)
+                answer, footnotes = format_output(output)
+            response.markdown(f"{answer}\n\n{footnotes}")
 if __name__ == "__main__":

tools/__init__.py ADDED Viewed

File without changes

tools/dailymed.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import httpx
+def find_drug_set_ids(name: str) -> list[dict]:
+    """Get the Set IDs of drugs by a name.
+    The Set ID can be used to look up a drug's instruction.
+    Args:
+        name: Generic or brand name of a drug.
+    Returns:
+        A list of drug names and their Set ID.
+    """
+    resp = httpx.get(
+        "https://dailymed.nlm.nih.gov/dailymed/services/v2/spls.json",
+        params={"drug_name": name},
+    )
+    return [
+        {
+            "name": row["title"],
+            "set_id": row["setid"],
+            "url": f"https://dailymed.nlm.nih.gov/dailymed/drugInfo.cfm?setid={row['setid']}",
+        }
+        for row in resp.json()["data"]
+    ]
+def find_drug_instruction(set_id: str) -> str:
+    """Get the instruction of a drug from the FDA database.
+    The instruction includes dosage, contradictions, adverse
+    reactions, drung interactions, etc.
+    Args:
+        set_id: Set ID of the drug to look up.
+    Returns:
+        Full package instruction in XML format.
+    """
+    resp = httpx.get(
+        f"https://dailymed.nlm.nih.gov/dailymed/services/v2/spls/{set_id}.xml"
+    )
+    return resp.text

tools/pubmed.py ADDED Viewed

	@@ -0,0 +1,24 @@

+import httpx
+def search(query: str, top_k: int = 10, db: str = "pubmed"):
+    resp = httpx.get(
+        "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi",
+        params={
+            "term": query,
+            "db": db,
+            "retmax": top_k,
+            "retmode": "json",
+        },
+    )
+    id_list = resp.json()["esearchresult"]["idlist"]
+    resp = httpx.get(
+        "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi",
+        params={
+            "db": db,
+            "id": ",".join(id_list),
+            "retmode": "json",
+        },
+    )
+    return resp.json()

tools/semantic_scholar.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import time
+import httpx
+def search_journal_articles(query: str) -> str | list[dict]:
+    """Get abstracts and summaries of related medical journal aritcles.
+    Args:
+        query: Search query for medical articles.
+    Returns:
+        A list of papers and thier title, summary, published
+        vanue and year, and the number of citations.
+    """
+    max_retries = 5
+    for attempt in range(max_retries):
+        try:
+            resp = httpx.get(
+                "https://api.semanticscholar.org/graph/v1/paper/search",
+                params={
+                    "query": query,
+                    "limit": 20,
+                    "fields": "title,tldr,abstract,externalIds,url,venue,year,citationCount",
+                    "fieldsOfStudy": "Medicine,Biology",
+                    "minCitationCount": 20,
+                },
+                timeout=10.0,
+            )
+            resp.raise_for_status()
+            break
+        except (httpx.HTTPStatusError, httpx.TimeoutException) as err:
+            if attempt < max_retries - 1:
+                time.sleep(1)
+            else:
+                raise err
+    else:
+        return "No related articles found."
+    results = resp.json()
+    articles = []
+    for article in results.get("data", []):
+        article = {
+            "url": f"https://doi.org/{article['externalIds'].get('DOI')}",
+            "title": article["title"],
+            "summary": article["tldr"]["text"] if article["tldr"] else "",
+            "abstract": article["abstract"],
+            "venue": article["venue"],
+            "year": article["year"],
+            "citations": article["citationCount"],
+        }
+        articles.append(article)
+    return articles