David Chu
commited on
Commit
·
3e78ada
unverified
·
0
Parent(s):
first commit
Browse files- .gitignore +10 -0
- .sesskey +1 -0
- README.md +0 -0
- main.py +150 -0
- pyproject.toml +36 -0
- uv.lock +0 -0
.gitignore
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Python-generated files
|
2 |
+
__pycache__/
|
3 |
+
*.py[oc]
|
4 |
+
build/
|
5 |
+
dist/
|
6 |
+
wheels/
|
7 |
+
*.egg-info
|
8 |
+
|
9 |
+
# Virtual environments
|
10 |
+
.venv
|
.sesskey
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
3ae1db93-9b8c-44d1-a84f-7fdc45d0dc4d
|
README.md
ADDED
File without changes
|
main.py
ADDED
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
import httpx
|
4 |
+
import streamlit as st
|
5 |
+
from google import genai
|
6 |
+
from pydantic import BaseModel
|
7 |
+
|
8 |
+
|
9 |
+
class Article(BaseModel):
|
10 |
+
id: str
|
11 |
+
title: str
|
12 |
+
summary: str
|
13 |
+
abstract: str
|
14 |
+
url: str
|
15 |
+
|
16 |
+
|
17 |
+
class Citation(BaseModel):
|
18 |
+
source_id: str
|
19 |
+
|
20 |
+
|
21 |
+
class Statement(BaseModel):
|
22 |
+
text: str
|
23 |
+
citation: Citation | None
|
24 |
+
|
25 |
+
|
26 |
+
class Answer(BaseModel):
|
27 |
+
sentences: list[Statement]
|
28 |
+
|
29 |
+
|
30 |
+
def improve_prompt(client: genai.Client, prompt: str) -> str:
|
31 |
+
response = client.models.generate_content(
|
32 |
+
model="gemini-2.0-flash-lite",
|
33 |
+
contents=f"Rewrite the following medical question to maximize clarity and specificity for optimal search results.\n\n<query>{prompt}</query>\n\nReturn only the improved query.",
|
34 |
+
)
|
35 |
+
return response.text or ""
|
36 |
+
|
37 |
+
|
38 |
+
def format_sources(articles: list[Article]) -> str:
|
39 |
+
sources = []
|
40 |
+
for article in articles:
|
41 |
+
source = f"<source id={article.id}><title>{article.title}</title>"
|
42 |
+
if article.abstract:
|
43 |
+
source += f"<abstract>{article.abstract}</abstract>"
|
44 |
+
if article.summary:
|
45 |
+
source += f"<summary>{article.summary}</summary>"
|
46 |
+
source += "</source>"
|
47 |
+
sources.append(source)
|
48 |
+
return "\n".join(sources)
|
49 |
+
|
50 |
+
|
51 |
+
def generate_answer(
|
52 |
+
client: genai.Client, query: str, articles: list[Article]
|
53 |
+
) -> list[Statement]:
|
54 |
+
response = client.models.generate_content(
|
55 |
+
model="gemini-2.0-flash",
|
56 |
+
contents=f"Answer the query based solely on the provided sources. The answer should be less than 100 words. Justify the answer by citing from the sources. Refuse to answer non-medical related query.\n\n<query>{query}</query>\n\n<sources>{format_sources(articles)}</sources>",
|
57 |
+
config={
|
58 |
+
"response_mime_type": "application/json",
|
59 |
+
"response_schema": Answer,
|
60 |
+
},
|
61 |
+
)
|
62 |
+
return response.parsed.sentences
|
63 |
+
|
64 |
+
|
65 |
+
def semantic_scholar(query: str, top_k: int = 10) -> list[Article]:
|
66 |
+
resp = httpx.get(
|
67 |
+
"https://api.semanticscholar.org/graph/v1/paper/search?query",
|
68 |
+
params={
|
69 |
+
"query": query,
|
70 |
+
"limit": top_k,
|
71 |
+
"fields": "title,tldr,abstract,externalIds,url",
|
72 |
+
"fieldsOfStudy": "Medicine,Biology",
|
73 |
+
"minCitationCount": 20,
|
74 |
+
},
|
75 |
+
)
|
76 |
+
results = resp.json()
|
77 |
+
if results["total"]:
|
78 |
+
articles = [
|
79 |
+
Article(
|
80 |
+
id=str(i),
|
81 |
+
title=article["title"],
|
82 |
+
summary=article["tldr"]["text"] if article["tldr"] else "",
|
83 |
+
abstract=article["abstract"] or "",
|
84 |
+
url=article["url"],
|
85 |
+
)
|
86 |
+
for i, article in enumerate(results["data"], 1)
|
87 |
+
if article
|
88 |
+
]
|
89 |
+
else:
|
90 |
+
articles = []
|
91 |
+
return articles
|
92 |
+
|
93 |
+
|
94 |
+
def pubmed(query: str, top_k: int = 10, db: str = "pubmed"):
|
95 |
+
resp = httpx.get(
|
96 |
+
"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi",
|
97 |
+
params={
|
98 |
+
"term": query,
|
99 |
+
"db": db,
|
100 |
+
"retmax": top_k,
|
101 |
+
"retmode": "json",
|
102 |
+
},
|
103 |
+
)
|
104 |
+
id_list = resp.json()["esearchresult"]["idlist"]
|
105 |
+
|
106 |
+
resp = httpx.get(
|
107 |
+
"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi",
|
108 |
+
params={
|
109 |
+
"db": db,
|
110 |
+
"id": ",".join(id_list),
|
111 |
+
"retmode": "json",
|
112 |
+
},
|
113 |
+
)
|
114 |
+
return resp.json()
|
115 |
+
|
116 |
+
|
117 |
+
def main():
|
118 |
+
client = genai.Client(api_key=os.environ["GOOGLE_API_KEY"])
|
119 |
+
|
120 |
+
st.title("Ask ~~Jeeves~~ Elna")
|
121 |
+
with st.form("search", border=False):
|
122 |
+
query = st.text_input("Your medical question")
|
123 |
+
submit = st.form_submit_button("Ask")
|
124 |
+
|
125 |
+
if submit:
|
126 |
+
papers = semantic_scholar(query, top_k=2)
|
127 |
+
if papers:
|
128 |
+
paper_titles = {paper.id: paper for paper in papers}
|
129 |
+
statements = generate_answer(client, query, papers)
|
130 |
+
answer_str = ""
|
131 |
+
citations = {}
|
132 |
+
for statement in statements:
|
133 |
+
answer_str += statement.text
|
134 |
+
if statement.citation:
|
135 |
+
if citation_id := citations.get(statement.citation.source_id):
|
136 |
+
answer_str += f"[{citation_id}]"
|
137 |
+
else:
|
138 |
+
citations[statement.citation.source_id] = len(citations)
|
139 |
+
answer_str += f"[{citations[statement.citation.source_id]}]"
|
140 |
+
answer_str += " "
|
141 |
+
|
142 |
+
st.write(answer_str)
|
143 |
+
if citations:
|
144 |
+
st.subheader("Citations")
|
145 |
+
for k, v in citations.items():
|
146 |
+
st.write(f"[{v}] [{paper_titles[k].title}]({paper_titles[k].url})")
|
147 |
+
|
148 |
+
|
149 |
+
if __name__ == "__main__":
|
150 |
+
main()
|
pyproject.toml
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[project]
|
2 |
+
name = "elna"
|
3 |
+
version = "0.1.0"
|
4 |
+
description = "Add your description here"
|
5 |
+
readme = "README.md"
|
6 |
+
requires-python = ">=3.12"
|
7 |
+
dependencies = [
|
8 |
+
"google-genai>=1.14.0",
|
9 |
+
"httpx>=0.28.1",
|
10 |
+
"pydantic>=2.11.4",
|
11 |
+
"python-fasthtml>=0.12.15",
|
12 |
+
"streamlit>=1.45.0",
|
13 |
+
]
|
14 |
+
|
15 |
+
[tool.ruff.lint]
|
16 |
+
preview = true
|
17 |
+
select = [
|
18 |
+
"E", # pycodestyle
|
19 |
+
"W", # pycodestyle warnings
|
20 |
+
"F", # Pyflakes
|
21 |
+
"I", # isort
|
22 |
+
"B", # flake8-bugbear
|
23 |
+
"UP", # pyupgrade
|
24 |
+
"SIM", # flake8-simplify
|
25 |
+
"C4", # flake8-comprehensions
|
26 |
+
"FURB", # refurb
|
27 |
+
"RUF", # ruff
|
28 |
+
]
|
29 |
+
ignore = [
|
30 |
+
"F401", # Module imported but unused
|
31 |
+
"E501", # Line too long
|
32 |
+
]
|
33 |
+
|
34 |
+
[tool.pyright]
|
35 |
+
venv = ".venv"
|
36 |
+
venvPath = "."
|
uv.lock
ADDED
The diff for this file is too large to render.
See raw diff
|
|