David Chu commited on
Commit
3e78ada
·
unverified ·
0 Parent(s):

first commit

Browse files
Files changed (6) hide show
  1. .gitignore +10 -0
  2. .sesskey +1 -0
  3. README.md +0 -0
  4. main.py +150 -0
  5. pyproject.toml +36 -0
  6. uv.lock +0 -0
.gitignore ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python-generated files
2
+ __pycache__/
3
+ *.py[oc]
4
+ build/
5
+ dist/
6
+ wheels/
7
+ *.egg-info
8
+
9
+ # Virtual environments
10
+ .venv
.sesskey ADDED
@@ -0,0 +1 @@
 
 
1
+ 3ae1db93-9b8c-44d1-a84f-7fdc45d0dc4d
README.md ADDED
File without changes
main.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import httpx
4
+ import streamlit as st
5
+ from google import genai
6
+ from pydantic import BaseModel
7
+
8
+
9
+ class Article(BaseModel):
10
+ id: str
11
+ title: str
12
+ summary: str
13
+ abstract: str
14
+ url: str
15
+
16
+
17
+ class Citation(BaseModel):
18
+ source_id: str
19
+
20
+
21
+ class Statement(BaseModel):
22
+ text: str
23
+ citation: Citation | None
24
+
25
+
26
+ class Answer(BaseModel):
27
+ sentences: list[Statement]
28
+
29
+
30
+ def improve_prompt(client: genai.Client, prompt: str) -> str:
31
+ response = client.models.generate_content(
32
+ model="gemini-2.0-flash-lite",
33
+ contents=f"Rewrite the following medical question to maximize clarity and specificity for optimal search results.\n\n<query>{prompt}</query>\n\nReturn only the improved query.",
34
+ )
35
+ return response.text or ""
36
+
37
+
38
+ def format_sources(articles: list[Article]) -> str:
39
+ sources = []
40
+ for article in articles:
41
+ source = f"<source id={article.id}><title>{article.title}</title>"
42
+ if article.abstract:
43
+ source += f"<abstract>{article.abstract}</abstract>"
44
+ if article.summary:
45
+ source += f"<summary>{article.summary}</summary>"
46
+ source += "</source>"
47
+ sources.append(source)
48
+ return "\n".join(sources)
49
+
50
+
51
+ def generate_answer(
52
+ client: genai.Client, query: str, articles: list[Article]
53
+ ) -> list[Statement]:
54
+ response = client.models.generate_content(
55
+ model="gemini-2.0-flash",
56
+ contents=f"Answer the query based solely on the provided sources. The answer should be less than 100 words. Justify the answer by citing from the sources. Refuse to answer non-medical related query.\n\n<query>{query}</query>\n\n<sources>{format_sources(articles)}</sources>",
57
+ config={
58
+ "response_mime_type": "application/json",
59
+ "response_schema": Answer,
60
+ },
61
+ )
62
+ return response.parsed.sentences
63
+
64
+
65
+ def semantic_scholar(query: str, top_k: int = 10) -> list[Article]:
66
+ resp = httpx.get(
67
+ "https://api.semanticscholar.org/graph/v1/paper/search?query",
68
+ params={
69
+ "query": query,
70
+ "limit": top_k,
71
+ "fields": "title,tldr,abstract,externalIds,url",
72
+ "fieldsOfStudy": "Medicine,Biology",
73
+ "minCitationCount": 20,
74
+ },
75
+ )
76
+ results = resp.json()
77
+ if results["total"]:
78
+ articles = [
79
+ Article(
80
+ id=str(i),
81
+ title=article["title"],
82
+ summary=article["tldr"]["text"] if article["tldr"] else "",
83
+ abstract=article["abstract"] or "",
84
+ url=article["url"],
85
+ )
86
+ for i, article in enumerate(results["data"], 1)
87
+ if article
88
+ ]
89
+ else:
90
+ articles = []
91
+ return articles
92
+
93
+
94
+ def pubmed(query: str, top_k: int = 10, db: str = "pubmed"):
95
+ resp = httpx.get(
96
+ "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi",
97
+ params={
98
+ "term": query,
99
+ "db": db,
100
+ "retmax": top_k,
101
+ "retmode": "json",
102
+ },
103
+ )
104
+ id_list = resp.json()["esearchresult"]["idlist"]
105
+
106
+ resp = httpx.get(
107
+ "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi",
108
+ params={
109
+ "db": db,
110
+ "id": ",".join(id_list),
111
+ "retmode": "json",
112
+ },
113
+ )
114
+ return resp.json()
115
+
116
+
117
+ def main():
118
+ client = genai.Client(api_key=os.environ["GOOGLE_API_KEY"])
119
+
120
+ st.title("Ask ~~Jeeves~~ Elna")
121
+ with st.form("search", border=False):
122
+ query = st.text_input("Your medical question")
123
+ submit = st.form_submit_button("Ask")
124
+
125
+ if submit:
126
+ papers = semantic_scholar(query, top_k=2)
127
+ if papers:
128
+ paper_titles = {paper.id: paper for paper in papers}
129
+ statements = generate_answer(client, query, papers)
130
+ answer_str = ""
131
+ citations = {}
132
+ for statement in statements:
133
+ answer_str += statement.text
134
+ if statement.citation:
135
+ if citation_id := citations.get(statement.citation.source_id):
136
+ answer_str += f"[{citation_id}]"
137
+ else:
138
+ citations[statement.citation.source_id] = len(citations)
139
+ answer_str += f"[{citations[statement.citation.source_id]}]"
140
+ answer_str += " "
141
+
142
+ st.write(answer_str)
143
+ if citations:
144
+ st.subheader("Citations")
145
+ for k, v in citations.items():
146
+ st.write(f"[{v}] [{paper_titles[k].title}]({paper_titles[k].url})")
147
+
148
+
149
+ if __name__ == "__main__":
150
+ main()
pyproject.toml ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "elna"
3
+ version = "0.1.0"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ requires-python = ">=3.12"
7
+ dependencies = [
8
+ "google-genai>=1.14.0",
9
+ "httpx>=0.28.1",
10
+ "pydantic>=2.11.4",
11
+ "python-fasthtml>=0.12.15",
12
+ "streamlit>=1.45.0",
13
+ ]
14
+
15
+ [tool.ruff.lint]
16
+ preview = true
17
+ select = [
18
+ "E", # pycodestyle
19
+ "W", # pycodestyle warnings
20
+ "F", # Pyflakes
21
+ "I", # isort
22
+ "B", # flake8-bugbear
23
+ "UP", # pyupgrade
24
+ "SIM", # flake8-simplify
25
+ "C4", # flake8-comprehensions
26
+ "FURB", # refurb
27
+ "RUF", # ruff
28
+ ]
29
+ ignore = [
30
+ "F401", # Module imported but unused
31
+ "E501", # Line too long
32
+ ]
33
+
34
+ [tool.pyright]
35
+ venv = ".venv"
36
+ venvPath = "."
uv.lock ADDED
The diff for this file is too large to render. See raw diff