mgbam commited on
Commit
bce8a18
·
verified ·
1 Parent(s): cfe029c

Delete backend

Browse files
backend/__init__.py DELETED
File without changes
backend/llm_utils.py DELETED
@@ -1,34 +0,0 @@
1
-
2
- """Utilities for loading the ZeroSearch simulation model and performing simulated searches."""
3
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
4
- import functools
5
-
6
- MODEL_NAME = "sunhaonlp/SearchSimulation_14B"
7
-
8
- @functools.lru_cache(maxsize=1)
9
- def _load_search_pipe():
10
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
11
- model = AutoModelForCausalLM.from_pretrained(
12
- MODEL_NAME,
13
- trust_remote_code=True,
14
- device_map="auto"
15
- )
16
- return pipeline(
17
- "text-generation",
18
- model=model,
19
- tokenizer=tokenizer,
20
- max_new_tokens=512,
21
- do_sample=False,
22
- temperature=0.0,
23
- )
24
-
25
- def simulate_search(query: str, k: int = 5):
26
- """Generate *k* synthetic documents for *query*."""
27
- pipe = _load_search_pipe()
28
- prompt = f"SearchSimulation:\nQuery: {query}\nDocuments:"
29
- outputs = pipe(prompt, num_return_sequences=k)
30
- docs = []
31
- for o in outputs:
32
- text = o["generated_text"]
33
- docs.append(text.split("Documents:")[-1].strip())
34
- return docs
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/main.py DELETED
@@ -1,52 +0,0 @@
1
-
2
- from fastapi import FastAPI
3
- from pydantic import BaseModel
4
- from .llm_utils import simulate_search
5
- from .umls_linker import link_umls
6
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
7
- import functools
8
-
9
- ANSWER_MODEL = "sunhaonlp/SearchSimulation_14B"
10
-
11
- @functools.lru_cache(maxsize=1)
12
- def _load_answer_pipe():
13
- tokenizer = AutoTokenizer.from_pretrained(ANSWER_MODEL)
14
- model = AutoModelForCausalLM.from_pretrained(
15
- ANSWER_MODEL,
16
- trust_remote_code=True,
17
- device_map="auto"
18
- )
19
- return pipeline(
20
- "text-generation",
21
- model=model,
22
- tokenizer=tokenizer,
23
- max_new_tokens=256,
24
- do_sample=False,
25
- temperature=0.0,
26
- )
27
-
28
- class Query(BaseModel):
29
- question: str
30
-
31
- app = FastAPI(
32
- title="ZeroSearch Medical Q&A API",
33
- description="Ask clinical questions; get answers with UMLS links, no external search APIs.",
34
- version="0.1.0",
35
- )
36
-
37
- @app.post("/ask")
38
- def ask(query: Query):
39
- docs = simulate_search(query.question, k=5)
40
- context = "\n\n".join(docs)
41
- prompt = (
42
- "Answer the medical question strictly based on the provided context.\n\n"
43
- f"Context:\n{context}\n\n"
44
- f"Question: {query.question}\nAnswer:"
45
- )
46
- answer_pipe = _load_answer_pipe()
47
- answer = (
48
- answer_pipe(prompt, num_return_sequences=1)[0]["generated_text"]
49
- .split("Answer:")[-1].strip()
50
- )
51
- umls = link_umls(answer)
52
- return {"answer": answer, "docs": docs, "umls": umls}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/test DELETED
File without changes
backend/umls_linker.py DELETED
@@ -1,19 +0,0 @@
1
-
2
- """Simple UMLS linker using SciSpacy."""
3
- import spacy
4
- from scispacy.linking import UmlsEntityLinker
5
-
6
- nlp = spacy.load("en_core_sci_lg")
7
- linker = UmlsEntityLinker(resolve_abbreviations=True, disambiguate=True)
8
- nlp.add_pipe(linker)
9
-
10
- def link_umls(text: str):
11
- doc = nlp(text)
12
- results = []
13
- for ent in doc.ents:
14
- for cui, score in ent._.kb_ents:
15
- results.append(
16
- {"text": ent.text, "cui": cui, "score": score}
17
- )
18
- break # take top candidate
19
- return results